[Monorail] Limit size of col_spec, sort_spec, and group_by.

R=jojwang@chromium.org

Bug: b/77508774
Change-Id: Icce233f49e9746386bf39053d2b1269a3208aa91
Reviewed-on: https://chromium-review.googlesource.com/994117
Reviewed-by: Joanna Wang <jojwang@chromium.org>
Commit-Queue: Joanna Wang <jojwang@chromium.org>
diff --git a/appengine/monorail/framework/framework_constants.py b/appengine/monorail/framework/framework_constants.py
index 29efdac..4cba304 100644
--- a/appengine/monorail/framework/framework_constants.py
+++ b/appengine/monorail/framework/framework_constants.py
@@ -53,9 +53,11 @@
 # Used to loosely validate column spec. Mainly guards against malicious input.
 COLSPEC_RE = re.compile(r'^[-.\w\s/]*$', re.UNICODE)
 COLSPEC_COL_RE = re.compile(r'[-.\w/]+', re.UNICODE)
+MAX_COL_PARTS = 25
 
 # Used to loosely validate sort spec. Mainly guards against malicious input.
 SORTSPEC_RE = re.compile(r'^[-.\w\s/]*$', re.UNICODE)
+MAX_SORT_PARTS = 6
 
 # For the artifact search box autosizing when the user types a long query.
 MIN_ARTIFACT_SEARCH_FIELD_SIZE = 38
diff --git a/appengine/monorail/framework/monorailrequest.py b/appengine/monorail/framework/monorailrequest.py
index a05a0af..31dbb95 100644
--- a/appengine/monorail/framework/monorailrequest.py
+++ b/appengine/monorail/framework/monorailrequest.py
@@ -187,7 +187,9 @@
 
     self.query_project_names = self.GetParam('projects')
     self.group_by_spec = self.GetParam('groupby')
+    self.group_by_spec = ' '.join(ParseColSpec(self.group_by_spec))
     self.sort_spec = self.GetParam('sort')
+    self.sort_spec = ' '.join(ParseColSpec(self.sort_spec))
     self.query = self.GetParam('q')
     self.can = self.GetParam('can')
     self.start = self.GetParam('start')
@@ -332,6 +334,7 @@
     self.sort_spec = self.GetParam(
         'sort', default_value='',
         antitamper_re=framework_constants.SORTSPEC_RE)
+    self.sort_spec = ' '.join(ParseColSpec(self.sort_spec))
 
     # Note: This is set later in request handling by ComputeColSpec().
     self.col_spec = None
@@ -514,7 +517,8 @@
       # If col spec is still empty then default to the global col spec.
       col_spec = tracker_constants.DEFAULT_COL_SPEC
 
-    self.col_spec = ' '.join(ParseColSpec(col_spec))
+    self.col_spec = ' '.join(ParseColSpec(col_spec,
+                             max_parts=framework_constants.MAX_COL_PARTS))
 
   def PrepareForReentry(self, echo_data):
     """Expose the results of form processing as if it was a new GET.
@@ -669,14 +673,30 @@
   return viewed_email
 
 
-def ParseColSpec(col_spec):
+def ParseColSpec(col_spec, max_parts=framework_constants.MAX_SORT_PARTS):
   """Split a string column spec into a list of column names.
 
+  We dedup col parts because an attacker could try to DoS us or guess
+  zero or one result by measuring the time to process a request that
+  has a very long column list.
+
   Args:
     col_spec: a unicode string containing a list of labels.
+    max_parts: optional int maximum number of parts to consider.
 
   Returns:
     A list of the extracted labels. Non-alphanumeric
     characters other than the period will be stripped from the text.
   """
-  return framework_constants.COLSPEC_COL_RE.findall(col_spec)
+  cols = framework_constants.COLSPEC_COL_RE.findall(col_spec)
+  result = []  # List of column headers with no duplicates.
+  seen = set()  # Set of column parts that we have processed so far.
+  for col in cols:
+    parts = []
+    for part in col.split('/'):
+      if part.lower() not in seen and len(seen) < max_parts:
+        parts.append(part)
+        seen.add(part.lower())
+    if parts:
+      result.append('/'.join(parts))
+  return result
diff --git a/appengine/monorail/framework/test/monorailrequest_test.py b/appengine/monorail/framework/test/monorailrequest_test.py
index c01dce8..e5992ab 100644
--- a/appengine/monorail/framework/test/monorailrequest_test.py
+++ b/appengine/monorail/framework/test/monorailrequest_test.py
@@ -416,6 +416,16 @@
         parse('\xe7\xaa\xbf\xe8\x8b\xa5\xe7\xb9\xb9 '
               '\xe5\x9f\xba\xe5\x9c\xb0\xe3\x81\xaf'.decode('utf-8')))
 
+  def testParseColSpec_Dedup(self):
+    parse = monorailrequest.ParseColSpec
+    self.assertEqual([], parse(''))
+    self.assertEqual(
+      ['Aa', 'b', 'c/d'],
+      parse(u'Aa Aa AA AA AA b Aa aa c/d d c aA b aa B C/D D/aa/c'))
+    self.assertEqual(
+      ['A', 'b', 'c/d', 'e', 'f'],
+      parse(u'A b c/d e f g h i j a/k l m/c/a n/o'))
+
 
 class TestPermissionLookup(unittest.TestCase):
   OWNER_ID = 1
diff --git a/appengine/monorail/tracker/issueadmin.py b/appengine/monorail/tracker/issueadmin.py
index 0c01d6b..90ed1f3 100644
--- a/appengine/monorail/tracker/issueadmin.py
+++ b/appengine/monorail/tracker/issueadmin.py
@@ -429,7 +429,8 @@
   # Don't allow empty colum spec
   if not default_col_spec:
     default_col_spec = tracker_constants.DEFAULT_COL_SPEC
-  col_spec_words = monorailrequest.ParseColSpec(default_col_spec)
+  col_spec_words = monorailrequest.ParseColSpec(
+      default_col_spec, max_parts=framework_constants.MAX_COL_PARTS)
   col_spec = ' '.join(word for word in col_spec_words)
 
   default_sort_spec = ''