Add LogDog stream name normalization function.

Add a LogDog stream normalization function. This will take an arbitrary
stream name and make a best-effort attempt to convert it to a valid
LogDog stream name.

BUG=chromium:628770
TEST=unittest
  - Unit test for the new function runs successfully.

Review-Url: https://codereview.chromium.org/2243483003
NOKEYCHECK=True
GitOrigin-RevId: 6a5120210cd8214bcbaec031333e3148520d57df
diff --git a/streamname.py b/streamname.py
index 3b92fa7..8aaffb8 100644
--- a/streamname.py
+++ b/streamname.py
@@ -3,8 +3,10 @@
 # that can be found in the LICENSE file.
 
 import re
+import string
 import types
 
+_ALNUM_CHARS = string.ascii_letters + string.digits
 _SEGMENT_RE_BASE = r'[a-zA-Z0-9][a-zA-Z0-9:_\-.]*'
 _STREAM_NAME_RE = re.compile('^(' + _SEGMENT_RE_BASE + ')(/' +
                              _SEGMENT_RE_BASE + ')*$')
@@ -13,6 +15,7 @@
 _MAX_TAG_KEY_LENGTH = 64
 _MAX_TAG_VALUE_LENGTH = 4096
 
+
 def validate_stream_name(v, maxlen=None):
   """Verifies that a given stream name is valid.
 
@@ -42,3 +45,63 @@
   """
   validate_stream_name(key, maxlen=_MAX_TAG_KEY_LENGTH)
   validate_stream_name(value, maxlen=_MAX_TAG_VALUE_LENGTH)
+
+
+def normalize(v, prefix=None):
+  """Given a string, "v", mutate it into a valid stream name.
+
+  This operates by replacing invalid stream naem characters with underscores (_)
+  when encountered.
+
+  A special case is when "v" begins with an invalid character. In this case, we
+  will replace it with the "prefix", if one is supplied.
+
+  See _STREAM_NAME_RE for a description of a valid stream name.
+
+  Raises:
+    ValueError: If normalization could not be successfully performed.
+  """
+  if len(v) == 0:
+    if not prefix:
+      raise ValueError('Cannot normalize empty name with no prefix.')
+    v = prefix
+  else:
+    out = []
+    for i, ch in enumerate(v):
+      if i == 0 and not _is_valid_stream_char(ch, first=True):
+        # The first letter is special, and must be alphanumeric.
+        # If we have a prefix, prepend that to the resulting string.
+        if prefix is None:
+          raise ValueError('Name has invalid beginning, and no prefix was '
+                           'provided.')
+        out.append(prefix)
+
+      if not _is_valid_stream_char(ch):
+        ch = '_'
+      out.append(ch)
+    v = ''.join(out)
+
+  # Validate the resulting string.
+  validate_stream_name(v)
+  return v
+
+
+def _is_valid_stream_char(ch, first=False):
+  """Returns (bool): True if a character is alphanumeric.
+
+  The first character must be alphanumeric, matching [a-zA-Z0-9].
+  Additional characters must either be alphanumeric or one of: (: _ - .).
+
+  Args:
+    ch (str): the character to evaluate.
+    first (bool): if true, apply special first-character constraints.
+  """
+  # Alphanumeric check.
+  if ch in _ALNUM_CHARS:
+    return True
+  if first:
+    # The first character must be alphanumeric.
+    return False
+
+  # Check additional middle-name characters:
+  return ch in ':_-./'
diff --git a/tests/streamname_test.py b/tests/streamname_test.py
index f5898a4..c84292a 100755
--- a/tests/streamname_test.py
+++ b/tests/streamname_test.py
@@ -43,6 +43,24 @@
         raised = True
       self.assertFalse(raised, "Stream name '%s' raised ValueError" % (name,))
 
+  def testNormalize(self):
+    for name, normalized in (
+        ('', 'PFX'),
+        ('_invalid_start_char', 'PFX_invalid_start_char'),
+        ('valid_stream_name.1:2-3', 'valid_stream_name.1:2-3'),
+        ('some stream (with stuff)', 'some_stream__with_stuff_'),
+        ('_invalid/st!ream/name entry', 'PFX_invalid/st_ream/name_entry'),
+        ('     ', 'PFX_____'),
+    ):
+      self.assertEqual(streamname.normalize(name, prefix='PFX'), normalized)
+
+    # Assert that an empty stream name with no prefix will raise a ValueError.
+    self.assertRaises(ValueError, streamname.normalize, '')
+
+    # Assert that a stream name with an invalid starting character and no prefix
+    # will raise a ValueError.
+    self.assertRaises(ValueError, streamname.normalize, '_invalid_start_char')
+
 
 if __name__ == '__main__':
   unittest.main()