Add LogDog stream name normalization function.
Add a LogDog stream normalization function. This will take an arbitrary
stream name and make a best-effort attempt to convert it to a valid
LogDog stream name.
BUG=chromium:628770
TEST=unittest
- Unit test for the new function runs successfully.
Review-Url: https://codereview.chromium.org/2243483003
NOKEYCHECK=True
GitOrigin-RevId: 6a5120210cd8214bcbaec031333e3148520d57df
diff --git a/streamname.py b/streamname.py
index 3b92fa7..8aaffb8 100644
--- a/streamname.py
+++ b/streamname.py
@@ -3,8 +3,10 @@
# that can be found in the LICENSE file.
import re
+import string
import types
+_ALNUM_CHARS = string.ascii_letters + string.digits
_SEGMENT_RE_BASE = r'[a-zA-Z0-9][a-zA-Z0-9:_\-.]*'
_STREAM_NAME_RE = re.compile('^(' + _SEGMENT_RE_BASE + ')(/' +
_SEGMENT_RE_BASE + ')*$')
@@ -13,6 +15,7 @@
_MAX_TAG_KEY_LENGTH = 64
_MAX_TAG_VALUE_LENGTH = 4096
+
def validate_stream_name(v, maxlen=None):
"""Verifies that a given stream name is valid.
@@ -42,3 +45,63 @@
"""
validate_stream_name(key, maxlen=_MAX_TAG_KEY_LENGTH)
validate_stream_name(value, maxlen=_MAX_TAG_VALUE_LENGTH)
+
+
+def normalize(v, prefix=None):
+ """Given a string, "v", mutate it into a valid stream name.
+
+ This operates by replacing invalid stream naem characters with underscores (_)
+ when encountered.
+
+ A special case is when "v" begins with an invalid character. In this case, we
+ will replace it with the "prefix", if one is supplied.
+
+ See _STREAM_NAME_RE for a description of a valid stream name.
+
+ Raises:
+ ValueError: If normalization could not be successfully performed.
+ """
+ if len(v) == 0:
+ if not prefix:
+ raise ValueError('Cannot normalize empty name with no prefix.')
+ v = prefix
+ else:
+ out = []
+ for i, ch in enumerate(v):
+ if i == 0 and not _is_valid_stream_char(ch, first=True):
+ # The first letter is special, and must be alphanumeric.
+ # If we have a prefix, prepend that to the resulting string.
+ if prefix is None:
+ raise ValueError('Name has invalid beginning, and no prefix was '
+ 'provided.')
+ out.append(prefix)
+
+ if not _is_valid_stream_char(ch):
+ ch = '_'
+ out.append(ch)
+ v = ''.join(out)
+
+ # Validate the resulting string.
+ validate_stream_name(v)
+ return v
+
+
+def _is_valid_stream_char(ch, first=False):
+ """Returns (bool): True if a character is alphanumeric.
+
+ The first character must be alphanumeric, matching [a-zA-Z0-9].
+ Additional characters must either be alphanumeric or one of: (: _ - .).
+
+ Args:
+ ch (str): the character to evaluate.
+ first (bool): if true, apply special first-character constraints.
+ """
+ # Alphanumeric check.
+ if ch in _ALNUM_CHARS:
+ return True
+ if first:
+ # The first character must be alphanumeric.
+ return False
+
+ # Check additional middle-name characters:
+ return ch in ':_-./'
diff --git a/tests/streamname_test.py b/tests/streamname_test.py
index f5898a4..c84292a 100755
--- a/tests/streamname_test.py
+++ b/tests/streamname_test.py
@@ -43,6 +43,24 @@
raised = True
self.assertFalse(raised, "Stream name '%s' raised ValueError" % (name,))
+ def testNormalize(self):
+ for name, normalized in (
+ ('', 'PFX'),
+ ('_invalid_start_char', 'PFX_invalid_start_char'),
+ ('valid_stream_name.1:2-3', 'valid_stream_name.1:2-3'),
+ ('some stream (with stuff)', 'some_stream__with_stuff_'),
+ ('_invalid/st!ream/name entry', 'PFX_invalid/st_ream/name_entry'),
+ (' ', 'PFX_____'),
+ ):
+ self.assertEqual(streamname.normalize(name, prefix='PFX'), normalized)
+
+ # Assert that an empty stream name with no prefix will raise a ValueError.
+ self.assertRaises(ValueError, streamname.normalize, '')
+
+ # Assert that a stream name with an invalid starting character and no prefix
+ # will raise a ValueError.
+ self.assertRaises(ValueError, streamname.normalize, '_invalid_start_char')
+
if __name__ == '__main__':
unittest.main()