| # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
| # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
| |
| """ |
| Creates a human-readable identifier, using numbers and digits, |
| avoiding ambiguous numbers and letters. hash_identifier can be used |
| to create compact representations that are unique for a certain string |
| (or concatenation of strings) |
| """ |
| |
| try: |
| from hashlib import md5 |
| except ImportError: |
| from md5 import md5 |
| |
| import six |
| |
| good_characters = "23456789abcdefghjkmnpqrtuvwxyz" |
| |
| base = len(good_characters) |
| |
| def make_identifier(number): |
| """ |
| Encodes a number as an identifier. |
| """ |
| if not isinstance(number, six.integer_types): |
| raise ValueError( |
| "You can only make identifiers out of integers (not %r)" |
| % number) |
| if number < 0: |
| raise ValueError( |
| "You cannot make identifiers out of negative numbers: %r" |
| % number) |
| result = [] |
| while number: |
| next = number % base |
| result.append(good_characters[next]) |
| # Note, this depends on integer rounding of results: |
| number = number // base |
| return ''.join(result) |
| |
| def hash_identifier(s, length, pad=True, hasher=md5, prefix='', |
| group=None, upper=False): |
| """ |
| Hashes the string (with the given hashing module), then turns that |
| hash into an identifier of the given length (using modulo to |
| reduce the length of the identifier). If ``pad`` is False, then |
| the minimum-length identifier will be used; otherwise the |
| identifier will be padded with 0's as necessary. |
| |
| ``prefix`` will be added last, and does not count towards the |
| target length. ``group`` will group the characters with ``-`` in |
| the given lengths, and also does not count towards the target |
| length. E.g., ``group=4`` will cause a identifier like |
| ``a5f3-hgk3-asdf``. Grouping occurs before the prefix. |
| """ |
| if not callable(hasher): |
| # Accept sha/md5 modules as well as callables |
| hasher = hasher.new |
| if length > 26 and hasher is md5: |
| raise ValueError( |
| "md5 cannot create hashes longer than 26 characters in " |
| "length (you gave %s)" % length) |
| if isinstance(s, six.text_type): |
| s = s.encode('utf-8') |
| elif not isinstance(s, six.binary_type): |
| s = str(s) |
| if six.PY3: |
| s = s.encode('utf-8') |
| h = hasher(s) |
| bin_hash = h.digest() |
| modulo = base ** length |
| number = 0 |
| for c in list(bin_hash): |
| number = (number * 256 + six.byte2int([c])) % modulo |
| ident = make_identifier(number) |
| if pad: |
| ident = good_characters[0]*(length-len(ident)) + ident |
| if group: |
| parts = [] |
| while ident: |
| parts.insert(0, ident[-group:]) |
| ident = ident[:-group] |
| ident = '-'.join(parts) |
| if upper: |
| ident = ident.upper() |
| return prefix + ident |
| |
| # doctest tests: |
| __test__ = { |
| 'make_identifier': """ |
| >>> make_identifier(0) |
| '' |
| >>> make_identifier(1000) |
| 'c53' |
| >>> make_identifier(-100) |
| Traceback (most recent call last): |
| ... |
| ValueError: You cannot make identifiers out of negative numbers: -100 |
| >>> make_identifier('test') |
| Traceback (most recent call last): |
| ... |
| ValueError: You can only make identifiers out of integers (not 'test') |
| >>> make_identifier(1000000000000) |
| 'c53x9rqh3' |
| """, |
| 'hash_identifier': """ |
| >>> hash_identifier(0, 5) |
| 'cy2dr' |
| >>> hash_identifier(0, 10) |
| 'cy2dr6rg46' |
| >>> hash_identifier('this is a test of a long string', 5) |
| 'awatu' |
| >>> hash_identifier(0, 26) |
| 'cy2dr6rg46cx8t4w2f3nfexzk4' |
| >>> hash_identifier(0, 30) |
| Traceback (most recent call last): |
| ... |
| ValueError: md5 cannot create hashes longer than 26 characters in length (you gave 30) |
| >>> hash_identifier(0, 10, group=4) |
| 'cy-2dr6-rg46' |
| >>> hash_identifier(0, 10, group=4, upper=True, prefix='M-') |
| 'M-CY-2DR6-RG46' |
| """} |
| |
| if __name__ == '__main__': |
| import doctest |
| doctest.testmod() |
| |