blob: 3f80107efc4ff4bfd6ec9fa0edea8791d486eed4 [file] [log] [blame]
# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
"""
Creates a human-readable identifier, using numbers and digits,
avoiding ambiguous numbers and letters. hash_identifier can be used
to create compact representations that are unique for a certain string
(or concatenation of strings)
"""
try:
from hashlib import md5
except ImportError:
from md5 import md5
import six
good_characters = "23456789abcdefghjkmnpqrtuvwxyz"
base = len(good_characters)
def make_identifier(number):
"""
Encodes a number as an identifier.
"""
if not isinstance(number, six.integer_types):
raise ValueError(
"You can only make identifiers out of integers (not %r)"
% number)
if number < 0:
raise ValueError(
"You cannot make identifiers out of negative numbers: %r"
% number)
result = []
while number:
next = number % base
result.append(good_characters[next])
# Note, this depends on integer rounding of results:
number = number // base
return ''.join(result)
def hash_identifier(s, length, pad=True, hasher=md5, prefix='',
group=None, upper=False):
"""
Hashes the string (with the given hashing module), then turns that
hash into an identifier of the given length (using modulo to
reduce the length of the identifier). If ``pad`` is False, then
the minimum-length identifier will be used; otherwise the
identifier will be padded with 0's as necessary.
``prefix`` will be added last, and does not count towards the
target length. ``group`` will group the characters with ``-`` in
the given lengths, and also does not count towards the target
length. E.g., ``group=4`` will cause a identifier like
``a5f3-hgk3-asdf``. Grouping occurs before the prefix.
"""
if not callable(hasher):
# Accept sha/md5 modules as well as callables
hasher = hasher.new
if length > 26 and hasher is md5:
raise ValueError(
"md5 cannot create hashes longer than 26 characters in "
"length (you gave %s)" % length)
if isinstance(s, six.text_type):
s = s.encode('utf-8')
elif not isinstance(s, six.binary_type):
s = str(s)
if six.PY3:
s = s.encode('utf-8')
h = hasher(s)
bin_hash = h.digest()
modulo = base ** length
number = 0
for c in list(bin_hash):
number = (number * 256 + six.byte2int([c])) % modulo
ident = make_identifier(number)
if pad:
ident = good_characters[0]*(length-len(ident)) + ident
if group:
parts = []
while ident:
parts.insert(0, ident[-group:])
ident = ident[:-group]
ident = '-'.join(parts)
if upper:
ident = ident.upper()
return prefix + ident
# doctest tests:
__test__ = {
'make_identifier': """
>>> make_identifier(0)
''
>>> make_identifier(1000)
'c53'
>>> make_identifier(-100)
Traceback (most recent call last):
...
ValueError: You cannot make identifiers out of negative numbers: -100
>>> make_identifier('test')
Traceback (most recent call last):
...
ValueError: You can only make identifiers out of integers (not 'test')
>>> make_identifier(1000000000000)
'c53x9rqh3'
""",
'hash_identifier': """
>>> hash_identifier(0, 5)
'cy2dr'
>>> hash_identifier(0, 10)
'cy2dr6rg46'
>>> hash_identifier('this is a test of a long string', 5)
'awatu'
>>> hash_identifier(0, 26)
'cy2dr6rg46cx8t4w2f3nfexzk4'
>>> hash_identifier(0, 30)
Traceback (most recent call last):
...
ValueError: md5 cannot create hashes longer than 26 characters in length (you gave 30)
>>> hash_identifier(0, 10, group=4)
'cy-2dr6-rg46'
>>> hash_identifier(0, 10, group=4, upper=True, prefix='M-')
'M-CY-2DR6-RG46'
"""}
if __name__ == '__main__':
import doctest
doctest.testmod()