| #!/usr/bin/python2.4 |
| # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| '''Utilities used by GRIT. |
| ''' |
| |
| import sys |
| import os.path |
| import codecs |
| import htmlentitydefs |
| import re |
| import time |
| from xml.sax import saxutils |
| |
| _root_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..')) |
| |
| |
| # Matches all of the resource IDs predefined by Windows. |
| # The '\b' before and after each word makes sure these match only whole words and |
| # not the beginning of any word.. eg. ID_FILE_NEW will not match ID_FILE_NEW_PROJECT |
| # see http://www.amk.ca/python/howto/regex/ (search for "\bclass\b" inside the html page) |
| SYSTEM_IDENTIFIERS = re.compile( |
| r'''\bIDOK\b | \bIDCANCEL\b | \bIDC_STATIC\b | \bIDYES\b | \bIDNO\b | |
| \bID_FILE_NEW\b | \bID_FILE_OPEN\b | \bID_FILE_CLOSE\b | \bID_FILE_SAVE\b | |
| \bID_FILE_SAVE_AS\b | \bID_FILE_PAGE_SETUP\b | \bID_FILE_PRINT_SETUP\b | |
| \bID_FILE_PRINT\b | \bID_FILE_PRINT_DIRECT\b | \bID_FILE_PRINT_PREVIEW\b | |
| \bID_FILE_UPDATE\b | \bID_FILE_SAVE_COPY_AS\b | \bID_FILE_SEND_MAIL\b | |
| \bID_FILE_MRU_FIRST\b | \bID_FILE_MRU_LAST\b | |
| \bID_EDIT_CLEAR\b | \bID_EDIT_CLEAR_ALL\b | \bID_EDIT_COPY\b | |
| \bID_EDIT_CUT\b | \bID_EDIT_FIND\b | \bID_EDIT_PASTE\b | \bID_EDIT_PASTE_LINK\b | |
| \bID_EDIT_PASTE_SPECIAL\b | \bID_EDIT_REPEAT\b | \bID_EDIT_REPLACE\b | |
| \bID_EDIT_SELECT_ALL\b | \bID_EDIT_UNDO\b | \bID_EDIT_REDO\b | |
| \bVS_VERSION_INFO\b | \bIDRETRY''', re.VERBOSE); |
| |
| |
| # Matches character entities, whether specified by name, decimal or hex. |
| _HTML_ENTITY = re.compile( |
| '&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));', |
| re.IGNORECASE) |
| |
| # Matches characters that should be HTML-escaped. This is <, > and &, but only |
| # if the & is not the start of an HTML character entity. |
| _HTML_CHARS_TO_ESCAPE = re.compile('"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)', |
| re.IGNORECASE | re.MULTILINE) |
| |
| |
| def WrapInputStream(stream, encoding = 'utf-8'): |
| '''Returns a stream that wraps the provided stream, making it read characters |
| using the specified encoding.''' |
| (e, d, sr, sw) = codecs.lookup(encoding) |
| return sr(stream) |
| |
| |
| def WrapOutputStream(stream, encoding = 'utf-8'): |
| '''Returns a stream that wraps the provided stream, making it write |
| characters using the specified encoding.''' |
| (e, d, sr, sw) = codecs.lookup(encoding) |
| return sw(stream) |
| |
| |
| def ChangeStdoutEncoding(encoding = 'utf-8'): |
| '''Changes STDOUT to print characters using the specified encoding.''' |
| sys.stdout = WrapOutputStream(sys.stdout, encoding) |
| |
| |
| def EscapeHtml(text, escape_quotes = False): |
| '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML |
| entities. Any existing named entity or HTML entity defined by decimal or |
| hex code will be left untouched. This is appropriate for escaping text for |
| inclusion in HTML, but not for XML. |
| ''' |
| def Replace(match): |
| if match.group() == '&': return '&' |
| elif match.group() == '<': return '<' |
| elif match.group() == '>': return '>' |
| elif match.group() == '"': |
| if escape_quotes: return '"' |
| else: return match.group() |
| else: assert False |
| out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text) |
| return out |
| |
| |
| def UnescapeHtml(text, replace_nbsp=True): |
| '''Returns 'text' with all HTML character entities (both named character |
| entities and those specified by decimal or hexadecimal Unicode ordinal) |
| replaced by their Unicode characters (or latin1 characters if possible). |
| |
| The only exception is that will not be escaped if 'replace_nbsp' is |
| False. |
| ''' |
| def Replace(match): |
| groups = match.groupdict() |
| if groups['hex']: |
| return unichr(int(groups['hex'], 16)) |
| elif groups['decimal']: |
| return unichr(int(groups['decimal'], 10)) |
| else: |
| name = groups['named'] |
| if name == 'nbsp' and not replace_nbsp: |
| return match.group() # Don't replace |
| assert name != None |
| if name in htmlentitydefs.name2codepoint.keys(): |
| return unichr(htmlentitydefs.name2codepoint[name]) |
| else: |
| return match.group() # Unknown HTML character entity - don't replace |
| |
| out = _HTML_ENTITY.sub(Replace, text) |
| return out |
| |
| |
| def EncodeCdata(cdata): |
| '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]> |
| format, depending on which is more appropriate for easy editing. The data |
| is escaped for inclusion in an XML element's body. |
| |
| Args: |
| cdata: 'If x < y and y < z then x < z' |
| |
| Return: |
| '<![CDATA[If x < y and y < z then x < z]]>' |
| ''' |
| if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0: |
| return '<![CDATA[%s]]>' % cdata |
| else: |
| return saxutils.escape(cdata) |
| |
| |
| def FixupNamedParam(function, param_name, param_value): |
| '''Returns a closure that is identical to 'function' but ensures that the |
| named parameter 'param_name' is always set to 'param_value' unless explicitly |
| set by the caller. |
| |
| Args: |
| function: callable |
| param_name: 'bingo' |
| param_value: 'bongo' (any type) |
| |
| Return: |
| callable |
| ''' |
| def FixupClosure(*args, **kw): |
| if not param_name in kw: |
| kw[param_name] = param_value |
| return function(*args, **kw) |
| return FixupClosure |
| |
| |
| def PathFromRoot(path): |
| '''Takes a path relative to the root directory for GRIT (the one that grit.py |
| resides in) and returns a path that is either absolute or relative to the |
| current working directory (i.e .a path you can use to open the file). |
| |
| Args: |
| path: 'rel_dir\file.ext' |
| |
| Return: |
| 'c:\src\tools\rel_dir\file.ext |
| ''' |
| return os.path.normpath(os.path.join(_root_dir, path)) |
| |
| |
| def FixRootForUnittest(root_node, dir=PathFromRoot('.')): |
| '''Adds a GetBaseDir() method to 'root_node', making unittesting easier.''' |
| def GetBaseDir(): |
| '''Returns a fake base directory.''' |
| return dir |
| def GetSourceLanguage(): |
| return 'en' |
| if not hasattr(root_node, 'GetBaseDir'): |
| setattr(root_node, 'GetBaseDir', GetBaseDir) |
| setattr(root_node, 'GetSourceLanguage', GetSourceLanguage) |
| |
| |
| def dirname(filename): |
| '''Version of os.path.dirname() that never returns empty paths (returns |
| '.' if the result of os.path.dirname() is empty). |
| ''' |
| ret = os.path.dirname(filename) |
| if ret == '': |
| ret = '.' |
| return ret |
| |
| |
| def normpath(path): |
| '''Version of os.path.normpath that also changes backward slashes to |
| forward slashes when not running on Windows. |
| ''' |
| # This is safe to always do because the Windows version of os.path.normpath |
| # will replace forward slashes with backward slashes. |
| path = path.replace('\\', '/') |
| return os.path.normpath(path) |
| |
| |
| _LANGUAGE_SPLIT_RE = re.compile('-|_|/') |
| |
| |
| def CanonicalLanguage(code): |
| '''Canonicalizes two-part language codes by using a dash and making the |
| second part upper case. Returns one-part language codes unchanged. |
| |
| Args: |
| code: 'zh_cn' |
| |
| Return: |
| code: 'zh-CN' |
| ''' |
| parts = _LANGUAGE_SPLIT_RE.split(code) |
| code = [ parts[0] ] |
| for part in parts[1:]: |
| code.append(part.upper()) |
| return '-'.join(code) |
| |
| |
| _LANG_TO_CODEPAGE = { |
| 'en' : 1252, |
| 'fr' : 1252, |
| 'it' : 1252, |
| 'de' : 1252, |
| 'es' : 1252, |
| 'nl' : 1252, |
| 'sv' : 1252, |
| 'no' : 1252, |
| 'da' : 1252, |
| 'fi' : 1252, |
| 'pt-BR' : 1252, |
| 'ru' : 1251, |
| 'ja' : 932, |
| 'zh-TW' : 950, |
| 'zh-CN' : 936, |
| 'ko' : 949, |
| } |
| |
| |
| def LanguageToCodepage(lang): |
| '''Returns the codepage _number_ that can be used to represent 'lang', which |
| may be either in formats such as 'en', 'pt_br', 'pt-BR', etc. |
| |
| The codepage returned will be one of the 'cpXXXX' codepage numbers. |
| |
| Args: |
| lang: 'de' |
| |
| Return: |
| 1252 |
| ''' |
| lang = CanonicalLanguage(lang) |
| if lang in _LANG_TO_CODEPAGE: |
| return _LANG_TO_CODEPAGE[lang] |
| else: |
| print "Not sure which codepage to use for %s, assuming cp1252" % lang |
| return 1252 |
| |
| def NewClassInstance(class_name, class_type): |
| '''Returns an instance of the class specified in classname |
| |
| Args: |
| class_name: the fully qualified, dot separated package + classname, |
| i.e. "my.package.name.MyClass". Short class names are not supported. |
| class_type: the class or superclass this object must implement |
| |
| Return: |
| An instance of the class, or None if none was found |
| ''' |
| lastdot = class_name.rfind('.') |
| module_name = '' |
| if lastdot >= 0: |
| module_name = class_name[0:lastdot] |
| if module_name: |
| class_name = class_name[lastdot+1:] |
| module = __import__(module_name, globals(), locals(), ['']) |
| if hasattr(module, class_name): |
| class_ = getattr(module, class_name) |
| class_instance = class_() |
| if isinstance(class_instance, class_type): |
| return class_instance |
| return None |
| |
| |
| def FixLineEnd(text, line_end): |
| # First normalize |
| text = text.replace('\r\n', '\n') |
| text = text.replace('\r', '\n') |
| # Then fix |
| text = text.replace('\n', line_end) |
| return text |
| |
| |
| def BoolToString(bool): |
| if bool: |
| return 'true' |
| else: |
| return 'false' |
| |
| |
| verbose = False |
| extra_verbose = False |
| |
| def IsVerbose(): |
| return verbose |
| |
| def IsExtraVerbose(): |
| return extra_verbose |
| |
| def GetCurrentYear(): |
| '''Returns the current 4-digit year as an integer.''' |
| return time.localtime()[0] |
| |