blob: c0538ed9f71157d5f9295d272035df3d6aae40b5 [file] [log] [blame]
#!/usr/bin/env python
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""The 'grit android2grd' tool."""
import getopt
import os.path
import StringIO
from xml.dom import Node
import xml.dom.minidom
import grit.node.empty
from grit.node import io
from grit.node import message
from grit.tool import interface
from grit import grd_reader
from grit import lazy_re
from grit import tclib
from grit import util
# The name of a string in strings.xml
_STRING_NAME = lazy_re.compile(r'[a-z0-9_]+\Z')
# A string's character limit in strings.xml
_CHAR_LIMIT = lazy_re.compile(r'\[CHAR-LIMIT=(\d+)\]')
# Finds String.Format() style format specifiers such as "%-5.2f".
_FORMAT_SPECIFIER = lazy_re.compile(
'%'
'([1-9][0-9]*\$|<)?' # argument_index
'([-#+ 0,(]*)' # flags
'([0-9]+)?' # width
'(\.[0-9]+)?' # precision
'([bBhHsScCdoxXeEfgGaAtT%n])') # conversion
class Android2Grd(interface.Tool):
"""Tool for converting Android string.xml files into chrome Grd files.
Usage: grit [global options] android2grd [OPTIONS] STRINGS_XML
The Android2Grd tool will convert an Android strings.xml file (whose path is
specified by STRINGS_XML) and create a chrome style grd file containing the
relevant information.
Because grd documents are much richer than strings.xml documents we supplement
the information required by grds using OPTIONS with sensible defaults.
OPTIONS may be any of the following:
--name FILENAME Specify the base FILENAME. This should be without
any file type suffix. By default
"chrome_android_strings" will be used.
--languages LANGUAGES Comma separated list of ISO language codes (e.g.
en-US, en-GB, ru, zh-CN). These codes will be used
to determine the names of resource and translations
files that will be declared by the output grd file.
--grd-dir GRD_DIR Specify where the resultant grd file
(FILENAME.grd) should be output. By default this
will be the present working directory.
--header-dir HEADER_DIR Specify the location of the directory where grit
generated C++ headers (whose name will be
FILENAME.h) will be placed. Use an empty string to
disable rc generation. Default: empty.
--rc-dir RC_DIR Specify the directory where resource files will
be located relative to grit build's output
directory. Use an empty string to disable rc
generation. Default: empty.
--xml-dir XML_DIR Specify where to place localized strings.xml files
relative to grit build's output directory. For each
language xx a values-xx/strings.xml file will be
generated. Use an empty string to disable
strings.xml generation. Default: '.'.
--xtb-dir XTB_DIR Specify where the xtb files containing translations
will be located relative to the grd file. Default:
'.'.
"""
_NAME_FLAG = 'name'
_LANGUAGES_FLAG = 'languages'
_GRD_DIR_FLAG = 'grd-dir'
_RC_DIR_FLAG = 'rc-dir'
_HEADER_DIR_FLAG = 'header-dir'
_XTB_DIR_FLAG = 'xtb-dir'
_XML_DIR_FLAG = 'xml-dir'
def __init__(self):
self.name = 'chrome_android_strings'
self.languages = []
self.grd_dir = '.'
self.rc_dir = None
self.xtb_dir = '.'
self.xml_res_dir = '.'
self.header_dir = None
def ShortDescription(self):
"""Returns a short description of the Android2Grd tool.
Overridden from grit.interface.Tool
Returns:
A string containing a short description of the android2grd tool.
"""
return 'Converts Android string.xml files into Chrome grd files.'
def ParseOptions(self, args):
"""Set this objects and return all non-option arguments."""
flags = [
Android2Grd._NAME_FLAG,
Android2Grd._LANGUAGES_FLAG,
Android2Grd._GRD_DIR_FLAG,
Android2Grd._RC_DIR_FLAG,
Android2Grd._HEADER_DIR_FLAG,
Android2Grd._XTB_DIR_FLAG,
Android2Grd._XML_DIR_FLAG, ]
(opts, args) = getopt.getopt(args, None, ['%s=' % o for o in flags])
for key, val in opts:
# Get rid of the preceding hypens.
k = key[2:]
if k == Android2Grd._NAME_FLAG:
self.name = val
elif k == Android2Grd._LANGUAGES_FLAG:
self.languages = val.split(',')
elif k == Android2Grd._GRD_DIR_FLAG:
self.grd_dir = val
elif k == Android2Grd._RC_DIR_FLAG:
self.rc_dir = val
elif k == Android2Grd._HEADER_DIR_FLAG:
self.header_dir = val
elif k == Android2Grd._XTB_DIR_FLAG:
self.xtb_dir = val
elif k == Android2Grd._XML_DIR_FLAG:
self.xml_res_dir = val
return args
def Run(self, opts, args):
"""Runs the Android2Grd tool.
Inherited from grit.interface.Tool.
Args:
opts: List of string arguments that should be parsed.
args: String containing the path of the strings.xml file to be converted.
"""
args = self.ParseOptions(args)
if len(args) != 1:
print ('Tool requires one argument, the path to the Android '
'strings.xml resource file to be converted.')
return 2
self.SetOptions(opts)
android_path = args[0]
# Read and parse the Android strings.xml file.
with open(android_path) as android_file:
android_dom = xml.dom.minidom.parse(android_file)
# Do the hard work -- convert the Android dom to grd file contents.
grd_dom = self.AndroidDomToGrdDom(android_dom)
grd_string = unicode(grd_dom)
# Write the grd string to a file in grd_dir.
grd_filename = self.name + '.grd'
grd_path = os.path.join(self.grd_dir, grd_filename)
with open(grd_path, 'w') as grd_file:
grd_file.write(grd_string)
def AndroidDomToGrdDom(self, android_dom):
"""Converts a strings.xml DOM into a DOM representing the contents of
a grd file.
Args:
android_dom: A xml.dom.Document containing the contents of the Android
string.xml document.
Returns:
The DOM for the grd xml document produced by converting the Android DOM.
"""
# Start with a basic skeleton for the .grd file.
root = grd_reader.Parse(StringIO.StringIO(
'''<?xml version="1.0" encoding="UTF-8"?>
<grit base_dir="." latest_public_release="0"
current_release="1" source_lang_id="en">
<outputs />
<translations />
<release allow_pseudo="false" seq="1">
<messages fallback_to_english="true" />
</release>
</grit>'''), dir='.')
outputs = root.children[0]
translations = root.children[1]
messages = root.children[2].children[0]
assert (isinstance(messages, grit.node.empty.MessagesNode) and
isinstance(translations, grit.node.empty.TranslationsNode) and
isinstance(outputs, grit.node.empty.OutputsNode))
if self.header_dir:
cpp_header = self.__CreateCppHeaderOutputNode(outputs, self.header_dir)
for lang in self.languages:
# Create an output element for each language.
if self.rc_dir:
self.__CreateRcOutputNode(outputs, lang, self.rc_dir)
if self.xml_res_dir:
self.__CreateAndroidXmlOutputNode(outputs, lang, self.xml_res_dir)
if lang != 'en':
self.__CreateFileNode(translations, lang)
# Convert all the strings.xml strings into grd messages.
self.__CreateMessageNodes(messages, android_dom.documentElement)
return root
def __CreateMessageNodes(self, messages, resources):
"""Creates the <message> elements and adds them as children of <messages>.
Args:
messages: the <messages> element in the strings.xml dom.
resources: the <resources> element in the grd dom.
"""
# <string> elements contain the definition of the resource.
# The description of a <string> element is contained within the comment
# node element immediately preceeding the string element in question.
description = ''
for child in resources.childNodes:
if child.nodeType == Node.COMMENT_NODE:
# Remove leading/trailing whitespace; collapse consecutive whitespaces.
description = ' '.join(child.data.split())
elif child.nodeType == Node.ELEMENT_NODE:
if child.tagName != 'string':
print 'Warning: ignoring unknown tag <%s>' % child.tagName
else:
translatable = self.IsTranslatable(child)
raw_name = child.getAttribute('name')
if not _STRING_NAME.match(raw_name):
print 'Error: illegal string name: %s' % raw_name
grd_name = 'IDS_' + raw_name.upper()
# Transform the <string> node contents into a tclib.Message, taking
# care to handle whitespace transformations and escaped characters,
# and coverting <xliff:g> placeholders into <ph> placeholders.
msg = self.CreateTclibMessage(child)
msg_node = self.__CreateMessageNode(messages, grd_name, description,
msg, translatable)
messages.AddChild(msg_node)
# Reset the description once a message has been parsed.
description = ''
def CreateTclibMessage(self, android_string):
"""Transforms a <string/> element from strings.xml into a tclib.Message.
Interprets whitespace, quotes, and escaped characters in the android_string
according to Android's formatting and styling rules for strings. Also
converts <xliff:g> placeholders into <ph> placeholders, e.g.:
<xliff:g id="website" example="google.com">%s</xliff:g>
becomes
<ph name="website"><ex>google.com</ex>%s</ph>
Returns:
The tclib.Message.
"""
msg = tclib.Message()
current_text = '' # Accumulated text that hasn't yet been added to msg.
nodes = android_string.childNodes
for i, node in enumerate(nodes):
# Handle text nodes.
if node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
current_text += node.data
# Handle <xliff:g> and other tags.
elif node.nodeType == Node.ELEMENT_NODE:
if node.tagName == 'xliff:g':
assert node.hasAttribute('id'), 'missing id: ' + node.data()
placeholder_id = node.getAttribute('id')
placeholder_text = self.__FormatPlaceholderText(node)
placeholder_example = node.getAttribute('example')
if not placeholder_example:
print ('Info: placeholder does not contain an example: %s' %
node.toxml())
placeholder_example = placeholder_id.upper()
msg.AppendPlaceholder(tclib.Placeholder(placeholder_id,
placeholder_text, placeholder_example))
else:
print ('Warning: removing tag <%s> which must be inside a '
'placeholder: %s' % (node.tagName, node.toxml()))
msg.AppendText(self.__FormatPlaceholderText(node))
# Handle other nodes.
elif node.nodeType != Node.COMMENT_NODE:
assert False, 'Unknown node type: %s' % node.nodeType
is_last_node = (i == len(nodes) - 1)
if (current_text and
(is_last_node or nodes[i + 1].nodeType == Node.ELEMENT_NODE)):
# For messages containing just text and comments (no xml tags) Android
# strips leading and trailing whitespace. We mimic that behavior.
if not msg.GetContent() and is_last_node:
current_text = current_text.strip()
msg.AppendText(self.__FormatAndroidString(current_text))
current_text = ''
return msg
def __FormatAndroidString(self, android_string, inside_placeholder=False):
r"""Returns android_string formatted for a .grd file.
* Collapses consecutive whitespaces, except when inside double-quotes.
* Replaces \\, \n, \t, \", \' with \, newline, tab, ", '.
"""
backslash_map = {'\\' : '\\', 'n' : '\n', 't' : '\t', '"' : '"', "'" : "'"}
is_quoted_section = False # True when we're inside double quotes.
is_backslash_sequence = False # True after seeing an unescaped backslash.
prev_char = ''
output = []
for c in android_string:
if is_backslash_sequence:
# Unescape \\, \n, \t, \", and \'.
assert c in backslash_map, 'Illegal escape sequence: \\%s' % c
output.append(backslash_map[c])
is_backslash_sequence = False
elif c == '\\':
is_backslash_sequence = True
elif c.isspace() and not is_quoted_section:
# Turn whitespace into ' ' and collapse consecutive whitespaces.
if not prev_char.isspace():
output.append(' ')
elif c == '"':
is_quoted_section = not is_quoted_section
else:
output.append(c)
prev_char = c
output = ''.join(output)
if is_quoted_section:
print 'Warning: unbalanced quotes in string: %s' % android_string
if is_backslash_sequence:
print 'Warning: trailing backslash in string: %s' % android_string
# Check for format specifiers outside of placeholder tags.
if not inside_placeholder:
format_specifier = _FORMAT_SPECIFIER.search(output)
if format_specifier:
print ('Warning: format specifiers are not inside a placeholder '
'<xliff:g/> tag: %s' % output)
return output
def __FormatPlaceholderText(self, placeholder_node):
"""Returns the text inside of an <xliff:g> placeholder node."""
text = []
for childNode in placeholder_node.childNodes:
if childNode.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
text.append(childNode.data)
elif childNode.nodeType != Node.COMMENT_NODE:
assert False, 'Unknown node type in ' + placeholder_node.toxml()
return self.__FormatAndroidString(''.join(text), inside_placeholder=True)
def __CreateMessageNode(self, messages_node, grd_name, description, msg,
translatable):
"""Creates and initializes a <message> element.
Message elements correspond to Android <string> elements in that they
declare a string resource along with a programmatic id.
"""
if not description:
print 'Warning: no description for %s' % grd_name
# Check that we actually fit within the character limit we've specified.
match = _CHAR_LIMIT.search(description)
if match:
char_limit = int(match.group(1))
msg_content = msg.GetRealContent()
if len(msg_content) > char_limit:
print ('Warning: char-limit for %s is %d, but length is %d: %s' %
(grd_name, char_limit, len(msg_content), msg_content))
return message.MessageNode.Construct(parent=messages_node,
name=grd_name,
message=msg,
desc=description,
translateable=translatable)
def __CreateFileNode(self, translations_node, lang):
"""Creates and initializes the <file> elements.
File elements provide information on the location of translation files
(xtbs)
"""
xtb_file = os.path.normpath(os.path.join(
self.xtb_dir, '%s_%s.xtb' % (self.name, lang)))
fnode = io.FileNode()
fnode.StartParsing(u'file', translations_node)
fnode.HandleAttribute('path', xtb_file)
fnode.HandleAttribute('lang', lang)
fnode.EndParsing()
translations_node.AddChild(fnode)
return fnode
def __CreateCppHeaderOutputNode(self, outputs_node, header_dir):
"""Creates the <output> element corresponding to the generated c header."""
header_file_name = os.path.join(header_dir, self.name + '.h')
header_node = io.OutputNode()
header_node.StartParsing(u'output', outputs_node)
header_node.HandleAttribute('filename', header_file_name)
header_node.HandleAttribute('type', 'rc_header')
emit_node = io.EmitNode()
emit_node.StartParsing(u'emit', header_node)
emit_node.HandleAttribute('emit_type', 'prepend')
emit_node.EndParsing()
header_node.AddChild(emit_node)
header_node.EndParsing()
outputs_node.AddChild(header_node)
return header_node
def __CreateRcOutputNode(self, outputs_node, lang, rc_dir):
"""Creates the <output> element corresponding to various rc file output."""
rc_file_name = self.name + '_' + lang + ".rc"
rc_path = os.path.join(rc_dir, rc_file_name)
node = io.OutputNode()
node.StartParsing(u'output', outputs_node)
node.HandleAttribute('filename', rc_path)
node.HandleAttribute('lang', lang)
node.HandleAttribute('type', 'rc_all')
node.EndParsing()
outputs_node.AddChild(node)
return node
def __CreateAndroidXmlOutputNode(self, outputs_node, locale, xml_res_dir):
"""Creates the <output> element corresponding to various rc file output."""
# Need to check to see if the locale has a region, e.g. the GB in en-GB.
# When a locale has a region Android expects the region to be prefixed
# with an 'r'. For example for en-GB Android expects a values-en-rGB
# directory. Also, Android expects nb, tl, in, iw, ji as the language
# codes for Norwegian, Tagalog/Filipino, Indonesian, Hebrew, and Yiddish:
# http://developer.android.com/reference/java/util/Locale.html
if locale == 'es-419':
android_locale = 'es-rUS'
else:
android_lang, dash, region = locale.partition('-')
lang_map = {'no': 'nb', 'fil': 'tl', 'id': 'in', 'he': 'iw', 'yi': 'ji'}
android_lang = lang_map.get(android_lang, android_lang)
android_locale = android_lang + ('-r' + region if region else '')
values = 'values-' + android_locale if android_locale != 'en' else 'values'
xml_path = os.path.normpath(os.path.join(
xml_res_dir, values, 'strings.xml'))
node = io.OutputNode()
node.StartParsing(u'output', outputs_node)
node.HandleAttribute('filename', xml_path)
node.HandleAttribute('lang', locale)
node.HandleAttribute('type', 'android')
node.EndParsing()
outputs_node.AddChild(node)
return node
def IsTranslatable(self, android_string):
"""Determines if a <string> element is a candidate for translation.
A <string> element is by default translatable unless otherwise marked.
"""
if android_string.hasAttribute('translatable'):
value = android_string.getAttribute('translatable').lower()
if value not in ('true', 'false'):
print 'Warning: translatable attribute has invalid value: %s' % value
return value == 'true'
else:
return True