grit/tool/xmb.py - external/grit-i18n - Git at Google

 #!/usr/bin/env python
 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """The 'grit xmb' tool.
 """

 import getopt
 import os

 from xml.sax import saxutils

 from grit import grd_reader
 from grit import lazy_re
 from grit import tclib
 from grit import util
 from grit.tool import interface


 # Used to collapse presentable content to determine if
 # xml:space="preserve" is needed.
 _WHITESPACES_REGEX = lazy_re.compile(ur'\s\s*')


 # See XmlEscape below.
 _XML_QUOTE_ESCAPES = {
     u"'":  u'&apos;',
     u'"':  u'&quot;',
 }
 _XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D'
                                       u'\u0020-\uD7FF\uE000-\uFFFD]')


 def _XmlEscape(s):
   """Returns text escaped for XML in a way compatible with Google's
   internal Translation Console tool.  May be used for attributes as
   well as for contents.
   """
   if not type(s) == unicode:
     s = unicode(s)
   result = saxutils.escape(s, _XML_QUOTE_ESCAPES)
   return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8')


 def _WriteAttribute(file, name, value):
   """Writes an XML attribute to the specified file.

     Args:
       file: file to write to
       name: name of the attribute
       value: (unescaped) value of the attribute
     """
   if value:
     file.write(' %s="%s"' % (name, _XmlEscape(value)))


 def _WriteMessage(file, message):
   presentable_content = message.GetPresentableContent()
   assert (type(presentable_content) == unicode or
           (len(message.parts) == 1 and
            type(message.parts[0] == tclib.Placeholder)))
   preserve_space = presentable_content != _WHITESPACES_REGEX.sub(
       u' ', presentable_content.strip())

   file.write('<msg')
   _WriteAttribute(file, 'desc', message.GetDescription())
   _WriteAttribute(file, 'id', message.GetId())
   _WriteAttribute(file, 'meaning', message.GetMeaning())
   if preserve_space:
     _WriteAttribute(file, 'xml:space', 'preserve')
   file.write('>')
   if not preserve_space:
     file.write('\n  ')

   parts = message.GetContent()
   for part in parts:
     if isinstance(part, tclib.Placeholder):
       file.write('<ph')
       _WriteAttribute(file, 'name', part.GetPresentation())
       file.write('><ex>')
       file.write(_XmlEscape(part.GetExample()))
       file.write('</ex>')
       file.write(_XmlEscape(part.GetOriginal()))
       file.write('</ph>')
     else:
       file.write(_XmlEscape(part))
   if not preserve_space:
     file.write('\n')
   file.write('</msg>\n')


 def WriteXmbFile(file, messages):
   """Writes the given grit.tclib.Message items to the specified open
   file-like object in the XMB format.
   """
   file.write("""<?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE messagebundle [
 <!ELEMENT messagebundle (msg)*>
 <!ATTLIST messagebundle class CDATA #IMPLIED>

 <!ELEMENT msg (#PCDATA|ph|source)*>
 <!ATTLIST msg id CDATA #IMPLIED>
 <!ATTLIST msg seq CDATA #IMPLIED>
 <!ATTLIST msg name CDATA #IMPLIED>
 <!ATTLIST msg desc CDATA #IMPLIED>
 <!ATTLIST msg meaning CDATA #IMPLIED>
 <!ATTLIST msg obsolete (obsolete) #IMPLIED>
 <!ATTLIST msg xml:space (default|preserve) "default">
 <!ATTLIST msg is_hidden CDATA #IMPLIED>

 <!ELEMENT source (#PCDATA)>

 <!ELEMENT ph (#PCDATA|ex)*>
 <!ATTLIST ph name CDATA #REQUIRED>

 <!ELEMENT ex (#PCDATA)>
 ]>
 <messagebundle>
 """)
   for message in messages:
     _WriteMessage(file, message)
   file.write('</messagebundle>')


 class OutputXmb(interface.Tool):
   """Outputs all translateable messages in the .grd input file to an
 .xmb file, which is the format used to give source messages to
 Google's internal Translation Console tool.  The format could easily
 be used for other systems.

 Usage: grit xmb [-i|-h] [-l LIMITFILE] OUTPUTPATH

 OUTPUTPATH is the path you want to output the .xmb file to.

 The -l option can be used to output only some of the resources to the .xmb file.
 LIMITFILE is the path to a file that is used to limit the items output to the
 xmb file.  If the filename extension is .grd, the file must be a .grd file
 and the tool only output the contents of nodes from the input file that also
 exist in the limit file (as compared on the 'name' attribute). Otherwise it must
 contain a list of the IDs that output should be limited to, one ID per line, and
 the tool will only output nodes with 'name' attributes that match one of the
 IDs.

 The -i option causes 'grit xmb' to output an "IDs only" file instead of an XMB
 file.  The "IDs only" file contains the message ID of each message that would
 normally be output to the XMB file, one message ID per line.  It is designed for
 use with the 'grit transl2tc' tool's -l option.

 Other options:

   -D NAME[=VAL]     Specify a C-preprocessor-like define NAME with optional
                     value VAL (defaults to 1) which will be used to control
                     conditional inclusion of resources.

   -E NAME=VALUE     Set environment variable NAME to VALUE (within grit).

 """
   # The different output formats supported by this tool
   FORMAT_XMB = 0
   FORMAT_IDS_ONLY = 1

   def __init__(self, defines=None):
     super(OutputXmb, self).__init__()
     self.format = self.FORMAT_XMB
     self.defines = defines or {}

   def ShortDescription(self):
     return 'Exports all translateable messages into an XMB file.'

   def Run(self, opts, args):
     self.SetOptions(opts)

     limit_file = None
     limit_is_grd = False
     limit_file_dir = None
     own_opts, args = getopt.getopt(args, 'l:D:ih')
     for key, val in own_opts:
       if key == '-l':
         limit_file = open(val, 'r')
         limit_file_dir = util.dirname(val)
         if not len(limit_file_dir):
           limit_file_dir = '.'
         limit_is_grd = os.path.splitext(val)[1] == '.grd'
       elif key == '-i':
         self.format = self.FORMAT_IDS_ONLY
       elif key == '-D':
         name, val = util.ParseDefine(val)
         self.defines[name] = val
       elif key == '-E':
         (env_name, env_value) = val.split('=', 1)
         os.environ[env_name] = env_value
     if not len(args) == 1:
       print ('grit xmb takes exactly one argument, the path to the XMB file '
              'to output.')
       return 2

     xmb_path = args[0]
     res_tree = grd_reader.Parse(opts.input, debug=opts.extra_verbose)
     res_tree.SetOutputLanguage('en')
     res_tree.SetDefines(self.defines)
     res_tree.OnlyTheseTranslations([])
     res_tree.RunGatherers()

     with open(xmb_path, 'wb') as output_file:
       self.Process(
         res_tree, output_file, limit_file, limit_is_grd, limit_file_dir)
     if limit_file:
       limit_file.close()
     print "Wrote %s" % xmb_path

   def Process(self, res_tree, output_file, limit_file=None, limit_is_grd=False,
               dir=None):
     """Writes a document with the contents of res_tree into output_file,
     limiting output to the IDs specified in limit_file, which is a GRD file if
     limit_is_grd is true, otherwise a file with one ID per line.

     The format of the output document depends on this object's format attribute.
     It can be FORMAT_XMB or FORMAT_IDS_ONLY.

     The FORMAT_IDS_ONLY format causes this function to write just a list
     of the IDs of all messages that would have been added to the XMB file, one
     ID per line.

     The FORMAT_XMB format causes this function to output the (default) XMB
     format.

     Args:
       res_tree: base.Node()
       output_file: file open for writing
       limit_file: None or file open for reading
       limit_is_grd: True | False
       dir: Directory of the limit file
     """
     if limit_file:
       if limit_is_grd:
         limit_list = []
         limit_tree = grd_reader.Parse(limit_file,
                                       dir=dir,
                                       debug=self.o.extra_verbose)
         for node in limit_tree:
           if 'name' in node.attrs:
             limit_list.append(node.attrs['name'])
       else:
         # Not a GRD file, so it's just a file with one ID per line
         limit_list = [item.strip() for item in limit_file.read().split('\n')]

     ids_already_done = {}
     messages = []
     for node in res_tree:
       if (limit_file and
           not ('name' in node.attrs and node.attrs['name'] in limit_list)):
         continue
       if not node.IsTranslateable():
         continue

       for clique in node.GetCliques():
         if not clique.IsTranslateable():
           continue
         if not clique.GetMessage().GetRealContent():
           continue

         # Some explanation is in order here.  Note that we can have
         # many messages with the same ID.
         #
         # The way we work around this is to maintain a list of cliques
         # per message ID (in the UberClique) and select the "best" one
         # (the first one that has a description, or an arbitrary one
         # if there is no description) for inclusion in the XMB file.
         # The translations are all going to be the same for messages
         # with the same ID, although the way we replace placeholders
         # might be slightly different.
         id = clique.GetMessage().GetId()
         if id in ids_already_done:
           continue
         ids_already_done[id] = 1

         message = node.UberClique().BestClique(id).GetMessage()
         messages += [message]

     # Ensure a stable order of messages, to help regression testing.
     messages.sort(key=lambda x:x.GetId())

     if self.format == self.FORMAT_IDS_ONLY:
       # We just print the list of IDs to the output file.
       for msg in messages:
         output_file.write(msg.GetId())
         output_file.write('\n')
     else:
       assert self.format == self.FORMAT_XMB
       WriteXmbFile(output_file, messages)
	#!/usr/bin/env python
	# Copyright (c) 2012 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""The 'grit xmb' tool.
	"""

	import getopt
	import os

	from xml.sax import saxutils

	from grit import grd_reader
	from grit import lazy_re
	from grit import tclib
	from grit import util
	from grit.tool import interface


	# Used to collapse presentable content to determine if
	# xml:space="preserve" is needed.
	_WHITESPACES_REGEX = lazy_re.compile(ur'\s\s*')


	# See XmlEscape below.
	_XML_QUOTE_ESCAPES = {
	u"'": u''',
	u'"': u'"',
	}
	_XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D'
	u'\u0020-\uD7FF\uE000-\uFFFD]')


	def _XmlEscape(s):
	"""Returns text escaped for XML in a way compatible with Google's
	internal Translation Console tool. May be used for attributes as
	well as for contents.
	"""
	if not type(s) == unicode:
	s = unicode(s)
	result = saxutils.escape(s, _XML_QUOTE_ESCAPES)
	return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8')


	def _WriteAttribute(file, name, value):
	"""Writes an XML attribute to the specified file.

	Args:
	file: file to write to
	name: name of the attribute
	value: (unescaped) value of the attribute
	"""
	if value:
	file.write(' %s="%s"' % (name, _XmlEscape(value)))


	def _WriteMessage(file, message):
	presentable_content = message.GetPresentableContent()
	assert (type(presentable_content) == unicode or
	(len(message.parts) == 1 and
	type(message.parts[0] == tclib.Placeholder)))
	preserve_space = presentable_content != _WHITESPACES_REGEX.sub(
	u' ', presentable_content.strip())

	file.write('<msg')
	_WriteAttribute(file, 'desc', message.GetDescription())
	_WriteAttribute(file, 'id', message.GetId())
	_WriteAttribute(file, 'meaning', message.GetMeaning())
	if preserve_space:
	_WriteAttribute(file, 'xml:space', 'preserve')
	file.write('>')
	if not preserve_space:
	file.write('\n ')

	parts = message.GetContent()
	for part in parts:
	if isinstance(part, tclib.Placeholder):
	file.write('<ph')
	_WriteAttribute(file, 'name', part.GetPresentation())
	file.write('><ex>')
	file.write(_XmlEscape(part.GetExample()))
	file.write('</ex>')
	file.write(_XmlEscape(part.GetOriginal()))
	file.write('</ph>')
	else:
	file.write(_XmlEscape(part))
	if not preserve_space:
	file.write('\n')
	file.write('</msg>\n')


	def WriteXmbFile(file, messages):
	"""Writes the given grit.tclib.Message items to the specified open
	file-like object in the XMB format.
	"""
	file.write("""<?xml version="1.0" encoding="UTF-8"?>
	<!DOCTYPE messagebundle [
	<!ELEMENT messagebundle (msg)*>
	<!ATTLIST messagebundle class CDATA #IMPLIED>

	<!ELEMENT msg (#PCDATA\|ph\|source)*>
	<!ATTLIST msg id CDATA #IMPLIED>
	<!ATTLIST msg seq CDATA #IMPLIED>
	<!ATTLIST msg name CDATA #IMPLIED>
	<!ATTLIST msg desc CDATA #IMPLIED>
	<!ATTLIST msg meaning CDATA #IMPLIED>
	<!ATTLIST msg obsolete (obsolete) #IMPLIED>
	<!ATTLIST msg xml:space (default\|preserve) "default">
	<!ATTLIST msg is_hidden CDATA #IMPLIED>

	<!ELEMENT source (#PCDATA)>

	<!ELEMENT ph (#PCDATA\|ex)*>
	<!ATTLIST ph name CDATA #REQUIRED>

	<!ELEMENT ex (#PCDATA)>
	]>
	<messagebundle>
	""")
	for message in messages:
	_WriteMessage(file, message)
	file.write('</messagebundle>')


	class OutputXmb(interface.Tool):
	"""Outputs all translateable messages in the .grd input file to an
	.xmb file, which is the format used to give source messages to
	Google's internal Translation Console tool. The format could easily
	be used for other systems.

	Usage: grit xmb [-i\|-h] [-l LIMITFILE] OUTPUTPATH

	OUTPUTPATH is the path you want to output the .xmb file to.

	The -l option can be used to output only some of the resources to the .xmb file.
	LIMITFILE is the path to a file that is used to limit the items output to the
	xmb file. If the filename extension is .grd, the file must be a .grd file
	and the tool only output the contents of nodes from the input file that also
	exist in the limit file (as compared on the 'name' attribute). Otherwise it must
	contain a list of the IDs that output should be limited to, one ID per line, and
	the tool will only output nodes with 'name' attributes that match one of the
	IDs.

	The -i option causes 'grit xmb' to output an "IDs only" file instead of an XMB
	file. The "IDs only" file contains the message ID of each message that would
	normally be output to the XMB file, one message ID per line. It is designed for
	use with the 'grit transl2tc' tool's -l option.

	Other options:

	-D NAME[=VAL] Specify a C-preprocessor-like define NAME with optional
	value VAL (defaults to 1) which will be used to control
	conditional inclusion of resources.

	-E NAME=VALUE Set environment variable NAME to VALUE (within grit).

	"""
	# The different output formats supported by this tool
	FORMAT_XMB = 0
	FORMAT_IDS_ONLY = 1

	def __init__(self, defines=None):
	super(OutputXmb, self).__init__()
	self.format = self.FORMAT_XMB
	self.defines = defines or {}

	def ShortDescription(self):
	return 'Exports all translateable messages into an XMB file.'

	def Run(self, opts, args):
	self.SetOptions(opts)

	limit_file = None
	limit_is_grd = False
	limit_file_dir = None
	own_opts, args = getopt.getopt(args, 'l:D:ih')
	for key, val in own_opts:
	if key == '-l':
	limit_file = open(val, 'r')
	limit_file_dir = util.dirname(val)
	if not len(limit_file_dir):
	limit_file_dir = '.'
	limit_is_grd = os.path.splitext(val)[1] == '.grd'
	elif key == '-i':
	self.format = self.FORMAT_IDS_ONLY
	elif key == '-D':
	name, val = util.ParseDefine(val)
	self.defines[name] = val
	elif key == '-E':
	(env_name, env_value) = val.split('=', 1)
	os.environ[env_name] = env_value
	if not len(args) == 1:
	print ('grit xmb takes exactly one argument, the path to the XMB file '
	'to output.')
	return 2

	xmb_path = args[0]
	res_tree = grd_reader.Parse(opts.input, debug=opts.extra_verbose)
	res_tree.SetOutputLanguage('en')
	res_tree.SetDefines(self.defines)
	res_tree.OnlyTheseTranslations([])
	res_tree.RunGatherers()

	with open(xmb_path, 'wb') as output_file:
	self.Process(
	res_tree, output_file, limit_file, limit_is_grd, limit_file_dir)
	if limit_file:
	limit_file.close()
	print "Wrote %s" % xmb_path

	def Process(self, res_tree, output_file, limit_file=None, limit_is_grd=False,
	dir=None):
	"""Writes a document with the contents of res_tree into output_file,
	limiting output to the IDs specified in limit_file, which is a GRD file if
	limit_is_grd is true, otherwise a file with one ID per line.

	The format of the output document depends on this object's format attribute.
	It can be FORMAT_XMB or FORMAT_IDS_ONLY.

	The FORMAT_IDS_ONLY format causes this function to write just a list
	of the IDs of all messages that would have been added to the XMB file, one
	ID per line.

	The FORMAT_XMB format causes this function to output the (default) XMB
	format.

	Args:
	res_tree: base.Node()
	output_file: file open for writing
	limit_file: None or file open for reading
	limit_is_grd: True \| False
	dir: Directory of the limit file
	"""
	if limit_file:
	if limit_is_grd:
	limit_list = []
	limit_tree = grd_reader.Parse(limit_file,
	dir=dir,
	debug=self.o.extra_verbose)
	for node in limit_tree:
	if 'name' in node.attrs:
	limit_list.append(node.attrs['name'])
	else:
	# Not a GRD file, so it's just a file with one ID per line
	limit_list = [item.strip() for item in limit_file.read().split('\n')]

	ids_already_done = {}
	messages = []
	for node in res_tree:
	if (limit_file and
	not ('name' in node.attrs and node.attrs['name'] in limit_list)):
	continue
	if not node.IsTranslateable():
	continue

	for clique in node.GetCliques():
	if not clique.IsTranslateable():
	continue
	if not clique.GetMessage().GetRealContent():
	continue

	# Some explanation is in order here. Note that we can have
	# many messages with the same ID.
	#
	# The way we work around this is to maintain a list of cliques
	# per message ID (in the UberClique) and select the "best" one
	# (the first one that has a description, or an arbitrary one
	# if there is no description) for inclusion in the XMB file.
	# The translations are all going to be the same for messages
	# with the same ID, although the way we replace placeholders
	# might be slightly different.
	id = clique.GetMessage().GetId()
	if id in ids_already_done:
	continue
	ids_already_done[id] = 1

	message = node.UberClique().BestClique(id).GetMessage()
	messages += [message]

	# Ensure a stable order of messages, to help regression testing.
	messages.sort(key=lambda x:x.GetId())

	if self.format == self.FORMAT_IDS_ONLY:
	# We just print the list of IDs to the output file.
	for msg in messages:
	output_file.write(msg.GetId())
	output_file.write('\n')
	else:
	assert self.format == self.FORMAT_XMB
	WriteXmbFile(output_file, messages)