third_party/pystache/parser.py - chromium/src - Git at Google

 # coding: utf-8

 """
 Exposes a parse() function to parse template strings.

 """

 import re

 from pystache import defaults
 from pystache.parsed import ParsedTemplate


 END_OF_LINE_CHARACTERS = [u'\r', u'\n']
 NON_BLANK_RE = re.compile(ur'^(.)', re.M)


 # TODO: add some unit tests for this.
 # TODO: add a test case that checks for spurious spaces.
 # TODO: add test cases for delimiters.
 def parse(template, delimiters=None):
     """
     Parse a unicode template string and return a ParsedTemplate instance.

     Arguments:

       template: a unicode template string.

       delimiters: a 2-tuple of delimiters.  Defaults to the package default.

     Examples:

     >>> parsed = parse(u"Hey {{#who}}{{name}}!{{/who}}")
     >>> print str(parsed).replace('u', '')  # This is a hack to get the test to pass both in Python 2 and 3.
     ['Hey ', _SectionNode(key='who', index_begin=12, index_end=21, parsed=[_EscapeNode(key='name'), '!'])]

     """
     if type(template) is not unicode:
         raise Exception("Template is not unicode: %s" % type(template))
     parser = _Parser(delimiters)
     return parser.parse(template)


 def _compile_template_re(delimiters):
     """
     Return a regular expression object (re.RegexObject) instance.

     """
     # The possible tag type characters following the opening tag,
     # excluding "=" and "{".
     tag_types = "!>&/#^"

     # TODO: are we following this in the spec?
     #
     #   The tag's content MUST be a non-whitespace character sequence
     #   NOT containing the current closing delimiter.
     #
     tag = r"""
         (?P<whitespace>[\ \t]*)
         %(otag)s \s*
         (?:
           (?P<change>=) \s* (?P<delims>.+?)   \s* = |
           (?P<raw>{)    \s* (?P<raw_name>.+?) \s* } |
           (?P<tag>[%(tag_types)s]?)  \s* (?P<tag_key>[\s\S]+?)
         )
         \s* %(ctag)s
     """ % {'tag_types': tag_types, 'otag': re.escape(delimiters[0]), 'ctag': re.escape(delimiters[1])}

     return re.compile(tag, re.VERBOSE)


 class ParsingError(Exception):

     pass


 ## Node types

 def _format(obj, exclude=None):
     if exclude is None:
         exclude = []
     exclude.append('key')
     attrs = obj.__dict__
     names = list(set(attrs.keys()) - set(exclude))
     names.sort()
     names.insert(0, 'key')
     args = ["%s=%s" % (name, repr(attrs[name])) for name in names]
     return "%s(%s)" % (obj.__class__.__name__, ", ".join(args))


 class _CommentNode(object):

     def __repr__(self):
         return _format(self)

     def render(self, engine, context):
         return u''


 class _ChangeNode(object):

     def __init__(self, delimiters):
         self.delimiters = delimiters

     def __repr__(self):
         return _format(self)

     def render(self, engine, context):
         return u''


 class _EscapeNode(object):

     def __init__(self, key):
         self.key = key

     def __repr__(self):
         return _format(self)

     def render(self, engine, context):
         s = engine.fetch_string(context, self.key)
         return engine.escape(s)


 class _LiteralNode(object):

     def __init__(self, key):
         self.key = key

     def __repr__(self):
         return _format(self)

     def render(self, engine, context):
         s = engine.fetch_string(context, self.key)
         return engine.literal(s)


 class _PartialNode(object):

     def __init__(self, key, indent):
         self.key = key
         self.indent = indent

     def __repr__(self):
         return _format(self)

     def render(self, engine, context):
         template = engine.resolve_partial(self.key)
         # Indent before rendering.
         template = re.sub(NON_BLANK_RE, self.indent + ur'\1', template)

         return engine.render(template, context)


 class _InvertedNode(object):

     def __init__(self, key, parsed_section):
         self.key = key
         self.parsed_section = parsed_section

     def __repr__(self):
         return _format(self)

     def render(self, engine, context):
         # TODO: is there a bug because we are not using the same
         #   logic as in fetch_string()?
         data = engine.resolve_context(context, self.key)
         # Note that lambdas are considered truthy for inverted sections
         # per the spec.
         if data:
             return u''
         return self.parsed_section.render(engine, context)


 class _SectionNode(object):

     # TODO: the template_ and parsed_template_ arguments don't both seem
     # to be necessary.  Can we remove one of them?  For example, if
     # callable(data) is True, then the initial parsed_template isn't used.
     def __init__(self, key, parsed, delimiters, template, index_begin, index_end):
         self.delimiters = delimiters
         self.key = key
         self.parsed = parsed
         self.template = template
         self.index_begin = index_begin
         self.index_end = index_end

     def __repr__(self):
         return _format(self, exclude=['delimiters', 'template'])

     def render(self, engine, context):
         values = engine.fetch_section_data(context, self.key)

         parts = []
         for val in values:
             if callable(val):
                 # Lambdas special case section rendering and bypass pushing
                 # the data value onto the context stack.  From the spec--
                 #
                 #   When used as the data value for a Section tag, the
                 #   lambda MUST be treatable as an arity 1 function, and
                 #   invoked as such (passing a String containing the
                 #   unprocessed section contents).  The returned value
                 #   MUST be rendered against the current delimiters, then
                 #   interpolated in place of the section.
                 #
                 #  Also see--
                 #
                 #   https://github.com/defunkt/pystache/issues/113
                 #
                 # TODO: should we check the arity?
                 val = val(self.template[self.index_begin:self.index_end])
                 val = engine._render_value(val, context, delimiters=self.delimiters)
                 parts.append(val)
                 continue

             context.push(val)
             parts.append(self.parsed.render(engine, context))
             context.pop()

         return unicode(''.join(parts))


 class _Parser(object):

     _delimiters = None
     _template_re = None

     def __init__(self, delimiters=None):
         if delimiters is None:
             delimiters = defaults.DELIMITERS

         self._delimiters = delimiters

     def _compile_delimiters(self):
         self._template_re = _compile_template_re(self._delimiters)

     def _change_delimiters(self, delimiters):
         self._delimiters = delimiters
         self._compile_delimiters()

     def parse(self, template):
         """
         Parse a template string starting at some index.

         This method uses the current tag delimiter.

         Arguments:

           template: a unicode string that is the template to parse.

           index: the index at which to start parsing.

         Returns:

           a ParsedTemplate instance.

         """
         self._compile_delimiters()

         start_index = 0
         content_end_index, parsed_section, section_key = None, None, None
         parsed_template = ParsedTemplate()

         states = []

         while True:
             match = self._template_re.search(template, start_index)

             if match is None:
                 break

             match_index = match.start()
             end_index = match.end()

             matches = match.groupdict()

             # Normalize the matches dictionary.
             if matches['change'] is not None:
                 matches.update(tag='=', tag_key=matches['delims'])
             elif matches['raw'] is not None:
                 matches.update(tag='&', tag_key=matches['raw_name'])

             tag_type = matches['tag']
             tag_key = matches['tag_key']
             leading_whitespace = matches['whitespace']

             # Standalone (non-interpolation) tags consume the entire line,
             # both leading whitespace and trailing newline.
             did_tag_begin_line = match_index == 0 or template[match_index - 1] in END_OF_LINE_CHARACTERS
             did_tag_end_line = end_index == len(template) or template[end_index] in END_OF_LINE_CHARACTERS
             is_tag_interpolating = tag_type in ['', '&']

             if did_tag_begin_line and did_tag_end_line and not is_tag_interpolating:
                 if end_index < len(template):
                     end_index += template[end_index] == '\r' and 1 or 0
                 if end_index < len(template):
                     end_index += template[end_index] == '\n' and 1 or 0
             elif leading_whitespace:
                 match_index += len(leading_whitespace)
                 leading_whitespace = ''

             # Avoid adding spurious empty strings to the parse tree.
             if start_index != match_index:
                 parsed_template.add(template[start_index:match_index])

             start_index = end_index

             if tag_type in ('#', '^'):
                 # Cache current state.
                 state = (tag_type, end_index, section_key, parsed_template)
                 states.append(state)

                 # Initialize new state
                 section_key, parsed_template = tag_key, ParsedTemplate()
                 continue

             if tag_type == '/':
                 if tag_key != section_key:
                     raise ParsingError("Section end tag mismatch: %s != %s" % (tag_key, section_key))

                 # Restore previous state with newly found section data.
                 parsed_section = parsed_template

                 (tag_type, section_start_index, section_key, parsed_template) = states.pop()
                 node = self._make_section_node(template, tag_type, tag_key, parsed_section,
                                                section_start_index, match_index)

             else:
                 node = self._make_interpolation_node(tag_type, tag_key, leading_whitespace)

             parsed_template.add(node)

         # Avoid adding spurious empty strings to the parse tree.
         if start_index != len(template):
             parsed_template.add(template[start_index:])

         return parsed_template

     def _make_interpolation_node(self, tag_type, tag_key, leading_whitespace):
         """
         Create and return a non-section node for the parse tree.

         """
         # TODO: switch to using a dictionary instead of a bunch of ifs and elifs.
         if tag_type == '!':
             return _CommentNode()

         if tag_type == '=':
             delimiters = tag_key.split()
             self._change_delimiters(delimiters)
             return _ChangeNode(delimiters)

         if tag_type == '':
             return _EscapeNode(tag_key)

         if tag_type == '&':
             return _LiteralNode(tag_key)

         if tag_type == '>':
             return _PartialNode(tag_key, leading_whitespace)

         raise Exception("Invalid symbol for interpolation tag: %s" % repr(tag_type))

     def _make_section_node(self, template, tag_type, tag_key, parsed_section,
                            section_start_index, section_end_index):
         """
         Create and return a section node for the parse tree.

         """
         if tag_type == '#':
             return _SectionNode(tag_key, parsed_section, self._delimiters,
                                template, section_start_index, section_end_index)

         if tag_type == '^':
             return _InvertedNode(tag_key, parsed_section)

         raise Exception("Invalid symbol for section tag: %s" % repr(tag_type))
	# coding: utf-8

	"""
	Exposes a parse() function to parse template strings.

	"""

	import re

	from pystache import defaults
	from pystache.parsed import ParsedTemplate


	END_OF_LINE_CHARACTERS = [u'\r', u'\n']
	NON_BLANK_RE = re.compile(ur'^(.)', re.M)


	# TODO: add some unit tests for this.
	# TODO: add a test case that checks for spurious spaces.
	# TODO: add test cases for delimiters.
	def parse(template, delimiters=None):
	"""
	Parse a unicode template string and return a ParsedTemplate instance.

	Arguments:

	template: a unicode template string.

	delimiters: a 2-tuple of delimiters. Defaults to the package default.

	Examples:

	>>> parsed = parse(u"Hey {{#who}}{{name}}!{{/who}}")
	>>> print str(parsed).replace('u', '') # This is a hack to get the test to pass both in Python 2 and 3.
	['Hey ', _SectionNode(key='who', index_begin=12, index_end=21, parsed=[_EscapeNode(key='name'), '!'])]

	"""
	if type(template) is not unicode:
	raise Exception("Template is not unicode: %s" % type(template))
	parser = _Parser(delimiters)
	return parser.parse(template)


	def _compile_template_re(delimiters):
	"""
	Return a regular expression object (re.RegexObject) instance.

	"""
	# The possible tag type characters following the opening tag,
	# excluding "=" and "{".
	tag_types = "!>&/#^"

	# TODO: are we following this in the spec?
	#
	# The tag's content MUST be a non-whitespace character sequence
	# NOT containing the current closing delimiter.
	#
	tag = r"""
	(?P<whitespace>[\ \t]*)
	%(otag)s \s*
	(?:
	(?P<change>=) \s* (?P<delims>.+?) \s* = \|
	(?P<raw>{) \s* (?P<raw_name>.+?) \s* } \|
	(?P<tag>[%(tag_types)s]?) \s* (?P<tag_key>[\s\S]+?)
	)
	\s* %(ctag)s
	""" % {'tag_types': tag_types, 'otag': re.escape(delimiters[0]), 'ctag': re.escape(delimiters[1])}

	return re.compile(tag, re.VERBOSE)


	class ParsingError(Exception):

	pass


	## Node types

	def _format(obj, exclude=None):
	if exclude is None:
	exclude = []
	exclude.append('key')
	attrs = obj.__dict__
	names = list(set(attrs.keys()) - set(exclude))
	names.sort()
	names.insert(0, 'key')
	args = ["%s=%s" % (name, repr(attrs[name])) for name in names]
	return "%s(%s)" % (obj.__class__.__name__, ", ".join(args))


	class _CommentNode(object):

	def __repr__(self):
	return _format(self)

	def render(self, engine, context):
	return u''


	class _ChangeNode(object):

	def __init__(self, delimiters):
	self.delimiters = delimiters

	def __repr__(self):
	return _format(self)

	def render(self, engine, context):
	return u''


	class _EscapeNode(object):

	def __init__(self, key):
	self.key = key

	def __repr__(self):
	return _format(self)

	def render(self, engine, context):
	s = engine.fetch_string(context, self.key)
	return engine.escape(s)


	class _LiteralNode(object):

	def __init__(self, key):
	self.key = key

	def __repr__(self):
	return _format(self)

	def render(self, engine, context):
	s = engine.fetch_string(context, self.key)
	return engine.literal(s)


	class _PartialNode(object):

	def __init__(self, key, indent):
	self.key = key
	self.indent = indent

	def __repr__(self):
	return _format(self)

	def render(self, engine, context):
	template = engine.resolve_partial(self.key)
	# Indent before rendering.
	template = re.sub(NON_BLANK_RE, self.indent + ur'\1', template)

	return engine.render(template, context)


	class _InvertedNode(object):

	def __init__(self, key, parsed_section):
	self.key = key
	self.parsed_section = parsed_section

	def __repr__(self):
	return _format(self)

	def render(self, engine, context):
	# TODO: is there a bug because we are not using the same
	# logic as in fetch_string()?
	data = engine.resolve_context(context, self.key)
	# Note that lambdas are considered truthy for inverted sections
	# per the spec.
	if data:
	return u''
	return self.parsed_section.render(engine, context)


	class _SectionNode(object):

	# TODO: the template_ and parsed_template_ arguments don't both seem
	# to be necessary. Can we remove one of them? For example, if
	# callable(data) is True, then the initial parsed_template isn't used.
	def __init__(self, key, parsed, delimiters, template, index_begin, index_end):
	self.delimiters = delimiters
	self.key = key
	self.parsed = parsed
	self.template = template
	self.index_begin = index_begin
	self.index_end = index_end

	def __repr__(self):
	return _format(self, exclude=['delimiters', 'template'])

	def render(self, engine, context):
	values = engine.fetch_section_data(context, self.key)

	parts = []
	for val in values:
	if callable(val):
	# Lambdas special case section rendering and bypass pushing
	# the data value onto the context stack. From the spec--
	#
	# When used as the data value for a Section tag, the
	# lambda MUST be treatable as an arity 1 function, and
	# invoked as such (passing a String containing the
	# unprocessed section contents). The returned value
	# MUST be rendered against the current delimiters, then
	# interpolated in place of the section.
	#
	# Also see--
	#
	# https://github.com/defunkt/pystache/issues/113
	#
	# TODO: should we check the arity?
	val = val(self.template[self.index_begin:self.index_end])
	val = engine._render_value(val, context, delimiters=self.delimiters)
	parts.append(val)
	continue

	context.push(val)
	parts.append(self.parsed.render(engine, context))
	context.pop()

	return unicode(''.join(parts))


	class _Parser(object):

	_delimiters = None
	_template_re = None

	def __init__(self, delimiters=None):
	if delimiters is None:
	delimiters = defaults.DELIMITERS

	self._delimiters = delimiters

	def _compile_delimiters(self):
	self._template_re = _compile_template_re(self._delimiters)

	def _change_delimiters(self, delimiters):
	self._delimiters = delimiters
	self._compile_delimiters()

	def parse(self, template):
	"""
	Parse a template string starting at some index.

	This method uses the current tag delimiter.

	Arguments:

	template: a unicode string that is the template to parse.

	index: the index at which to start parsing.

	Returns:

	a ParsedTemplate instance.

	"""
	self._compile_delimiters()

	start_index = 0
	content_end_index, parsed_section, section_key = None, None, None
	parsed_template = ParsedTemplate()

	states = []

	while True:
	match = self._template_re.search(template, start_index)

	if match is None:
	break

	match_index = match.start()
	end_index = match.end()

	matches = match.groupdict()

	# Normalize the matches dictionary.
	if matches['change'] is not None:
	matches.update(tag='=', tag_key=matches['delims'])
	elif matches['raw'] is not None:
	matches.update(tag='&', tag_key=matches['raw_name'])

	tag_type = matches['tag']
	tag_key = matches['tag_key']
	leading_whitespace = matches['whitespace']

	# Standalone (non-interpolation) tags consume the entire line,
	# both leading whitespace and trailing newline.
	did_tag_begin_line = match_index == 0 or template[match_index - 1] in END_OF_LINE_CHARACTERS
	did_tag_end_line = end_index == len(template) or template[end_index] in END_OF_LINE_CHARACTERS
	is_tag_interpolating = tag_type in ['', '&']

	if did_tag_begin_line and did_tag_end_line and not is_tag_interpolating:
	if end_index < len(template):
	end_index += template[end_index] == '\r' and 1 or 0
	if end_index < len(template):
	end_index += template[end_index] == '\n' and 1 or 0
	elif leading_whitespace:
	match_index += len(leading_whitespace)
	leading_whitespace = ''

	# Avoid adding spurious empty strings to the parse tree.
	if start_index != match_index:
	parsed_template.add(template[start_index:match_index])

	start_index = end_index

	if tag_type in ('#', '^'):
	# Cache current state.
	state = (tag_type, end_index, section_key, parsed_template)
	states.append(state)

	# Initialize new state
	section_key, parsed_template = tag_key, ParsedTemplate()
	continue

	if tag_type == '/':
	if tag_key != section_key:
	raise ParsingError("Section end tag mismatch: %s != %s" % (tag_key, section_key))

	# Restore previous state with newly found section data.
	parsed_section = parsed_template

	(tag_type, section_start_index, section_key, parsed_template) = states.pop()
	node = self._make_section_node(template, tag_type, tag_key, parsed_section,
	section_start_index, match_index)

	else:
	node = self._make_interpolation_node(tag_type, tag_key, leading_whitespace)

	parsed_template.add(node)

	# Avoid adding spurious empty strings to the parse tree.
	if start_index != len(template):
	parsed_template.add(template[start_index:])

	return parsed_template

	def _make_interpolation_node(self, tag_type, tag_key, leading_whitespace):
	"""
	Create and return a non-section node for the parse tree.

	"""
	# TODO: switch to using a dictionary instead of a bunch of ifs and elifs.
	if tag_type == '!':
	return _CommentNode()

	if tag_type == '=':
	delimiters = tag_key.split()
	self._change_delimiters(delimiters)
	return _ChangeNode(delimiters)

	if tag_type == '':
	return _EscapeNode(tag_key)

	if tag_type == '&':
	return _LiteralNode(tag_key)

	if tag_type == '>':
	return _PartialNode(tag_key, leading_whitespace)

	raise Exception("Invalid symbol for interpolation tag: %s" % repr(tag_type))

	def _make_section_node(self, template, tag_type, tag_key, parsed_section,
	section_start_index, section_end_index):
	"""
	Create and return a section node for the parse tree.

	"""
	if tag_type == '#':
	return _SectionNode(tag_key, parsed_section, self._delimiters,
	template, section_start_index, section_end_index)

	if tag_type == '^':
	return _InvertedNode(tag_key, parsed_section)

	raise Exception("Invalid symbol for section tag: %s" % repr(tag_type))