blob: 931f96a03943444c617eec89b02ca312a25c6e5a [file] [log] [blame]
#!/usr/bin/env python
"""ezt.py -- EaZy Templating
For documentation, please see: http://code.google.com/p/ezt/wiki/Syntax
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
#
# Copyright (C) 2001-2011 Greg Stein. All Rights Reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
#
# This software is maintained by Greg and is available at:
# http://code.google.com/p/ezt/
#
__author__ = 'Greg Stein'
__version__ = '1.0'
__license__ = 'BSD'
import re
from types import IntType, FloatType, LongType
import os
import urllib
import StringIO
from six import string_types, text_type
#
# Formatting types
#
FORMAT_RAW = 'raw'
FORMAT_HTML = 'html'
FORMAT_XML = 'xml'
FORMAT_JS = 'js'
FORMAT_URL = 'url'
#
# This regular expression matches four alternatives:
# expr: NEWLINE | DIRECTIVE | BRACKET | COMMENT
# DIRECTIVE: '[' ITEM (whitespace ARG)* ']
# ITEM: STRING | NAME
# ARG: STRING | NAME | NUMBER
# STRING: '"' (not-slash-or-dquote | '\' anychar)* '"'
# NAME: (alpha | '_') (alphanum | '_' | '-' | '.')*
# NUMBER: digit+
# BRACKET: '[[]'
# COMMENT: '[#' not-rbracket* ']'
#
# Note: the above BNR is a bit loose around ITEM/ARG/NAME/NUMBER. The
# important point is that the first value in a directive must
# start with '_' or an alpha character (no digits). This greatly
# helps to avoid simple errors like '[0]' in templates.
#
# When used with the split() method, the return value will be composed of
# non-matching text and the three paren groups (NEWLINE, DIRECTIVE and
# BRACKET). Since the COMMENT matches are not placed into a group, they are
# considered a "splitting" value and simply dropped.
#
_item = r'(?:"(?:[^\\"]|\\.)*"|[A-Za-z_][-\w.]*)'
_arg = r'(?:"(?:[^\\"]|\\.)*"|[-\w.]+)'
_re_parse = re.compile(r'(\r?\n)|\[(%s(?: +%s)*)\]|(\[\[\])|\[#[^\]]*\]' %
(_item, _arg))
_re_args = re.compile(r'"(?:[^\\"]|\\.)*"|[-\w.]+')
# block commands and their argument counts
_block_cmd_specs = { 'if-index':2, 'for':1, 'is':2, 'define':1, 'format':1 }
_block_cmds = list(_block_cmd_specs.keys())
# two regular expressions for compressing whitespace. the first is used to
# compress any whitespace including a newline into a single newline. the
# second regex is used to compress runs of whitespace into a single space.
_re_newline = re.compile('[ \t\r\f\v]*\n\\s*')
_re_whitespace = re.compile(r'\s\s+')
# this regex is used to substitute arguments into a value. we split the value,
# replace the relevant pieces, and then put it all back together. splitting
# will produce a list of: TEXT ( splitter TEXT )*. splitter will be '%' or
# an integer.
_re_subst = re.compile('%(%|[0-9]+)')
class Template:
def __init__(self, fname=None, compress_whitespace=1,
base_format=FORMAT_RAW):
self.compress_whitespace = compress_whitespace
if fname:
self.parse_file(fname, base_format)
def parse_file(self, fname, base_format=FORMAT_RAW):
"fname -> a string object with pathname of file containg an EZT template."
self.parse(_FileReader(fname), base_format)
def parse(self, text_or_reader, base_format=FORMAT_RAW):
"""Parse the template specified by text_or_reader.
The argument should be a string containing the template, or it should
specify a subclass of ezt.Reader which can read templates. The base
format for printing values is given by base_format.
"""
if not isinstance(text_or_reader, Reader):
# assume the argument is a plain text string
text_or_reader = _TextReader(text_or_reader)
self.program = self._parse(text_or_reader,
base_printer=_parse_format(base_format))
def generate(self, fp, data):
if hasattr(data, '__getitem__') or callable(getattr(data, 'keys', None)):
# a dictionary-like object was passed. convert it to an
# attribute-based object.
class _data_ob:
def __init__(self, d):
vars(self).update(d)
data = _data_ob(data)
ctx = _context()
ctx.data = data
ctx.for_index = { }
ctx.defines = { }
self._execute(self.program, fp, ctx)
def _parse(self, reader, for_names=None, file_args=(), base_printer=None):
"""text -> string object containing the template.
This is a private helper function doing the real work for method parse.
It returns the parsed template as a 'program'. This program is a sequence
made out of strings or (function, argument) 2-tuples.
Note: comment directives [# ...] are automatically dropped by _re_parse.
"""
filename = reader.filename()
# parse the template program into: (TEXT NEWLINE DIRECTIVE BRACKET)* TEXT
parts = _re_parse.split(reader.text)
program = [ ]
stack = [ ]
if not for_names:
for_names = [ ]
if base_printer is None:
base_printer = ()
printers = [ base_printer ]
one_newline_copied = False
line_number = 1
for i in range(len(parts)):
piece = parts[i]
which = i % 4 # discriminate between: TEXT NEWLINE DIRECTIVE BRACKET
if which == 0:
# TEXT. append if non-empty.
if piece:
if self.compress_whitespace:
piece = _re_whitespace.sub(' ', piece)
program.append(piece)
one_newline_copied = False
elif which == 1:
# NEWLINE. append unless compress_whitespace requested
if piece:
line_number += 1
if self.compress_whitespace:
if not one_newline_copied:
program.append('\n')
one_newline_copied = True
else:
program.append(piece)
elif which == 3:
# BRACKET directive. append '[' if present.
if piece:
program.append('[')
one_newline_copied = False
elif piece:
# DIRECTIVE is present.
one_newline_copied = False
args = _re_args.findall(piece)
cmd = args[0]
if cmd == 'else':
if len(args) > 1:
raise ArgCountSyntaxError(str(args[1:]), filename, line_number)
### check: don't allow for 'for' cmd
idx = stack[-1][1]
true_section = program[idx:]
del program[idx:]
stack[-1][3] = true_section
elif cmd == 'end':
if len(args) > 1:
raise ArgCountSyntaxError(str(args[1:]), filename, line_number)
# note: true-section may be None
try:
cmd, idx, args, true_section, start_line_number = stack.pop()
except IndexError:
raise UnmatchedEndError(None, filename, line_number)
else_section = program[idx:]
if cmd == 'format':
printers.pop()
else:
func = getattr(self, '_cmd_' + re.sub('-', '_', cmd))
program[idx:] = [ (func, (args, true_section, else_section),
filename, line_number) ]
if cmd == 'for':
for_names.pop()
elif cmd in _block_cmds:
if len(args) > _block_cmd_specs[cmd] + 1:
raise ArgCountSyntaxError(str(args[1:]), filename, line_number)
### this assumes arg1 is always a ref unless cmd is 'define'
if cmd != 'define':
args[1] = _prepare_ref(args[1], for_names, file_args)
# handle arg2 for the 'is' command
if cmd == 'is':
args[2] = _prepare_ref(args[2], for_names, file_args)
elif cmd == 'for':
for_names.append(args[1][0]) # append the refname
elif cmd == 'format':
if args[1][0]:
raise BadFormatConstantError(str(args[1:]), filename, line_number)
printers.append(_parse_format(args[1][1]))
# remember the cmd, current pos, args, and a section placeholder
stack.append([cmd, len(program), args[1:], None, line_number])
elif cmd == 'include' or cmd == 'insertfile':
is_insertfile = (cmd == 'insertfile')
# extra arguments are meaningless when using insertfile
if is_insertfile and len(args) != 2:
raise ArgCountSyntaxError(str(args), filename, line_number)
if args[1][0] == '"':
include_filename = args[1][1:-1]
if is_insertfile:
program.append(reader.read_other(include_filename).text)
else:
f_args = [ ]
for arg in args[2:]:
f_args.append(_prepare_ref(arg, for_names, file_args))
program.extend(self._parse(reader.read_other(include_filename),
for_names, f_args, printers[-1]))
else:
if len(args) != 2:
raise ArgCountSyntaxError(str(args), filename, line_number)
if is_insertfile:
cmd = self._cmd_insertfile
else:
cmd = self._cmd_include
program.append((cmd,
(_prepare_ref(args[1], for_names, file_args),
reader, printers[-1]), filename, line_number))
elif cmd == 'if-any':
f_args = [ ]
for arg in args[1:]:
f_args.append(_prepare_ref(arg, for_names, file_args))
stack.append(['if-any', len(program), f_args, None, line_number])
else:
# implied PRINT command
if len(args) > 1:
f_args = [ ]
for arg in args:
f_args.append(_prepare_ref(arg, for_names, file_args))
program.append((self._cmd_subst,
(printers[-1], f_args[0], f_args[1:]),
filename, line_number))
else:
valref = _prepare_ref(args[0], for_names, file_args)
program.append((self._cmd_print, (printers[-1], valref),
filename, line_number))
if stack:
raise UnclosedBlocksError('Block opened at line %s' % stack[-1][4],
filename=filename)
return program
def _execute(self, program, fp, ctx):
"""This private helper function takes a 'program' sequence as created
by the method '_parse' and executes it step by step. strings are written
to the file object 'fp' and functions are called.
"""
for step in program:
if isinstance(step, string_types):
fp.write(step)
else:
method, method_args, filename, line_number = step
method(method_args, fp, ctx, filename, line_number)
def _cmd_print(self, transforms_and_valref, fp, ctx, filename, line_number):
transforms, valref = transforms_and_valref
value = _get_value(valref, ctx, filename, line_number)
# if the value has a 'read' attribute, then it is a stream: copy it
if hasattr(value, 'read'):
while 1:
chunk = value.read(16384)
if not chunk:
break
for t in transforms:
chunk = t(chunk)
fp.write(chunk)
else:
for t in transforms:
value = t(value)
fp.write(value)
def _cmd_subst(self, transforms_valref_args, fp, ctx, filename,
line_number):
transforms, valref, args = transforms_valref_args
fmt = _get_value(valref, ctx, filename, line_number)
parts = _re_subst.split(fmt)
for i in range(len(parts)):
piece = parts[i]
if i%2 == 1 and piece != '%':
idx = int(piece)
if idx < len(args):
piece = _get_value(args[idx], ctx, filename, line_number)
else:
piece = '<undef>'
for t in transforms:
piece = t(piece)
fp.write(piece)
def _cmd_include(self, valref_reader_printer, fp, ctx, filename,
line_number):
valref, reader, printer = valref_reader_printer
fname = _get_value(valref, ctx, filename, line_number)
### note: we don't have the set of for_names to pass into this parse.
### I don't think there is anything to do but document it
self._execute(self._parse(reader.read_other(fname), base_printer=printer),
fp, ctx)
def _cmd_insertfile(self, valref_reader_printer, fp, ctx, filename,
line_number):
valref, reader, _ = valref_reader_printer
fname = _get_value(valref, ctx, filename, line_number)
fp.write(reader.read_other(fname).text)
def _cmd_if_any(self, args, fp, ctx, filename, line_number):
"If any value is a non-empty string or non-empty list, then T else F."
(valrefs, t_section, f_section) = args
value = 0
for valref in valrefs:
if _get_value(valref, ctx, filename, line_number):
value = 1
break
self._do_if(value, t_section, f_section, fp, ctx)
def _cmd_if_index(self, args, fp, ctx, filename, line_number):
((valref, value), t_section, f_section) = args
list, idx = ctx.for_index[valref[0]]
if value == 'even':
value = idx % 2 == 0
elif value == 'odd':
value = idx % 2 == 1
elif value == 'first':
value = idx == 0
elif value == 'last':
value = idx == len(list)-1
else:
value = idx == int(value)
self._do_if(value, t_section, f_section, fp, ctx)
def _cmd_is(self, args, fp, ctx, filename, line_number):
((left_ref, right_ref), t_section, f_section) = args
right_value = _get_value(right_ref, ctx, filename, line_number)
left_value = _get_value(left_ref, ctx, filename, line_number)
value = left_value.lower() == right_value.lower()
self._do_if(value, t_section, f_section, fp, ctx)
def _do_if(self, value, t_section, f_section, fp, ctx):
if t_section is None:
t_section = f_section
f_section = None
if value:
section = t_section
else:
section = f_section
if section is not None:
self._execute(section, fp, ctx)
def _cmd_for(self, args, fp, ctx, filename, line_number):
((valref,), unused, section) = args
list = _get_value(valref, ctx, filename, line_number)
refname = valref[0]
if isinstance(list, string_types):
raise NeedSequenceError(refname, filename, line_number)
ctx.for_index[refname] = idx = [ list, 0 ]
for item in list:
self._execute(section, fp, ctx)
idx[1] = idx[1] + 1
del ctx.for_index[refname]
def _cmd_define(self, args, fp, ctx, filename, line_number):
((name,), unused, section) = args
valfp = StringIO.StringIO()
if section is not None:
self._execute(section, valfp, ctx)
ctx.defines[name] = valfp.getvalue()
def boolean(value):
"Return a value suitable for [if-any bool_var] usage in a template."
if value:
return 'yes'
return None
def _prepare_ref(refname, for_names, file_args):
"""refname -> a string containing a dotted identifier. example:"foo.bar.bang"
for_names -> a list of active for sequences.
Returns a `value reference', a 3-tuple made out of (refname, start, rest),
for fast access later.
"""
# is the reference a string constant?
if refname[0] == '"':
return None, refname[1:-1], None
parts = refname.split('.')
start = parts[0]
rest = parts[1:]
# if this is an include-argument, then just return the prepared ref
if start[:3] == 'arg':
try:
idx = int(start[3:])
except ValueError:
pass
else:
if idx < len(file_args):
orig_refname, start, more_rest = file_args[idx]
if more_rest is None:
# the include-argument was a string constant
return None, start, None
# prepend the argument's "rest" for our further processing
rest[:0] = more_rest
# rewrite the refname to ensure that any potential 'for' processing
# has the correct name
### this can make it hard for debugging include files since we lose
### the 'argNNN' names
if not rest:
return start, start, [ ]
refname = start + '.' + '.'.join(rest)
if for_names:
# From last to first part, check if this reference is part of a for loop
for i in range(len(parts), 0, -1):
name = '.'.join(parts[:i])
if name in for_names:
return refname, name, parts[i:]
return refname, start, rest
def _get_value(refname_start_rest, ctx, filename, line_number):
"""(refname, start, rest) -> a prepared `value reference' (see above).
ctx -> an execution context instance.
Does a name space lookup within the template name space. Active
for blocks take precedence over data dictionary members with the
same name.
"""
refname, start, rest = refname_start_rest
if rest is None:
# it was a string constant
return start
# get the starting object
if ctx.for_index.has_key(start):
list, idx = ctx.for_index[start]
ob = list[idx]
elif ctx.defines.has_key(start):
ob = ctx.defines[start]
elif hasattr(ctx.data, start):
ob = getattr(ctx.data, start)
else:
raise UnknownReference(refname, filename, line_number)
# walk the rest of the dotted reference
for attr in rest:
try:
if isinstance(ob, dict):
ob = ob[attr]
else:
ob = getattr(ob, attr)
except AttributeError, KeyError:
raise UnknownReference(refname, filename, line_number)
# make sure we return a string instead of some various Python types
if isinstance(ob, (IntType, FloatType, LongType)):
return str(ob)
if ob is None:
return ''
# string or a sequence
return ob
def _replace(s, replace_map):
for orig, repl in replace_map:
s = s.replace(orig, repl)
return s
REPLACE_JS_MAP = (
('\\', r'\\'), ('\t', r'\t'), ('\n', r'\n'), ('\r', r'\r'),
('"', r'\x22'), ('\'', r'\x27'), ('&', r'\x26'),
('<', r'\x3c'), ('>', r'\x3e'), ('=', r'\x3d'),
)
# Various unicode whitespace
REPLACE_JS_UNICODE_MAP = (
(u'\u0085', r'\u0085'), (u'\u2028', r'\u2028'), (u'\u2029', r'\u2029'),
)
# Why not cgi.escape? It doesn't do single quotes which are occasionally
# used to contain HTML attributes and event handler definitions (unfortunately)
REPLACE_HTML_MAP = (
('&', '&amp;'), ('<', '&lt;'), ('>', '&gt;'),
('"', '&quot;'), ('\'', '&#39;'),
)
def _js_escape(s):
s = _replace(s, REPLACE_JS_MAP)
### perhaps attempt to coerce the string to unicode and then replace?
if isinstance(s, text_type):
s = _replace(s, REPLACE_JS_UNICODE_MAP)
return s
def _html_escape(s):
return _replace(s, REPLACE_HTML_MAP)
def _url_escape(s):
### quote_plus barfs on non-ASCII characters. According to
### http://www.w3.org/International/O-URL-code.html URIs should be
### UTF-8 encoded first.
if isinstance(s, text_type):
s = s.encode('utf8')
return urllib.quote_plus(s)
FORMATTERS = {
FORMAT_RAW: None,
FORMAT_HTML: _html_escape,
FORMAT_XML: _html_escape, ### use the same quoting as HTML for now
FORMAT_JS: _js_escape,
FORMAT_URL: _url_escape,
}
def _parse_format(format_string=FORMAT_RAW):
format_funcs = []
try:
for fspec in format_string.split(','):
format_func = FORMATTERS[fspec]
if format_func is not None:
format_funcs.append(format_func)
except KeyError:
raise UnknownFormatConstantError(format_string)
return format_funcs
class _context:
"""A container for the execution context"""
class Reader:
"""Abstract class which allows EZT to detect Reader objects."""
def filename(self):
return '(%s does not provide filename() method)' % repr(self)
class _FileReader(Reader):
"""Reads templates from the filesystem."""
def __init__(self, fname):
self.text = open(fname, 'rb').read()
self._dir = os.path.dirname(fname)
self.fname = fname
def read_other(self, relative):
return _FileReader(os.path.join(self._dir, relative))
def filename(self):
return self.fname
class _TextReader(Reader):
"""'Reads' a template from provided text."""
def __init__(self, text):
self.text = text
def read_other(self, relative):
raise BaseUnavailableError()
def filename(self):
return '(text)'
class EZTException(Exception):
"""Parent class of all EZT exceptions."""
def __init__(self, message=None, filename=None, line_number=None):
self.message = message
self.filename = filename
self.line_number = line_number
def __str__(self):
ret = []
if self.message is not None:
ret.append(self.message)
if self.filename is not None:
ret.append('in file ' + str(self.filename))
if self.line_number is not None:
ret.append('at line ' + str(self.line_number))
return ' '.join(ret)
class ArgCountSyntaxError(EZTException):
"""A bracket directive got the wrong number of arguments."""
class UnknownReference(EZTException):
"""The template references an object not contained in the data dictionary."""
class NeedSequenceError(EZTException):
"""The object dereferenced by the template is no sequence (tuple or list)."""
class UnclosedBlocksError(EZTException):
"""This error may be simply a missing [end]."""
class UnmatchedEndError(EZTException):
"""This error may be caused by a misspelled if directive."""
class BaseUnavailableError(EZTException):
"""Base location is unavailable, which disables includes."""
class BadFormatConstantError(EZTException):
"""Format specifiers must be string constants."""
class UnknownFormatConstantError(EZTException):
"""The format specifier is an unknown value."""
# --- standard test environment ---
def test_parse():
assert _re_parse.split('[a]') == ['', '[a]', None, '']
assert _re_parse.split('[a] [b]') == \
['', '[a]', None, ' ', '[b]', None, '']
assert _re_parse.split('[a c] [b]') == \
['', '[a c]', None, ' ', '[b]', None, '']
assert _re_parse.split('x [a] y [b] z') == \
['x ', '[a]', None, ' y ', '[b]', None, ' z']
assert _re_parse.split('[a "b" c "d"]') == \
['', '[a "b" c "d"]', None, '']
assert _re_parse.split(r'["a \"b[foo]" c.d f]') == \
['', '["a \\"b[foo]" c.d f]', None, '']
def _test(argv):
import doctest, ezt
verbose = "-v" in argv
return doctest.testmod(ezt, verbose=verbose)
if __name__ == "__main__":
# invoke unit test for this module:
import sys
sys.exit(_test(sys.argv)[0])