blob: 9e0ff029b847a4599f7844bec7d63e4b5834afc0 [file] [log] [blame]
#default_value:foo
#include: other.manifest
#
#[test_name.js]
# expected: ERROR
#
# [subtest 1]
# expected:
# os == win: FAIL #This is a comment
# PASS
#
# TODO: keep comments in the tree
from __future__ import unicode_literals
from cStringIO import StringIO
from node import (AtomNode, BinaryExpressionNode, BinaryOperatorNode,
ConditionalNode, DataNode, IndexNode, KeyValueNode, ListNode,
NumberNode, StringNode, UnaryExpressionNode,
UnaryOperatorNode, ValueNode, VariableNode)
class ParseError(Exception):
def __init__(self, filename, line, detail):
self.line = line
self.filename = filename
self.detail = detail
self.message = "%s: %s line %s" % (self.detail, self.filename, self.line)
Exception.__init__(self, self.message)
eol = object
group_start = object
group_end = object
digits = "0123456789"
open_parens = "[("
close_parens = "])"
parens = open_parens + close_parens
operator_chars = "=!"
unary_operators = ["not"]
binary_operators = ["==", "!=", "and", "or"]
operators = ["==", "!=", "not", "and", "or"]
atoms = {"True": True,
"False": False,
"Reset": object()}
def decode(s):
assert isinstance(s, unicode)
return s
def precedence(operator_node):
return len(operators) - operators.index(operator_node.data)
class TokenTypes(object):
def __init__(self):
for type in ["group_start", "group_end", "paren", "list_start", "list_end", "separator", "ident", "string", "number", "atom", "eof"]:
setattr(self, type, type)
token_types = TokenTypes()
class Tokenizer(object):
def __init__(self):
self.reset()
def reset(self):
self.indent_levels = [0]
self.state = self.line_start_state
self.next_state = self.data_line_state
self.line_number = 0
def tokenize(self, stream):
self.reset()
assert not isinstance(stream, unicode)
if isinstance(stream, str):
stream = StringIO(stream)
if not hasattr(stream, "name"):
self.filename = ""
else:
self.filename = stream.name
self.next_line_state = self.line_start_state
for i, line in enumerate(stream):
assert isinstance(line, str)
self.state = self.next_line_state
assert self.state is not None
states = []
self.next_line_state = None
self.line_number = i + 1
self.index = 0
self.line = line.decode('utf-8').rstrip()
assert isinstance(self.line, unicode)
while self.state != self.eol_state:
states.append(self.state)
tokens = self.state()
if tokens:
for token in tokens:
yield token
self.state()
while True:
yield (token_types.eof, None)
def char(self):
if self.index == len(self.line):
return eol
return self.line[self.index]
def consume(self):
if self.index < len(self.line):
self.index += 1
def peek(self, length):
return self.line[self.index:self.index + length]
def skip_whitespace(self):
while self.char() == " ":
self.consume()
def eol_state(self):
if self.next_line_state is None:
self.next_line_state = self.line_start_state
def line_start_state(self):
self.skip_whitespace()
if self.char() == eol:
self.state = self.eol_state
return
if self.index > self.indent_levels[-1]:
self.indent_levels.append(self.index)
yield (token_types.group_start, None)
else:
while self.index < self.indent_levels[-1]:
self.indent_levels.pop()
yield (token_types.group_end, None)
# This is terrible; if we were parsing an expression
# then the next_state will be expr_or_value but when we deindent
# it must always be a heading or key next so we go back to data_line_state
self.next_state = self.data_line_state
if self.index != self.indent_levels[-1]:
raise ParseError(self.filename, self.line_number, "Unexpected indent")
self.state = self.next_state
def data_line_state(self):
if self.char() == "[":
yield (token_types.paren, self.char())
self.consume()
self.state = self.heading_state
else:
self.state = self.key_state
def heading_state(self):
rv = ""
while True:
c = self.char()
if c == "\\":
rv += self.consume_escape()
elif c == "]":
break
elif c == eol:
raise ParseError(self.filename, self.line_number, "EOL in heading")
else:
rv += c
self.consume()
yield (token_types.string, decode(rv))
yield (token_types.paren, "]")
self.consume()
self.state = self.line_end_state
self.next_state = self.data_line_state
def key_state(self):
rv = ""
while True:
c = self.char()
if c == " ":
self.skip_whitespace()
if self.char() != ":":
raise ParseError(self.filename, self.line_number, "Space in key name")
break
elif c == ":":
break
elif c == eol:
raise ParseError(self.filename, self.line_number, "EOL in key name (missing ':'?)")
elif c == "\\":
rv += self.consume_escape()
else:
rv += c
self.consume()
yield (token_types.string, decode(rv))
yield (token_types.separator, ":")
self.consume()
self.state = self.after_key_state
def after_key_state(self):
self.skip_whitespace()
c = self.char()
if c == "#":
self.next_state = self.expr_or_value_state
self.state = self.comment_state
elif c == eol:
self.next_state = self.expr_or_value_state
self.state = self.eol_state
elif c == "[":
self.state = self.list_start_state
else:
self.state = self.value_state
def after_expr_state(self):
self.skip_whitespace()
c = self.char()
if c == "#":
self.next_state = self.after_expr_state
self.state = self.comment_state
elif c == eol:
self.next_state = self.after_expr_state
self.state = self.eol_state
elif c == "[":
self.state = self.list_start_state
else:
self.state = self.value_state
def list_start_state(self):
yield (token_types.list_start, "[")
self.consume()
self.state = self.list_value_start_state
def list_value_start_state(self):
self.skip_whitespace()
if self.char() == "]":
self.state = self.list_end_state
elif self.char() in ("'", '"'):
quote_char = self.char()
self.consume()
yield (token_types.string, self.consume_string(quote_char))
self.skip_whitespace()
if self.char() == "]":
self.state = self.list_end_state
elif self.char() != ",":
raise ParseError(self.filename, self.line_number, "Junk after quoted string")
self.consume()
elif self.char() == "#":
self.state = self.comment_state
self.next_line_state = self.list_value_start_state
elif self.char() == eol:
self.next_line_state = self.list_value_start_state
self.state = self.eol_state
elif self.char() == ",":
raise ParseError(self.filename, self.line_number, "List item started with separator")
elif self.char() == "@":
self.state = self.list_value_atom_state
else:
self.state = self.list_value_state
def list_value_state(self):
rv = ""
spaces = 0
while True:
c = self.char()
if c == "\\":
escape = self.consume_escape()
rv += escape
elif c == eol:
raise ParseError(self.filename, self.line_number, "EOL in list value")
elif c == "#":
raise ParseError(self.filename, self.line_number, "EOL in list value (comment)")
elif c == ",":
self.state = self.list_value_start_state
self.consume()
break
elif c == " ":
spaces += 1
self.consume()
elif c == "]":
self.state = self.list_end_state
self.consume()
break
else:
rv += " " * spaces
spaces = 0
rv += c
self.consume()
if rv:
yield (token_types.string, decode(rv))
def list_value_atom_state(self):
self.consume()
for _, value in self.list_value_state():
yield token_types.atom, value
def list_end_state(self):
self.consume()
yield (token_types.list_end, "]")
self.state = self.line_end_state
def value_state(self):
self.skip_whitespace()
if self.char() in ("'", '"'):
quote_char = self.char()
self.consume()
yield (token_types.string, self.consume_string(quote_char))
if self.char() == "#":
self.state = self.comment_state
else:
self.state = self.line_end_state
elif self.char() == "@":
self.consume()
for _, value in self.value_inner_state():
yield token_types.atom, value
else:
self.state = self.value_inner_state
def value_inner_state(self):
rv = ""
spaces = 0
while True:
c = self.char()
if c == "\\":
rv += self.consume_escape()
elif c == "#":
self.state = self.comment_state
break
elif c == " ":
# prevent whitespace before comments from being included in the value
spaces += 1
self.consume()
elif c == eol:
self.state = self.line_end_state
break
else:
rv += " " * spaces
spaces = 0
rv += c
self.consume()
yield (token_types.string, decode(rv))
def comment_state(self):
while self.char() is not eol:
self.consume()
self.state = self.eol_state
def line_end_state(self):
self.skip_whitespace()
c = self.char()
if c == "#":
self.state = self.comment_state
elif c == eol:
self.state = self.eol_state
else:
raise ParseError(self.filename, self.line_number, "Junk before EOL %s" % c)
def consume_string(self, quote_char):
rv = ""
while True:
c = self.char()
if c == "\\":
rv += self.consume_escape()
elif c == quote_char:
self.consume()
break
elif c == eol:
raise ParseError(self.filename, self.line_number, "EOL in quoted string")
else:
rv += c
self.consume()
return decode(rv)
def expr_or_value_state(self):
if self.peek(3) == "if ":
self.state = self.expr_state
else:
self.state = self.value_state
def expr_state(self):
self.skip_whitespace()
c = self.char()
if c == eol:
raise ParseError(self.filename, self.line_number, "EOL in expression")
elif c in "'\"":
self.consume()
yield (token_types.string, self.consume_string(c))
elif c == "#":
raise ParseError(self.filename, self.line_number, "Comment before end of expression")
elif c == ":":
yield (token_types.separator, c)
self.consume()
self.state = self.after_expr_state
elif c in parens:
self.consume()
yield (token_types.paren, c)
elif c in ("!", "="):
self.state = self.operator_state
elif c in digits:
self.state = self.digit_state
else:
self.state = self.ident_state
def operator_state(self):
# Only symbolic operators
index_0 = self.index
while True:
c = self.char()
if c == eol:
break
elif c in operator_chars:
self.consume()
else:
self.state = self.expr_state
break
yield (token_types.ident, self.line[index_0:self.index])
def digit_state(self):
index_0 = self.index
seen_dot = False
while True:
c = self.char()
if c == eol:
break
elif c in digits:
self.consume()
elif c == ".":
if seen_dot:
raise ParseError(self.filename, self.line_number, "Invalid number")
self.consume()
seen_dot = True
elif c in parens:
break
elif c in operator_chars:
break
elif c == " ":
break
elif c == ":":
break
else:
raise ParseError(self.filename, self.line_number, "Invalid character in number")
self.state = self.expr_state
yield (token_types.number, self.line[index_0:self.index])
def ident_state(self):
index_0 = self.index
while True:
c = self.char()
if c == eol:
break
elif c == ".":
break
elif c in parens:
break
elif c in operator_chars:
break
elif c == " ":
break
elif c == ":":
break
else:
self.consume()
self.state = self.expr_state
yield (token_types.ident, self.line[index_0:self.index])
def consume_escape(self):
assert self.char() == "\\"
self.consume()
c = self.char()
self.consume()
if c == "x":
return self.decode_escape(2)
elif c == "u":
return self.decode_escape(4)
elif c == "U":
return self.decode_escape(6)
elif c in ["a", "b", "f", "n", "r", "t", "v"]:
return eval("'\%s'" % c)
elif c is eol:
raise ParseError(self.filename, self.line_number, "EOL in escape")
else:
return c
def decode_escape(self, length):
value = 0
for i in xrange(length):
c = self.char()
value *= 16
value += self.escape_value(c)
self.consume()
return unichr(value)
def escape_value(self, c):
if '0' <= c <= '9':
return ord(c) - ord('0')
elif 'a' <= c <= 'f':
return ord(c) - ord('a') + 10
elif 'A' <= c <= 'F':
return ord(c) - ord('A') + 10
else:
raise ParseError(self.filename, self.line_number, "Invalid character escape")
class Parser(object):
def __init__(self):
self.reset()
def reset(self):
self.token = None
self.unary_operators = "!"
self.binary_operators = frozenset(["&&", "||", "=="])
self.tokenizer = Tokenizer()
self.token_generator = None
self.tree = Treebuilder(DataNode(None))
self.expr_builder = None
self.expr_builders = []
def parse(self, input):
self.reset()
self.token_generator = self.tokenizer.tokenize(input)
self.consume()
self.manifest()
return self.tree.node
def consume(self):
self.token = self.token_generator.next()
def expect(self, type, value=None):
if self.token[0] != type:
raise ParseError(self.tokenizer.filename, self.tokenizer.line_number,
"Token '{}' doesn't equal expected type '{}'".format(self.token[0], type))
if value is not None:
if self.token[1] != value:
raise ParseError(self.tokenizer.filename, self.tokenizer.line_number,
"Token '{}' doesn't equal expected value '{}'".format(self.token[1], value))
self.consume()
def manifest(self):
self.data_block()
self.expect(token_types.eof)
def data_block(self):
while self.token[0] == token_types.string:
self.tree.append(KeyValueNode(self.token[1]))
self.consume()
self.expect(token_types.separator)
self.value_block()
self.tree.pop()
while self.token == (token_types.paren, "["):
self.consume()
if self.token[0] != token_types.string:
raise ParseError(self.tokenizer.filename, self.tokenizer.line_number,
"Token '{}' is not a string".format(self.token[0]))
self.tree.append(DataNode(self.token[1]))
self.consume()
self.expect(token_types.paren, "]")
if self.token[0] == token_types.group_start:
self.consume()
self.data_block()
self.eof_or_end_group()
self.tree.pop()
def eof_or_end_group(self):
if self.token[0] != token_types.eof:
self.expect(token_types.group_end)
def value_block(self):
if self.token[0] == token_types.list_start:
self.consume()
self.list_value()
elif self.token[0] == token_types.string:
self.value()
elif self.token[0] == token_types.group_start:
self.consume()
self.expression_values()
if self.token[0] == token_types.string:
self.value()
self.eof_or_end_group()
elif self.token[0] == token_types.atom:
self.atom()
else:
raise ParseError(self.tokenizer.filename, self.tokenizer.line_number,
"Token '{}' is not a known type".format(self.token[0]))
def list_value(self):
self.tree.append(ListNode())
while self.token[0] in (token_types.atom, token_types.string):
if self.token[0] == token_types.atom:
self.atom()
else:
self.value()
self.expect(token_types.list_end)
self.tree.pop()
def expression_values(self):
while self.token == (token_types.ident, "if"):
self.consume()
self.tree.append(ConditionalNode())
self.expr_start()
self.expect(token_types.separator)
self.value_block()
self.tree.pop()
def value(self):
self.tree.append(ValueNode(self.token[1]))
self.consume()
self.tree.pop()
def atom(self):
if self.token[1] not in atoms:
raise ParseError(self.tokenizer.filename, self.tokenizer.line_number, "Unrecognised symbol @%s" % self.token[1])
self.tree.append(AtomNode(atoms[self.token[1]]))
self.consume()
self.tree.pop()
def expr_start(self):
self.expr_builder = ExpressionBuilder(self.tokenizer)
self.expr_builders.append(self.expr_builder)
self.expr()
expression = self.expr_builder.finish()
self.expr_builders.pop()
self.expr_builder = self.expr_builders[-1] if self.expr_builders else None
if self.expr_builder:
self.expr_builder.operands[-1].children[-1].append(expression)
else:
self.tree.append(expression)
self.tree.pop()
def expr(self):
self.expr_operand()
while (self.token[0] == token_types.ident and self.token[1] in binary_operators):
self.expr_bin_op()
self.expr_operand()
def expr_operand(self):
if self.token == (token_types.paren, "("):
self.consume()
self.expr_builder.left_paren()
self.expr()
self.expect(token_types.paren, ")")
self.expr_builder.right_paren()
elif self.token[0] == token_types.ident and self.token[1] in unary_operators:
self.expr_unary_op()
self.expr_operand()
elif self.token[0] in [token_types.string, token_types.ident]:
self.expr_value()
elif self.token[0] == token_types.number:
self.expr_number()
else:
raise ParseError(self.tokenizer.filename, self.tokenizer.line_number, "Unrecognised operand")
def expr_unary_op(self):
if self.token[1] in unary_operators:
self.expr_builder.push_operator(UnaryOperatorNode(self.token[1]))
self.consume()
else:
raise ParseError(self.tokenizer.filename, self.tokenizer.line_number, "Expected unary operator")
def expr_bin_op(self):
if self.token[1] in binary_operators:
self.expr_builder.push_operator(BinaryOperatorNode(self.token[1]))
self.consume()
else:
raise ParseError(self.tokenizer.filename, self.tokenizer.line_number, "Expected binary operator")
def expr_value(self):
node_type = {token_types.string: StringNode,
token_types.ident: VariableNode}[self.token[0]]
self.expr_builder.push_operand(node_type(self.token[1]))
self.consume()
if self.token == (token_types.paren, "["):
self.consume()
self.expr_builder.operands[-1].append(IndexNode())
self.expr_start()
self.expect(token_types.paren, "]")
def expr_number(self):
self.expr_builder.push_operand(NumberNode(self.token[1]))
self.consume()
class Treebuilder(object):
def __init__(self, root):
self.root = root
self.node = root
def append(self, node):
self.node.append(node)
self.node = node
return node
def pop(self):
node = self.node
self.node = self.node.parent
return node
class ExpressionBuilder(object):
def __init__(self, tokenizer):
self.operands = []
self.operators = [None]
self.tokenizer = tokenizer
def finish(self):
while self.operators[-1] is not None:
self.pop_operator()
rv = self.pop_operand()
assert self.is_empty()
return rv
def left_paren(self):
self.operators.append(None)
def right_paren(self):
while self.operators[-1] is not None:
self.pop_operator()
if not self.operators:
raise ParseError(self.tokenizer.filename, self.tokenizer.line,
"Unbalanced parens")
assert self.operators.pop() is None
def push_operator(self, operator):
assert operator is not None
while self.precedence(self.operators[-1]) > self.precedence(operator):
self.pop_operator()
self.operators.append(operator)
def pop_operator(self):
operator = self.operators.pop()
if isinstance(operator, BinaryOperatorNode):
operand_1 = self.operands.pop()
operand_0 = self.operands.pop()
self.operands.append(BinaryExpressionNode(operator, operand_0, operand_1))
else:
operand_0 = self.operands.pop()
self.operands.append(UnaryExpressionNode(operator, operand_0))
def push_operand(self, node):
self.operands.append(node)
def pop_operand(self):
return self.operands.pop()
def is_empty(self):
return len(self.operands) == 0 and all(item is None for item in self.operators)
def precedence(self, operator):
if operator is None:
return 0
return precedence(operator)
def parse(stream):
p = Parser()
return p.parse(stream)