blob: d156f0357fe5ff9e500b8614c81a91f52f26863d [file] [log] [blame]
#!/usr/bin/python
# Copyright (c) 2013 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from __future__ import print_function
import sys
import os
import dfa_parser
class Generator(object):
def __init__(self, dfa, c_file):
self.dfa = dfa
self.c_file = c_file
self.code_table1 = {}
self.code_table2 = {}
def TransitionCode(self, transition):
result = [a.body for a in transition.actions]
result.append(
'current_state = %s; current_position++; goto _again;\n' %
transition.to_state.index)
return tuple(result)
def WriteData(self):
self.c_file.write(
'static const int _dfa_accepting[] = {%s};\n' %
', '.join('1' if state.is_accepting else '0'
for state in self.dfa.states))
table = []
for i, state in enumerate(self.dfa.states):
elems = []
for byte in range(256):
t = state.forward_transitions.get(byte)
if t is not None:
code = self.TransitionCode(t)
else:
code = (self.dfa.error_action.body,)
# Instead of a single switch with all possible sequences of actions,
# we use two switches: one for the first 'half' of action sequence,
# and one for the second 'half'.
# It significantly reduces generated source size and compilation time
# (because different action sequences can share 'halves')
# at a cost of runtime performance.
code1 = code[:1]
code2 = code[1:]
if code1 not in self.code_table1:
self.code_table1[code1] = len(self.code_table1)
if code2 not in self.code_table2:
self.code_table2[code2] = len(self.code_table2)
assert self.code_table1[code1] < 2**16
assert self.code_table2[code2] < 2**16
elems.append('{%s, %s}' % (self.code_table1[code1],
self.code_table2[code2]))
table.append(' {%s}' % ', '.join(map(str, elems)))
self.c_file.write(
'static const uint16_t _dfa_transitions[][256][2] = {\n%s\n};\n' %
',\n'.join(table))
def WriteInit(self):
self.c_file.write(
'current_state = %d;\n' % self.dfa.initial_state.index)
def WriteCodeTable(self, code_table, selector):
self.c_file.write(
'switch (%s) {\n' % selector)
for code, index in sorted(code_table.items(), key=lambda (_, index): index):
self.c_file.write(
' case %d:\n' % index)
self.c_file.write(' '.join(code))
self.c_file.write(' break;\n')
self.c_file.write(
'}\n')
def WriteExec(self):
self.c_file.write('_again:\n')
self.c_file.write(
'if (current_position == end_position) {\n'
' if (_dfa_accepting[current_state]) goto _done;\n'
' %s'
'}\n' % self.dfa.error_action.body)
self.WriteCodeTable(
self.code_table1,
'_dfa_transitions[current_state][*current_position][0]')
self.WriteCodeTable(
self.code_table2,
'_dfa_transitions[current_state][*current_position][1]')
self.c_file.write(
'_done: ;\n')
def main():
if len(sys.argv[1:]) != 3:
print('Usage:')
print(' %s <rl file> <xml file> <output c file>' %
os.path.basename(__file__))
sys.exit(1)
rl_filename, xml_filename, c_filename = sys.argv[1:]
dfa = dfa_parser.ParseXml(xml_filename)
with open(rl_filename) as rl_file:
lines = list(rl_file)
with open(c_filename, 'wt') as c_file:
c_file.write(
'/*\n'
' * THIS FILE IS AUTO-GENERATED. DO NOT EDIT.\n'
' * Generated by %s from %s and %s.\n'
' */\n\n' % (
os.path.basename(__file__),
os.path.basename(rl_filename),
os.path.basename(xml_filename)))
generator = Generator(dfa, c_file)
in_ragel_block = False
for line in lines:
if line.strip() == '%%{':
assert not in_ragel_block
in_ragel_block = True
elif line.strip() == '}%%':
assert in_ragel_block
in_ragel_block = False
elif line.strip() == '%% write data;':
assert not in_ragel_block
generator.WriteData()
elif line.strip() == '%% write init;':
assert not in_ragel_block
generator.WriteInit()
elif line.strip() == '%% write exec;':
assert not in_ragel_block
generator.WriteExec()
else:
if not in_ragel_block:
c_file.write(line)
assert not in_ragel_block
if __name__ == '__main__':
main()