| #!/usr/bin/env python |
| |
| # Copyright 2017 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """ |
| Script for generating .proto and a conversion .cc file for a templated library |
| based JavaScript parser fuzzer. |
| """ |
| |
| import sys |
| |
| def ParseWord(word_string): |
| # Every part of the word is either a string surrounded by "" or a placeholder |
| # $<int>. |
| |
| word_string = word_string.lstrip().rstrip() |
| |
| parts = [] |
| while len(word_string) > 0: |
| if word_string[0] == '"': |
| end_ix = 1 + word_string[1:].index('"') |
| parts.append(word_string[1:end_ix]) |
| word_string = word_string[(end_ix + 1):] |
| elif word_string[0] == '$': |
| if ' ' in word_string: |
| end_ix = word_string.index(' ') |
| else: |
| end_ix = len(word_string) |
| parts.append(int(word_string[1:end_ix])) |
| word_string = word_string[end_ix:] |
| else: |
| assert(False) |
| word_string = word_string.lstrip() |
| return parts |
| |
| def GenerateProtoContents(words): |
| contents = '' |
| for ix in range(len(words)): |
| contents += ' token_value_' + str(ix) + ' = ' + str(ix) + ';\n' |
| return contents |
| |
| def GenerateConversionContents(words): |
| contents = '' |
| ix = 0 |
| for word in words: |
| contents += ' case ' + str(ix) + ':\n' |
| max_part = -1 |
| first = True |
| building_string = '' |
| for part in word: |
| if not first: |
| building_string += ' + std::string(" ") + ' |
| if isinstance(part, str): |
| building_string += 'std::string("' + part + '")' |
| else: |
| if (part > max_part): |
| max_part = part |
| building_string += ('token_to_string(token.inner_tokens(' + str(part) + |
| '), depth)') |
| first = False |
| if max_part >= 0: |
| contents += (' if (token.inner_tokens().size() < ' + |
| str(max_part + 1) + ') return std::string("");\n') |
| contents += ' return ' + building_string + ';\n' |
| ix += 1 |
| return contents |
| |
| def ReadDictionary(filename): |
| with open(filename) as input_file: |
| lines = input_file.readlines() |
| words = [] |
| for line in lines: |
| if not line.startswith('#'): |
| word = ParseWord(line) |
| if len(word) > 0: |
| words.append(word) |
| return words |
| |
| def main(argv): |
| output_proto_file = argv[1] |
| output_cc_file = argv[2] |
| input_dict_file = argv[3] |
| |
| words = ReadDictionary(input_dict_file) |
| |
| proto_header = ('// Generated by generate_javascript_parser_proto.py.\n' |
| '\n' |
| 'syntax = "proto2";\n' |
| 'package javascript_parser_proto_fuzzer;\n' |
| '\n' |
| 'message Token {\n' |
| ' enum Value {\n') |
| |
| |
| proto_footer = (' }\n' |
| ' required Value value = 1;\n' |
| ' repeated Token inner_tokens = 2;\n' |
| '}\n' |
| '\n' |
| 'message Source {\n' |
| ' required bool is_module = 1;\n' |
| ' repeated Token tokens = 2;\n' |
| '}\n') |
| |
| proto_contents = proto_header + GenerateProtoContents(words) + proto_footer |
| |
| with open(output_proto_file, 'w') as f: |
| f.write(proto_contents) |
| |
| conversion_header = ( |
| '// Generated by generate_javascript_parser_proto.py.\n' |
| '\n' |
| '#include "testing/libfuzzer/fuzzers/' |
| 'javascript_parser_proto_to_string.h"\n' |
| '\n' |
| '// Bound calls to token_to_string to prevent memory usage from growing\n' |
| '// too much.\n' |
| 'const int kMaxRecursiveDepth = 9;\n' |
| '\n' |
| 'std::string token_to_string(\n' |
| ' const javascript_parser_proto_fuzzer::Token& token, int depth)' |
| ' {\n' |
| ' if (++depth == kMaxRecursiveDepth) return std::string("");\n' |
| ' switch(token.value()) {\n') |
| |
| conversion_footer = (' default: break;\n' |
| ' }\n' |
| ' return std::string("");\n' |
| '}\n') |
| |
| conversion_contents = (conversion_header + GenerateConversionContents(words) |
| + conversion_footer) |
| |
| with open(output_cc_file, 'w') as f: |
| f.write(conversion_contents) |
| |
| if __name__ == "__main__": |
| main(sys.argv) |