testing/libfuzzer/fuzzers/generate_javascript_parser_proto.py - chromium/src - Git at Google

 #!/usr/bin/env python

 # Copyright 2017 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """
 Script for generating .proto and a conversion .cc file for a templated library
 based JavaScript parser fuzzer.
 """

 import sys

 def ParseWord(word_string):
   # Every part of the word is either a string surrounded by "" or a placeholder
   # $<int>.

   word_string = word_string.lstrip().rstrip()

   parts = []
   while len(word_string) > 0:
     if word_string[0] == '"':
       end_ix = 1 + word_string[1:].index('"')
       parts.append(word_string[1:end_ix])
       word_string = word_string[(end_ix + 1):]
     elif word_string[0] == '$':
       if ' ' in word_string:
         end_ix = word_string.index(' ')
       else:
         end_ix = len(word_string)
       parts.append(int(word_string[1:end_ix]))
       word_string = word_string[end_ix:]
     else:
       assert(False)
     word_string = word_string.lstrip()
   return parts

 def GenerateProtoContents(words):
   contents = ''
   for ix in range(len(words)):
     contents += '    token_value_' + str(ix) + ' = ' + str(ix) + ';\n'
   return contents

 def GenerateConversionContents(words):
   contents = ''
   ix = 0
   for word in words:
     contents += '    case ' + str(ix) + ':\n'
     max_part = -1
     first = True
     building_string = ''
     for part in word:
       if not first:
         building_string += ' + std::string(" ") + '
       if isinstance(part, str):
         building_string += 'std::string("' + part + '")'
       else:
         if (part > max_part):
           max_part = part
         building_string += ('token_to_string(token.inner_tokens(' + str(part) +
                             '), depth)')
       first = False
     if max_part >= 0:
         contents += ('      if (token.inner_tokens().size() < ' +
                      str(max_part + 1) + ') return std::string("");\n')
     contents += '      return ' + building_string + ';\n'
     ix += 1
   return contents

 def ReadDictionary(filename):
   with open(filename) as input_file:
     lines = input_file.readlines()
   words = []
   for line in lines:
     if not line.startswith('#'):
       word = ParseWord(line)
       if len(word) > 0:
         words.append(word)
   return words

 def main(argv):
   output_proto_file = argv[1]
   output_cc_file = argv[2]
   input_dict_file = argv[3]

   words = ReadDictionary(input_dict_file)

   proto_header = ('// Generated by generate_javascript_parser_proto.py.\n'
                   '\n'
                   'syntax = "proto2";\n'
                   'package javascript_parser_proto_fuzzer;\n'
                   '\n'
                   'message Token {\n'
                   '  enum Value {\n')


   proto_footer = ('  }\n'
                   '  required Value value = 1;\n'
                   '  repeated Token inner_tokens = 2;\n'
                   '}\n'
                   '\n'
                   'message Source {\n'
                   '  required bool is_module = 1;\n'
                   '  repeated Token tokens = 2;\n'
                   '}\n')

   proto_contents = proto_header + GenerateProtoContents(words) + proto_footer

   with open(output_proto_file, 'w') as f:
     f.write(proto_contents)

   conversion_header = (
       '// Generated by generate_javascript_parser_proto.py.\n'
       '\n'
       '#include "testing/libfuzzer/fuzzers/'
       'javascript_parser_proto_to_string.h"\n'
       '\n'
       '// Bound calls to token_to_string to prevent memory usage from growing\n'
       '// too much.\n'
       'const int kMaxRecursiveDepth = 9;\n'
       '\n'
       'std::string token_to_string(\n'
       '    const javascript_parser_proto_fuzzer::Token& token, int depth)'
       ' {\n'
       '  if (++depth == kMaxRecursiveDepth) return std::string("");\n'
       '  switch(token.value()) {\n')

   conversion_footer = ('    default: break;\n'
                        '  }\n'
                        '  return std::string("");\n'
                        '}\n')

   conversion_contents = (conversion_header + GenerateConversionContents(words)
                          + conversion_footer)

   with open(output_cc_file, 'w') as f:
     f.write(conversion_contents)

 if __name__ == "__main__":
   main(sys.argv)
	#!/usr/bin/env python

	# Copyright 2017 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""
	Script for generating .proto and a conversion .cc file for a templated library
	based JavaScript parser fuzzer.
	"""

	import sys

	def ParseWord(word_string):
	# Every part of the word is either a string surrounded by "" or a placeholder
	# $<int>.

	word_string = word_string.lstrip().rstrip()

	parts = []
	while len(word_string) > 0:
	if word_string[0] == '"':
	end_ix = 1 + word_string[1:].index('"')
	parts.append(word_string[1:end_ix])
	word_string = word_string[(end_ix + 1):]
	elif word_string[0] == '$':
	if ' ' in word_string:
	end_ix = word_string.index(' ')
	else:
	end_ix = len(word_string)
	parts.append(int(word_string[1:end_ix]))
	word_string = word_string[end_ix:]
	else:
	assert(False)
	word_string = word_string.lstrip()
	return parts

	def GenerateProtoContents(words):
	contents = ''
	for ix in range(len(words)):
	contents += ' token_value_' + str(ix) + ' = ' + str(ix) + ';\n'
	return contents

	def GenerateConversionContents(words):
	contents = ''
	ix = 0
	for word in words:
	contents += ' case ' + str(ix) + ':\n'
	max_part = -1
	first = True
	building_string = ''
	for part in word:
	if not first:
	building_string += ' + std::string(" ") + '
	if isinstance(part, str):
	building_string += 'std::string("' + part + '")'
	else:
	if (part > max_part):
	max_part = part
	building_string += ('token_to_string(token.inner_tokens(' + str(part) +
	'), depth)')
	first = False
	if max_part >= 0:
	contents += (' if (token.inner_tokens().size() < ' +
	str(max_part + 1) + ') return std::string("");\n')
	contents += ' return ' + building_string + ';\n'
	ix += 1
	return contents

	def ReadDictionary(filename):
	with open(filename) as input_file:
	lines = input_file.readlines()
	words = []
	for line in lines:
	if not line.startswith('#'):
	word = ParseWord(line)
	if len(word) > 0:
	words.append(word)
	return words

	def main(argv):
	output_proto_file = argv[1]
	output_cc_file = argv[2]
	input_dict_file = argv[3]

	words = ReadDictionary(input_dict_file)

	proto_header = ('// Generated by generate_javascript_parser_proto.py.\n'
	'\n'
	'syntax = "proto2";\n'
	'package javascript_parser_proto_fuzzer;\n'
	'\n'
	'message Token {\n'
	' enum Value {\n')


	proto_footer = (' }\n'
	' required Value value = 1;\n'
	' repeated Token inner_tokens = 2;\n'
	'}\n'
	'\n'
	'message Source {\n'
	' required bool is_module = 1;\n'
	' repeated Token tokens = 2;\n'
	'}\n')

	proto_contents = proto_header + GenerateProtoContents(words) + proto_footer

	with open(output_proto_file, 'w') as f:
	f.write(proto_contents)

	conversion_header = (
	'// Generated by generate_javascript_parser_proto.py.\n'
	'\n'
	'#include "testing/libfuzzer/fuzzers/'
	'javascript_parser_proto_to_string.h"\n'
	'\n'
	'// Bound calls to token_to_string to prevent memory usage from growing\n'
	'// too much.\n'
	'const int kMaxRecursiveDepth = 9;\n'
	'\n'
	'std::string token_to_string(\n'
	' const javascript_parser_proto_fuzzer::Token& token, int depth)'
	' {\n'
	' if (++depth == kMaxRecursiveDepth) return std::string("");\n'
	' switch(token.value()) {\n')

	conversion_footer = (' default: break;\n'
	' }\n'
	' return std::string("");\n'
	'}\n')

	conversion_contents = (conversion_header + GenerateConversionContents(words)
	+ conversion_footer)

	with open(output_cc_file, 'w') as f:
	f.write(conversion_contents)

	if __name__ == "__main__":
	main(sys.argv)