syzygy/pdb/generate_type_info_records.py - syzygy - Git at Google

 #!python
 # Copyright 2015 Google Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 # This script generates wrappers around record types from CVInfo.h. The
 # description of the records is loaded from given json file. Example of a valid
 # .json file:
 #
 # {
 #   // List of all the structs that will be generated.
 #   "structs": [
 #     {
 #       // Name of the generated struct.
 #       "name": "LeafClass",
 #       // Optional entry with the name of the struct from CVInfo.h header
 #       // which this struct encapsulates.
 #       "original_name": "LeafClass",
 #       // Optional entry with the name of the first field that can't directly
 #       // be parsed by direct byte coercion (ie: variable length fields,
 #       // optional fields, etc). All bytes up to this field will be read
 #       // directly from the stream as one piece. If this is not specified then
 #       // the entire struct will be read in one piece. This should not be
 #       // specified unless "extra_fields" are present.
 #       "first_extra_field": "data",
 #       // Optional entry describing the additional fields.
 #       "extra_fields": [
 #         {
 #           // Name of the additional field.
 #           "name": "size",
 #           // Reference to the type dictionary.
 #           "type_name": "UnsignedNumeric"
 #           // Optional entry with C++ declaration that can be coerced to a
 #           // bool. The field gets populated only when the result is true.
 #           // This entry can also contain newline character if the condition
 #           // is too long.
 #           "condition': "property().fwdref != 0"
 #         }
 #       ],
 #       // Optional entry describing the fields that will get retyped.
 #       "retyped_fields": [
 #         {
 #           // Name of the field in the CVInfo.h struct.
 #           "name": "property",
 #           // The new type of this field.
 #           "type": "LeafPropertyField"
 #         }
 #       ]
 #     }
 #   ],
 #   // List of all the types used for extra fields.
 #   "types": {
 #     // Keys are referenced from the struct entries.
 #     "UnsignedNumeric": {
 #       // Name of function with the signature bool(common::BinaryStreamParser*, type*) which
 #       // populates object of the given type from the common::BinaryStreamParser.
 #       "parser": "ReadUnsignedNumeric",
 #       // Optional entry containing True if type can be returned by value.
 #       "ret_by_value": "True",
 #       // Actual type of this value.
 #       "type": "uint64_t"
 #     }
 #   }
 # }

 import datetime
 import json
 import os.path
 import string

 from optparse import OptionParser


 _LICENSE = """\
 // Copyright {year} Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // This file is generated by {basename}, DO NOT MODIFY.

 """

 _HEADER_H = """\
 #ifndef SYZYGY_PDB_GEN_PDB_TYPE_INFO_RECORDS_H_
 #define SYZYGY_PDB_GEN_PDB_TYPE_INFO_RECORDS_H_

 #include "base/strings/string16.h"
 #include "syzygy/common/binary_stream.h"
 #include "syzygy/pdb/pdb_stream_record.h"
 #include "syzygy/pe/cvinfo_ext.h"

 namespace pdb {{

 """

 _HEADER_CC = """\
 #include "syzygy/pdb/gen/pdb_type_info_records.h"

 namespace pdb {{

 """

 _FOOTER_H = """\
 }}  // namespace pdb

 #endif  // SYZYGY_PDB_GEN_PDB_TYPE_INFO_RECORDS_H_
 """

 _FOOTER_CC = """\
 }}  // namespace pdb
 """

 _CLASS_HEADER = """\
 class {name} {{
  public:
   {name}();

   // @name Accessors.
   // @{{
   const {cci}::{original_name}& body() const {{ return body_; }}
 """

 _GETTER_REF_DECL_IMPL = """\
   const {type}& {name}() const {{ return {name}_; }}
 """

 _GETTER_VAL_DECL_IMPL = """\
   {type} {name}() const {{ return {name}_; }}
 """

 _RETYPE_DECL_IMPL = """\
   {type} {name}() const {{ return {{body_.{name}}}; }}
 """

 _ACCESSORS_END = """\
   // @}}
 """

 _CONDITION_DECL_IMPL = """\

   bool has_{name}() const {{
     return ({condition_indented});
   }}
 """

 _MEMBER_DECL = """\
   {type} {name}_;
 """

 _CLASS_MIDDLE = """\

   // Initializes the class from the given pdb stream.
   // @param stream pointer to the pdb stream.
   // @returns true on success, false on failure.
   bool Initialize(common::BinaryStreamParser* stream);

  private:
   // The struct from CVInfo.h which represents this record.
   {cci}::{original_name} body_;
 """

 _EXTRA_FIELDS_DESCRIPTION = """\

   // Additional fields parsed from the pdb stream.
 """

 _CLASS_FOOTER = """\
 }};

 """

 _OFFSET_OF = """\
 offsetof({cci}::{original_name}, \
 {first_extra_field});"""

 _SIZE_OF = """\
 sizeof(body_);"""

 _INIT_HEADER = """\
 {name}::{name}() : """

 _INIT_MIDDLE = """\
  {{}}

 bool {name}::Initialize(common::BinaryStreamParser* stream) {{
   size_t to_read = {bytes_to_read}
   if (!stream->ReadBytes(to_read, &body_))
     return false;
 """

 _INIT_CONDITION = """\
   if (({condition_indented}) &&
       !{parser}(stream, &{name}_)) {{
     return false;
   }}
 """

 _INIT_FIELD = """\
   if (!{parser}(stream, &{name}_))
     return false;
 """

 _INIT_FOOTER = """\

   return true;
 }}

 """

 _COMMON_SUBSTITUTES = {
   'cci': 'Microsoft_Cci_Pdb',
   'year': datetime.datetime.now().year,
   'basename': os.path.basename(__file__)
 }

 _DEFAULT_TYPE = {
   "parser": "ReadBasicType",
   "ret_by_value": "True"
 }

 def _Substitute(str, **more):
   """Substitutes both the common and specific strings."""
   subst = _COMMON_SUBSTITUTES.copy()
   subst.update(more.items())
   str = str.format(**subst);
   return str


 def _IncreaseIndent(field, indent):
   """Increases indent of each new line in the given string."""
   return field.replace('\n', '\n' + ' ' * indent)


 def _GenerateClass(pdb_class):
   """Generates the class definition string."""
   code = _Substitute(_CLASS_HEADER, **pdb_class)

   for field in pdb_class.get('extra_fields', []):
     if field.get('ret_by_value') == 'True':
       code += _Substitute(_GETTER_VAL_DECL_IMPL, **field)
     else:
       code += _Substitute(_GETTER_REF_DECL_IMPL, **field)

   for field in pdb_class.get('retyped_fields', []):
     code += _Substitute(_RETYPE_DECL_IMPL, **field)

   code += _Substitute(_ACCESSORS_END)

   for field in pdb_class.get('extra_fields', []):
     if 'condition' in field:
       field['condition_indented'] = _IncreaseIndent(field['condition'], 11)
       code += _Substitute(_CONDITION_DECL_IMPL, **field)

   code += _Substitute(_CLASS_MIDDLE, **pdb_class)

   if pdb_class.get('extra_fields', []):
     code += _Substitute(_EXTRA_FIELDS_DESCRIPTION)

   for field in pdb_class.get('extra_fields', []):
     code += _Substitute(_MEMBER_DECL, **field)

   code += _Substitute(_CLASS_FOOTER, **pdb_class)
   return code


 def _GenerateInit(pdb_class):
   """Generates the Initialize definition."""
   size = _OFFSET_OF if 'first_extra_field' in pdb_class else _SIZE_OF
   pdb_class['bytes_to_read'] = _Substitute(size, **pdb_class)

   code = _Substitute(_INIT_HEADER, **pdb_class)
   indent = len(code)
   initializers = ['body_{}']
   for field in pdb_class.get('extra_fields', []):
     initializers.append(' '*indent + field['name'] + '_{}')

   code += ',\n'.join(initializers)
   code += _Substitute(_INIT_MIDDLE, **pdb_class)

   for field in pdb_class.get('extra_fields', []):
     if 'condition' in field:
       field['condition_indented'] = _IncreaseIndent(field['condition'], 6)
       code += _Substitute(_INIT_CONDITION, **field)
     else:
       code += _Substitute(_INIT_FIELD, **field)

   code += _Substitute(_INIT_FOOTER, **pdb_class)
   return code


 def _GenerateHeaderFile(data):
   """Generate the whole header file in a string."""
   code = _Substitute(_LICENSE)
   code += _Substitute(_HEADER_H)

   for pdb_class in data:
     code += _GenerateClass(pdb_class);

   code += _Substitute(_FOOTER_H)
   return code


 def _GenerateCcFile(data):
   """Generate the whole cc file in a string."""
   code = _Substitute(_LICENSE)
   code += _Substitute(_HEADER_CC)

   for pdb_class in data:
     code += _GenerateInit(pdb_class)

   code += _Substitute(_FOOTER_CC)
   return code


 def _WriteFile(file_name, contents):
   """Write given string to a given file path."""
   dir = os.path.dirname(__file__)
   with open(os.path.join(dir, file_name), 'wb') as f:
     f.write(contents)


 _USAGE = """\
 %prog [file to process]

 Generate wrappers around CVInfo structs as described in the given json file."""


 def ascii_encode_dict(data):
   """Encodes dict keywords to regular strings. Needed because of bug in Python
     2.6 which causes fail when passing unicode keyword to _Substitute()."""
   ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
   return dict(map(ascii_encode, pair) for pair in data.items())


 def _FillFields(pdb_class, types_definition):
   """Adds type information to fields of a struct."""
   pdb_class.setdefault('original_name', pdb_class['name'])
   for field in pdb_class.get('extra_fields', []):
     if field['type_name'] in types_definition:
       field.update(types_definition[field['type_name']])
     else:
       default_definition = _DEFAULT_TYPE.copy()
       default_definition.setdefault('type', field['type_name'])
       field.update(default_definition)
   return pdb_class


 def _CompleteTypes(pdb_structs, types_definition):
   """Adds types information of fields to each struct"""
   return [_FillFields(struct, types_definition) for struct in pdb_structs]


 def main():
   parser = OptionParser(usage=_USAGE)
   (options, input_files) = parser.parse_args()

   # The first argument might be the current script name, remove it and make sure
   # that there's at least one input file.
   if __file__ in input_files:
     input_files.remove(__file__)
   if len(input_files) != 1:
     parser.error('You must specify exactly one input file.')

   with open(input_files[0]) as data_file:
     data = json.load(data_file, object_hook=ascii_encode_dict)

     pdb_structs =_CompleteTypes(data['structs'], data['types'])
     header_file = _GenerateHeaderFile(pdb_structs)
     cc_file = _GenerateCcFile(pdb_structs)

     file_name = os.path.splitext(os.path.basename(input_files[0]))[0]
     _WriteFile('gen\\pdb_' + file_name + '.h', header_file)
     _WriteFile('gen\\pdb_' + file_name + '.cc', cc_file)


 if __name__ == '__main__':
   main()
	#!python
	# Copyright 2015 Google Inc. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# This script generates wrappers around record types from CVInfo.h. The
	# description of the records is loaded from given json file. Example of a valid
	# .json file:
	#
	# {
	# // List of all the structs that will be generated.
	# "structs": [
	# {
	# // Name of the generated struct.
	# "name": "LeafClass",
	# // Optional entry with the name of the struct from CVInfo.h header
	# // which this struct encapsulates.
	# "original_name": "LeafClass",
	# // Optional entry with the name of the first field that can't directly
	# // be parsed by direct byte coercion (ie: variable length fields,
	# // optional fields, etc). All bytes up to this field will be read
	# // directly from the stream as one piece. If this is not specified then
	# // the entire struct will be read in one piece. This should not be
	# // specified unless "extra_fields" are present.
	# "first_extra_field": "data",
	# // Optional entry describing the additional fields.
	# "extra_fields": [
	# {
	# // Name of the additional field.
	# "name": "size",
	# // Reference to the type dictionary.
	# "type_name": "UnsignedNumeric"
	# // Optional entry with C++ declaration that can be coerced to a
	# // bool. The field gets populated only when the result is true.
	# // This entry can also contain newline character if the condition
	# // is too long.
	# "condition': "property().fwdref != 0"
	# }
	# ],
	# // Optional entry describing the fields that will get retyped.
	# "retyped_fields": [
	# {
	# // Name of the field in the CVInfo.h struct.
	# "name": "property",
	# // The new type of this field.
	# "type": "LeafPropertyField"
	# }
	# ]
	# }
	# ],
	# // List of all the types used for extra fields.
	# "types": {
	# // Keys are referenced from the struct entries.
	# "UnsignedNumeric": {
	# // Name of function with the signature bool(common::BinaryStreamParser, type) which
	# // populates object of the given type from the common::BinaryStreamParser.
	# "parser": "ReadUnsignedNumeric",
	# // Optional entry containing True if type can be returned by value.
	# "ret_by_value": "True",
	# // Actual type of this value.
	# "type": "uint64_t"
	# }
	# }
	# }

	import datetime
	import json
	import os.path
	import string

	from optparse import OptionParser


	_LICENSE = """\
	// Copyright {year} Google Inc. All Rights Reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	// This file is generated by {basename}, DO NOT MODIFY.

	"""

	_HEADER_H = """\
	#ifndef SYZYGY_PDB_GEN_PDB_TYPE_INFO_RECORDS_H_
	#define SYZYGY_PDB_GEN_PDB_TYPE_INFO_RECORDS_H_

	#include "base/strings/string16.h"
	#include "syzygy/common/binary_stream.h"
	#include "syzygy/pdb/pdb_stream_record.h"
	#include "syzygy/pe/cvinfo_ext.h"

	namespace pdb {{

	"""

	_HEADER_CC = """\
	#include "syzygy/pdb/gen/pdb_type_info_records.h"

	namespace pdb {{

	"""

	_FOOTER_H = """\
	}} // namespace pdb

	#endif // SYZYGY_PDB_GEN_PDB_TYPE_INFO_RECORDS_H_
	"""

	_FOOTER_CC = """\
	}} // namespace pdb
	"""

	_CLASS_HEADER = """\
	class {name} {{
	public:
	{name}();

	// @name Accessors.
	// @{{
	const {cci}::{original_name}& body() const {{ return body_; }}
	"""

	_GETTER_REF_DECL_IMPL = """\
	const {type}& {name}() const {{ return {name}_; }}
	"""

	_GETTER_VAL_DECL_IMPL = """\
	{type} {name}() const {{ return {name}_; }}
	"""

	_RETYPE_DECL_IMPL = """\
	{type} {name}() const {{ return {{body_.{name}}}; }}
	"""

	_ACCESSORS_END = """\
	// @}}
	"""

	_CONDITION_DECL_IMPL = """\

	bool has_{name}() const {{
	return ({condition_indented});
	}}
	"""

	_MEMBER_DECL = """\
	{type} {name}_;
	"""

	_CLASS_MIDDLE = """\

	// Initializes the class from the given pdb stream.
	// @param stream pointer to the pdb stream.
	// @returns true on success, false on failure.
	bool Initialize(common::BinaryStreamParser* stream);

	private:
	// The struct from CVInfo.h which represents this record.
	{cci}::{original_name} body_;
	"""

	_EXTRA_FIELDS_DESCRIPTION = """\

	// Additional fields parsed from the pdb stream.
	"""

	_CLASS_FOOTER = """\
	}};

	"""

	_OFFSET_OF = """\
	offsetof({cci}::{original_name}, \
	{first_extra_field});"""

	_SIZE_OF = """\
	sizeof(body_);"""

	_INIT_HEADER = """\
	{name}::{name}() : """

	_INIT_MIDDLE = """\
	{{}}

	bool {name}::Initialize(common::BinaryStreamParser* stream) {{
	size_t to_read = {bytes_to_read}
	if (!stream->ReadBytes(to_read, &body_))
	return false;
	"""

	_INIT_CONDITION = """\
	if (({condition_indented}) &&
	!{parser}(stream, &{name}_)) {{
	return false;
	}}
	"""

	_INIT_FIELD = """\
	if (!{parser}(stream, &{name}_))
	return false;
	"""

	_INIT_FOOTER = """\

	return true;
	}}

	"""

	_COMMON_SUBSTITUTES = {
	'cci': 'Microsoft_Cci_Pdb',
	'year': datetime.datetime.now().year,
	'basename': os.path.basename(__file__)
	}

	_DEFAULT_TYPE = {
	"parser": "ReadBasicType",
	"ret_by_value": "True"
	}

	def _Substitute(str, **more):
	"""Substitutes both the common and specific strings."""
	subst = _COMMON_SUBSTITUTES.copy()
	subst.update(more.items())
	str = str.format(**subst);
	return str


	def _IncreaseIndent(field, indent):
	"""Increases indent of each new line in the given string."""
	return field.replace('\n', '\n' + ' ' * indent)


	def _GenerateClass(pdb_class):
	"""Generates the class definition string."""
	code = _Substitute(_CLASS_HEADER, **pdb_class)

	for field in pdb_class.get('extra_fields', []):
	if field.get('ret_by_value') == 'True':
	code += _Substitute(_GETTER_VAL_DECL_IMPL, **field)
	else:
	code += _Substitute(_GETTER_REF_DECL_IMPL, **field)

	for field in pdb_class.get('retyped_fields', []):
	code += _Substitute(_RETYPE_DECL_IMPL, **field)

	code += _Substitute(_ACCESSORS_END)

	for field in pdb_class.get('extra_fields', []):
	if 'condition' in field:
	field['condition_indented'] = _IncreaseIndent(field['condition'], 11)
	code += _Substitute(_CONDITION_DECL_IMPL, **field)

	code += _Substitute(_CLASS_MIDDLE, **pdb_class)

	if pdb_class.get('extra_fields', []):
	code += _Substitute(_EXTRA_FIELDS_DESCRIPTION)

	for field in pdb_class.get('extra_fields', []):
	code += _Substitute(_MEMBER_DECL, **field)

	code += _Substitute(_CLASS_FOOTER, **pdb_class)
	return code


	def _GenerateInit(pdb_class):
	"""Generates the Initialize definition."""
	size = _OFFSET_OF if 'first_extra_field' in pdb_class else _SIZE_OF
	pdb_class['bytes_to_read'] = _Substitute(size, **pdb_class)

	code = _Substitute(_INIT_HEADER, **pdb_class)
	indent = len(code)
	initializers = ['body_{}']
	for field in pdb_class.get('extra_fields', []):
	initializers.append(' '*indent + field['name'] + '_{}')

	code += ',\n'.join(initializers)
	code += _Substitute(_INIT_MIDDLE, **pdb_class)

	for field in pdb_class.get('extra_fields', []):
	if 'condition' in field:
	field['condition_indented'] = _IncreaseIndent(field['condition'], 6)
	code += _Substitute(_INIT_CONDITION, **field)
	else:
	code += _Substitute(_INIT_FIELD, **field)

	code += _Substitute(_INIT_FOOTER, **pdb_class)
	return code


	def _GenerateHeaderFile(data):
	"""Generate the whole header file in a string."""
	code = _Substitute(_LICENSE)
	code += _Substitute(_HEADER_H)

	for pdb_class in data:
	code += _GenerateClass(pdb_class);

	code += _Substitute(_FOOTER_H)
	return code


	def _GenerateCcFile(data):
	"""Generate the whole cc file in a string."""
	code = _Substitute(_LICENSE)
	code += _Substitute(_HEADER_CC)

	for pdb_class in data:
	code += _GenerateInit(pdb_class)

	code += _Substitute(_FOOTER_CC)
	return code


	def _WriteFile(file_name, contents):
	"""Write given string to a given file path."""
	dir = os.path.dirname(__file__)
	with open(os.path.join(dir, file_name), 'wb') as f:
	f.write(contents)


	_USAGE = """\
	%prog [file to process]

	Generate wrappers around CVInfo structs as described in the given json file."""


	def ascii_encode_dict(data):
	"""Encodes dict keywords to regular strings. Needed because of bug in Python
	2.6 which causes fail when passing unicode keyword to _Substitute()."""
	ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
	return dict(map(ascii_encode, pair) for pair in data.items())


	def _FillFields(pdb_class, types_definition):
	"""Adds type information to fields of a struct."""
	pdb_class.setdefault('original_name', pdb_class['name'])
	for field in pdb_class.get('extra_fields', []):
	if field['type_name'] in types_definition:
	field.update(types_definition[field['type_name']])
	else:
	default_definition = _DEFAULT_TYPE.copy()
	default_definition.setdefault('type', field['type_name'])
	field.update(default_definition)
	return pdb_class


	def _CompleteTypes(pdb_structs, types_definition):
	"""Adds types information of fields to each struct"""
	return [_FillFields(struct, types_definition) for struct in pdb_structs]


	def main():
	parser = OptionParser(usage=_USAGE)
	(options, input_files) = parser.parse_args()

	# The first argument might be the current script name, remove it and make sure
	# that there's at least one input file.
	if __file__ in input_files:
	input_files.remove(__file__)
	if len(input_files) != 1:
	parser.error('You must specify exactly one input file.')

	with open(input_files[0]) as data_file:
	data = json.load(data_file, object_hook=ascii_encode_dict)

	pdb_structs =_CompleteTypes(data['structs'], data['types'])
	header_file = _GenerateHeaderFile(pdb_structs)
	cc_file = _GenerateCcFile(pdb_structs)

	file_name = os.path.splitext(os.path.basename(input_files[0]))[0]
	_WriteFile('gen\\pdb_' + file_name + '.h', header_file)
	_WriteFile('gen\\pdb_' + file_name + '.cc', cc_file)


	if __name__ == '__main__':
	main()