scripts/slave/chromium/package_index.py - chromium/tools/build - Git at Google

 #!/usr/bin/env python
 # Copyright 2014 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """A tool to create a compilation index pack and upload it to Google Storage."""


 import argparse
 import hashlib
 import json
 import os
 import shutil
 import sys
 import tempfile
 import time

 from common import chromium_utils
 from contextlib import closing


 class IndexPack(object):
   """Class used to create an index pack to be indexed by Kythe."""

   def __init__(self, compdb_path, corpus=None, root=None, revision=None,
                out_dir='src/out/Debug'):
     """Initializes IndexPack.

     Args:
       compdb_path: path to the compilation database.
       corpus: the corpus to use for the generated Kythe VNames, e.g. 'chromium'.
         A VName identifies a node in the Kythe index. For more details, see:
         https://kythe.io/docs/kythe-storage.html
       root: the root to use for the generated Kythe VNames (optional)
       revision: the revision of the files being indexed
       out_dir: The output directory from which compilation was run.
     """
     if corpus is None:
       raise Exception('ERROR: --corpus required')
     if revision is None:
       raise Exception('ERROR: --revision required')

     with open(compdb_path, 'rb') as json_commands_file:
       # The list of JSON dictionaries, each describing one compilation unit.
       self.json_dictionaries = json.load(json_commands_file)
     self.corpus = corpus
     self.root = root
     self.revision = revision
     self.out_dir = out_dir
     # Maps from source file name to the SHA256 hash of its content.
     self.filehashes = {}
     # Maps from source file name to the file size.
     self.filesizes = {}
     # Create a temporary data directory. The structure is as follows:
     # A root directory (arbitrary name) with two subdirectories. The
     # subdirectory 'files' should contain all source files involved in any
     # compilation unit. The subdirectory 'units' describes the compilation units
     # in JSON format.
     self.index_directory = tempfile.mkdtemp()
     print 'Storing the index pack files in ' + self.index_directory
     # Path for the files directory within the index directory
     self.files_directory = os.path.join(self.index_directory, 'files')
     # Path for the units directory within the index directory
     self.units_directory = os.path.join(self.index_directory, 'units')
     os.makedirs(self.files_directory)
     os.makedirs(self.units_directory)

   def close(self):
     """Cleans up any temporary dirs created in the constructor."""
     shutil.rmtree(self.index_directory)

   def _GenerateDataFiles(self):
     """A function which produces the data files for the index pack.

     Each file is a copy of a source file which is needed for at least one
     compilation unit.
     """

     # Keeps track of the '*.filepaths' files already processed.
     filepaths = set()
     # Process all entries in the compilation database.
     for entry in self.json_dictionaries:
       filepath = os.path.join(entry['directory'], entry['file'] + '.filepaths')
       print 'Extract source files from %s' % filepath

       # We don't want to fail if one of the filepaths doesn't exist. However we
       # keep track of it.
       if not os.path.exists(filepath):
         print 'missing ' + filepath
         continue

       # For some reason, the compilation database contains the same targets more
       # than once. However we have just one file containing the file paths of
       # the involved files. So we can skip this entry if we already processed
       # it.
       if filepath in filepaths:
         continue
       filepaths.add(filepath)

       # All file paths given in the *.filepaths file are either absolute paths
       # or relative to the directory entry in the compilation database.
       with open(filepath, 'rb') as filepaths_file:
         # Each line in the '*.filepaths' file references the path to a source
         # file involved in the compilation.
         for line in filepaths_file:
           fname = os.path.join(entry['directory'],
                                line.strip().replace('//', '/'))
           # We should not package builtin clang header files, see
           # crbug.com/513826
           if 'third_party/llvm-build' in fname:
             continue
           if fname not in self.filehashes:
             # Derive the new filename from the SHA256 hash.
             with open(fname, 'rb') as source_file:
               content = source_file.read()
             content_hash = hashlib.sha256(content).hexdigest()
             self.filehashes[fname] = content_hash
             self.filesizes[fname] = len(content)
             file_name = os.path.join(self.files_directory, content_hash)
             print ' Including source file %s as %s for compilation' % (
                 fname, file_name)
             with open(file_name, 'wb') as f:
               f.write(content)

   def _GenerateUnitFiles(self):
     """A function which produces the unit files for the index pack.

     A unit file consists of a JSON dump of this format:
     {
       'source_file': [<name of the cc or c file>],
       'output_key': <path to the file generated by this compilation unit>,
       'argument': <list of compilation parameters>,
       'v_name': {
         'corpus': <a corpus such as chromium>,
         'root': <a build config such as chromium-linux>,
       },
       'revision': <the hash of the commit containing the files being indexed>,
       'required_input': [
         {
           'v_name': {
             'corpus': <a corpus such as chromium>,
             'root': <a build config such as chromium-linux>,
             'path': '<path to the source file relative to the root and with
                       relativizing particles ('.', '..') removed>
           },
           'info': {
             'path': <path to the source file>,
             'digest': <SHA256 hash of the contents of the source file>,
           }
         },
         ...
       ]
     }
     """

     # Keeps track of the '*.filepaths' files already processed.
     filepaths = set()

     # Process all entries in the compilation database.
     for entry in self.json_dictionaries:
       filepath = os.path.join(entry['directory'], entry['file'] + '.filepaths')
       if not os.path.exists(filepath) or filepath in filepaths:
         continue
       filepaths.add(filepath)

       # For each compilation unit, generate a dictionary in the format described
       # above.
       unit_dictionary = {}

       print 'Generating Translation Unit data for %s' % entry['file']
       print 'Compile command: %s' % entry['command']

       command_list = entry['command'].split()
       # The |command_list| starts with the compiler that was used for the
       # compilation. In the unit file we want to have just the parameters passed
       # to the compiler (which always needs to be clang/clang++). Currently,
       # |command_list| starts with the path to the goma executable followed by
       # the path to the clang executable, but it is safe to assume that the
       # first entry in the list after the clang executable will be the first
       # real parameter.
       for i in range(len(command_list)):
         if 'clang' in command_list[i]:
           # Shorten the list of commands such that it starts after the path to
           # the clang executable with the first real parameter.
           command_list = command_list[i + 1:]
           break

       # Extract the output file argument
       output_file = None
       for i in range(len(command_list)):
         if command_list[i] == '-o' and i + 1 < len(command_list):
           output_file = command_list[i + 1]
           break
         elif command_list[i].startswith('/Fo'):
           # Handle the Windows case.
           output_file = command_list[i][len('/Fo'):]
           break
       if not output_file:
         print 'No output file path found for %s' % entry['file']

       required_inputs = []
       include_paths = set()
       with open(filepath, 'rb') as filepaths_file:
         for line in filepaths_file:
           fname = line.strip()
           # We should not package builtin clang header files, see
           # crbug.com/513826
           if 'third_party/llvm-build' in fname:
             continue
           # The clang tool uses '//' to separate the system path where system
           # headers can be found from the relative path used in the #include
           # statement.
           if '//' in fname:
             path = fname.split('//')
             fname = '/'.join(path)
           fname_fullpath = os.path.join(entry['directory'], fname)
           if fname_fullpath not in self.filesizes:
             print 'No information about required input file %s' % fname_fullpath
             continue

           # Handle absolute paths - when normalizing we assume paths are
           # relative to the output directory (e.g. src/out/Debug).
           if os.path.isabs(fname):
             fname = os.path.relpath(fname, entry['directory'])

           normalized_fname = os.path.normpath(os.path.join(self.out_dir, fname))
           required_input = {
               'v_name': {
                   'corpus': self.corpus,
                   'path': normalized_fname,
               }
           }

           # Add the VName root only if it was specified.
           if self.root:
             required_input['v_name']['root'] = self.root

           required_input['info'] = {
               'path': fname,
               'digest': self.filehashes[fname_fullpath],
           }

           required_inputs.append(required_input)

       unit_dictionary['source_file'] = [entry['file']]
       unit_dictionary['output_key'] = output_file
       unit_dictionary['v_name'] = {
           'corpus': self.corpus,
       }
       # Add the VName root only if it was specified.
       if self.root:
         unit_dictionary['v_name']['root'] = self.root
       unit_dictionary['revision'] = self.revision

       # Add the include paths to the list of compile arguments; also disable all
       # warnings so that the indexer can run successfully. The job of the
       # indexer is to index the code, not to verify it. Warnings we actually
       # care about would show up in the compile step. And the -nostdinc++ flag
       # tells the indexer that it does not need to add any additional -isystem
       # arguments itself.
       unit_dictionary['argument'] = (
           list(include_paths) + command_list + ['-w', '-nostdinc++']
       )
       unit_dictionary['required_input'] = required_inputs

       print "Unit argument: %s" % unit_dictionary['argument']

       wrapper = {
           'unit': unit_dictionary
       }

       # Dump the dictionary in JSON format.
       unit_file_content = json.dumps(wrapper)
       unit_file_content_hash = hashlib.sha256(unit_file_content).hexdigest()
       unit_file_path = os.path.join(self.units_directory,
                                     unit_file_content_hash)
       with open(unit_file_path, 'wb') as unit_file:
         unit_file.write(unit_file_content)
       print 'Wrote compilation unit file %s' % unit_file_path

   def GenerateIndexPack(self):
     """Generates the index pack.

     An index pack consists of data files (the source and header files) and unit
     files (describing one compilation unit each).
     """

     # Generate the source files.
     # This needs to be called first before calling _GenerateUnitFiles()
     self._GenerateDataFiles()

     # Generate the unit files.
     self._GenerateUnitFiles()

   def CreateArchive(self, filepath):
     """Creates a zip archive containing the index pack.

     Args:
       filepath: The filepath where the index pack archive should be stored.
     Raises:
       Exception: The zip command failed to create the archive
     """

     # Remove the old zip archive (if it exists). This avoids that the new index
     # pack is just added to the old zip archive.
     if os.path.exists(filepath):
       os.remove(filepath)

     # Run the command in the parent directory of the index pack and use a
     # relative path for the index pack to get rid of any path prefix. The format
     # specification requires that the archive contains one folder with an
     # arbitrary name directly containing the 'units' and 'files' directories.
     if chromium_utils.RunCommand(
         ['zip', '-r', filepath, os.path.basename(self.index_directory)],
         cwd=os.path.dirname(self.index_directory)) != 0:
       raise Exception('ERROR: failed to create %s, exiting' % filepath)


 def main():
   parser = argparse.ArgumentParser()
   parser.add_argument('--path-to-archive-output',
                       required=True,
                       help='path to index pack archive to be generated')
   parser.add_argument('--path-to-compdb',
                       required=True,
                       help='path to the compilation database')
   parser.add_argument('--corpus',
                       default='chromium-linux',
                       help='the kythe corpus to use for the vname')
   parser.add_argument('--root',
                       help='the kythe root to use for the vname')
   parser.add_argument('--revision',
                       help='the revision of the files being indexed')
   parser.add_argument('--out_dir',
                       default='src/out/Debug',
                       help='the output directory from which compilation is run')
   parser.add_argument('--keep-filepaths-files',
                       help='keep the .filepaths files used for index pack '
                       'generation',
                       action='store_true')
   options = parser.parse_args()

   print '%s: Index generation...' % time.strftime('%X')
   with closing(IndexPack(options.path_to_compdb, options.corpus, options.root,
                          options.revision, options.out_dir)) as index_pack:
     index_pack.GenerateIndexPack()

     if not options.keep_filepaths_files:
       # Clean up the *.filepaths files.
       chromium_utils.RemoveFilesWildcards(
           '*.filepaths', os.path.join(os.getcwd(), 'src'))

     # Create the archive containing the generated files.
     index_pack.CreateArchive(options.path_to_archive_output)

   print '%s: Done.' % time.strftime('%X')
   return 0


 if '__main__' == __name__:
   sys.exit(main())
	#!/usr/bin/env python
	# Copyright 2014 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""A tool to create a compilation index pack and upload it to Google Storage."""


	import argparse
	import hashlib
	import json
	import os
	import shutil
	import sys
	import tempfile
	import time

	from common import chromium_utils
	from contextlib import closing


	class IndexPack(object):
	"""Class used to create an index pack to be indexed by Kythe."""

	def __init__(self, compdb_path, corpus=None, root=None, revision=None,
	out_dir='src/out/Debug'):
	"""Initializes IndexPack.

	Args:
	compdb_path: path to the compilation database.
	corpus: the corpus to use for the generated Kythe VNames, e.g. 'chromium'.
	A VName identifies a node in the Kythe index. For more details, see:
	https://kythe.io/docs/kythe-storage.html
	root: the root to use for the generated Kythe VNames (optional)
	revision: the revision of the files being indexed
	out_dir: The output directory from which compilation was run.
	"""
	if corpus is None:
	raise Exception('ERROR: --corpus required')
	if revision is None:
	raise Exception('ERROR: --revision required')

	with open(compdb_path, 'rb') as json_commands_file:
	# The list of JSON dictionaries, each describing one compilation unit.
	self.json_dictionaries = json.load(json_commands_file)
	self.corpus = corpus
	self.root = root
	self.revision = revision
	self.out_dir = out_dir
	# Maps from source file name to the SHA256 hash of its content.
	self.filehashes = {}
	# Maps from source file name to the file size.
	self.filesizes = {}
	# Create a temporary data directory. The structure is as follows:
	# A root directory (arbitrary name) with two subdirectories. The
	# subdirectory 'files' should contain all source files involved in any
	# compilation unit. The subdirectory 'units' describes the compilation units
	# in JSON format.
	self.index_directory = tempfile.mkdtemp()
	print 'Storing the index pack files in ' + self.index_directory
	# Path for the files directory within the index directory
	self.files_directory = os.path.join(self.index_directory, 'files')
	# Path for the units directory within the index directory
	self.units_directory = os.path.join(self.index_directory, 'units')
	os.makedirs(self.files_directory)
	os.makedirs(self.units_directory)

	def close(self):
	"""Cleans up any temporary dirs created in the constructor."""
	shutil.rmtree(self.index_directory)

	def _GenerateDataFiles(self):
	"""A function which produces the data files for the index pack.

	Each file is a copy of a source file which is needed for at least one
	compilation unit.
	"""

	# Keeps track of the '*.filepaths' files already processed.
	filepaths = set()
	# Process all entries in the compilation database.
	for entry in self.json_dictionaries:
	filepath = os.path.join(entry['directory'], entry['file'] + '.filepaths')
	print 'Extract source files from %s' % filepath

	# We don't want to fail if one of the filepaths doesn't exist. However we
	# keep track of it.
	if not os.path.exists(filepath):
	print 'missing ' + filepath
	continue

	# For some reason, the compilation database contains the same targets more
	# than once. However we have just one file containing the file paths of
	# the involved files. So we can skip this entry if we already processed
	# it.
	if filepath in filepaths:
	continue
	filepaths.add(filepath)

	# All file paths given in the *.filepaths file are either absolute paths
	# or relative to the directory entry in the compilation database.
	with open(filepath, 'rb') as filepaths_file:
	# Each line in the '*.filepaths' file references the path to a source
	# file involved in the compilation.
	for line in filepaths_file:
	fname = os.path.join(entry['directory'],
	line.strip().replace('//', '/'))
	# We should not package builtin clang header files, see
	# crbug.com/513826
	if 'third_party/llvm-build' in fname:
	continue
	if fname not in self.filehashes:
	# Derive the new filename from the SHA256 hash.
	with open(fname, 'rb') as source_file:
	content = source_file.read()
	content_hash = hashlib.sha256(content).hexdigest()
	self.filehashes[fname] = content_hash
	self.filesizes[fname] = len(content)
	file_name = os.path.join(self.files_directory, content_hash)
	print ' Including source file %s as %s for compilation' % (
	fname, file_name)
	with open(file_name, 'wb') as f:
	f.write(content)

	def _GenerateUnitFiles(self):
	"""A function which produces the unit files for the index pack.

	A unit file consists of a JSON dump of this format:
	{
	'source_file': [<name of the cc or c file>],
	'output_key': <path to the file generated by this compilation unit>,
	'argument': <list of compilation parameters>,
	'v_name': {
	'corpus': <a corpus such as chromium>,
	'root': <a build config such as chromium-linux>,
	},
	'revision': <the hash of the commit containing the files being indexed>,
	'required_input': [
	{
	'v_name': {
	'corpus': <a corpus such as chromium>,
	'root': <a build config such as chromium-linux>,
	'path': '<path to the source file relative to the root and with
	relativizing particles ('.', '..') removed>
	},
	'info': {
	'path': <path to the source file>,
	'digest': <SHA256 hash of the contents of the source file>,
	}
	},
	...
	]
	}
	"""

	# Keeps track of the '*.filepaths' files already processed.
	filepaths = set()

	# Process all entries in the compilation database.
	for entry in self.json_dictionaries:
	filepath = os.path.join(entry['directory'], entry['file'] + '.filepaths')
	if not os.path.exists(filepath) or filepath in filepaths:
	continue
	filepaths.add(filepath)

	# For each compilation unit, generate a dictionary in the format described
	# above.
	unit_dictionary = {}

	print 'Generating Translation Unit data for %s' % entry['file']
	print 'Compile command: %s' % entry['command']

	command_list = entry['command'].split()
	# The \|command_list\| starts with the compiler that was used for the
	# compilation. In the unit file we want to have just the parameters passed
	# to the compiler (which always needs to be clang/clang++). Currently,
	# \|command_list\| starts with the path to the goma executable followed by
	# the path to the clang executable, but it is safe to assume that the
	# first entry in the list after the clang executable will be the first
	# real parameter.
	for i in range(len(command_list)):
	if 'clang' in command_list[i]:
	# Shorten the list of commands such that it starts after the path to
	# the clang executable with the first real parameter.
	command_list = command_list[i + 1:]
	break

	# Extract the output file argument
	output_file = None
	for i in range(len(command_list)):
	if command_list[i] == '-o' and i + 1 < len(command_list):
	output_file = command_list[i + 1]
	break
	elif command_list[i].startswith('/Fo'):
	# Handle the Windows case.
	output_file = command_list[i][len('/Fo'):]
	break
	if not output_file:
	print 'No output file path found for %s' % entry['file']

	required_inputs = []
	include_paths = set()
	with open(filepath, 'rb') as filepaths_file:
	for line in filepaths_file:
	fname = line.strip()
	# We should not package builtin clang header files, see
	# crbug.com/513826
	if 'third_party/llvm-build' in fname:
	continue
	# The clang tool uses '//' to separate the system path where system
	# headers can be found from the relative path used in the #include
	# statement.
	if '//' in fname:
	path = fname.split('//')
	fname = '/'.join(path)
	fname_fullpath = os.path.join(entry['directory'], fname)
	if fname_fullpath not in self.filesizes:
	print 'No information about required input file %s' % fname_fullpath
	continue

	# Handle absolute paths - when normalizing we assume paths are
	# relative to the output directory (e.g. src/out/Debug).
	if os.path.isabs(fname):
	fname = os.path.relpath(fname, entry['directory'])

	normalized_fname = os.path.normpath(os.path.join(self.out_dir, fname))
	required_input = {
	'v_name': {
	'corpus': self.corpus,
	'path': normalized_fname,
	}
	}

	# Add the VName root only if it was specified.
	if self.root:
	required_input['v_name']['root'] = self.root

	required_input['info'] = {
	'path': fname,
	'digest': self.filehashes[fname_fullpath],
	}

	required_inputs.append(required_input)

	unit_dictionary['source_file'] = [entry['file']]
	unit_dictionary['output_key'] = output_file
	unit_dictionary['v_name'] = {
	'corpus': self.corpus,
	}
	# Add the VName root only if it was specified.
	if self.root:
	unit_dictionary['v_name']['root'] = self.root
	unit_dictionary['revision'] = self.revision

	# Add the include paths to the list of compile arguments; also disable all
	# warnings so that the indexer can run successfully. The job of the
	# indexer is to index the code, not to verify it. Warnings we actually
	# care about would show up in the compile step. And the -nostdinc++ flag
	# tells the indexer that it does not need to add any additional -isystem
	# arguments itself.
	unit_dictionary['argument'] = (
	list(include_paths) + command_list + ['-w', '-nostdinc++']
	)
	unit_dictionary['required_input'] = required_inputs

	print "Unit argument: %s" % unit_dictionary['argument']

	wrapper = {
	'unit': unit_dictionary
	}

	# Dump the dictionary in JSON format.
	unit_file_content = json.dumps(wrapper)
	unit_file_content_hash = hashlib.sha256(unit_file_content).hexdigest()
	unit_file_path = os.path.join(self.units_directory,
	unit_file_content_hash)
	with open(unit_file_path, 'wb') as unit_file:
	unit_file.write(unit_file_content)
	print 'Wrote compilation unit file %s' % unit_file_path

	def GenerateIndexPack(self):
	"""Generates the index pack.

	An index pack consists of data files (the source and header files) and unit
	files (describing one compilation unit each).
	"""

	# Generate the source files.
	# This needs to be called first before calling _GenerateUnitFiles()
	self._GenerateDataFiles()

	# Generate the unit files.
	self._GenerateUnitFiles()

	def CreateArchive(self, filepath):
	"""Creates a zip archive containing the index pack.

	Args:
	filepath: The filepath where the index pack archive should be stored.
	Raises:
	Exception: The zip command failed to create the archive
	"""

	# Remove the old zip archive (if it exists). This avoids that the new index
	# pack is just added to the old zip archive.
	if os.path.exists(filepath):
	os.remove(filepath)

	# Run the command in the parent directory of the index pack and use a
	# relative path for the index pack to get rid of any path prefix. The format
	# specification requires that the archive contains one folder with an
	# arbitrary name directly containing the 'units' and 'files' directories.
	if chromium_utils.RunCommand(
	['zip', '-r', filepath, os.path.basename(self.index_directory)],
	cwd=os.path.dirname(self.index_directory)) != 0:
	raise Exception('ERROR: failed to create %s, exiting' % filepath)


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--path-to-archive-output',
	required=True,
	help='path to index pack archive to be generated')
	parser.add_argument('--path-to-compdb',
	required=True,
	help='path to the compilation database')
	parser.add_argument('--corpus',
	default='chromium-linux',
	help='the kythe corpus to use for the vname')
	parser.add_argument('--root',
	help='the kythe root to use for the vname')
	parser.add_argument('--revision',
	help='the revision of the files being indexed')
	parser.add_argument('--out_dir',
	default='src/out/Debug',
	help='the output directory from which compilation is run')
	parser.add_argument('--keep-filepaths-files',
	help='keep the .filepaths files used for index pack '
	'generation',
	action='store_true')
	options = parser.parse_args()

	print '%s: Index generation...' % time.strftime('%X')
	with closing(IndexPack(options.path_to_compdb, options.corpus, options.root,
	options.revision, options.out_dir)) as index_pack:
	index_pack.GenerateIndexPack()

	if not options.keep_filepaths_files:
	# Clean up the *.filepaths files.
	chromium_utils.RemoveFilesWildcards(
	'*.filepaths', os.path.join(os.getcwd(), 'src'))

	# Create the archive containing the generated files.
	index_pack.CreateArchive(options.path_to_archive_output)

	print '%s: Done.' % time.strftime('%X')
	return 0


	if '__main__' == __name__:
	sys.exit(main())