testing/merge_scripts/code_coverage/merge_js_lib.py - chromium/src - Git at Google

 # Copyright 2020 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Functions to merge multiple JavaScript coverage files into one"""

 import base64
 import logging
 import json
 import os
 import sys

 _HERE_PATH = os.path.dirname(__file__)
 _THIRD_PARTY_PATH = os.path.normpath(
     os.path.join(_HERE_PATH, '..', '..', '..', 'third_party'))
 _SRC_PATH = os.path.normpath(os.path.join(_HERE_PATH, '..', '..', '..'))
 sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'node'))
 sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'js_code_coverage'))
 import node
 import coverage_modules

 logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s',
                     level=logging.DEBUG)


 def _parse_json_file(path):
     """Opens file and parses data into JSON

   Args:
     path (str): The path to a JSON file to parse.
   """
     with open(path, 'r') as json_file:
         return json.load(json_file)


 def _peek_last(stack):
     """Returns the top element of stack or None"""
     return stack[-1] if stack else None


 def _convert_to_disjoint_segments(ranges):
     """Converts a list of v8 CoverageRanges into a list of disjoint segments.

   A v8 CoverageRange is a JSON object that describes the start and end
   character offsets for a block of instrumented JavaScript code:
   https://chromedevtools.github.io/devtools-protocol/tot/Profiler/#type-CoverageRange
   CoverageRange is defined by the ranges field from a v8 FunctionCoverage:
   https://chromedevtools.github.io/devtools-protocol/tot/Profiler/#type-FunctionCoverage

   To compute the list of disjoint segments, we sort (must be a stable sort)
   the |ranges| list in ascending order by their startOffset. This
   has the effect of bringing CoverageRange groups closer together. Each
   group of CoverageRange's has a recursive relationship such that:
   - The first range in the group defines the character offsets for the
     function we are capturing coverage for
   - Children of this range identify unexcuted code unless they are
     also parents, in which case they continue the recursive relationship

   To give an example, consider the following arrow function:

   exports.test = arg => { return arg ? 'y' : 'n' }

   An invocation of test(true) would produce the following |ranges|

   [
     { "startOffset":  0, "endOffset": 48, "count": 1 }, // Range 1
     { "startOffset": 15, "endOffset": 48, "count": 1 }, // Range 2
     { "startOffset": 41, "endOffset": 46, "count": 0 }, // Range 3
   ]

   Range 1 identifies the entire script.
   Range 2 identifies the function from the arg parameter through
   to the closing brace
   Range 3 identifies that the code from offset [41, 46) was
   not executed.

   If we were to make the function calls, e.g. test(true); test(true);
   this would produce the following |ranges|

   [
     { "startOffset":  0, "endOffset": 48, "count": 1 }, // Range 1
     { "startOffset": 15, "endOffset": 48, "count": 2 }, // Range 2
     { "startOffset": 41, "endOffset": 46, "count": 0 }, // Range 3
   ]

   All the offsets are maintained, however the count on Range
   2 has increased while the count on Range 1 is unchanged. This
   shows another implicit assumption such that the inner most parent
   range count identifies the total invocation count.

   TODO(benreich): Write up more extensive documentation.

   Args:
     ranges (list): A list of v8 CoverageRange that have been
       merged from multiple FunctionCoverage. The order in which they
       appear in the original v8 coverage output must be maintained.

   Returns:
     A list of dictionaries where each entry is defined as:
       {
         count: Number of invocations of this range
         end: Exclusive character offset for the end of this range
       }
   """
     stack = []
     segments = []

     # pylint: disable=unsupported-assignment-operation
     def _append(end, count):
         """Append a new range segment to |segments|.

     If the top range on |segments| has the same ending as |end|
     return early, otherwise extend the segment if the same count
     exists.

     Args:
       end (number): The end character offset for the range
       count (number): The invocation count for the range
     """
         last = _peek_last(segments)
         if last is not None:
             if last['end'] == end:
                 return

             if last['count'] == count:
                 last['end'] = end
                 return

         if end == 0:
             return

         segments.append({'end': end, 'count': count})
         return

     # Stable sort the range segments.
     ranges.sort(key=lambda entry: entry['startOffset'])

     for entry in ranges:
         top = _peek_last(stack)

         while top and top['endOffset'] <= entry['startOffset']:
             _append(top['endOffset'], top['count'])
             stack.pop()
             top = _peek_last(stack)

         top_count = 0 if not top else top['count']
         _append(entry['startOffset'], top_count)
         stack.append(entry)

     while stack:
         top = stack.pop()
         _append(top['endOffset'], top['count'])

     return segments
     # pylint: enable=unsupported-assignment-operation


 # pylint: disable=unsupported-assignment-operation
 def _merge_segments(segments_a, segments_b):
     """Merges 2 lists of disjoint segments into one

   Take in two lists that have been output by _convert_to_disjoint_segments
   and merge them into a single list. Any segments that are
   overlapping sum their invocation counts. If the overlap
   is partial, split the ranges into contiguous segments and
   assign the invocation counts appropriately.

   Args:
     segments_a (list): A list of disjoint segments.
     segments_b (list): A list of disjoint segments.

   Returns:
     A list of disjoint segments.
   """
     segments = []
     i = 0
     j = 0

     while i < len(segments_a) and j < len(segments_b):
         a = segments_a[i]
         b = segments_b[j]

         count = a.get('count', 0) + b.get('count', 0)
         end = min(a['end'], b['end'])
         last = _peek_last(segments)

         # Get the segment from the top of the stack and
         # extend the segment if the invocation counts match
         # otherwise push a new range segment onto the stack.
         if last is None or last['count'] != count:
             segments.append({'end': end, 'count': count})
         else:
             last['end'] = end

         if a['end'] <= b['end']:
             i += 1

         if a['end'] >= b['end']:
             j += 1

     while i < len(segments_a):
         segments.append(segments_a[i])
         i += 1

     while j < len(segments_b):
         segments.append(segments_b[j])
         j += 1

     return segments


 # pylint: enable=unsupported-assignment-operation


 def _get_paths_with_suffix(input_dir, suffix):
     """Gets all JSON files in the input directory.

   Args:
     input_dir (str): The path to recursively search for
         JSON files.

   Returns:
     A list of absolute file paths.
   """
     paths = []
     for dir_path, _sub_dirs, file_names in os.walk(input_dir):
         paths.extend([
             os.path.join(dir_path, fn) for fn in file_names
             if fn.endswith(suffix)
         ])
     return paths


 def merge_coverage_files(coverage_dir, output_path):
     """Merge all coverages in the coverage dir into a single file.

   Args:
     coverage_dir (str): Path to all the raw JavaScript coverage files.
     output_path  (str): Path to the location to output merged coverage.
   """
     coverage_by_path = {}
     json_files = _get_paths_with_suffix(coverage_dir, '.cov.json')

     if not json_files:
         logging.info('No JavaScript coverage files found in %s', coverage_dir)
         return None

     for file_path in json_files:
         coverage_data = _parse_json_file(file_path)

         if 'result' not in coverage_data:
             raise RuntimeError('%r does not have a result field' % file_path)

         for script_coverage in coverage_data['result']:
             script_url = script_coverage['url']

             # Ignore files with paths that have not been rewritten.
             # Files can rewrite paths by appending a //# sourceURL=
             # comment.
             if not script_url.startswith('//'):
                 continue

             previous_coverage = coverage_by_path.get(script_url, [])

             ranges = []
             for function_coverage in script_coverage['functions']:
                 for range_coverage in function_coverage['ranges']:
                     ranges.append(range_coverage)

             disjoint_segments = _convert_to_disjoint_segments(ranges)
             merged_segments = _merge_segments(previous_coverage,
                                               disjoint_segments)

             coverage_by_path[script_url] = merged_segments

     with open(output_path, 'w') as merged_coverage_file:
         return merged_coverage_file.write(json.dumps(coverage_by_path))


 def write_parsed_scripts(task_output_dir, source_dir=_SRC_PATH):
     """Extract parsed script contents and write back to original folder
   structure.

   Args:
     task_output_dir (str): The output directory for the sharded task. This will
         contain the raw JavaScript v8 parsed files that are identified by
         their ".js.json" suffix.

   Returns:
     The absolute file path to the raw parsed scripts or None if no parsed
     scripts were identified (or any of the raw data contains invalid JSON).
   """
     _SOURCEMAPPING_DATA_URL_PREFIX = 'data:application/json;base64,'

     scripts = _get_paths_with_suffix(task_output_dir, '.js.json')
     output_dir = os.path.join(task_output_dir, 'parsed_scripts')

     # The original file is extracted from the inline sourcemaps, this
     # information is not available from the coverage data. So we have to
     # maintain a URL to path map to ensure the coverage data knows the original
     # source location.
     url_to_path_map = {}

     if not scripts:
         return None

     for file_path in scripts:
         # TODO(crbug.com/1224786): Some of the raw script data is being saved
         # with a trailing curly brace leading to invalid JSON. Bail out if this
         # is encountered and ensure we log the file path.
         script_data = None
         try:
             script_data = _parse_json_file(file_path)
         except ValueError as e:
             logging.error('Failed to parse %s: %s', file_path, e)
             return None

         if any(key not in script_data
                for key in ('url', 'text', 'sourceMapURL')):
             logging.info('File %s is missing key url, text or sourceMapURL',
                          file_path)
             continue

         # TODO(crbug/1373753): For now we exclude any sourcemaps that are 0
         # length and also that don't begin with a data URL designation.
         if len(script_data['sourceMapURL']) == 0 or not script_data[
                 'sourceMapURL'].startswith(_SOURCEMAPPING_DATA_URL_PREFIX):
             continue

         decoded_sourcemap = base64.b64decode(
             script_data['sourceMapURL'].replace(_SOURCEMAPPING_DATA_URL_PREFIX,
                                                 ''))
         json_sourcemap = json.loads(decoded_sourcemap)
         if len(json_sourcemap['sources']) == 0:
             logging.warning('File %s has a valid sourcemap with no sources',
                             file_path)
             continue

         for source_idx in range(len(json_sourcemap['sources'])):
             source_path = os.path.relpath(
                 os.path.normpath(
                     os.path.join(json_sourcemap['sourceRoot'],
                                  json_sourcemap['sources'][source_idx])),
                 source_dir)
             source_directory = os.path.join(output_dir,
                                             os.path.dirname(source_path))
             if not os.path.exists(source_directory):
                 os.makedirs(source_directory)

             with open(os.path.join(output_dir, source_path), 'wb') as f:
                 f.write(script_data['text'].encode('utf8'))

             # Only write the first instance of the sources to the map.
             # Sourcemaps require stability in their indexing as the mapping
             # derived are based on the index location of the file in the
             # "sources" and "sourcesContent" fields. Therefore the first index
             # of the "sources" field will be the first file that was encountered
             # during source map generation, i.e. this should be the actual
             # chromium/src original file.
             if script_data['url'] not in url_to_path_map:
                 url_to_path_map[script_data['url']] = source_path

     if not url_to_path_map:
         return None

     with open(os.path.join(output_dir, 'parsed_scripts.json'),
               'w+',
               encoding='utf-8') as f:
         json.dump(url_to_path_map, f)

     return output_dir


 def exclude_uninteresting_lines(coverage_file_path):
     """Removes lines from Istanbul coverage reports that correspond to lines in
   the source file that are empty. These lines provide no additional coverage
   information and in fact inflate the coverage metrics.

   Args:
     coverage_file_path (str): The path to the merged coverage.json file.
   """
     with open(coverage_file_path, 'r+') as f:
         coverage = json.load(f)

         def exclude_line(coverage_map, key):
             """Exclude an individual line from the coverage map. This relies on
             the key 'statementMap' which maintains a map of statements to lines
             as well as the key 's' which contains the invocation counts of each
             line.
             """
             del coverage_map['statementMap'][key]
             del coverage_map['s'][key]

         for file_path in coverage:
             istanbul_coverage = coverage[file_path]
             lines = []
             with open(file_path) as fd:
                 lines = fd.readlines()

             # Force list of the keys to allow removal of items whilst iterating.
             for key in list(istanbul_coverage['statementMap']):
                 statement_map = istanbul_coverage['statementMap'][key]
                 line_num = statement_map['start']['line']

                 assert statement_map['start']['line'] == statement_map['end'][
                     'line']

                 line_contents = lines[line_num - 1].strip()

                 # Exclude empty lines
                 if line_contents == '':
                     exclude_line(istanbul_coverage, key)
                     continue

                 # Exclude lines that start with a full line comment.
                 # e.g. // comment.
                 if line_contents.startswith('//'):
                     exclude_line(istanbul_coverage, key)
                     continue

                 # Exclude any lines that start with an import statement.
                 if line_contents.startswith('import '):
                     exclude_line(istanbul_coverage, key)
                     continue

         # Overwrite the current coverage file with new contents.
         f.seek(0)
         f.truncate()
         json.dump(coverage, f)


 def get_raw_coverage_dirs(task_output_dir):
     """Returns a list of directories containing raw v8 coverage.

   Args:
     task_output_dir (str): The output directory for the sharded task. This will
         contain the raw JavaScript v8 coverage files that are identified by
         their ".cov.json" suffix.
   """
     coverage_directories = set()
     for dir_path, _sub_dirs, file_names in os.walk(task_output_dir):
         for name in file_names:
             if name.endswith('.cov.json'):
                 coverage_directories.add(dir_path)
                 continue

     return coverage_directories


 def convert_raw_coverage_to_istanbul(raw_coverage_dirs, source_dir,
                                      task_output_dir):
     """Calls the node helper script convert_to_istanbul.js

   Args:
     raw_coverage_dirs (list): Directory that contains raw v8 code coverage.
     source_dir (str): Root directory containing the instrumented source.

   Raises:
     RuntimeError: If the underlying node command fails.
   """
     return node.RunNode([
         os.path.join(_HERE_PATH, 'convert_to_istanbul.js'),
         '--source-dir',
         source_dir,
         '--output-dir',
         task_output_dir,
         '--raw-coverage-dirs',
         ' '.join(raw_coverage_dirs),
     ])


 def merge_istanbul_reports(istanbul_coverage_dir, source_dir, output_file):
     """Merges all disparate istanbul reports into a single report.

   Args:
     istanbul_coverage_dir (str): Directory containing separate coverage files.
     source_dir (str): Directory containing instrumented source code.
     output_file (str): File path to output merged coverage.

   Raises:
     RuntimeError: If the underlying node command fails.
   """
     return node.RunNode([
         coverage_modules.PathToNyc(),
         'merge',
         istanbul_coverage_dir,
         output_file,
         '--cwd',
         source_dir,
     ])


 def generate_coverage_reports(coverage_file_dir, source_dir, output_dir):
     """Generate a LCOV report.

   Args:
     coverage_file_dir (str): Directory containing the coverage.json file.
     source_dir (str): Directory containing the instrumented source code.
     output_dir (str): Directory to output the reports.
   """
     return node.RunNode([
         coverage_modules.PathToNyc(),
         'report',
         '--reporter',
         'lcov',
         '--temp-dir',
         coverage_file_dir,
         '--cwd',
         source_dir,
         '--report-dir',
         output_dir,
     ])
	# Copyright 2020 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Functions to merge multiple JavaScript coverage files into one"""

	import base64
	import logging
	import json
	import os
	import sys

	_HERE_PATH = os.path.dirname(__file__)
	_THIRD_PARTY_PATH = os.path.normpath(
	os.path.join(_HERE_PATH, '..', '..', '..', 'third_party'))
	_SRC_PATH = os.path.normpath(os.path.join(_HERE_PATH, '..', '..', '..'))
	sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'node'))
	sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'js_code_coverage'))
	import node
	import coverage_modules

	logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s',
	level=logging.DEBUG)


	def _parse_json_file(path):
	"""Opens file and parses data into JSON

	Args:
	path (str): The path to a JSON file to parse.
	"""
	with open(path, 'r') as json_file:
	return json.load(json_file)


	def _peek_last(stack):
	"""Returns the top element of stack or None"""
	return stack[-1] if stack else None


	def _convert_to_disjoint_segments(ranges):
	"""Converts a list of v8 CoverageRanges into a list of disjoint segments.

	A v8 CoverageRange is a JSON object that describes the start and end
	character offsets for a block of instrumented JavaScript code:
	https://chromedevtools.github.io/devtools-protocol/tot/Profiler/#type-CoverageRange
	CoverageRange is defined by the ranges field from a v8 FunctionCoverage:
	https://chromedevtools.github.io/devtools-protocol/tot/Profiler/#type-FunctionCoverage

	To compute the list of disjoint segments, we sort (must be a stable sort)
	the \|ranges\| list in ascending order by their startOffset. This
	has the effect of bringing CoverageRange groups closer together. Each
	group of CoverageRange's has a recursive relationship such that:
	- The first range in the group defines the character offsets for the
	function we are capturing coverage for
	- Children of this range identify unexcuted code unless they are
	also parents, in which case they continue the recursive relationship

	To give an example, consider the following arrow function:

	exports.test = arg => { return arg ? 'y' : 'n' }

	An invocation of test(true) would produce the following \|ranges\|

	[
	{ "startOffset": 0, "endOffset": 48, "count": 1 }, // Range 1
	{ "startOffset": 15, "endOffset": 48, "count": 1 }, // Range 2
	{ "startOffset": 41, "endOffset": 46, "count": 0 }, // Range 3
	]

	Range 1 identifies the entire script.
	Range 2 identifies the function from the arg parameter through
	to the closing brace
	Range 3 identifies that the code from offset [41, 46) was
	not executed.

	If we were to make the function calls, e.g. test(true); test(true);
	this would produce the following \|ranges\|

	[
	{ "startOffset": 0, "endOffset": 48, "count": 1 }, // Range 1
	{ "startOffset": 15, "endOffset": 48, "count": 2 }, // Range 2
	{ "startOffset": 41, "endOffset": 46, "count": 0 }, // Range 3
	]

	All the offsets are maintained, however the count on Range
	2 has increased while the count on Range 1 is unchanged. This
	shows another implicit assumption such that the inner most parent
	range count identifies the total invocation count.

	TODO(benreich): Write up more extensive documentation.

	Args:
	ranges (list): A list of v8 CoverageRange that have been
	merged from multiple FunctionCoverage. The order in which they
	appear in the original v8 coverage output must be maintained.

	Returns:
	A list of dictionaries where each entry is defined as:
	{
	count: Number of invocations of this range
	end: Exclusive character offset for the end of this range
	}
	"""
	stack = []
	segments = []

	# pylint: disable=unsupported-assignment-operation
	def _append(end, count):
	"""Append a new range segment to \|segments\|.

	If the top range on \|segments\| has the same ending as \|end\|
	return early, otherwise extend the segment if the same count
	exists.

	Args:
	end (number): The end character offset for the range
	count (number): The invocation count for the range
	"""
	last = _peek_last(segments)
	if last is not None:
	if last['end'] == end:
	return

	if last['count'] == count:
	last['end'] = end
	return

	if end == 0:
	return

	segments.append({'end': end, 'count': count})
	return

	# Stable sort the range segments.
	ranges.sort(key=lambda entry: entry['startOffset'])

	for entry in ranges:
	top = _peek_last(stack)

	while top and top['endOffset'] <= entry['startOffset']:
	_append(top['endOffset'], top['count'])
	stack.pop()
	top = _peek_last(stack)

	top_count = 0 if not top else top['count']
	_append(entry['startOffset'], top_count)
	stack.append(entry)

	while stack:
	top = stack.pop()
	_append(top['endOffset'], top['count'])

	return segments
	# pylint: enable=unsupported-assignment-operation


	# pylint: disable=unsupported-assignment-operation
	def _merge_segments(segments_a, segments_b):
	"""Merges 2 lists of disjoint segments into one

	Take in two lists that have been output by _convert_to_disjoint_segments
	and merge them into a single list. Any segments that are
	overlapping sum their invocation counts. If the overlap
	is partial, split the ranges into contiguous segments and
	assign the invocation counts appropriately.

	Args:
	segments_a (list): A list of disjoint segments.
	segments_b (list): A list of disjoint segments.

	Returns:
	A list of disjoint segments.
	"""
	segments = []
	i = 0
	j = 0

	while i < len(segments_a) and j < len(segments_b):
	a = segments_a[i]
	b = segments_b[j]

	count = a.get('count', 0) + b.get('count', 0)
	end = min(a['end'], b['end'])
	last = _peek_last(segments)

	# Get the segment from the top of the stack and
	# extend the segment if the invocation counts match
	# otherwise push a new range segment onto the stack.
	if last is None or last['count'] != count:
	segments.append({'end': end, 'count': count})
	else:
	last['end'] = end

	if a['end'] <= b['end']:
	i += 1

	if a['end'] >= b['end']:
	j += 1

	while i < len(segments_a):
	segments.append(segments_a[i])
	i += 1

	while j < len(segments_b):
	segments.append(segments_b[j])
	j += 1

	return segments


	# pylint: enable=unsupported-assignment-operation


	def _get_paths_with_suffix(input_dir, suffix):
	"""Gets all JSON files in the input directory.

	Args:
	input_dir (str): The path to recursively search for
	JSON files.

	Returns:
	A list of absolute file paths.
	"""
	paths = []
	for dir_path, _sub_dirs, file_names in os.walk(input_dir):
	paths.extend([
	os.path.join(dir_path, fn) for fn in file_names
	if fn.endswith(suffix)
	])
	return paths


	def merge_coverage_files(coverage_dir, output_path):
	"""Merge all coverages in the coverage dir into a single file.

	Args:
	coverage_dir (str): Path to all the raw JavaScript coverage files.
	output_path (str): Path to the location to output merged coverage.
	"""
	coverage_by_path = {}
	json_files = _get_paths_with_suffix(coverage_dir, '.cov.json')

	if not json_files:
	logging.info('No JavaScript coverage files found in %s', coverage_dir)
	return None

	for file_path in json_files:
	coverage_data = _parse_json_file(file_path)

	if 'result' not in coverage_data:
	raise RuntimeError('%r does not have a result field' % file_path)

	for script_coverage in coverage_data['result']:
	script_url = script_coverage['url']

	# Ignore files with paths that have not been rewritten.
	# Files can rewrite paths by appending a //# sourceURL=
	# comment.
	if not script_url.startswith('//'):
	continue

	previous_coverage = coverage_by_path.get(script_url, [])

	ranges = []
	for function_coverage in script_coverage['functions']:
	for range_coverage in function_coverage['ranges']:
	ranges.append(range_coverage)

	disjoint_segments = _convert_to_disjoint_segments(ranges)
	merged_segments = _merge_segments(previous_coverage,
	disjoint_segments)

	coverage_by_path[script_url] = merged_segments

	with open(output_path, 'w') as merged_coverage_file:
	return merged_coverage_file.write(json.dumps(coverage_by_path))


	def write_parsed_scripts(task_output_dir, source_dir=_SRC_PATH):
	"""Extract parsed script contents and write back to original folder
	structure.

	Args:
	task_output_dir (str): The output directory for the sharded task. This will
	contain the raw JavaScript v8 parsed files that are identified by
	their ".js.json" suffix.

	Returns:
	The absolute file path to the raw parsed scripts or None if no parsed
	scripts were identified (or any of the raw data contains invalid JSON).
	"""
	_SOURCEMAPPING_DATA_URL_PREFIX = 'data:application/json;base64,'

	scripts = _get_paths_with_suffix(task_output_dir, '.js.json')
	output_dir = os.path.join(task_output_dir, 'parsed_scripts')

	# The original file is extracted from the inline sourcemaps, this
	# information is not available from the coverage data. So we have to
	# maintain a URL to path map to ensure the coverage data knows the original
	# source location.
	url_to_path_map = {}

	if not scripts:
	return None

	for file_path in scripts:
	# TODO(crbug.com/1224786): Some of the raw script data is being saved
	# with a trailing curly brace leading to invalid JSON. Bail out if this
	# is encountered and ensure we log the file path.
	script_data = None
	try:
	script_data = _parse_json_file(file_path)
	except ValueError as e:
	logging.error('Failed to parse %s: %s', file_path, e)
	return None

	if any(key not in script_data
	for key in ('url', 'text', 'sourceMapURL')):
	logging.info('File %s is missing key url, text or sourceMapURL',
	file_path)
	continue

	# TODO(crbug/1373753): For now we exclude any sourcemaps that are 0
	# length and also that don't begin with a data URL designation.
	if len(script_data['sourceMapURL']) == 0 or not script_data[
	'sourceMapURL'].startswith(_SOURCEMAPPING_DATA_URL_PREFIX):
	continue

	decoded_sourcemap = base64.b64decode(
	script_data['sourceMapURL'].replace(_SOURCEMAPPING_DATA_URL_PREFIX,
	''))
	json_sourcemap = json.loads(decoded_sourcemap)
	if len(json_sourcemap['sources']) == 0:
	logging.warning('File %s has a valid sourcemap with no sources',
	file_path)
	continue

	for source_idx in range(len(json_sourcemap['sources'])):
	source_path = os.path.relpath(
	os.path.normpath(
	os.path.join(json_sourcemap['sourceRoot'],
	json_sourcemap['sources'][source_idx])),
	source_dir)
	source_directory = os.path.join(output_dir,
	os.path.dirname(source_path))
	if not os.path.exists(source_directory):
	os.makedirs(source_directory)

	with open(os.path.join(output_dir, source_path), 'wb') as f:
	f.write(script_data['text'].encode('utf8'))

	# Only write the first instance of the sources to the map.
	# Sourcemaps require stability in their indexing as the mapping
	# derived are based on the index location of the file in the
	# "sources" and "sourcesContent" fields. Therefore the first index
	# of the "sources" field will be the first file that was encountered
	# during source map generation, i.e. this should be the actual
	# chromium/src original file.
	if script_data['url'] not in url_to_path_map:
	url_to_path_map[script_data['url']] = source_path

	if not url_to_path_map:
	return None

	with open(os.path.join(output_dir, 'parsed_scripts.json'),
	'w+',
	encoding='utf-8') as f:
	json.dump(url_to_path_map, f)

	return output_dir


	def exclude_uninteresting_lines(coverage_file_path):
	"""Removes lines from Istanbul coverage reports that correspond to lines in
	the source file that are empty. These lines provide no additional coverage
	information and in fact inflate the coverage metrics.

	Args:
	coverage_file_path (str): The path to the merged coverage.json file.
	"""
	with open(coverage_file_path, 'r+') as f:
	coverage = json.load(f)

	def exclude_line(coverage_map, key):
	"""Exclude an individual line from the coverage map. This relies on
	the key 'statementMap' which maintains a map of statements to lines
	as well as the key 's' which contains the invocation counts of each
	line.
	"""
	del coverage_map['statementMap'][key]
	del coverage_map['s'][key]

	for file_path in coverage:
	istanbul_coverage = coverage[file_path]
	lines = []
	with open(file_path) as fd:
	lines = fd.readlines()

	# Force list of the keys to allow removal of items whilst iterating.
	for key in list(istanbul_coverage['statementMap']):
	statement_map = istanbul_coverage['statementMap'][key]
	line_num = statement_map['start']['line']

	assert statement_map['start']['line'] == statement_map['end'][
	'line']

	line_contents = lines[line_num - 1].strip()

	# Exclude empty lines
	if line_contents == '':
	exclude_line(istanbul_coverage, key)
	continue

	# Exclude lines that start with a full line comment.
	# e.g. // comment.
	if line_contents.startswith('//'):
	exclude_line(istanbul_coverage, key)
	continue

	# Exclude any lines that start with an import statement.
	if line_contents.startswith('import '):
	exclude_line(istanbul_coverage, key)
	continue

	# Overwrite the current coverage file with new contents.
	f.seek(0)
	f.truncate()
	json.dump(coverage, f)


	def get_raw_coverage_dirs(task_output_dir):
	"""Returns a list of directories containing raw v8 coverage.

	Args:
	task_output_dir (str): The output directory for the sharded task. This will
	contain the raw JavaScript v8 coverage files that are identified by
	their ".cov.json" suffix.
	"""
	coverage_directories = set()
	for dir_path, _sub_dirs, file_names in os.walk(task_output_dir):
	for name in file_names:
	if name.endswith('.cov.json'):
	coverage_directories.add(dir_path)
	continue

	return coverage_directories


	def convert_raw_coverage_to_istanbul(raw_coverage_dirs, source_dir,
	task_output_dir):
	"""Calls the node helper script convert_to_istanbul.js

	Args:
	raw_coverage_dirs (list): Directory that contains raw v8 code coverage.
	source_dir (str): Root directory containing the instrumented source.

	Raises:
	RuntimeError: If the underlying node command fails.
	"""
	return node.RunNode([
	os.path.join(_HERE_PATH, 'convert_to_istanbul.js'),
	'--source-dir',
	source_dir,
	'--output-dir',
	task_output_dir,
	'--raw-coverage-dirs',
	' '.join(raw_coverage_dirs),
	])


	def merge_istanbul_reports(istanbul_coverage_dir, source_dir, output_file):
	"""Merges all disparate istanbul reports into a single report.

	Args:
	istanbul_coverage_dir (str): Directory containing separate coverage files.
	source_dir (str): Directory containing instrumented source code.
	output_file (str): File path to output merged coverage.

	Raises:
	RuntimeError: If the underlying node command fails.
	"""
	return node.RunNode([
	coverage_modules.PathToNyc(),
	'merge',
	istanbul_coverage_dir,
	output_file,
	'--cwd',
	source_dir,
	])


	def generate_coverage_reports(coverage_file_dir, source_dir, output_dir):
	"""Generate a LCOV report.

	Args:
	coverage_file_dir (str): Directory containing the coverage.json file.
	source_dir (str): Directory containing the instrumented source code.
	output_dir (str): Directory to output the reports.
	"""
	return node.RunNode([
	coverage_modules.PathToNyc(),
	'report',
	'--reporter',
	'lcov',
	'--temp-dir',
	coverage_file_dir,
	'--cwd',
	source_dir,
	'--report-dir',
	output_dir,
	])