| # Copyright 2020 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Functions to merge multiple JavaScript coverage files into one""" |
| |
| import logging |
| import json |
| import os |
| import sys |
| |
| logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s', |
| level=logging.DEBUG) |
| |
| |
| def _parse_json_file(path): |
| """Opens file and parses data into JSON |
| |
| Args: |
| path (str): The path to a JSON file to parse. |
| """ |
| with open(path, 'r') as json_file: |
| return json.load(json_file) |
| |
| |
| def _peek_last(stack): |
| """Returns the top element of stack or None""" |
| return stack[-1] if stack else None |
| |
| |
| def _convert_to_disjoint_segments(ranges): |
| """Converts a list of v8 CoverageRanges into a list of disjoint segments. |
| |
| A v8 CoverageRange is a JSON object that describes the start and end |
| character offsets for a block of instrumented JavaScript code: |
| https://chromedevtools.github.io/devtools-protocol/tot/Profiler/#type-CoverageRange |
| CoverageRange is defined by the ranges field from a v8 FunctionCoverage: |
| https://chromedevtools.github.io/devtools-protocol/tot/Profiler/#type-FunctionCoverage |
| |
| To compute the list of disjoint segments, we sort (must be a stable sort) |
| the |ranges| list in ascending order by their startOffset. This |
| has the effect of bringing CoverageRange groups closer together. Each |
| group of CoverageRange's has a recursive relationship such that: |
| - The first range in the group defines the character offsets for the |
| function we are capturing coverage for |
| - Children of this range identify unexcuted code unless they are |
| also parents, in which case they continue the recursive relationship |
| |
| To give an example, consider the following arrow function: |
| |
| exports.test = arg => { return arg ? 'y' : 'n' } |
| |
| An invocation of test(true) would produce the following |ranges| |
| |
| [ |
| { "startOffset": 0, "endOffset": 48, "count": 1 }, // Range 1 |
| { "startOffset": 15, "endOffset": 48, "count": 1 }, // Range 2 |
| { "startOffset": 41, "endOffset": 46, "count": 0 }, // Range 3 |
| ] |
| |
| Range 1 identifies the entire script. |
| Range 2 identifies the function from the arg parameter through |
| to the closing brace |
| Range 3 identifies that the code from offset [41, 46) was |
| not executed. |
| |
| If we were to make the function calls, e.g. test(true); test(true); |
| this would produce the following |ranges| |
| |
| [ |
| { "startOffset": 0, "endOffset": 48, "count": 1 }, // Range 1 |
| { "startOffset": 15, "endOffset": 48, "count": 2 }, // Range 2 |
| { "startOffset": 41, "endOffset": 46, "count": 0 }, // Range 3 |
| ] |
| |
| All the offsets are maintained, however the count on Range |
| 2 has increased while the count on Range 1 is unchanged. This |
| shows another implicit assumption such that the inner most parent |
| range count identifies the total invocation count. |
| |
| TODO(benreich): Write up more extensive documentation. |
| |
| Args: |
| ranges (list): A list of v8 CoverageRange that have been |
| merged from multiple FunctionCoverage. The order in which they |
| appear in the original v8 coverage output must be maintained. |
| |
| Returns: |
| A list of dictionaries where each entry is defined as: |
| { |
| count: Number of invocations of this range |
| end: Exclusive character offset for the end of this range |
| } |
| """ |
| stack = [] |
| segments = [] |
| |
| def _append(end, count): |
| """Append a new range segment to |segments|. |
| |
| If the top range on |segments| has the same ending as |end| |
| return early, otherwise extend the segment if the same count |
| exists. |
| |
| Args: |
| end (number): The end character offset for the range |
| count (number): The invocation count for the range |
| """ |
| last = _peek_last(segments) |
| if last is not None: |
| if last['end'] == end: |
| return |
| |
| if last['count'] == count: |
| last['end'] = end |
| return |
| |
| if end == 0: |
| return |
| |
| segments.append({'end': end, 'count': count}) |
| return |
| |
| # Stable sort the range segments. |
| ranges.sort(key=lambda entry: entry['startOffset']) |
| |
| for entry in ranges: |
| top = _peek_last(stack) |
| |
| while top and top['endOffset'] <= entry['startOffset']: |
| _append(top['endOffset'], top['count']) |
| stack.pop() |
| top = _peek_last(stack) |
| |
| top_count = 0 if not top else top['count'] |
| _append(entry['startOffset'], top_count) |
| stack.append(entry) |
| |
| while stack: |
| top = stack.pop() |
| _append(top['endOffset'], top['count']) |
| |
| return segments |
| |
| def _merge_segments(segments_a, segments_b): |
| """Merges 2 lists of disjoint segments into one |
| |
| Take in two lists that have been output by _convert_to_disjoint_segments |
| and merge them into a single list. Any segments that are |
| overlapping sum their invocation counts. If the overlap |
| is partial, split the ranges into contiguous segments and |
| assign the invocation counts appropriately. |
| |
| Args: |
| segments_a (list): A list of disjoint segments. |
| segments_b (list): A list of disjoint segments. |
| |
| Returns: |
| A list of disjoint segments. |
| """ |
| segments = [] |
| i = 0 |
| j = 0 |
| |
| while i < len(segments_a) and j < len(segments_b): |
| a = segments_a[i] |
| b = segments_b[j] |
| |
| count = a.get('count', 0) + b.get('count', 0) |
| end = min(a['end'], b['end']) |
| last = _peek_last(segments) |
| |
| # Get the segment from the top of the stack and |
| # extend the segment if the invocation counts match |
| # otherwise push a new range segment onto the stack. |
| if last is None or last['count'] != count: |
| segments.append({'end': end, 'count': count}) |
| else: |
| last['end'] = end |
| |
| if a['end'] <= b['end']: |
| i += 1 |
| |
| if a['end'] >= b['end']: |
| j += 1 |
| |
| while i < len(segments_a): |
| segments.append(segments_a[i]) |
| i += 1 |
| |
| while j < len(segments_b): |
| segments.append(segments_b[j]) |
| j += 1 |
| |
| return segments |
| |
| |
| def _get_coverage_paths(input_dir): |
| """Gets all JSON files in the input directory. |
| |
| Args: |
| input_dir (str): The path to recursively search for |
| JSON files. |
| |
| Returns: |
| A list of absolute file paths. |
| """ |
| paths = [] |
| for dir_path, _sub_dirs, file_names in os.walk(input_dir): |
| paths.extend([ |
| os.path.join(dir_path, fn) for fn in file_names |
| if fn.endswith('.cov.json') |
| ]) |
| return paths |
| |
| |
| def merge_coverage_files(coverage_dir, output_path): |
| """Merge all coverages in the coverage dir into a single file. |
| |
| Args: |
| coverage_dir (str): Path to all the raw JavaScript coverage files. |
| output_path (str): Path to the location to output merged coverage. |
| """ |
| coverage_by_path = {} |
| json_files = _get_coverage_paths(coverage_dir) |
| |
| if not json_files: |
| logging.info('No JavaScript coverage files found in %s', coverage_dir) |
| return |
| |
| for file_path in json_files: |
| coverage_data = _parse_json_file(file_path) |
| |
| if 'result' not in coverage_data: |
| raise RuntimeError('%r does not have a result field' % |
| json_file_path) |
| |
| for script_coverage in coverage_data['result']: |
| script_url = script_coverage['url'] |
| |
| # Ignore files with paths that have not been rewritten. |
| # Files can rewrite paths by appending a //# sourceURL= |
| # comment. |
| if not script_url.startswith('//'): |
| continue |
| |
| previous_coverage = coverage_by_path.get(script_url, []) |
| |
| ranges = [] |
| for function_coverage in script_coverage['functions']: |
| for range_coverage in function_coverage['ranges']: |
| ranges.append(range_coverage) |
| |
| disjoint_segments = _convert_to_disjoint_segments(ranges) |
| merged_segments = _merge_segments(previous_coverage, |
| disjoint_segments) |
| |
| coverage_by_path[script_url] = merged_segments |
| |
| with open(output_path, 'w') as merged_coverage_file: |
| return merged_coverage_file.write(json.dumps(coverage_by_path)) |