| # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import os |
| from threading import Lock |
| |
| import blame |
| from common import utils |
| import component_dictionary |
| import crash_utils |
| import git_repository_parser |
| import match_set |
| import svn_repository_parser |
| |
| |
| LINE_CHANGE_PRIORITY = 1 |
| FILE_CHANGE_PRIORITY = 2 |
| _THIS_DIR = os.path.abspath(os.path.dirname(__file__)) |
| CONFIG = crash_utils.ParseURLsFromConfig(os.path.join(_THIS_DIR, |
| 'config.ini')) |
| |
| |
| def GenerateMatchEntry( |
| matches, revision_info, revision_number, file_path, function, |
| component_path, component_name, crashed_line_numbers, stack_frame_indices, |
| file_change_type, repository_parser): |
| """Generates a match object and adds it to the match set. |
| |
| Args: |
| matches: A matchset object, a map from CL to a match object. |
| revision_info: The revision information, a map from fields (message, |
| changed files, etc) to its values. |
| revision_number: The SVN revision number or git hash. |
| file_path: The path of the file. |
| function: The function that caused an crash. |
| component_path: The path of the component this file is from. |
| component_name: The name of the component the file is from. |
| crashed_line_numbers: The list of the lines in the file that caused |
| the crash. |
| stack_frame_indices: The list of positions of this file within a stack. |
| file_change_type: Whether file is modified, added or deleted. |
| repository_parser: The parser object to parse line diff. |
| """ |
| # Check if this CL should be ignored. |
| with matches.matches_lock: |
| if revision_number in matches.cls_to_ignore: |
| return |
| |
| # If this CL is not already identified as suspected, create a new entry. |
| if revision_number not in matches.matches: |
| match = match_set.Match(revision_info, component_name) |
| message = revision_info['message'] |
| # TODO(jeun): Don't hold lock while issuing http request. |
| match.ParseMessage(message, matches.codereview_api_url) |
| |
| # If this match is a revert, add to the set of CLs to be ignored. |
| if match.is_revert: |
| matches.cls_to_ignore.add(revision_number) |
| |
| # If this match has info on what CL it is reverted from, add that CL. |
| if match.revert_of: |
| matches.cls_to_ignore.add(match.revert_of) |
| |
| return |
| |
| matches.matches[revision_number] = match |
| |
| else: |
| match = matches.matches[revision_number] |
| |
| (diff_url, changed_line_numbers, changed_line_contents) = ( |
| repository_parser.ParseLineDiff( |
| file_path, component_path, file_change_type, revision_number)) |
| |
| # Ignore this match if the component is not supported for svn. |
| if not diff_url: |
| return |
| |
| # Find the intersection between the lines that this file crashed on and |
| # the changed lines. |
| (line_number_intersection, stack_frame_index_intersection, functions) = ( |
| crash_utils.Intersection( |
| crashed_line_numbers, stack_frame_indices, changed_line_numbers, |
| function)) |
| |
| # Find the minimum distance between the changed lines and crashed lines. |
| (min_distance, min_crashed_line, min_changed_line) = \ |
| crash_utils.FindMinLineDistance(crashed_line_numbers, |
| changed_line_numbers) |
| |
| # Check whether this CL changes the crashed lines or not. |
| if line_number_intersection: |
| priority = LINE_CHANGE_PRIORITY |
| else: |
| priority = FILE_CHANGE_PRIORITY |
| |
| # Add the parsed information to the object. |
| with matches.matches_lock: |
| match.crashed_line_numbers.append(line_number_intersection) |
| |
| file_name = file_path.split('/')[-1] |
| match.changed_files.append(file_name) |
| |
| # Update the min distance only if it is less than the current one. |
| if min_distance < match.min_distance: |
| match.min_distance = min_distance |
| match.min_distance_info = (file_name, min_crashed_line, min_changed_line) |
| |
| # If this CL does not change the crashed line, all occurrence of this |
| # file in the stack has the same priority. |
| if not stack_frame_index_intersection: |
| stack_frame_index_intersection = stack_frame_indices |
| functions = function |
| match.stack_frame_indices.append(stack_frame_index_intersection) |
| match.changed_file_urls.append(diff_url) |
| match.priorities.append(priority) |
| match.function_list.append(functions) |
| |
| |
| def FindMatch(revisions_info_map, file_to_revision_info, file_to_crash_info, |
| component_path, component_name, repository_parser, |
| codereview_api_url): |
| """Finds a CL that modifies file in the stacktrace. |
| |
| Args: |
| revisions_info_map: A dictionary mapping revision number to the CL |
| information. |
| file_to_revision_info: A dictionary mapping file to the revision that |
| modifies it. |
| file_to_crash_info: A dictionary mapping file to its occurrence in |
| stacktrace. |
| component_path: The path of the component to search for. |
| component_name: The name of the component to search for. |
| repository_parser: The parser object to parse the line diff. |
| codereview_api_url: A code review url to retrieve data from. |
| |
| Returns: |
| Matches, a set of match objects. |
| """ |
| matches = match_set.MatchSet(codereview_api_url) |
| |
| tasks = [] |
| # Iterate through the crashed files in the stacktrace. |
| for crashed_file_path in file_to_crash_info: |
| # Ignore header file. |
| if crashed_file_path.endswith('.h'): |
| continue |
| |
| # If the file in the stacktrace is not changed in any commits, continue. |
| for changed_file_path in file_to_revision_info: |
| changed_file_name = changed_file_path.split('/')[-1].lower() |
| crashed_file_name = crashed_file_path.split('/')[-1].lower() |
| if changed_file_name != crashed_file_name: |
| continue |
| |
| if not crash_utils.GuessIfSameSubPath( |
| changed_file_path.lower(), crashed_file_path.lower()): |
| continue |
| |
| crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers( |
| crashed_file_path) |
| stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices( |
| crashed_file_path) |
| functions = file_to_crash_info.GetCrashFunctions(crashed_file_path) |
| |
| # Iterate through the CLs that this file path is changed. |
| for (cl, file_change_type) in file_to_revision_info[changed_file_path]: |
| # If the file change is delete, ignore this CL. |
| if file_change_type == 'D': |
| continue |
| |
| revision = revisions_info_map[cl] |
| |
| tasks.append({ |
| 'function': GenerateMatchEntry, |
| 'args':[matches, revision, cl, changed_file_path, functions, |
| component_path, component_name, crashed_line_numbers, |
| stack_frame_nums, file_change_type, |
| repository_parser]}) |
| |
| # Run all the tasks. |
| crash_utils.RunTasks(tasks) |
| |
| matches.RemoveRevertedCLs() |
| |
| return matches |
| |
| |
| def FindMatchForComponent(component_path, file_to_crash_info, changelog, |
| callstack_priority, results, results_lock): |
| """Parses changelog and finds suspected CLs for a given component. |
| |
| Args: |
| component_path: The path of component to look for the culprit CL. |
| file_to_crash_info: A dictionary mapping file to its occurrence in |
| stackframe. |
| changelog: The parsed changelog for this component. |
| callstack_priority: The priority of this call stack, 0 if from crash stack, |
| 1 if from freed, 2 if from previously allocated. |
| results: A dictionary to store the result. |
| results_lock: A lock that guards results. |
| """ |
| (repository_parser, component_name, revisions, file_to_revision_map) = \ |
| changelog |
| |
| # Find match for this component. |
| codereview_api_url = CONFIG['codereview']['review_url'] |
| component_result = FindMatch( |
| revisions, file_to_revision_map, file_to_crash_info, component_path, |
| component_name, repository_parser, codereview_api_url) |
| matches = component_result.matches |
| |
| # For all the match results in a dictionary, add to the list so that it |
| # can be sorted. |
| with results_lock: |
| for cl in matches: |
| match = matches[cl] |
| results.append((callstack_priority, cl, match)) |
| |
| |
| def FindMatchForCallstack( |
| callstack, components, component_to_changelog_map, results, |
| results_lock): |
| """Finds culprit cl for a stack within a stacktrace. |
| |
| For each components to look for, create new thread that computes the matches |
| and join the results at the end. |
| |
| Args: |
| callstack: A callstack in a stacktrace to find the result for. |
| components: A set of components to look for. |
| component_to_changelog_map: A map from component to its parsed changelog. |
| results: A list to aggregrate results from all stacktraces. |
| results_lock: A lock that guards results. |
| """ |
| # Create component dictionary from the component and call stack. |
| component_dict = component_dictionary.ComponentDictionary(callstack, |
| components) |
| callstack_priority = callstack.priority |
| |
| # Iterate through all components. |
| for component_path in component_dict: |
| # If the component to consider in this callstack is not in the parsed list |
| # of components, ignore this one. |
| if component_path not in component_to_changelog_map: |
| continue |
| |
| changelog = component_to_changelog_map[component_path] |
| file_to_crash_info = component_dict.GetFileDict(component_path) |
| FindMatchForComponent(component_path, file_to_crash_info, changelog, |
| callstack_priority, results, results_lock) |
| |
| |
| def FindMatchForStacktrace(stacktrace, components, |
| component_to_regression_dict): |
| """Finds the culprit CL for stacktrace. |
| |
| The passed stacktrace is either from release build stacktrace |
| or debug build stacktrace. |
| |
| Args: |
| stacktrace: A list of parsed stacks within a stacktrace. |
| components: A set of components to look for. |
| component_to_regression_dict: A dictionary mapping component path to |
| its regression. |
| |
| Returns: |
| A list of match results from all stacks. |
| """ |
| # A list to aggregate results from all the callstacks in the stacktrace. |
| results = [] |
| results_lock = Lock() |
| |
| # Setup parsers. |
| svn_parser = svn_repository_parser.SVNParser(CONFIG['svn']) |
| git_parser = git_repository_parser.GitParser(component_to_regression_dict, |
| CONFIG['git']) |
| |
| # Create a cache of parsed revisions. |
| component_to_changelog_map = {} |
| for component_path in components: |
| component_object = component_to_regression_dict[component_path] |
| range_start = component_object['old_revision'] |
| range_end = component_object['new_revision'] |
| |
| # If range start is 0, the range is too large and the crash has been |
| # introduced the archived build, so ignore this case. |
| if range_start == '0': |
| continue |
| |
| component_name = component_to_regression_dict[component_path]['name'] |
| |
| is_git = utils.IsGitHash(range_start) |
| if is_git: |
| repository_parser = git_parser |
| else: |
| repository_parser = svn_parser |
| |
| (revisions, file_to_revision_map) = repository_parser.ParseChangelog( |
| component_path, range_start, range_end) |
| |
| # If the returned map from ParseChangeLog is empty, we don't need to look |
| # further because either the parsing failed or the changelog is empty. |
| if not (revisions and file_to_revision_map): |
| continue |
| |
| component_to_changelog_map[component_path] = (repository_parser, |
| component_name, |
| revisions, |
| file_to_revision_map) |
| |
| # Analyze each of the call stacks in the stacktrace. |
| for callstack in stacktrace.stack_list: |
| FindMatchForCallstack(callstack, components, component_to_changelog_map, |
| results, results_lock) |
| |
| return results |
| |
| |
| def SortMatchesFunction(match_with_stack_priority): |
| """A function to sort the match triple. |
| |
| Currently, it sorts the list by: |
| 1) The highest priority file change in the CL (changing crashed line is |
| higher priority than just changing the file). |
| 2) The callstack this match is computed (crash stack, freed, allocation). |
| 3) The minimum stack frame number of the changed file in the match. |
| 4) The number of files this CL changes (higher the better). |
| 5) The minimum distance between the lines that the CL changes and crashed |
| lines. |
| |
| Args: |
| match_with_stack_priority: A match object, with the CL it is from and what |
| callstack it is from. |
| |
| Returns: |
| A sort key. |
| """ |
| (stack_priority, _, match) = match_with_stack_priority |
| |
| return (min(match.priorities), |
| stack_priority, |
| match.min_distance, |
| crash_utils.FindMinStackFrameNumber(match.stack_frame_indices, |
| match.priorities), |
| -len(match.changed_files)) |
| |
| |
| def SortAndFilterMatches(matches, num_important_frames=5): |
| """Filters the list of potential culprit CLs to remove noise. |
| |
| Args: |
| matches: A list containing match results. |
| num_important_frames: A number of frames on the top of the frame to Check |
| for when filtering the results. A match with a file |
| that is in top num_important_frames of the stacktrace |
| is regarded more probable then others. |
| |
| Returns: |
| Filtered match results. |
| """ |
| new_matches = [] |
| line_changed = False |
| is_important_frame = False |
| highest_priority_stack = crash_utils.INFINITY |
| matches.sort(key=SortMatchesFunction) |
| # Iterate through the matches to find out what results are significant. |
| for stack_priority, cl, match in matches: |
| # Check if the current match changes crashed line. |
| is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY) |
| |
| # Check which stack this match is from, and finds the highest priority |
| # callstack up to this point. |
| current_stack = stack_priority |
| if current_stack < highest_priority_stack: |
| highest_priority_stack = current_stack |
| |
| # Check if current match changes a file that occurs in crash state. |
| flattened_stack_frame_indices = [frame for frame_indices in |
| match.stack_frame_indices |
| for frame in frame_indices] |
| current_is_important = ( |
| min(flattened_stack_frame_indices) < num_important_frames) |
| |
| # This match and anything lower than this should be ignored if: |
| # - Current match does not change crashed lines but there are matches |
| # that do so. |
| # - Current match is not in crash state but there are matches in it. |
| # - There are other matches that came from higher priority stack. |
| if (line_changed and not is_line_change) or ( |
| is_important_frame and not current_is_important) or ( |
| current_stack > highest_priority_stack): |
| break |
| |
| # Update the variables. |
| if is_line_change: |
| line_changed = True |
| if current_is_important: |
| is_important_frame = True |
| |
| # Add current match to the filtered result. |
| new_matches.append((stack_priority, cl, match)) |
| |
| return new_matches |
| |
| |
| def GenerateReasonForMatches(matches): |
| """Generates a reason that a match (CL) is a culprit cl. |
| |
| Args: |
| matches: A list of match objects. |
| """ |
| # Iterate through the matches in the list. |
| for i, _, match in matches: |
| reason = [] |
| |
| # Zip the files in the match by the reason they are suspected |
| # (how the file is modified). |
| match_by_priority = zip( |
| match.priorities, match.crashed_line_numbers, match.changed_files, |
| match.stack_frame_indices, match.function_list) |
| |
| # Sort the zipped changed files in the match by their priority so that the |
| # changed lines comes first in the reason. |
| match_by_priority.sort( |
| key=lambda (priority, crashed_line_numbers, file_name, |
| stack_frame_indices, function_list): priority) |
| |
| # Iterate through the sorted match. |
| for i in range(len(match_by_priority)): |
| (priority, crashed_line_numbers, file_name, stack_frame_indices, |
| function_list) = match_by_priority[i] |
| |
| # If the file in the match is a line change, append a explanation. |
| if priority == LINE_CHANGE_PRIORITY: |
| crashed_line_numbers = [crashed_line_number |
| for lines in crashed_line_numbers |
| for crashed_line_number in lines] |
| reason.append( |
| 'Lines %s of file %s which potentially caused crash ' |
| 'are changed in this cl (%s).\n' % |
| (utils.JoinLineNumbers(crashed_line_numbers, accepted_gap=4), |
| file_name, |
| crash_utils.PrettifyFrameInfo(stack_frame_indices, function_list))) |
| |
| else: |
| # Get all the files that are not line change. |
| rest_of_the_files = match_by_priority[i:] |
| |
| if len(rest_of_the_files) == 1: |
| file_string = 'File %s is changed in this cl ' |
| else: |
| file_string = 'Files %s are changed in this cl ' |
| |
| # Create a list of file names, and prettify the list. |
| file_names = [ |
| file_name for (_, _, file_name, _, _) in rest_of_the_files] |
| pretty_file_names = crash_utils.PrettifyList(file_names) |
| |
| # Add the reason, break because we took care of the rest of the files. |
| file_string += ('(and is part of stack %s)' % |
| crash_utils.PrettifyFrameInfo(stack_frame_indices, function_list)) |
| reason.append(file_string % pretty_file_names) |
| break |
| |
| # Set the reason as string. |
| match.reason = '\n'.join(reason) |
| |
| |
| def CombineMatches(matches): |
| """Combine possible duplicates in matches. |
| |
| Args: |
| matches: A list of matches object, along with its callstack priority and |
| CL it is from. |
| Returns: |
| A combined list of matches. |
| """ |
| combined_matches = [] |
| |
| for stack_index, cl, match in matches: |
| found_match = None |
| |
| # Iterate through the list of combined matches. |
| for _, cl_combined, match_combined in combined_matches: |
| # Check for if current CL is already higher up in the result. |
| if cl == cl_combined: |
| found_match = match_combined |
| break |
| |
| # If current match is not already in, add it to the list of matches. |
| if not found_match: |
| combined_matches.append((stack_index, cl, match)) |
| continue |
| |
| # Combine the reason if the current match is already in there. |
| found_match.reason += '\n' + match.reason |
| if match.min_distance < found_match.min_distance: |
| found_match.min_distance = match.min_distance |
| found_match.min_distance_info = match.min_distance_info |
| |
| for stack_index, cl, match in combined_matches: |
| if match.min_distance_info: |
| file_name, min_crashed_line, min_changed_line = match.min_distance_info |
| match.reason = match.reason.strip() |
| match.reason += ( |
| '\nMinimum distance from crash line to modified line: %d. ' |
| '(file: %s, crashed on: %d, modified: %d).' % |
| (match.min_distance, file_name, min_crashed_line, min_changed_line)) |
| |
| return combined_matches |
| |
| |
| def FilterAndGenerateReasonForMatches(result): |
| """A wrapper function. |
| |
| It generates reasons for the matches and returns string representation |
| of filtered results. |
| |
| Args: |
| result: A list of match objects. |
| |
| Returns: |
| A string representation of filtered results. |
| """ |
| new_result = SortAndFilterMatches(result) |
| GenerateReasonForMatches(new_result) |
| combined_matches = CombineMatches(new_result) |
| return crash_utils.MatchListToResultList(combined_matches) |
| |
| |
| def ParseCrashComponents(main_stack): |
| """Parses the crashing component. |
| |
| Crashing components is a component that top_n_frames of the stacktrace is |
| from. |
| |
| Args: |
| main_stack: Main stack from the stacktrace. |
| |
| Returns: |
| A set of components. |
| """ |
| components = set() |
| |
| for frame in main_stack.frame_list: |
| components.add(frame.component_path) |
| |
| return components |
| |
| |
| def GenerateAndFilterBlameList(callstack, component_to_crash_revision_dict, |
| component_to_regression_dict): |
| """A wrapper function. |
| |
| Finds blame information for stack and returns string representation. |
| |
| Args: |
| callstack: A callstack to find the blame information. |
| component_to_crash_revision_dict: A dictionary mapping component to its |
| crash revision. |
| component_to_regression_dict: A dictionary mapping component to its |
| regression. |
| |
| Returns: |
| A list of blame results. |
| """ |
| if component_to_regression_dict: |
| parsed_deps = component_to_regression_dict |
| else: |
| parsed_deps = component_to_crash_revision_dict |
| |
| # Setup parser objects to use for parsing blame information. |
| svn_parser = svn_repository_parser.SVNParser(CONFIG['svn']) |
| git_parser = git_repository_parser.GitParser(parsed_deps, CONFIG['git']) |
| parsers = {} |
| parsers['svn'] = svn_parser |
| parsers['git'] = git_parser |
| |
| # Create and generate the blame objects from the callstack. |
| blame_list = blame.BlameList() |
| blame_list.FindBlame(callstack, component_to_crash_revision_dict, |
| component_to_regression_dict, |
| parsers) |
| |
| blame_list.FilterAndSortBlameList() |
| return crash_utils.BlameListToResultList(blame_list) |
| |
| |
| def FindItForCrash(stacktrace_list, |
| callstack, |
| component_to_regression_dict, |
| component_to_crash_revision_dict): |
| """Finds the culprit CL from the list of stacktrace. |
| |
| Args: |
| stacktrace_list: A list of stacktraces to look for, in the order of |
| decreasing significance. |
| callstack: A callstack object to show blame information for, if there are |
| no results for all stacktraces in the stacktrace_list. |
| component_to_regression_dict: A parsed regression information as a |
| result of parsing DEPS file. |
| component_to_crash_revision_dict: A parsed crash revision information. |
| |
| Returns: |
| A list of result objects, with the message how the result is created. |
| """ |
| # If regression information is not available, return blame information. |
| if not component_to_regression_dict: |
| result = GenerateAndFilterBlameList(callstack, |
| component_to_crash_revision_dict, |
| component_to_regression_dict) |
| if result: |
| return_message = ( |
| 'Regression information is not available. The result is ' |
| 'the blame information.') |
| else: |
| return_message = ('Findit could not find any suspected CLs.') |
| |
| return (return_message, result) |
| |
| for stacktrace in stacktrace_list: |
| # Check the next stacktrace if current one is empty. |
| if not stacktrace.stack_list: |
| continue |
| |
| # Get the crash stack for this stacktrace, and extract crashing components |
| # from it. |
| main_stack = stacktrace.GetCrashStack() |
| components = ParseCrashComponents(main_stack) |
| |
| result_for_stacktrace = FindMatchForStacktrace( |
| stacktrace, components, component_to_regression_dict) |
| filtered_result = FilterAndGenerateReasonForMatches(result_for_stacktrace) |
| |
| # If the result is empty, check the next stacktrace. Else, return the |
| # filtered result. |
| if not filtered_result: |
| continue |
| |
| return_message = ( |
| 'The result is a list of CLs that change the crashed files.') |
| return (return_message, filtered_result) |
| |
| # If no match is found, return the blame information for the input |
| # callstack. |
| result = GenerateAndFilterBlameList( |
| callstack, component_to_crash_revision_dict, |
| component_to_regression_dict) |
| |
| if result: |
| return_message = ( |
| 'No CL in the regression range changes the crashed files. ' |
| 'The result is the blame information.') |
| |
| # When findit could not find any CL that changes file in stacktrace or if |
| # if cannot get any blame information, return a message saying that no |
| # results are available. |
| else: |
| return_message = ('Findit could not find any suspected CLs.') |
| |
| return (return_message, result) |
| |