| # Copyright 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| from collections import defaultdict |
| import copy |
| from datetime import datetime |
| |
| from common import acl |
| from common import constants |
| from common.waterfall import failure_type |
| from gae_libs import token |
| from gae_libs.http import auth_util |
| from gae_libs.handlers.base_handler import BaseHandler, Permission |
| from handlers import handlers_util |
| from handlers import result_status |
| from handlers.result_status import NO_TRY_JOB_REASON_MAP |
| from libs import analysis_status |
| from model import result_status as analysis_result_status |
| from model import suspected_cl_status |
| from model.base_build_model import BaseBuildModel |
| from model.result_status import RESULT_STATUS_TO_DESCRIPTION |
| from model.suspected_cl_confidence import SuspectedCLConfidence |
| from model.suspected_cl_status import CL_STATUS_TO_DESCRIPTION |
| from model.wf_analysis import WfAnalysis |
| from model.wf_suspected_cl import WfSuspectedCL |
| from model.wf_try_job import WfTryJob |
| from waterfall import build_failure_analysis_pipelines |
| from waterfall import build_util |
| from waterfall import buildbot |
| from waterfall import suspected_cl_util |
| from waterfall import waterfall_config |
| |
| NON_SWARMING = object() |
| |
| _ANALYSIS_CL_STATUS_MAP = { |
| analysis_result_status.FOUND_CORRECT: suspected_cl_status.CORRECT, |
| analysis_result_status.FOUND_INCORRECT: suspected_cl_status.INCORRECT |
| } |
| |
| |
| def _FormatDatetime(dt): |
| if not dt: |
| return None |
| else: |
| return dt.strftime('%Y-%m-%d %H:%M:%S UTC') |
| |
| |
| def _GetCLDict(analysis, cl_info): |
| if not cl_info: |
| return {} |
| |
| cl_keys = suspected_cl_util.GetCLInfo(cl_info) |
| repo_name = cl_keys[0] |
| revision = cl_keys[1] |
| for cl in analysis.suspected_cls: |
| if cl['repo_name'] == repo_name and cl['revision'] == revision: |
| return cl |
| return {} |
| |
| |
| def _GetTriageHistory(analysis): |
| if (not auth_util.IsCurrentUserAdmin() or not analysis.completed or |
| not analysis.triage_history): |
| return None |
| |
| triage_history = [] |
| for triage_record in analysis.triage_history: |
| triage_history.append({ |
| 'triage_time': |
| _FormatDatetime( |
| datetime.utcfromtimestamp(triage_record['triage_timestamp'])), |
| 'user_name': |
| triage_record['user_name'], |
| 'triaged_cl': |
| _GetCLDict(analysis, triage_record.get('triaged_cl')), |
| 'result_status': ( |
| RESULT_STATUS_TO_DESCRIPTION.get( |
| triage_record.get('result_status')) or |
| CL_STATUS_TO_DESCRIPTION.get(triage_record.get('cl_status'))), |
| 'version': |
| triage_record.get('version'), |
| }) |
| |
| return triage_history |
| |
| |
| def _GetOrganizedAnalysisResultBySuspectedCL(master_name, builder_name, |
| analysis_result): |
| """Group tests it they have the same suspected CLs.""" |
| organized_results = defaultdict(list) |
| |
| if not analysis_result: |
| return organized_results |
| |
| for step_failure in analysis_result.get('failures', []): |
| step_name = step_failure['step_name'] |
| supported = step_failure.get('supported', True) |
| step_revisions_index = {} |
| organized_suspected_cls = organized_results[step_name] |
| is_flaky = step_failure.get('flaky', False) |
| |
| if not step_failure.get('tests'): |
| # Non swarming, just group the whole step together. |
| shared_result = { |
| 'first_failure': |
| step_failure['first_failure'], |
| 'last_pass': |
| step_failure.get('last_pass'), |
| 'first_failure_build_id': |
| buildbot.GetBuildId(master_name, builder_name, |
| step_failure['first_failure']), |
| 'last_pass_build_id': |
| buildbot.GetBuildId(master_name, builder_name, |
| step_failure['last_pass']), |
| 'supported': |
| supported, |
| 'tests': [], |
| 'suspected_cls': |
| step_failure['suspected_cls'], |
| 'flaky': |
| is_flaky, |
| } |
| organized_suspected_cls.append(shared_result) |
| continue |
| |
| # Swarming tests. |
| for index, cl in enumerate(step_failure['suspected_cls']): |
| step_revisions_index[cl['revision']] = index |
| |
| # Groups tests by suspected CLs' revision. |
| # Keys are the indices of each test in the test list. |
| # Format is as below: |
| # { |
| # 1: { |
| # 'tests': ['test1', 'test2'], |
| # 'revisions': ['rev1'], |
| # 'suspected_cls': [ |
| # # suspected cl info for rev1 at step level. |
| # ] |
| # }, |
| # 3: { |
| # 'tests': ['test3'], |
| # 'revisions': ['rev3', 'rev2'], |
| # 'suspected_cls': [ |
| # # suspected cl info for rev2, rev3 at step level. |
| # ] |
| # } |
| # } |
| tests_group = defaultdict(list) |
| for index, test in enumerate(step_failure['tests']): |
| # Get all revisions for this test and check if there is |
| # any other test has the same culprit(represented by revision) set and |
| # are flaky or reliable at the same time. |
| test_name = test['test_name'] |
| is_flaky = test.get('flaky', False) |
| found_group = False |
| revisions = set() |
| for cl in test['suspected_cls']: |
| revisions.add(cl['revision']) |
| for group in tests_group.values(): |
| # Found tests that have the same culprit(represented by revision), |
| # add current test to group. |
| if revisions == set(group['revisions']) and is_flaky == group['flaky']: |
| group['tests'].append(test_name) |
| found_group = True |
| break |
| if not found_group: |
| # First test with that revision set, add a new group. |
| group_suspected_cls = [] |
| for revision in revisions: |
| group_suspected_cls.append( |
| step_failure['suspected_cls'][step_revisions_index[revision]]) |
| tests_group[index] = { |
| 'tests': [test_name], |
| 'revisions': list(revisions), |
| 'suspected_cls': group_suspected_cls, |
| 'flaky': is_flaky, |
| } |
| |
| for index, group in tests_group.iteritems(): |
| # Reorganize heuristic results by culprits. |
| test_result = step_failure['tests'][index] |
| shared_result = { |
| 'first_failure': |
| test_result['first_failure'], |
| 'last_pass': |
| test_result.get('last_pass'), |
| 'first_failure_build_id': |
| buildbot.GetBuildId(master_name, builder_name, |
| test_result['first_failure']), |
| 'last_pass_build_id': |
| buildbot.GetBuildId(master_name, builder_name, |
| test_result['last_pass']), |
| 'supported': |
| supported, |
| 'tests': |
| group['tests'], |
| 'suspected_cls': |
| group['suspected_cls'], |
| 'flaky': |
| group['flaky'], |
| } |
| organized_suspected_cls.append(shared_result) |
| |
| return organized_results |
| |
| |
| def _AttachBuildIdToSuspectedCLs(master_name, builder_name, suspected_cls): |
| for cl in suspected_cls: |
| cl['build_id'] = buildbot.GetBuildId(master_name, builder_name, |
| cl.get('build_number')) |
| return suspected_cls |
| |
| |
| def _GetAnalysisResultWithTryJobInfo(show_debug_info, organized_results, |
| master_name, builder_name, build_number): |
| """Reorganizes analysis result and try job result by step_name and culprit. |
| |
| Returns: |
| update_result (dict): A dict of classified results. |
| |
| The format for those dicts are as below: |
| { |
| # A dict of results that contains both |
| # heuristic analysis results and try job results. |
| 'reliable_failures': { |
| 'step1': { |
| 'results': [ |
| { |
| 'try_job':{ |
| 'ref_name': 'step1', |
| 'try_job_key': 'm/b/119', |
| 'status': analysis_status.COMPLETED, |
| 'try_job_url': 'url/121', |
| 'try_job_build_number': 121, |
| }, |
| 'heuristic_analysis': { |
| 'suspected_cls': [ |
| { |
| 'build_number': 98, |
| 'repo_name': 'chromium', |
| 'revision': 'r98_1', |
| 'commit_position': None, |
| 'url': None, |
| 'score': 5, |
| 'hints': { |
| 'added f98.cc (and it was in log)': 5, |
| }, |
| } |
| ] |
| } |
| 'tests': ['test1', 'test2'], |
| 'first_failure': 98, |
| 'last_pass': 97, |
| 'supported': True |
| } |
| ] |
| } |
| }, |
| # A dict of result for flaky tests. |
| 'flaky_failures': {...}, |
| # A dict of results for all the other conditions, |
| # such as non-swarming tests or swarming rerun failed. |
| 'unclassified_failures': {...} |
| } |
| """ |
| updated_results = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) |
| |
| if not organized_results: |
| return updated_results |
| |
| try_job_info = handlers_util.GetAllTryJobResults( |
| master_name, builder_name, build_number, show_debug_info) |
| |
| if not try_job_info: |
| return updated_results |
| |
| for step_name, try_jobs in try_job_info.iteritems(): |
| try_jobs = try_jobs['try_jobs'] |
| step_heuristic_results = organized_results[step_name] |
| step_updated_results = updated_results[step_name]['results'] |
| |
| # Finds out try job result index and heuristic result index for each test. |
| test_result_map = defaultdict(lambda: defaultdict(int)) |
| |
| for index, try_job in enumerate(try_jobs): |
| if not try_job.get('tests'): # Compile or non-swarming. |
| test_result_map[NON_SWARMING]['try_job_index'] = index |
| continue |
| for test_name in try_job['tests']: |
| test_result_map[test_name]['try_job_index'] = index |
| |
| for index, heuristic_result in enumerate(step_heuristic_results): |
| if not heuristic_result.get('tests'): # Compile or non-swarming. |
| test_result_map[NON_SWARMING]['heuristic_index'] = index |
| continue |
| for test_name in heuristic_result['tests']: |
| test_result_map[test_name]['heuristic_index'] = index |
| |
| # Group tests based on indices. |
| indices_test_map = defaultdict(list) |
| for test_name, indices in test_result_map.iteritems(): |
| indices_test_map[(indices['try_job_index'], |
| indices['heuristic_index'])].append(test_name) |
| |
| for (try_job_index, heuristic_index), tests in indices_test_map.iteritems(): |
| try_job_result = try_jobs[try_job_index] |
| heuristic_result = step_heuristic_results[heuristic_index] |
| |
| final_result = { |
| 'try_job': |
| try_job_result, |
| 'heuristic_analysis': { |
| 'suspected_cls': |
| _AttachBuildIdToSuspectedCLs( |
| master_name, builder_name, |
| heuristic_result['suspected_cls']), |
| }, |
| 'tests': |
| tests if tests != [NON_SWARMING] else [], |
| 'first_failure': |
| heuristic_result['first_failure'], |
| 'last_pass': |
| heuristic_result['last_pass'], |
| 'first_failure_build_id': |
| buildbot.GetBuildId(master_name, builder_name, |
| heuristic_result['first_failure']), |
| 'last_pass_build_id': |
| buildbot.GetBuildId(master_name, builder_name, |
| heuristic_result['last_pass']), |
| 'supported': |
| heuristic_result['supported'] |
| } |
| |
| if (('status' not in try_job_result or |
| try_job_result['status'] in NO_TRY_JOB_REASON_MAP.values()) or |
| try_job_result.get('can_force')): |
| # There is no try job info but only heuristic result. |
| try_job_result['status'] = try_job_result.get('status', |
| result_status.UNKNOWN) |
| step_updated_results['unclassified_failures'].append(final_result) |
| elif (try_job_result['status'] == result_status.FLAKY or |
| bool(heuristic_result.get('flaky'))): |
| step_updated_results['flaky_failures'].append(final_result) |
| else: |
| step_updated_results['reliable_failures'].append(final_result) |
| |
| return updated_results |
| |
| |
| def _PrepareTryJobDataForCompileFailure(analysis): |
| try_job_data = {} |
| if not (analysis.failure_result_map and # pragma: no branch. |
| constants.COMPILE_STEP_NAME in analysis.failure_result_map): |
| return try_job_data # pragma: no cover. |
| |
| referred_build_keys = BaseBuildModel.GetBuildInfoFromBuildKey( |
| analysis.failure_result_map[constants.COMPILE_STEP_NAME]) |
| try_job = WfTryJob.Get(*referred_build_keys) |
| if not try_job or not try_job.compile_results: |
| return try_job_data # pragma: no cover. |
| result = try_job.compile_results[-1] |
| |
| try_job_data['status'] = analysis_status.STATUS_TO_DESCRIPTION.get( |
| try_job.status, 'unknown').lower() |
| try_job_data['url'] = result.get('url') |
| try_job_data['completed'] = try_job.completed |
| try_job_data['failed'] = try_job.failed |
| try_job_data['culprit'] = result.get('culprit', |
| {}).get(constants.COMPILE_STEP_NAME) |
| |
| return try_job_data |
| |
| |
| def _PopulateHeuristicDataForCompileFailure(analysis, data): |
| if analysis.result: # pragma: no branch. |
| compile_failure = None |
| for failure in analysis.result.get('failures', []): |
| if failure['step_name'] == constants.COMPILE_STEP_NAME: |
| compile_failure = failure |
| if compile_failure: # pragma: no branch. |
| data['first_failure'] = compile_failure['first_failure'] |
| data['last_pass'] = compile_failure['last_pass'] |
| data['first_failure_build_id'] = buildbot.GetBuildId( |
| analysis.master_name, analysis.builder_name, |
| compile_failure['first_failure']) |
| data['last_pass_build_id'] = buildbot.GetBuildId( |
| analysis.master_name, analysis.builder_name, |
| compile_failure['last_pass']) |
| data['suspected_cls_by_heuristic'] = _AttachBuildIdToSuspectedCLs( |
| analysis.master_name, analysis.builder_name, |
| compile_failure['suspected_cls']) |
| |
| |
| def _GetAllSuspectedCLsAndCheckStatus(master_name, builder_name, build_number, |
| analysis): |
| if not analysis or not analysis.suspected_cls: |
| return [] |
| build_key = BaseBuildModel.CreateBuildKey(master_name, builder_name, |
| build_number) |
| suspected_cls = copy.deepcopy(analysis.suspected_cls) |
| |
| confidences = SuspectedCLConfidence.Get() |
| |
| for cl in suspected_cls: |
| cl['status'] = _ANALYSIS_CL_STATUS_MAP.get(analysis.result_status, None) |
| cl['confidence'] = None |
| |
| suspected_cl = WfSuspectedCL.Get(cl['repo_name'], cl['revision']) |
| if not suspected_cl: |
| continue |
| |
| build = suspected_cl.builds.get(build_key) |
| if build: |
| cl['status'] = build['status'] |
| cl['confidence'] = '%d%%' % ( |
| suspected_cl_util.GetSuspectedCLConfidenceScore(confidences, build) or |
| 0) |
| |
| return suspected_cls |
| |
| |
| class BuildFailure(BaseHandler): |
| PERMISSION_LEVEL = Permission.ANYONE |
| |
| def _PrepareCommonDataForFailure(self, analysis): |
| return { |
| 'master_name': |
| analysis.master_name, |
| 'builder_name': |
| analysis.builder_name, |
| 'build_number': |
| analysis.build_number, |
| 'build_id': |
| buildbot.GetBuildId(analysis.master_name, analysis.builder_name, |
| analysis.build_number), |
| 'pipeline_status_path': |
| analysis.pipeline_status_path, |
| 'show_debug_info': |
| self._ShowDebugInfo(), |
| 'analysis_request_time': |
| _FormatDatetime(analysis.request_time), |
| 'analysis_start_time': |
| _FormatDatetime(analysis.start_time), |
| 'analysis_end_time': |
| _FormatDatetime(analysis.end_time), |
| 'analysis_duration': |
| analysis.duration, |
| 'analysis_update_time': |
| _FormatDatetime(analysis.updated_time), |
| 'analysis_completed': |
| analysis.completed, |
| 'analysis_failed': |
| analysis.failed, |
| 'analysis_correct': |
| analysis.correct, |
| 'analysis_is_duplicate': |
| analysis.is_duplicate, |
| 'triage_history': |
| _GetTriageHistory(analysis), |
| 'show_admin_controls': |
| auth_util.IsCurrentUserAdmin(), |
| 'triage_reference_analysis_master_name': |
| analysis.triage_reference_analysis_master_name, |
| 'triage_reference_analysis_builder_name': |
| analysis.triage_reference_analysis_builder_name, |
| 'triage_reference_analysis_build_number': |
| analysis.triage_reference_analysis_build_number |
| } |
| |
| def _PrepareDataForCompileFailure(self, analysis, data): |
| data['infra_exception'] = analysis.failure_type == failure_type.INFRA |
| # Check result from heuristic analysis. |
| _PopulateHeuristicDataForCompileFailure(analysis, data) |
| # Check result from try job. |
| data['try_job'] = _PrepareTryJobDataForCompileFailure(analysis) |
| |
| def _PrepareDataForTestFailures(self, |
| analysis, |
| build_info, |
| data, |
| show_debug_info=False): |
| |
| data['status_message_map'] = result_status.STATUS_MESSAGE_MAP |
| |
| organized_results = _GetOrganizedAnalysisResultBySuspectedCL( |
| analysis.master_name, analysis.builder_name, analysis.result) |
| analysis_result = _GetAnalysisResultWithTryJobInfo( |
| show_debug_info, organized_results, *build_info) |
| |
| data['analysis_result'] = analysis_result |
| return data |
| |
| def HandleGet(self): |
| """Renders analysis result of the failure. |
| |
| If the final analysis result is available, set cache-control to 1 day to |
| avoid overload by unnecessary and frequent query from clients; otherwise |
| set cache-control to 5 seconds to allow repeated query. |
| """ |
| url = self.request.get('url').strip() |
| build_info = buildbot.ParseBuildUrl(url) |
| if not build_info: |
| return BaseHandler.CreateError( |
| 'Url "%s" is not pointing to a build.' % url, 404) |
| master_name, builder_name, build_number = build_info |
| |
| analysis = WfAnalysis.Get(master_name, builder_name, build_number) |
| |
| if not analysis: |
| if self.request.get('redirect') == '1': |
| return BaseHandler.CreateError( |
| 'No permission to schedule a new analysis.', 401) |
| else: |
| return BaseHandler.CreateError( |
| 'Please schedule analyses on home page instead.', 400) |
| |
| data = self._PrepareCommonDataForFailure(analysis) |
| data['suspected_cls'] = _GetAllSuspectedCLsAndCheckStatus( |
| master_name, builder_name, build_number, analysis) |
| |
| # TODO(crbug.com/702444): Do not assume failure_type.INFRA implies a |
| # compile failure. Either use a special template, or choose the appropriate |
| # one based on the type of job (compile/test). |
| if analysis.failure_type == failure_type.COMPILE or ( |
| analysis.failure_type == failure_type.INFRA): |
| self._PrepareDataForCompileFailure(analysis, data) |
| return {'template': 'waterfall/compile_failure.html', 'data': data} |
| else: |
| self._PrepareDataForTestFailures(analysis, build_info, data, |
| self._ShowDebugInfo()) |
| return {'template': 'waterfall/test_failure.html', 'data': data} |
| |
| @token.VerifyXSRFToken() |
| def HandlePost(self): |
| """Triggers an analysis on demand and redirects to the result page.""" |
| url = self.request.get('url').strip() |
| |
| is_admin = auth_util.IsCurrentUserAdmin() |
| user_email = auth_util.GetUserEmail() |
| if not acl.CanTriggerNewAnalysis(user_email, is_admin): |
| # No permission to schedule a new analysis. |
| return self.CreateRedirect('/waterfall/failure?redirect=1&url=%s' % url) |
| |
| build_info = buildbot.ParseBuildUrl(url) |
| if not build_info: |
| return BaseHandler.CreateError( |
| 'Url "%s" is not pointing to a build.' % url, 404) |
| master_name, builder_name, build_number = build_info |
| |
| analysis = None |
| if not (waterfall_config.MasterIsSupported(master_name) or |
| auth_util.IsCurrentUserAdmin()): |
| # If the build failure was already analyzed, just show it to the user. |
| analysis = WfAnalysis.Get(master_name, builder_name, build_number) |
| if not analysis: |
| return BaseHandler.CreateError( |
| 'Master "%s" is not supported yet.' % master_name, 501) |
| |
| if not analysis: |
| # Only allow admin to force a re-run and set the build_completed. |
| force = is_admin and self.request.get('force') == '1' |
| |
| build = build_util.GetBuildInfo(master_name, builder_name, build_number) |
| if not build: |
| return BaseHandler.CreateError( |
| 'Can\'t get information about build "%s/%s/%s".' % |
| (master_name, builder_name, build_number), 501) |
| |
| if not build.completed and force: |
| return BaseHandler.CreateError( |
| 'Can\'t force a rerun for an incomplete build "%s/%s/%s".' % |
| (master_name, builder_name, build_number), 501) |
| |
| build_failure_analysis_pipelines.ScheduleAnalysisIfNeeded( |
| master_name, |
| builder_name, |
| build_number, |
| build_completed=build.completed, |
| force=force, |
| queue_name=constants.WATERFALL_ANALYSIS_QUEUE) |
| |
| return self.CreateRedirect('/waterfall/failure?redirect=1&url=%s' % url) |