blob: 41a82aed3982ef17c872910fd27afd5bebababbe [file] [log] [blame]
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Script to extract stage features from raw protobufs.
Function structure outline:
ExtractStageFeatures():
---> _GetInOutDir()
---> _GetFilenames()
---> _ExtractStageFeaturesHelper()
--->_ExtractStageFeaturesForSingleCQ()
---> _UpdateStageFeaturesDictForBuild()
---> _ExtractStageInfoFeatures()
How to use:
just call ExtractStageFeatures(for_training) to generate csv from protobufs.
"""
from __future__ import print_function
import os
from chromite.lib import cros_logging as logging
import pandas
from lib import constants
from lib import feature_extractor_utils
from lib.protos import StagesForCQ_pb2
STAGES = ['MasterSlaveSyncCompletion', 'Paygen', 'CPEExport',
'HWTest [bvt-inline]', 'UnitTest', 'Archive', 'SimpleChromeWorkflow',
'SlaveFailureSummary', 'VMTest (attempt 1)', 'BuildPackages',
'AFDODataGenerate', 'Signing', 'DetectRelevantChanges',
'UploadPrebuilts', 'SetupBoard', 'TestSimpleChromeWorkflow',
'AndroidMetadata', 'Uprev', 'SignerTest', 'BranchUtilTest',
'SetupBoard (pre-Patch)', 'InitSDK', 'InitSDK (pre-Patch)',
'AFDOUpdateEbuild', 'PatchChrome', 'GCETest (attempt 2)', 'CleanUp',
'DevInstallerPrebuilts', 'HWTest [moblab_quick]', 'BinhostTest',
'Report', 'HWTest [jetstream_cq]', 'SyncChrome', 'ConfigDump',
'DetectIrrelevantChanges', 'DownloadAndroidDebugSymbols',
'UprevAndroid', 'MasterSlaveLKGMSync', 'VMTest (attempt 2)',
'HWTest [bvt-arc]', 'BuildReexecutionFinished', 'BuildStart',
'ChromeSDK', 'UploadTestArtifacts', 'HWTest [AFDO_record]',
'BuildImage', 'ImageTest', 'HWTest [sanity]', 'PublishUprevChanges',
'ManifestVersionedSync', 'CommitQueueCompletion', 'DebugSymbols',
'MasterUploadPrebuilts', 'SimpleChromeArtifacts', 'CommitQueueSync',
'GCETest (attempt 1)', 'HWTest [bvt-cq]', 'HWTest [arc-bvt-cq]',
'ScheduleSlaves', 'RegenPortageCache', 'BuildPackages (pre-Patch)']
STAGES_FOR_BUILD_KEYS = STAGES + ['duration', 'important', 'final',
'UnknownStage']
STAGE_INFO_KEYS = ['build_id', 'status', 'duration', 'board', 'stage_id',
'builder_config', 'stage_name', 'important', 'final']
def ExtractStageFeatures(for_training, bucket=None):
"""Extract features from stage records and store the features in csv file(s).
Args:
for_training: boolean, True for training data, False for prediction.
bucket: Google Cloud Storage bucket. If not None, it means running on app
engine. Default to None.
"""
(input_dir,
output_dir) = feature_extractor_utils.GetInOutDir(
for_training,
constants.FEATURE_TYPE_STAGE)
stage_filenames = feature_extractor_utils.GetFilenames(input_dir, bucket)
_ExtractStageFeaturesHelper(stage_filenames, output_dir, for_training, bucket)
def _ExtractStageFeaturesHelper(stage_filenames, output_dir, for_training,
bucket):
"""Extract features from stage records defined in stage_filenames.
For training, save processed data in constants.STAGE_TRAIN_FILE.
For prediction, save processed data in individual masterID_count.csv file.
Args:
stage_filenames: a list of filenames with path info to load.
output_dir: output directory to store the resultant csv file.
for_training: boolean, True for training data, False for prediction.
bucket: Google Cloud Storage bucket. If not None, it means running on app
engine.
"""
data_table = []
for filename in stage_filenames:
stages_for_cq_msg = _GetStagesForCQMessage(filename, bucket)
stages_for_cq_features = _ExtractStageFeaturesForSingleCQ(stages_for_cq_msg)
# need to store each individual master build result if for prediction.
if not for_training:
df = pandas.DataFrame(stages_for_cq_features)
build_filename = os.path.basename(filename) + constants.CSV_SUFFIX
df.to_csv(os.path.join(output_dir, build_filename), encoding='utf-8')
else:
data_table.extend(stages_for_cq_features)
if for_training:
df = pandas.DataFrame(data_table)
df.to_csv(os.path.join(output_dir, constants.STAGE_TRAIN_FILE),
encoding='utf-8')
def _ExtractStageFeaturesForSingleCQ(stages_for_cq_msg):
"""Extract Stage features for one CQ run.
Args:
stages_for_cq_msg: a StagesForCQ message defined in StagesForCQ.proto. It
contains all stages info in a master build and all its
slave builds.
Returns:
A list of dictionaries containing the information in the stages_for_cq_msg.
Each dictionary item represents one build, which could be mater or slave.
Valid keys defined in STAGES_FOR_BUILD_KEYS.
"""
logging.info('Processing CQ %d' % stages_for_cq_msg.master_build_id)
# a dictionary to store stage info in present CQ. Key is build id, could
# be master or slave. Value is a dictionary of stage features of a single
# build with keys defined in STAGES_FOR_BUILD_KEYS.
stages_for_cq_dict = {}
for stage_info_msg in stages_for_cq_msg.stage_msgs:
# only record failed stages
if stage_info_msg.status == constants.STATUS_FAIL:
if stage_info_msg.build_id not in stages_for_cq_dict:
stages_for_cq_dict[stage_info_msg.build_id] = dict.fromkeys(
STAGES_FOR_BUILD_KEYS, 0)
stages_for_cq_dict[stage_info_msg.build_id]['failure_category'] = (
stages_for_cq_msg.failure_category)
_UpdateStageFeaturesDictForBuild(
stage_info_msg, stages_for_cq_dict[stage_info_msg.build_id])
# return a list of dictionaries of the result for each build
return [value for _, value in stages_for_cq_dict.iteritems()]
def _UpdateStageFeaturesDictForBuild(stage_info_msg, current_build_dict):
"""Process a stage message. Update current build result dictionary in place.
Args:
stage_info_msg: a StageInfo message as defined in StagesForCQ.proto.
current_build_dict: a dictionary to store stage info in current build.
Valid keys are defined in STAGES_FOR_BUILD_KEYS.
"""
stage_result = _ExtractStageInfoFeatures(stage_info_msg)
for key, value in stage_result.iteritems():
if key == 'stage_name':
try:
current_build_dict[value] += 1 # value is actual failed stage name
except KeyError:
# This is ok. It may happen if new test stages are added or a rare
# test is being executed. Update 'UnknowStage' field in this case.
logging.info('unknown stage encountered: %s' % value)
current_build_dict['UnknownStage'] += 1
elif key == 'duration':
current_build_dict[key] += value
elif key in ('important', 'final'):
# if any failed stage has 'important' or 'final' equal to True, then we
# set the corresponding result in current build dict to constants.TRUE.
if value:
current_build_dict[key] = constants.TRUE
def _GetStagesForCQMessage(filename, bucket):
"""Read a persisted protobuf file on disk and return a StagesForCQ object.
Args:
filename: full file name with path information. It takes the form:
path/to/id_occurrence. The file is the persisted protobuf record.
bucket: Google Cloud Storage bucket. If not None, it means running on app
engine.
Returns:
a StagesForCQ message object.
"""
stages_for_cq_msg = StagesForCQ_pb2.StagesForCQ()
if bucket:
file_to_load = bucket.get_blob(filename).download_as_string()
stages_for_cq_msg.ParseFromString(file_to_load)
else:
with open(filename, 'rb') as file_to_load:
stages_for_cq_msg.ParseFromString(file_to_load.read())
return stages_for_cq_msg
def _ExtractStageInfoFeatures(stage_info_msg):
"""Extract CL stage features from a StageInfo message.
Args:
stage_info_msg: a StageInfo message as defined in StagesForCQ.proto.
Returns:
A dictionary containing features from stage_info_msg, with valid keys of
STAGE_INFO_KEYS.
"""
stage_info_dict = {}
for descriptor in stage_info_msg.DESCRIPTOR.fields:
stage_info_dict[descriptor.name] = getattr(stage_info_msg, descriptor.name)
return stage_info_dict