| import json |
| import time |
| import six |
| |
| from collections import defaultdict |
| from mozlog.formatters import base |
| |
| |
| class ChromiumFormatter(base.BaseFormatter): |
| """Formatter to produce results matching the Chromium JSON Test Results format. |
| https://chromium.googlesource.com/chromium/src/+/master/docs/testing/json_test_results_format.md |
| |
| Notably, each test has an "artifacts" field that is a dict consisting of |
| "log": a list of strings (one per subtest + one for harness status, see |
| _append_test_message for the format) |
| "screenshots": a list of strings in the format of "url: base64" |
| |
| """ |
| |
| def __init__(self): |
| # Whether the run was interrupted, either by the test runner or user. |
| self.interrupted = False |
| |
| # A map of test status to the number of tests that had that status. |
| self.num_failures_by_status = defaultdict(int) |
| |
| # Start time, expressed as offset since UNIX epoch in seconds. |
| self.start_timestamp_seconds = None |
| |
| # Trie of test results. Each directory in the test name is a node in |
| # the trie and the leaf contains the dict of per-test data. |
| self.tests = {} |
| |
| # Message dictionary, keyed by test name. Value is a list of strings: |
| # see _append_test_message for the format. |
| self.messages = defaultdict(list) |
| |
| # List of tests that have failing subtests. |
| self.tests_with_subtest_fails = set() |
| |
| # Browser log for the current test under execution. |
| self.test_log = [] |
| |
| def _append_test_message(self, test, subtest, wpt_actual_status, message): |
| """ |
| Appends the message data for a test or subtest. |
| :param str test: the name of the test |
| :param str subtest: the name of the subtest with the message. Will be |
| None if this is called for a test. |
| :param str wpt_actual_status: the test status as reported by WPT |
| :param str message: the string to append to the message for this test |
| |
| Example: |
| [test_or_subtest_name] |
| expected: FAIL |
| message: some test message eg assert failure |
| """ |
| # NOTE: throughout this function we output a key called "expected" but |
| # fill it in with the actual status. This is by design. The goal of this |
| # output is to look exactly like WPT's expectation metadata so that it |
| # can be easily diff-ed. |
| if subtest: |
| result = " [%s]\n expected: %s\n" % (subtest, wpt_actual_status) |
| if message: |
| result += " message: %s\n" % message |
| self.messages[test].append(result) |
| else: |
| # No subtest, so this is the top-level test. The result must be |
| # prepended to the list of messages, so that the message for the |
| # test comes before any subtest messages. |
| test_name_last_part = test.split("/")[-1] |
| result = "[%s]\n expected: %s\n" % (test_name_last_part, wpt_actual_status) |
| if message: |
| result += " message: %s\n" % message |
| self.messages[test].insert(0, result) |
| |
| def _append_artifact(self, cur_dict, artifact_name, artifact_value): |
| """ |
| Appends artifacts to the specified dictionary. |
| :param dict cur_dict: the test leaf dictionary to append to |
| :param str artifact_name: the name of the artifact |
| :param str artifact_value: the value of the artifact |
| """ |
| assert isinstance(artifact_value, six.string_types), "artifact_value must be a str" |
| if "artifacts" not in cur_dict.keys(): |
| cur_dict["artifacts"] = defaultdict(list) |
| cur_dict["artifacts"][artifact_name].append(artifact_value) |
| |
| def _store_test_result(self, name, actual, expected, messages, wpt_actual, |
| subtest_failure, reftest_screenshots): |
| """ |
| Stores the result of a single test in |self.tests| |
| :param str name: name of the test. |
| :param str actual: actual status of the test. |
| :param str expected: expected statuses of the test. |
| :param list messages: a list of test messages. |
| :param str wpt_actual: actual status reported by wpt, may differ from |actual|. |
| :param bool subtest_failure: whether this test failed because of subtests. |
| :param Optional[list] reftest_screenshots: see executors/base.py for definition. |
| """ |
| # The test name can contain a leading / which will produce an empty |
| # string in the first position of the list returned by split. We use |
| # filter(None) to remove such entries. |
| name_parts = filter(None, name.split("/")) |
| cur_dict = self.tests |
| for name_part in name_parts: |
| cur_dict = cur_dict.setdefault(name_part, {}) |
| cur_dict["actual"] = actual |
| cur_dict["expected"] = expected |
| if subtest_failure: |
| self._append_artifact(cur_dict, "wpt_subtest_failure", "true") |
| if wpt_actual != actual: |
| self._append_artifact(cur_dict, "wpt_actual_status", wpt_actual) |
| if wpt_actual == 'CRASH': |
| for line in self.test_log: |
| self._append_artifact(cur_dict, "wpt_crash_log", line) |
| for message in messages: |
| self._append_artifact(cur_dict, "log", message) |
| |
| # Store screenshots (if any). |
| for item in reftest_screenshots or []: |
| if not isinstance(item, dict): |
| # Skip the relation string. |
| continue |
| data = "%s: %s" % (item["url"], item["screenshot"]) |
| self._append_artifact(cur_dict, "screenshots", data) |
| |
| # Figure out if there was a regression or unexpected status. This only |
| # happens for tests that were run |
| if actual != "SKIP": |
| if actual not in expected: |
| cur_dict["is_unexpected"] = True |
| if actual != "PASS": |
| cur_dict["is_regression"] = True |
| |
| def _map_status_name(self, status): |
| """ |
| Maps a WPT status to a Chromium status. |
| |
| Chromium has five main statuses that we have to map to: |
| CRASH: the test harness crashed |
| FAIL: the test did not run as expected |
| PASS: the test ran as expected |
| SKIP: the test was not run |
| TIMEOUT: the did not finish in time and was aborted |
| |
| :param str status: the string status of a test from WPT |
| :return: a corresponding string status for Chromium |
| """ |
| if status == "OK": |
| return "PASS" |
| if status == "NOTRUN": |
| return "SKIP" |
| if status == "EXTERNAL-TIMEOUT": |
| return "TIMEOUT" |
| if status in ("ERROR", "PRECONDITION_FAILED"): |
| return "FAIL" |
| if status == "INTERNAL-ERROR": |
| return "CRASH" |
| # Any other status just gets returned as-is. |
| return status |
| |
| def _get_expected_status_from_data(self, actual_status, data): |
| """ |
| Gets the expected statuses from a |data| dictionary. |
| |
| If there is no expected status in data, the actual status is returned. |
| This is because mozlog will delete "expected" from |data| if it is the |
| same as "status". So the presence of "expected" implies that "status" is |
| unexpected. Conversely, the absence of "expected" implies the "status" |
| is expected. So we use the "expected" status if it's there or fall back |
| to the actual status if it's not. |
| |
| If the test has multiple statuses, it will have other statuses listed as |
| "known_intermittent" in |data|. If these exist, they will be appended to |
| the returned status with spaced in between. |
| |
| :param str actual_status: the actual status of the test |
| :param data: a data dictionary to extract expected status from |
| :return str: the expected statuses as a string |
| """ |
| expected_statuses = self._map_status_name(data["expected"]) if "expected" in data else actual_status |
| if data.get("known_intermittent"): |
| expected_statuses += " " + " ".join( |
| [self._map_status_name(other_status) for other_status in data["known_intermittent"]]) |
| return expected_statuses |
| |
| def suite_start(self, data): |
| # |data| contains a timestamp in microseconds, while time.time() gives |
| # it in seconds. |
| self.start_timestamp_seconds = (float(data["time"]) / 1000 if "time" in data |
| else time.time()) |
| |
| def test_status(self, data): |
| test_name = data["test"] |
| wpt_actual_status = data["status"] |
| actual_status = self._map_status_name(wpt_actual_status) |
| expected_statuses = self._get_expected_status_from_data(actual_status, data) |
| |
| is_unexpected = actual_status not in expected_statuses |
| if is_unexpected and test_name not in self.tests_with_subtest_fails: |
| self.tests_with_subtest_fails.add(test_name) |
| # We should always get a subtest in the data dict, but it's technically |
| # possible that it's missing. Be resilient here. |
| subtest_name = data.get("subtest", "UNKNOWN SUBTEST") |
| self._append_test_message(test_name, subtest_name, |
| wpt_actual_status, data.get("message", "")) |
| |
| def test_end(self, data): |
| test_name = data["test"] |
| # Save the status reported by WPT since we might change it when |
| # reporting to Chromium. |
| wpt_actual_status = data["status"] |
| actual_status = self._map_status_name(wpt_actual_status) |
| expected_statuses = self._get_expected_status_from_data(actual_status, data) |
| subtest_failure = False |
| if test_name in self.tests_with_subtest_fails: |
| subtest_failure = True |
| # Clean up the test list to avoid accumulating too many. |
| self.tests_with_subtest_fails.remove(test_name) |
| # This test passed but it has failing subtests. Since we can only |
| # report a single status to Chromium, we choose FAIL to indicate |
| # that something about this test did not run correctly. |
| if actual_status == "PASS": |
| actual_status = "FAIL" |
| |
| self._append_test_message(test_name, None, wpt_actual_status, |
| data.get("message", "")) |
| self._store_test_result(test_name, |
| actual_status, |
| expected_statuses, |
| self.messages[test_name], |
| wpt_actual_status, |
| subtest_failure, |
| data.get("extra", {}).get("reftest_screenshots")) |
| |
| # Remove the test from messages dict to avoid accumulating too many. |
| self.messages.pop(test_name) |
| |
| # Update the count of how many tests ran with each status. |
| self.num_failures_by_status[actual_status] += 1 |
| |
| # New test, new browser logs. |
| self.test_log = [] |
| |
| def suite_end(self, data): |
| # Create the final result dictionary |
| final_result = { |
| # There are some required fields that we just hard-code. |
| "interrupted": False, |
| "path_delimiter": "/", |
| "version": 3, |
| "seconds_since_epoch": self.start_timestamp_seconds, |
| "num_failures_by_type": self.num_failures_by_status, |
| "tests": self.tests |
| } |
| return json.dumps(final_result) |
| |
| def process_output(self, data): |
| if 'command' in data and 'chromedriver' in data['command']: |
| self.test_log.append(data['data']) |