Improve wptrunner tests compatibility with Python3 (#20900)

diff --git a/tools/wptrunner/wptrunner/browsers/base.py b/tools/wptrunner/wptrunner/browsers/base.py
index 6553445..fef052d 100644
--- a/tools/wptrunner/wptrunner/browsers/base.py
+++ b/tools/wptrunner/wptrunner/browsers/base.py
@@ -3,6 +3,7 @@
 import socket
 from abc import ABCMeta, abstractmethod
 from copy import deepcopy
+from six import iteritems
 
 from ..wptcommandline import require_arg  # noqa: F401
 
@@ -199,5 +200,5 @@
     up the browser from the runner process.
     """
     def __init__(self, **kwargs):
-        for k, v in kwargs.iteritems():
+        for k, v in iteritems(kwargs):
             setattr(self, k, v)
diff --git a/tools/wptrunner/wptrunner/environment.py b/tools/wptrunner/wptrunner/environment.py
index e79ae37..7dcea4a 100644
--- a/tools/wptrunner/wptrunner/environment.py
+++ b/tools/wptrunner/wptrunner/environment.py
@@ -5,6 +5,7 @@
 import socket
 import sys
 import time
+from six import iteritems
 
 from mozlog import get_default_logger, handlers, proxy
 
@@ -98,7 +99,7 @@
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.process_interrupts()
 
-        for scheme, servers in self.servers.iteritems():
+        for scheme, servers in iteritems(self.servers):
             for port, server in servers:
                 server.kill()
         for cm in self.env_extras_cms:
@@ -194,7 +195,7 @@
         route_builder.add_handler(b"GET", b"/resources/testdriver.js",
                                   StringHandler(data, "text/javascript"))
 
-        for url_base, paths in self.test_paths.iteritems():
+        for url_base, paths in iteritems(self.test_paths):
             if url_base == "/":
                 continue
             route_builder.add_mount_point(url_base, paths["tests_path"])
@@ -220,7 +221,7 @@
     def test_servers(self):
         failed = []
         host = self.config["server_host"]
-        for scheme, servers in self.servers.iteritems():
+        for scheme, servers in iteritems(self.servers):
             for port, server in servers:
                 if self.test_server_port:
                     s = socket.socket()
diff --git a/tools/wptrunner/wptrunner/executors/executorservo.py b/tools/wptrunner/wptrunner/executors/executorservo.py
index 9cef1fb..9eebfa5 100644
--- a/tools/wptrunner/wptrunner/executors/executorservo.py
+++ b/tools/wptrunner/wptrunner/executors/executorservo.py
@@ -7,6 +7,7 @@
 import threading
 import traceback
 import uuid
+from six import iteritems
 
 from mozprocess import ProcessHandler
 
@@ -45,7 +46,7 @@
         args += ["-Z", debug_opts]
     for stylesheet in browser.user_stylesheets:
         args += ["--user-stylesheet", stylesheet]
-    for pref, value in test.environment.get('prefs', {}).iteritems():
+    for pref, value in iteritems(test.environment.get('prefs', {})):
         args += ["--pref", "%s=%s" % (pref, value)]
     if browser.ca_certificate_path:
         args += ["--certificate-path", browser.ca_certificate_path]
diff --git a/tools/wptrunner/wptrunner/expectedtree.py b/tools/wptrunner/wptrunner/expectedtree.py
index 4d50508..7521f25 100644
--- a/tools/wptrunner/wptrunner/expectedtree.py
+++ b/tools/wptrunner/wptrunner/expectedtree.py
@@ -1,5 +1,6 @@
 from math import log
 from collections import defaultdict
+from six import iteritems, itervalues
 
 class Node(object):
     def __init__(self, prop, value):
@@ -33,7 +34,7 @@
 
     result_counts = defaultdict(int)
     total = float(len(results))
-    for values in results.itervalues():
+    for values in itervalues(results):
         # Not sure this is right, possibly want to treat multiple values as
         # distinct from multiple of the same value?
         for value in values:
@@ -41,7 +42,7 @@
 
     entropy_sum = 0
 
-    for count in result_counts.itervalues():
+    for count in itervalues(result_counts):
         prop = float(count) / total
         entropy_sum -= prop * log(prop, 2)
 
@@ -52,7 +53,7 @@
     """Split a dictionary of results into a dictionary of dictionaries where
     each sub-dictionary has a specific value of the given property"""
     by_prop = defaultdict(dict)
-    for run_info, value in results.iteritems():
+    for run_info, value in iteritems(results):
         by_prop[run_info[prop]][run_info] = value
 
     return by_prop
@@ -77,13 +78,13 @@
     prop_index = {prop: i for i, prop in enumerate(properties)}
 
     all_results = defaultdict(int)
-    for result_values in results.itervalues():
-        for result_value, count in result_values.iteritems():
+    for result_values in itervalues(results):
+        for result_value, count in iteritems(result_values):
             all_results[result_value] += count
 
     # If there is only one result we are done
     if not properties or len(all_results) == 1:
-        for value, count in all_results.iteritems():
+        for value, count in iteritems(all_results):
             tree.result_values[value] += count
         tree.run_info |= set(results.keys())
         return tree
@@ -99,7 +100,7 @@
             continue
         new_entropy = 0.
         results_sets_entropy = []
-        for prop_value, result_set in result_sets.iteritems():
+        for prop_value, result_set in iteritems(result_sets):
             results_sets_entropy.append((entropy(result_set), prop_value, result_set))
             new_entropy += (float(len(result_set)) / len(results)) * results_sets_entropy[-1][0]
 
@@ -109,7 +110,7 @@
 
     # In the case that no properties partition the space
     if not results_partitions:
-        for value, count in all_results.iteritems():
+        for value, count in iteritems(all_results):
             tree.result_values[value] += count
         tree.run_info |= set(results.keys())
         return tree
diff --git a/tools/wptrunner/wptrunner/manifestexpected.py b/tools/wptrunner/wptrunner/manifestexpected.py
index eae85b1..65b53f0 100644
--- a/tools/wptrunner/wptrunner/manifestexpected.py
+++ b/tools/wptrunner/wptrunner/manifestexpected.py
@@ -1,6 +1,7 @@
 import os
 from six.moves.urllib.parse import urljoin
 from collections import deque
+from six import text_type
 
 from .wptmanifest.backends import static
 from .wptmanifest.backends.base import ManifestItem
@@ -57,7 +58,7 @@
     """Set of tags that have been applied to the test"""
     try:
         value = node.get("tags")
-        if isinstance(value, (str, unicode)):
+        if isinstance(value, text_type):
             return {value}
         return set(value)
     except KeyError:
@@ -66,7 +67,7 @@
 
 def prefs(node):
     def value(ini_value):
-        if isinstance(ini_value, (str, unicode)):
+        if isinstance(ini_value, text_type):
             return tuple(pref_piece.strip() for pref_piece in ini_value.split(':', 1))
         else:
             # this should be things like @Reset, which are apparently type 'object'
@@ -74,7 +75,7 @@
 
     try:
         node_prefs = node.get("prefs")
-        if type(node_prefs) in (str, unicode):
+        if isinstance(node_prefs, text_type):
             rv = dict(value(node_prefs))
         else:
             rv = dict(value(item) for item in node_prefs)
@@ -86,7 +87,7 @@
 def set_prop(name, node):
     try:
         node_items = node.get(name)
-        if isinstance(node_items, (str, unicode)):
+        if isinstance(node_items, text_type):
             rv = {node_items}
         else:
             rv = set(node_items)
@@ -99,7 +100,7 @@
     rv = {}
     try:
         node_items = node.get("leak-threshold")
-        if isinstance(node_items, (str, unicode)):
+        if isinstance(node_items, text_type):
             node_items = [node_items]
         for item in node_items:
             process, value = item.rsplit(":", 1)
@@ -156,7 +157,7 @@
     if not isinstance(value, list):
         value = [value]
     for item in value:
-        if not isinstance(item, (str, unicode)):
+        if not isinstance(item, text_type):
             rv.append(item)
             continue
         parts = item.rsplit(":", 1)
@@ -478,7 +479,7 @@
     """
     manifest_path = expected.expected_path(metadata_root, test_path)
     try:
-        with open(manifest_path) as f:
+        with open(manifest_path, "rb") as f:
             return static.compile(f,
                                   run_info,
                                   data_cls_getter=data_cls_getter,
@@ -497,7 +498,7 @@
                      values should be computed.
     """
     try:
-        with open(path) as f:
+        with open(path, "rb") as f:
             return static.compile(f,
                                   run_info,
                                   data_cls_getter=lambda x,y: DirectoryManifest)
diff --git a/tools/wptrunner/wptrunner/manifestinclude.py b/tools/wptrunner/wptrunner/manifestinclude.py
index d302831..b3ab2c0 100644
--- a/tools/wptrunner/wptrunner/manifestinclude.py
+++ b/tools/wptrunner/wptrunner/manifestinclude.py
@@ -150,5 +150,5 @@
 
 
 def get_manifest(manifest_path):
-    with open(manifest_path) as f:
+    with open(manifest_path, "rb") as f:
         return conditional.compile(f, data_cls_getter=lambda x, y: IncludeManifest)
diff --git a/tools/wptrunner/wptrunner/manifestupdate.py b/tools/wptrunner/wptrunner/manifestupdate.py
index 2f2a8d5..3cb1b51 100644
--- a/tools/wptrunner/wptrunner/manifestupdate.py
+++ b/tools/wptrunner/wptrunner/manifestupdate.py
@@ -3,7 +3,7 @@
 from six.moves.urllib.parse import urljoin, urlsplit
 from collections import namedtuple, defaultdict, deque
 from math import ceil
-from six import iterkeys, itervalues, iteritems
+from six import integer_types, iterkeys, itervalues, iteritems, string_types, text_type
 
 from .wptmanifest import serialize
 from .wptmanifest.node import (DataNode, ConditionalNode, BinaryExpressionNode,
@@ -411,7 +411,7 @@
         for e in errors:
             if disable_intermittent:
                 condition = e.cond.children[0] if e.cond else None
-                msg = disable_intermittent if isinstance(disable_intermittent, (str, unicode)) else "unstable"
+                msg = disable_intermittent if isinstance(disable_intermittent, string_types+(text_type,)) else "unstable"
                 self.node.set("disabled", msg, condition)
                 self.node.new_disabled = True
             else:
@@ -774,7 +774,7 @@
         for item in new:
             if item is None:
                 continue
-            elif isinstance(item, (str, unicode)):
+            elif isinstance(item, text_type):
                 rv.add(item)
             else:
                 rv |= item
@@ -897,10 +897,10 @@
 
 
 def make_node(value):
-    if type(value) in (int, float, long):
+    if isinstance(value, integer_types+(float,)):
         node = NumberNode(value)
-    elif type(value) in (str, unicode):
-        node = StringNode(unicode(value))
+    elif isinstance(value, text_type):
+        node = StringNode(text_type(value))
     elif hasattr(value, "__iter__"):
         node = ListNode()
         for item in value:
@@ -909,10 +909,10 @@
 
 
 def make_value_node(value):
-    if type(value) in (int, float, long):
+    if isinstance(value, integer_types+(float,)):
         node = ValueNode(value)
-    elif type(value) in (str, unicode):
-        node = ValueNode(unicode(value))
+    elif isinstance(value, text_type):
+        node = ValueNode(text_type(value))
     elif hasattr(value, "__iter__"):
         node = ListNode()
         for item in value:
diff --git a/tools/wptrunner/wptrunner/metadata.py b/tools/wptrunner/wptrunner/metadata.py
index bf4d7a5..aafc7d5 100644
--- a/tools/wptrunner/wptrunner/metadata.py
+++ b/tools/wptrunner/wptrunner/metadata.py
@@ -4,7 +4,7 @@
 from collections import defaultdict, namedtuple
 
 from mozlog import structuredlog
-from six import itervalues
+from six import ensure_str, ensure_text, iteritems, iterkeys, itervalues, text_type
 from six.moves import intern
 
 from . import manifestupdate
@@ -44,11 +44,11 @@
         return self.canonical_repr == other.canonical_repr
 
     def iteritems(self):
-        for key, value in self.data.iteritems():
+        for key, value in iteritems(self.data):
             yield key, value
 
     def items(self):
-        return list(self.iteritems())
+        return list(iteritems(self))
 
 
 def update_expected(test_paths, serve_root, log_file_names,
@@ -239,7 +239,7 @@
 def unpack_result(data):
     if isinstance(data, int):
         return (status_intern.get(data), None)
-    if isinstance(data, unicode):
+    if isinstance(data, text_type):
         return (data, None)
     # Unpack multiple statuses into a tuple to be used in the Results named tuple below,
     # separating `status` and `known_intermittent`.
@@ -289,7 +289,7 @@
     test_file_items = set(itervalues(id_test_map))
 
     default_expected_by_type = {}
-    for test_type, test_cls in wpttest.manifest_test_cls.iteritems():
+    for test_type, test_cls in iteritems(wpttest.manifest_test_cls):
         if test_cls.result_cls:
             default_expected_by_type[(test_type, False)] = test_cls.result_cls.default_expected
         if test_cls.subtest_result_cls:
@@ -427,7 +427,7 @@
             action_map["lsan_leak"](item)
 
         mozleak_data = data.get("mozleak", {})
-        for scope, scope_data in mozleak_data.iteritems():
+        for scope, scope_data in iteritems(mozleak_data):
             for key, action in [("objects", "mozleak_object"),
                                 ("total", "mozleak_total")]:
                 for item in scope_data.get(key, []):
@@ -439,7 +439,7 @@
         self.run_info = run_info_intern.store(RunInfo(data["run_info"]))
 
     def test_start(self, data):
-        test_id = intern(data["test"].encode("utf8"))
+        test_id = intern(ensure_str(data["test"]))
         try:
             self.id_test_map[test_id]
         except KeyError:
@@ -449,8 +449,8 @@
         self.tests_visited[test_id] = set()
 
     def test_status(self, data):
-        test_id = intern(data["test"].encode("utf8"))
-        subtest = intern(data["subtest"].encode("utf8"))
+        test_id = intern(ensure_str(data["test"]))
+        subtest = intern(ensure_str(data["subtest"]))
         test_data = self.id_test_map.get(test_id)
         if test_data is None:
             return
@@ -467,7 +467,7 @@
         if data["status"] == "SKIP":
             return
 
-        test_id = intern(data["test"].encode("utf8"))
+        test_id = intern(ensure_str(data["test"]))
         test_data = self.id_test_map.get(test_id)
         if test_data is None:
             return
@@ -480,7 +480,7 @@
         del self.tests_visited[test_id]
 
     def assertion_count(self, data):
-        test_id = intern(data["test"].encode("utf8"))
+        test_id = intern(ensure_str(data["test"]))
         test_data = self.id_test_map.get(test_id)
         if test_data is None:
             return
@@ -491,7 +491,7 @@
 
     def test_for_scope(self, data):
         dir_path = data.get("scope", "/")
-        dir_id = intern(os.path.join(dir_path, "__dir__").replace(os.path.sep, "/").encode("utf8"))
+        dir_id = intern(ensure_str(os.path.join(dir_path, "__dir__").replace(os.path.sep, "/")))
         if dir_id.startswith("/"):
             dir_id = dir_id[1:]
         return dir_id, self.id_test_map[dir_id]
@@ -530,13 +530,13 @@
     assert all_types > exclude_types
     include_types = all_types - exclude_types
     for item_type, test_path, tests in test_manifest.itertypes(*include_types):
-        test_file_data = TestFileData(intern(test_manifest.url_base.encode("utf8")),
-                                      intern(item_type.encode("utf8")),
+        test_file_data = TestFileData(intern(ensure_str(test_manifest.url_base)),
+                                      intern(ensure_str(item_type)),
                                       metadata_path,
                                       test_path,
                                       tests)
         for test in tests:
-            id_test_map[intern(test.id.encode("utf8"))] = test_file_data
+            id_test_map[intern(ensure_str(test.id))] = test_file_data
 
         dir_path = os.path.split(test_path)[0].replace(os.path.sep, "/")
         while True:
@@ -544,9 +544,9 @@
                 dir_id = dir_path + "/__dir__"
             else:
                 dir_id = "__dir__"
-            dir_id = intern((test_manifest.url_base + dir_id).lstrip("/").encode("utf8"))
+            dir_id = intern(ensure_str((test_manifest.url_base + dir_id).lstrip("/")))
             if dir_id not in id_test_map:
-                test_file_data = TestFileData(intern(test_manifest.url_base.encode("utf8")),
+                test_file_data = TestFileData(intern(ensure_str(test_manifest.url_base)),
                                               None,
                                               metadata_path,
                                               dir_id,
@@ -615,7 +615,7 @@
         self.item_type = item_type
         self.test_path = test_path
         self.metadata_path = metadata_path
-        self.tests = {intern(item.id.encode("utf8")) for item in tests}
+        self.tests = {intern(ensure_str(item.id)) for item in tests}
         self._requires_update = False
         self.data = defaultdict(lambda: defaultdict(PackedResultList))
 
@@ -656,11 +656,11 @@
         # Return subtest nodes present in the expected file, but missing from the data
         rv = []
 
-        for test_id, subtests in self.data.iteritems():
-            test = expected.get_test(test_id.decode("utf8"))
+        for test_id, subtests in iteritems(self.data):
+            test = expected.get_test(ensure_text(test_id))
             if not test:
                 continue
-            seen_subtests = set(item.decode("utf8") for item in subtests.iterkeys() if item is not None)
+            seen_subtests = set(ensure_text(item) for item in iterkeys(subtests) if item is not None)
             missing_subtests = set(test.subtests.keys()) - seen_subtests
             for item in missing_subtests:
                 expected_subtest = test.get_subtest(item)
@@ -729,9 +729,9 @@
             test_expected = expected.get_test(test_id)
             expected_by_test[test_id] = test_expected
 
-        for test_id, test_data in self.data.iteritems():
-            test_id = test_id.decode("utf8")
-            for subtest_id, results_list in test_data.iteritems():
+        for test_id, test_data in iteritems(self.data):
+            test_id = ensure_str(test_id)
+            for subtest_id, results_list in iteritems(test_data):
                 for prop, run_info, value in results_list:
                     # Special case directory metadata
                     if subtest_id is None and test_id.endswith("__dir__"):
@@ -747,8 +747,7 @@
                     if subtest_id is None:
                         item_expected = test_expected
                     else:
-                        if isinstance(subtest_id, str):
-                            subtest_id = subtest_id.decode("utf8")
+                        subtest_id = ensure_text(subtest_id)
                         item_expected = test_expected.get_subtest(subtest_id)
 
                     if prop == "status":
diff --git a/tools/wptrunner/wptrunner/products.py b/tools/wptrunner/wptrunner/products.py
index e311704..abd8409 100644
--- a/tools/wptrunner/wptrunner/products.py
+++ b/tools/wptrunner/wptrunner/products.py
@@ -1,5 +1,6 @@
 import importlib
 import imp
+from six import iteritems
 
 from .browsers import product_list
 
@@ -44,7 +45,7 @@
         self.get_timeout_multiplier = getattr(module, data["timeout_multiplier"])
 
         self.executor_classes = {}
-        for test_type, cls_name in data["executor"].iteritems():
+        for test_type, cls_name in iteritems(data["executor"]):
             cls = getattr(module, cls_name)
             self.executor_classes[test_type] = cls
 
diff --git a/tools/wptrunner/wptrunner/testloader.py b/tools/wptrunner/wptrunner/testloader.py
index fa54ca3..f16cc14 100644
--- a/tools/wptrunner/wptrunner/testloader.py
+++ b/tools/wptrunner/wptrunner/testloader.py
@@ -5,6 +5,8 @@
 from six.moves.queue import Empty
 from collections import defaultdict, deque
 from multiprocessing import Queue
+from six import iteritems
+from six.moves import xrange
 
 from . import manifestinclude
 from . import manifestexpected
@@ -124,7 +126,7 @@
 
     def load(self):
         rv = {}
-        for url_base, paths in self.test_paths.iteritems():
+        for url_base, paths in iteritems(self.test_paths):
             manifest_file = self.load_manifest(url_base=url_base,
                                                **paths)
             path_data = {"url_base": url_base}
@@ -236,7 +238,7 @@
             manifest_items = self.chunker(manifest_items)
 
         for test_type, test_path, tests in manifest_items:
-            manifest_file = manifests_by_url_base[iter(tests).next().url_base]
+            manifest_file = manifests_by_url_base[next(iter(tests)).url_base]
             metadata_path = self.manifests[manifest_file]["metadata_path"]
 
             inherit_metadata, test_metadata = self.load_metadata(manifest_file, metadata_path, test_path)
diff --git a/tools/wptrunner/wptrunner/tests/test_expectedtree.py b/tools/wptrunner/wptrunner/tests/test_expectedtree.py
index 2308be9..d71237a 100644
--- a/tools/wptrunner/wptrunner/tests/test_expectedtree.py
+++ b/tools/wptrunner/wptrunner/tests/test_expectedtree.py
@@ -1,7 +1,3 @@
-import sys
-
-import pytest
-
 from .. import expectedtree, metadata
 from collections import defaultdict
 
@@ -32,8 +28,6 @@
     return results_obj
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_build_tree_0():
     # Pass if debug
     results = [({"os": "linux", "version": "18.04", "debug": True}, "FAIL"),
@@ -53,8 +47,6 @@
     assert dump_tree(tree) == expected
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_build_tree_1():
     # Pass if linux or windows 10
     results = [({"os": "linux", "version": "18.04", "debug": True}, "PASS"),
@@ -77,8 +69,6 @@
     assert dump_tree(tree) == expected
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_build_tree_2():
     # Fails in a specific configuration
     results = [({"os": "linux", "version": "18.04", "debug": True}, "PASS"),
@@ -104,8 +94,6 @@
     assert dump_tree(tree) == expected
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_build_tree_3():
 
     results = [({"os": "linux", "version": "18.04", "debug": True, "unused": False}, "PASS"),
@@ -118,8 +106,6 @@
     assert dump_tree(tree) == expected
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_build_tree_4():
     # Check counts for multiple statuses
     results = [({"os": "linux", "version": "18.04", "debug": False}, "FAIL"),
diff --git a/tools/wptrunner/wptrunner/tests/test_manifestexpected.py b/tools/wptrunner/wptrunner/tests/test_manifestexpected.py
index 525915d..f3e4ce7 100644
--- a/tools/wptrunner/wptrunner/tests/test_manifestexpected.py
+++ b/tools/wptrunner/wptrunner/tests/test_manifestexpected.py
@@ -1,4 +1,3 @@
-import sys
 from io import BytesIO
 
 import pytest
@@ -6,8 +5,6 @@
 from .. import manifestexpected
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="bytes/text confusion in py3")
 @pytest.mark.parametrize("fuzzy, expected", [
     (b"ref.html:1;200", [("ref.html", ((1, 1), (200, 200)))]),
     (b"ref.html:0-1;100-200", [("ref.html", ((0, 1), (100, 200)))]),
@@ -25,7 +22,7 @@
       ((u"test.html", u"ref1.html", "=="), ((5,10), (100, 100)))]),
 ])
 def test_fuzzy(fuzzy, expected):
-    data = """
+    data = b"""
 [test.html]
   fuzzy: %s""" % fuzzy
     f = BytesIO(data)
diff --git a/tools/wptrunner/wptrunner/tests/test_testloader.py b/tools/wptrunner/wptrunner/tests/test_testloader.py
index e857cd4..836003d 100644
--- a/tools/wptrunner/wptrunner/tests/test_testloader.py
+++ b/tools/wptrunner/wptrunner/tests/test_testloader.py
@@ -20,8 +20,6 @@
 
 @pytest.mark.xfail(sys.platform == "win32",
                    reason="NamedTemporaryFile cannot be reopened on Win32")
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="wptmanifest.parser doesn't support py3")
 def test_filter_unicode():
     tests = make_mock_manifest(("test", "a", 10), ("test", "a/b", 10),
                                ("test", "c", 10))
diff --git a/tools/wptrunner/wptrunner/tests/test_update.py b/tools/wptrunner/wptrunner/tests/test_update.py
index 5ed3667..a24e4a7 100644
--- a/tools/wptrunner/wptrunner/tests/test_update.py
+++ b/tools/wptrunner/wptrunner/tests/test_update.py
@@ -108,7 +108,7 @@
             getattr(logger, action)(**kwargs)
         logger.remove_handler(handler)
     else:
-        json.dump(entries, data)
+        data.write(json.dumps(entries).encode())
     data.seek(0)
     return data
 
@@ -132,11 +132,9 @@
     return m
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_0():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected: FAIL""")]
 
@@ -154,11 +152,9 @@
     assert updated[0][1].is_empty
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_1():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected: ERROR""")]
 
@@ -177,11 +173,9 @@
     assert new_manifest.get_test(test_id).children[0].get("expected", default_run_info) == "FAIL"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_known_intermittent_1():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected: PASS""")]
 
@@ -218,11 +212,9 @@
         "expected", default_run_info) == ["PASS", "FAIL"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_known_intermittent_2():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected: PASS""")]
 
@@ -243,11 +235,9 @@
         "expected", default_run_info) == "FAIL"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_existing_known_intermittent():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected: [PASS, FAIL]""")]
 
@@ -286,11 +276,9 @@
         "expected", default_run_info) == ["PASS", "ERROR", "FAIL"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_remove_previous_intermittent():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected: [PASS, FAIL]""")]
 
@@ -334,8 +322,6 @@
         "expected", default_run_info) == ["PASS", "ERROR"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_new_test_with_intermittent():
     tests = [("path/to/test.htm", [test_id], "testharness", None)]
 
@@ -373,8 +359,6 @@
         "expected", default_run_info) == ["PASS", "FAIL"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_expected_tie_resolution():
     tests = [("path/to/test.htm", [test_id], "testharness", None)]
 
@@ -402,11 +386,9 @@
         "expected", default_run_info) == ["PASS", "FAIL"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_reorder_expected():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected: [PASS, FAIL]""")]
 
@@ -445,10 +427,8 @@
         "expected", default_run_info) == ["FAIL", "PASS"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_and_preserve_unchanged_expected_intermittent():
-    tests = [("path/to/test.htm", [test_id], "testharness", """
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""
 [test.htm]
   expected:
     if os == "android": [PASS, FAIL]
@@ -488,11 +468,9 @@
         "expected", default_run_info) == "PASS"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_test_with_intermittent_to_one_expected_status():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected: [PASS, FAIL]""")]
 
@@ -513,10 +491,8 @@
         "expected", default_run_info) == "ERROR"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_intermittent_with_conditions():
-    tests = [("path/to/test.htm", [test_id], "testharness", """
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""
 [test.htm]
   expected:
     if os == "android": [PASS, FAIL]""")]
@@ -548,10 +524,8 @@
         "expected", run_info_1) == ["PASS", "TIMEOUT", "FAIL"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_and_remove_intermittent_with_conditions():
-    tests = [("path/to/test.htm", [test_id], "testharness", """
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""
 [test.htm]
   expected:
     if os == "android": [PASS, FAIL]""")]
@@ -583,11 +557,9 @@
         "expected", run_info_1) == ["PASS", "TIMEOUT"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_intermittent_full():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected:
       if os == "mac": [FAIL, TIMEOUT]
@@ -623,11 +595,9 @@
         "expected", default_run_info) == "FAIL"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_intermittent_full_remove():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected:
       if os == "mac": [FAIL, TIMEOUT, PASS]
@@ -674,11 +644,9 @@
         "expected", default_run_info) == "FAIL"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_full_update():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected:
       if os == "mac": [FAIL, TIMEOUT]
@@ -714,11 +682,9 @@
         "expected", default_run_info) == "FAIL"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_full_orphan():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected: FAIL
     [subsub test]
@@ -747,11 +713,9 @@
     assert len(new_manifest.get_test(test_id).children) == 1
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_reorder_expected_full_conditions():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected:
       if os == "mac": [FAIL, TIMEOUT]
@@ -808,11 +772,9 @@
         "expected", default_run_info) == ["PASS", "FAIL"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_skip_0():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected: FAIL""")]
 
@@ -828,10 +790,8 @@
     assert not updated
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_new_subtest():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   [test1]
     expected: FAIL""")]
 
@@ -853,10 +813,8 @@
     assert new_manifest.get_test(test_id).children[1].get("expected", default_run_info) == "FAIL"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_multiple_0():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   [test1]
     expected: FAIL""")]
 
@@ -892,10 +850,8 @@
         "expected", {"debug": False, "os": "linux"}) == "TIMEOUT"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_multiple_1():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   [test1]
     expected: FAIL""")]
 
@@ -936,10 +892,8 @@
         "expected", run_info_3) == "FAIL"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_multiple_2():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   [test1]
     expected: FAIL""")]
 
@@ -976,10 +930,8 @@
         "expected", run_info_2) == "TIMEOUT"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_multiple_3():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   [test1]
     expected:
       if debug: FAIL
@@ -1018,10 +970,8 @@
         "expected", run_info_2) == "TIMEOUT"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_ignore_existing():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   [test1]
     expected:
       if debug: TIMEOUT
@@ -1060,8 +1010,6 @@
         "expected", run_info_2) == "NOTRUN"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_new_test():
     tests = [("path/to/test.htm", [test_id], "testharness", None)]
 
@@ -1084,10 +1032,8 @@
         "expected", run_info_1) == "FAIL"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_duplicate():
-    tests = [("path/to/test.htm", [test_id], "testharness", """
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""
 [test.htm]
   expected: ERROR""")]
 
@@ -1106,10 +1052,8 @@
         "expected", run_info_1) == "ERROR"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_disable_intermittent():
-    tests = [("path/to/test.htm", [test_id], "testharness", """
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""
 [test.htm]
   expected: ERROR""")]
 
@@ -1128,10 +1072,8 @@
         "disabled", run_info_1) == "Some message"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_stability_conditional_instability():
-    tests = [("path/to/test.htm", [test_id], "testharness", """
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""
 [test.htm]
   expected: ERROR""")]
 
@@ -1164,10 +1106,8 @@
         "expected", run_info_2) == "FAIL"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_full():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   [test1]
     expected:
       if debug: TIMEOUT
@@ -1216,10 +1156,8 @@
         "expected", run_info_2) == "ERROR"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_full_unknown():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   [test1]
     expected:
       if release_or_beta: ERROR
@@ -1259,10 +1197,8 @@
         "expected", run_info_2) == "ERROR"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_full_unknown_missing():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   [subtest_deleted]
     expected:
       if release_or_beta: ERROR
@@ -1282,10 +1218,8 @@
     assert len(updated) == 0
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_default():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   [test1]
     expected:
       if os == "mac": FAIL
@@ -1315,10 +1249,8 @@
     assert new_manifest.is_empty
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_default_1():
-    tests = [("path/to/test.htm", [test_id], "testharness", """
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""
 [test.htm]
   expected:
     if os == "mac": TIMEOUT
@@ -1347,10 +1279,8 @@
         "expected", run_info_2) == "FAIL"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_default_2():
-    tests = [("path/to/test.htm", [test_id], "testharness", """
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""
 [test.htm]
   expected:
     if os == "mac": TIMEOUT
@@ -1379,10 +1309,8 @@
         "expected", run_info_2) == "TIMEOUT"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_assertion_count_0():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   max-asserts: 4
   min-asserts: 2
 """)]
@@ -1403,10 +1331,8 @@
     assert new_manifest.get_test(test_id).get("min-asserts") == "2"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_assertion_count_1():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   max-asserts: 4
   min-asserts: 2
 """)]
@@ -1427,10 +1353,8 @@
     assert new_manifest.get_test(test_id).has_key("min-asserts") is False
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_assertion_count_2():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   max-asserts: 4
   min-asserts: 2
 """)]
@@ -1447,10 +1371,8 @@
     assert not updated
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_assertion_count_3():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]
   max-asserts: 4
   min-asserts: 2
 """)]
@@ -1481,10 +1403,8 @@
     assert new_manifest.get_test(test_id).get("min-asserts") == "2"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_assertion_count_4():
-    tests = [("path/to/test.htm", [test_id], "testharness", """[test.htm]""")]
+    tests = [("path/to/test.htm", [test_id], "testharness", b"""[test.htm]""")]
 
     log_0 = suite_log([("test_start", {"test": test_id}),
                        ("assertion_count", {"test": test_id,
@@ -1512,11 +1432,9 @@
     assert new_manifest.get_test(test_id).has_key("min-asserts") is False
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_lsan_0():
-    tests = [("path/to/test.htm", [test_id], "testharness", ""),
-             ("path/to/__dir__", [dir_id], None, "")]
+    tests = [("path/to/test.htm", [test_id], "testharness", b""),
+             ("path/to/__dir__", [dir_id], None, b"")]
 
     log_0 = suite_log([("lsan_leak", {"scope": "path/to/",
                                       "frames": ["foo", "bar"]})])
@@ -1529,11 +1447,9 @@
     assert new_manifest.get("lsan-allowed") == ["foo"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_lsan_1():
-    tests = [("path/to/test.htm", [test_id], "testharness", ""),
-             ("path/to/__dir__", [dir_id], None, """
+    tests = [("path/to/test.htm", [test_id], "testharness", b""),
+             ("path/to/__dir__", [dir_id], None, b"""
 lsan-allowed: [foo]""")]
 
     log_0 = suite_log([("lsan_leak", {"scope": "path/to/",
@@ -1549,13 +1465,11 @@
     assert new_manifest.get("lsan-allowed") == ["baz", "foo"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_lsan_2():
-    tests = [("path/to/test.htm", [test_id], "testharness", ""),
-             ("path/__dir__", ["path/__dir__"], None, """
+    tests = [("path/to/test.htm", [test_id], "testharness", b""),
+             ("path/__dir__", ["path/__dir__"], None, b"""
 lsan-allowed: [foo]"""),
-             ("path/to/__dir__", [dir_id], None, "")]
+             ("path/to/__dir__", [dir_id], None, b"")]
 
     log_0 = suite_log([("lsan_leak", {"scope": "path/to/",
                                       "frames": ["foo", "bar"],
@@ -1571,11 +1485,9 @@
     assert new_manifest.get("lsan-allowed") == ["baz"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_lsan_3():
-    tests = [("path/to/test.htm", [test_id], "testharness", ""),
-             ("path/to/__dir__", [dir_id], None, "")]
+    tests = [("path/to/test.htm", [test_id], "testharness", b""),
+             ("path/to/__dir__", [dir_id], None, b"")]
 
     log_0 = suite_log([("lsan_leak", {"scope": "path/to/",
                                       "frames": ["foo", "bar"]})],
@@ -1593,11 +1505,9 @@
     assert new_manifest.get("lsan-allowed") == ["baz", "foo"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_wptreport_0():
     tests = [("path/to/test.htm", [test_id], "testharness",
-              """[test.htm]
+              b"""[test.htm]
   [test1]
     expected: FAIL""")]
 
@@ -1615,11 +1525,9 @@
     assert updated[0][1].is_empty
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_wptreport_1():
-    tests = [("path/to/test.htm", [test_id], "testharness", ""),
-             ("path/to/__dir__", [dir_id], None, "")]
+    tests = [("path/to/test.htm", [test_id], "testharness", b""),
+             ("path/to/__dir__", [dir_id], None, b"")]
 
     log = {"run_info": default_run_info.copy(),
            "results": [],
@@ -1632,11 +1540,9 @@
     assert updated[0][1].get("lsan-allowed") == ["baz"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_leak_total_0():
-    tests = [("path/to/test.htm", [test_id], "testharness", ""),
-             ("path/to/__dir__", [dir_id], None, "")]
+    tests = [("path/to/test.htm", [test_id], "testharness", b""),
+             ("path/to/__dir__", [dir_id], None, b"")]
 
     log_0 = suite_log([("mozleak_total", {"scope": "path/to/",
                                           "process": "default",
@@ -1651,11 +1557,9 @@
     assert new_manifest.get("leak-threshold") == ['default:51200']
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_leak_total_1():
-    tests = [("path/to/test.htm", [test_id], "testharness", ""),
-             ("path/to/__dir__", [dir_id], None, "")]
+    tests = [("path/to/test.htm", [test_id], "testharness", b""),
+             ("path/to/__dir__", [dir_id], None, b"")]
 
     log_0 = suite_log([("mozleak_total", {"scope": "path/to/",
                                           "process": "default",
@@ -1667,11 +1571,9 @@
     assert not updated
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_leak_total_2():
-    tests = [("path/to/test.htm", [test_id], "testharness", ""),
-             ("path/to/__dir__", [dir_id], None, """
+    tests = [("path/to/test.htm", [test_id], "testharness", b""),
+             ("path/to/__dir__", [dir_id], None, b"""
 leak-total: 110""")]
 
     log_0 = suite_log([("mozleak_total", {"scope": "path/to/",
@@ -1684,11 +1586,9 @@
     assert not updated
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_leak_total_3():
-    tests = [("path/to/test.htm", [test_id], "testharness", ""),
-             ("path/to/__dir__", [dir_id], None, """
+    tests = [("path/to/test.htm", [test_id], "testharness", b""),
+             ("path/to/__dir__", [dir_id], None, b"""
 leak-total: 100""")]
 
     log_0 = suite_log([("mozleak_total", {"scope": "path/to/",
@@ -1704,11 +1604,9 @@
     assert new_manifest.get("leak-threshold") == ['default:51200']
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="metadata doesn't support py3")
 def test_update_leak_total_4():
-    tests = [("path/to/test.htm", [test_id], "testharness", ""),
-             ("path/to/__dir__", [dir_id], None, """
+    tests = [("path/to/test.htm", [test_id], "testharness", b""),
+             ("path/to/__dir__", [dir_id], None, b"""
 leak-total: 110""")]
 
     log_0 = suite_log([
@@ -1737,8 +1635,6 @@
     steps = [TestStep]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="update.state doesn't support py3")
 def test_update_pickle():
     logger = structuredlog.StructuredLogger("expected_test")
     args = {
diff --git a/tools/wptrunner/wptrunner/tests/test_wpttest.py b/tools/wptrunner/wptrunner/tests/test_wpttest.py
index 9bb3e1f..1a94a2f 100644
--- a/tools/wptrunner/wptrunner/tests/test_wpttest.py
+++ b/tools/wptrunner/wptrunner/tests/test_wpttest.py
@@ -1,5 +1,3 @@
-import pytest
-import sys
 from io import BytesIO
 from mock import Mock
 
@@ -7,29 +5,29 @@
 from manifest.item import TestharnessTest
 from .. import manifestexpected, wpttest
 
-dir_ini_0 = """\
+dir_ini_0 = b"""\
 prefs: [a:b]
 """
 
-dir_ini_1 = """\
+dir_ini_1 = b"""\
 prefs: [@Reset, b:c]
 max-asserts: 2
 min-asserts: 1
 tags: [b, c]
 """
 
-dir_ini_2 = """\
+dir_ini_2 = b"""\
 lsan-max-stack-depth: 42
 """
 
-test_0 = """\
+test_0 = b"""\
 [0.html]
   prefs: [c:d]
   max-asserts: 3
   tags: [a, @Reset]
 """
 
-test_1 = """\
+test_1 = b"""\
 [1.html]
   prefs:
     if os == 'win': [a:b, c:d]
@@ -37,12 +35,12 @@
     if os == 'win': FAIL
 """
 
-test_2 = """\
+test_2 = b"""\
 [2.html]
   lsan-max-stack-depth: 42
 """
 
-test_3 = """\
+test_3 = b"""\
 [3.html]
   [subtest1]
     expected: [PASS, FAIL]
@@ -54,32 +52,32 @@
     expected: FAIL
 """
 
-test_4 = """\
+test_4 = b"""\
 [4.html]
   expected: FAIL
 """
 
-test_5 = """\
+test_5 = b"""\
 [5.html]
 """
 
-test_6 = """\
+test_6 = b"""\
 [6.html]
   expected: [OK, FAIL]
 """
 
-test_7 = """\
+test_7 = b"""\
 [7.html]
   blink_expect_any_subtest_status: yep
 """
 
-test_fuzzy = """\
+test_fuzzy = b"""\
 [fuzzy.html]
   fuzzy: fuzzy-ref.html:1;200
 """
 
 
-testharness_test = """<script src="/resources/testharness.js"></script>
+testharness_test = b"""<script src="/resources/testharness.js"></script>
 <script src="/resources/testharnessreport.js"></script>"""
 
 
@@ -117,8 +115,6 @@
     return wpttest.from_manifest(tests, test, inherit_metadata, test_metadata.get_test(test.id))
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="bytes/text confusion in py3")
 def test_metadata_inherit():
     items = [("test", "a", 10), ("test", "a/b", 10), ("test", "c", 10)]
     inherit_metadata = [
@@ -136,8 +132,6 @@
     assert test_obj.tags == {"a", "dir:a"}
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="bytes/text confusion in py3")
 def test_conditional():
     items = [("test", "a", 10), ("test", "a/b", 10), ("test", "c", 10)]
 
@@ -147,8 +141,6 @@
     assert test_obj.expected() == "FAIL"
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="bytes/text confusion in py3")
 def test_metadata_lsan_stack_depth():
     items = [("test", "a", 10), ("test", "a/b", 10)]
 
@@ -172,8 +164,6 @@
     assert test_obj.lsan_max_stack_depth == 42
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="bytes/text confusion in py3")
 def test_subtests():
     test_obj = make_test_object(test_3, "a/3.html", 3, ("test", "a", 4), None, False)
     assert test_obj.expected("subtest1") == "PASS"
@@ -184,40 +174,30 @@
     assert test_obj.known_intermittent("subtest3") == []
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="bytes/text confusion in py3")
 def test_expected_fail():
     test_obj = make_test_object(test_4, "a/4.html", 4, ("test", "a", 5), None, False)
     assert test_obj.expected() == "FAIL"
     assert test_obj.known_intermittent() == []
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="bytes/text confusion in py3")
 def test_no_expected():
     test_obj = make_test_object(test_5, "a/5.html", 5, ("test", "a", 6), None, False)
     assert test_obj.expected() == "OK"
     assert test_obj.known_intermittent() == []
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="bytes/text confusion in py3")
 def test_known_intermittent():
     test_obj = make_test_object(test_6, "a/6.html", 6, ("test", "a", 7), None, False)
     assert test_obj.expected() == "OK"
     assert test_obj.known_intermittent() == ["FAIL"]
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="bytes/text confusion in py3")
 def test_expect_any_subtest_status():
     test_obj = make_test_object(test_7, "a/7.html", 7, ("test", "a", 8), None, False)
     assert test_obj.expected() == "OK"
     assert test_obj.expect_any_subtest_status() is True
 
 
-@pytest.mark.xfail(sys.version[0] == "3",
-                   reason="bytes/text confusion in py3")
 def test_metadata_fuzzy():
     manifest_data = {
         "items": {"reftest": {"a/fuzzy.html": [["a/fuzzy.html",
@@ -234,7 +214,7 @@
                                                     test_path="a/fuzzy.html",
                                                     url_base="/")
 
-    test = manifest.iterpath("a/fuzzy.html").next()
+    test = next(manifest.iterpath("a/fuzzy.html"))
     test_obj = wpttest.from_manifest(manifest, test, [], test_metadata.get_test(test.id))
 
     assert test_obj.fuzzy == {('/a/fuzzy.html', '/a/fuzzy-ref.html', '=='): [[2, 3], [10, 15]]}
diff --git a/tools/wptrunner/wptrunner/update/state.py b/tools/wptrunner/wptrunner/update/state.py
index 64dbf11..f8a8352 100644
--- a/tools/wptrunner/wptrunner/update/state.py
+++ b/tools/wptrunner/wptrunner/update/state.py
@@ -11,7 +11,7 @@
             return rv
 
         logger.debug("No existing state found")
-        return object.__new__(cls, logger)
+        return super(BaseState, cls).__new__(cls)
 
     def __init__(self, logger):
         """Object containing state variables created when running Steps.
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index cac172a..f40dcff 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -3,6 +3,7 @@
 import json
 import os
 import sys
+from six import iteritems, itervalues
 
 from wptserve import sslutils
 
@@ -102,7 +103,7 @@
     run_info, test_loader = get_loader(test_paths, product,
                                        run_info_extras=run_info_extras, **kwargs)
 
-    for test_type, tests in test_loader.disabled_tests.iteritems():
+    for test_type, tests in iteritems(test_loader.disabled_tests):
         for test in tests:
             rv.append({"test": test.id, "reason": test.disabled()})
     print(json.dumps(rv, indent=2))
@@ -127,7 +128,7 @@
         if kwargs["headless"]:
             return False
         tests = test_loader.tests
-        is_single_testharness = (sum(len(item) for item in tests.itervalues()) == 1 and
+        is_single_testharness = (sum(len(item) for item in itervalues(tests)) == 1 and
                                  len(tests.get("testharness", [])) == 1)
         if kwargs["repeat"] == 1 and kwargs["rerun"] == 1 and is_single_testharness:
             return True
diff --git a/tools/wptrunner/wptrunner/wpttest.py b/tools/wptrunner/wptrunner/wpttest.py
index 67b5758..4cbaedc 100644
--- a/tools/wptrunner/wptrunner/wpttest.py
+++ b/tools/wptrunner/wptrunner/wpttest.py
@@ -2,6 +2,7 @@
 import subprocess
 from six.moves.urllib.parse import urljoin
 from collections import defaultdict
+from six import string_types
 
 from .wptmanifest.parser import atoms
 
@@ -337,7 +338,7 @@
 
         try:
             expected = metadata.get("expected")
-            if isinstance(expected, (basestring)):
+            if isinstance(expected, string_types):
                 return expected
             elif isinstance(expected, list):
                 return expected[0]