tools/android/loading/sandwich_prefetch.py - chromium/src.git - Git at Google

 # Copyright 2016 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """
 Implements a task builder for benchmarking effects of NoState Prefetch.
 Noticeable steps of the task pipeline:
   * Save a WPR archive
   * Process the WPR archive to make all resources cacheable
   * Process cache archive to patch response headers back to their original
       values.
   * Find out which resources are discoverable by NoState Prefetch
       (HTMLPreloadScanner)
   * Load pages with empty/full/prefetched cache
   * Extract most important metrics to a CSV
 """

 import csv
 import logging
 import json
 import os
 import re
 import shutil
 import urlparse

 import chrome_cache
 import common_util
 import loading_trace
 from prefetch_view import PrefetchSimulationView
 from request_dependencies_lens import RequestDependencyLens
 import sandwich_metrics
 import sandwich_runner
 import sandwich_utils
 import task_manager
 import wpr_backend


 class Discoverer(object):
   # Do not prefetch anything.
   EmptyCache = 'empty-cache'

   # Prefetches everything to load fully from cache (impossible in practice).
   FullCache = 'full-cache'

   # Prefetches the first resource following the redirection chain.
   MainDocument = 'main-document'

   # All resources which are fetched from the main document and their
   # redirections.
   Parser = 'parser'

   # Simulation of HTMLPreloadScanner on the main document and their
   # redirections and subsets:
   #   Store: only resources that don't have Cache-Control: No-Store.
   HTMLPreloadScanner = 'html-scanner'
   HTMLPreloadScannerStore = 'html-scanner-store'


 # List of all available sub-resource discoverers.
 SUBRESOURCE_DISCOVERERS = set([
   Discoverer.EmptyCache,
   Discoverer.FullCache,
   Discoverer.MainDocument,
   Discoverer.Parser,
   Discoverer.HTMLPreloadScanner,
   Discoverer.HTMLPreloadScannerStore,
 ])


 _UPLOAD_DATA_STREAM_REQUESTS_REGEX = re.compile(r'^\d+/(?P<url>.*)$')


 def _NormalizeUrl(url):
   """Returns normalized URL such as removing trailing slashes."""
   parsed_url = list(urlparse.urlparse(url))
   parsed_url[2] = re.sub(r'/{2,}', r'/', parsed_url[2])
   return urlparse.urlunparse(parsed_url)


 def _PatchCacheArchive(cache_archive_path, loading_trace_path,
                        cache_archive_dest_path):
   """Patch the cache archive.

   Note: This method update the raw response headers of cache entries' to store
     the ones such as Set-Cookie that were pruned by the
     net::HttpCacheTransaction, and remove the stream index 2 holding resource's
     compile meta data.

   Args:
     cache_archive_path: Input archive's path to patch.
     loading_trace_path: Path of the loading trace that have recorded the cache
         archive <cache_archive_path>.
     cache_archive_dest_path: Archive destination's path.
   """
   trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
   with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:
     cache_path = os.path.join(tmp_path, 'cache')
     chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)
     cache_backend = chrome_cache.CacheBackend(cache_path, 'simple')
     cache_entries = set(cache_backend.ListKeys())
     logging.info('Original cache size: %d bytes' % cache_backend.GetSize())
     for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
         trace.request_track.GetEvents()):
       # On requests having an upload data stream such as POST requests,
       # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with
       # the upload data stream's session unique identifier.
       #
       # It is fine to not patch these requests since when reopening Chrome,
       # there is no way the entry can be reused since the upload data stream's
       # identifier will be different.
       #
       # The fact that these entries are kept in the cache after closing Chrome
       # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath()
       # do is known chrome bug (crbug.com/610725).
       if request.url not in cache_entries:
         continue
       # Chrome prunes Set-Cookie from response headers before storing them in
       # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect
       # response headers. Sandwich manages the cache, but between recording the
       # cache and benchmarking the cookie jar is invalidated. This leads to
       # invalidation of all cacheable redirects.
       raw_headers = request.GetRawResponseHeaders()
       cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)
       # NoState-Prefetch would only fetch the resources, but not parse them.
       cache_backend.DeleteStreamForKey(request.url, 2)
     chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)
     logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())


 def _DiscoverRequests(dependencies_lens, subresource_discoverer):
   trace = dependencies_lens.loading_trace
   first_resource_request = trace.request_track.GetFirstResourceRequest()

   if subresource_discoverer == Discoverer.EmptyCache:
     requests = []
   elif subresource_discoverer == Discoverer.FullCache:
     requests = dependencies_lens.loading_trace.request_track.GetEvents()
   elif subresource_discoverer == Discoverer.MainDocument:
     requests = [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]
   elif subresource_discoverer == Discoverer.Parser:
     requests = PrefetchSimulationView.ParserDiscoverableRequests(
         first_resource_request, dependencies_lens)
   elif subresource_discoverer == Discoverer.HTMLPreloadScanner:
     requests = PrefetchSimulationView.PreloadedRequests(
         first_resource_request, dependencies_lens, trace)
   else:
     assert False
   logging.info('number of requests discovered by %s: %d',
       subresource_discoverer, len(requests))
   return requests


 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests):
   with open(original_headers_path) as file_input:
     original_headers = json.load(file_input)
   pruned_requests = set()
   for request in requests:
     url = _NormalizeUrl(request.url)
     if url not in original_headers:
       # TODO(gabadie): Investigate why these requests were not in WPR.
       assert request.failed
       logging.warning(
           'could not find original headers for: %s (failure: %s)',
           url, request.error_text)
       continue
     request_original_headers = original_headers[url]
     if ('cache-control' in request_original_headers and
         'no-store' in request_original_headers['cache-control'].lower()):
       pruned_requests.add(request)
   return [r for r in requests if r not in pruned_requests]


 def _ExtractDiscoverableUrls(
     original_headers_path, loading_trace_path, subresource_discoverer):
   """Extracts discoverable resource urls from a loading trace according to a
   sub-resource discoverer.

   Args:
     original_headers_path: Path of JSON containing the original headers.
     loading_trace_path: Path of the loading trace recorded at original cache
       creation.
     subresource_discoverer: The sub-resources discoverer that should white-list
       the resources to keep in cache for the NoState-Prefetch benchmarks.

   Returns:
     A set of urls.
   """
   assert subresource_discoverer in SUBRESOURCE_DISCOVERERS, \
       'unknown prefetch simulation {}'.format(subresource_discoverer)
   logging.info('loading %s', loading_trace_path)
   trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
   dependencies_lens = RequestDependencyLens(trace)

   # Build the list of discovered requests according to the desired simulation.
   discovered_requests = []
   if subresource_discoverer == Discoverer.HTMLPreloadScannerStore:
     requests = _DiscoverRequests(
         dependencies_lens, Discoverer.HTMLPreloadScanner)
     discovered_requests = _PruneOutOriginalNoStoreRequests(
         original_headers_path, requests)
   else:
     discovered_requests = _DiscoverRequests(
         dependencies_lens, subresource_discoverer)

   whitelisted_urls = set()
   for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
       discovered_requests):
     logging.debug('white-listing %s', request.url)
     whitelisted_urls.add(request.url)
   logging.info('number of white-listed resources: %d', len(whitelisted_urls))
   return whitelisted_urls


 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):
   """Compare URL sets and log the diffs.

   Args:
     ref_url_set: Set of reference urls.
     url_set: Set of urls to compare to the reference.
     url_set_name: The set name for logging purposes.
   """
   assert type(ref_url_set) == set
   assert type(url_set) == set
   if ref_url_set == url_set:
     logging.info('  %d %s are matching.' % (len(ref_url_set), url_set_name))
     return
   missing_urls = ref_url_set.difference(url_set)
   unexpected_urls = url_set.difference(ref_url_set)
   logging.error('  %s are not matching (expected %d, had %d)' % \
       (url_set_name, len(ref_url_set), len(url_set)))
   logging.error('    List of %d missing resources:' % len(missing_urls))
   for url in sorted(missing_urls):
     logging.error('-     ' + url)
   logging.error('    List of %d unexpected resources:' % len(unexpected_urls))
   for url in sorted(unexpected_urls):
     logging.error('+     ' + url)


 class _RunOutputVerifier(object):
   """Object to verify benchmark run from traces and WPR log stored in the
   runner output directory.
   """

   def __init__(self, cache_validation_result, benchmark_setup):
     """Constructor.

     Args:
       cache_validation_result: JSON of the cache validation task.
       benchmark_setup: JSON of the benchmark setup.
     """
     self._cache_whitelist = set(benchmark_setup['cache_whitelist'])
     self._original_requests = set(
         cache_validation_result['effective_encoded_data_lengths'].keys())
     self._original_post_requests = set(
         cache_validation_result['effective_post_requests'])
     self._original_cached_requests = self._original_requests.intersection(
         self._cache_whitelist)
     self._original_uncached_requests = self._original_requests.difference(
         self._cache_whitelist)
     self._all_sent_url_requests = set()

   def VerifyTrace(self, trace):
     """Verifies a trace with the cache validation result and the benchmark
     setup.
     """
     effective_requests = sandwich_utils.ListUrlRequests(
         trace, sandwich_utils.RequestOutcome.All)
     effective_post_requests = sandwich_utils.ListUrlRequests(
         trace, sandwich_utils.RequestOutcome.Post)
     effective_cached_requests = sandwich_utils.ListUrlRequests(
         trace, sandwich_utils.RequestOutcome.ServedFromCache)
     effective_uncached_requests = sandwich_utils.ListUrlRequests(
         trace, sandwich_utils.RequestOutcome.NotServedFromCache)

     missing_requests = self._original_requests.difference(effective_requests)
     unexpected_requests = effective_requests.difference(self._original_requests)
     expected_cached_requests = \
         self._original_cached_requests.difference(missing_requests)
     expected_uncached_requests = self._original_uncached_requests.union(
         unexpected_requests).difference(missing_requests)

     # POST requests are known to be unable to use the cache.
     expected_cached_requests.difference_update(effective_post_requests)
     expected_uncached_requests.update(effective_post_requests)

     _PrintUrlSetComparison(self._original_requests, effective_requests,
                            'All resources')
     _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources')
     _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests,
                            'Cached resources')
     _PrintUrlSetComparison(expected_uncached_requests,
                            effective_uncached_requests, 'Non cached resources')

     self._all_sent_url_requests.update(effective_uncached_requests)

   def VerifyWprLog(self, wpr_log_path):
     """Verifies WPR log with previously verified traces."""
     all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path)
     all_wpr_urls = set()
     unserved_wpr_urls = set()
     wpr_command_colliding_urls = set()

     for request in all_wpr_requests:
       if request.is_wpr_host:
         continue
       if urlparse.urlparse(request.url).path.startswith('/web-page-replay'):
         wpr_command_colliding_urls.add(request.url)
       elif request.is_served is False:
         unserved_wpr_urls.add(request.url)
       all_wpr_urls.add(request.url)

     _PrintUrlSetComparison(set(), unserved_wpr_urls,
                            'Distinct unserved resources from WPR')
     _PrintUrlSetComparison(set(), wpr_command_colliding_urls,
                            'Distinct resources colliding to WPR commands')
     _PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests,
                            'Distinct resource requests to WPR')


 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
   """Validates a cache archive content.

   Args:
     cache_build_trace_path: Path of the generated trace at the cache build time.
     cache_archive_path: Cache archive's path to validate.

   Returns:
     {
       'effective_encoded_data_lengths':
         {URL of all requests: encoded_data_length},
       'effective_post_requests': [URLs of POST requests],
       'expected_cached_resources': [URLs of resources expected to be cached],
       'successfully_cached': [URLs of cached sub-resources]
     }
   """
   # TODO(gabadie): What's the best way of propagating errors happening in here?
   logging.info('lists cached urls from %s' % cache_archive_path)
   with common_util.TemporaryDirectory() as cache_directory:
     chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
     cache_keys = set(
         chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())
   trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)
   effective_requests = sandwich_utils.ListUrlRequests(
       trace, sandwich_utils.RequestOutcome.All)
   effective_post_requests = sandwich_utils.ListUrlRequests(
       trace, sandwich_utils.RequestOutcome.Post)
   effective_encoded_data_lengths = {}
   for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
       trace.request_track.GetEvents()):
     if request.from_disk_cache or request.served_from_cache:
       # At cache archive creation time, a request might be loaded several times,
       # but avoid the request.encoded_data_length == 0 if loaded from cache.
       continue
     if request.url in effective_encoded_data_lengths:
       effective_encoded_data_lengths[request.url] = max(
           effective_encoded_data_lengths[request.url],
           request.GetResponseTransportLength())
     else:
       effective_encoded_data_lengths[request.url] = (
           request.GetResponseTransportLength())

   upload_data_stream_cache_entry_keys = set()
   upload_data_stream_requests = set()
   for cache_entry_key in cache_keys:
     match = _UPLOAD_DATA_STREAM_REQUESTS_REGEX.match(cache_entry_key)
     if not match:
       continue
     upload_data_stream_cache_entry_keys.add(cache_entry_key)
     upload_data_stream_requests.add(match.group('url'))

   expected_cached_requests = effective_requests.difference(
       effective_post_requests)
   effective_cache_keys = cache_keys.difference(
       upload_data_stream_cache_entry_keys)

   _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests,
                          'POST resources')
   _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,
                          'Cached resources')

   return {
       'effective_encoded_data_lengths': effective_encoded_data_lengths,
       'effective_post_requests': [url for url in effective_post_requests],
       'expected_cached_resources': [url for url in expected_cached_requests],
       'successfully_cached_resources': [url for url in effective_cache_keys]
   }


 def _ProcessRunOutputDir(
     cache_validation_result, benchmark_setup, runner_output_dir):
   """Process benchmark's run output directory.

   Args:
     cache_validation_result: Same as for _RunOutputVerifier
     benchmark_setup: Same as for _RunOutputVerifier
     runner_output_dir: Same as for SandwichRunner.output_dir

   Returns:
     List of dictionary.
   """
   run_metrics_list = []
   run_output_verifier = _RunOutputVerifier(
       cache_validation_result, benchmark_setup)
   cached_encoded_data_lengths = (
       cache_validation_result['effective_encoded_data_lengths'])
   for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
       runner_output_dir):
     trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)

     logging.info('loading trace: %s', trace_path)
     trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

     logging.info('verifying trace: %s', trace_path)
     run_output_verifier.VerifyTrace(trace)

     logging.info('extracting metrics from trace: %s', trace_path)

     # Gather response size per URLs.
     response_sizes = {}
     for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
         trace.request_track.GetEvents()):
       # Ignore requests served from the blink's cache.
       if request.served_from_cache:
         continue
       if request.from_disk_cache:
         if request.url in cached_encoded_data_lengths:
           response_size = cached_encoded_data_lengths[request.url]
         else:
           # Some fat webpages may overflow the Memory cache, and so some
           # requests might be served from disk cache couple of times per page
           # load.
           logging.warning('Looks like could be served from memory cache: %s',
               request.url)
           if request.url in response_sizes:
             response_size = response_sizes[request.url]
       else:
         response_size = request.GetResponseTransportLength()
       response_sizes[request.url] = response_size

     # Sums the served from cache/network bytes.
     served_from_network_bytes = 0
     served_from_cache_bytes = 0
     urls_hitting_network = set()
     for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
         trace.request_track.GetEvents()):
       # Ignore requests served from the blink's cache.
       if request.served_from_cache:
         continue
       urls_hitting_network.add(request.url)
       if request.from_disk_cache:
         served_from_cache_bytes += response_sizes[request.url]
       else:
         served_from_network_bytes += response_sizes[request.url]

     # Make sure the served from blink's cache requests have at least one
     # corresponding request that was not served from the blink's cache.
     for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
         trace.request_track.GetEvents()):
       assert (request.url in urls_hitting_network or
               not request.served_from_cache)

     run_metrics = {
         'url': trace.url,
         'repeat_id': repeat_id,
         'subresource_discoverer': benchmark_setup['subresource_discoverer'],
         'cache_recording.subresource_count':
             len(cache_validation_result['effective_encoded_data_lengths']),
         'cache_recording.cached_subresource_count_theoretic':
             len(cache_validation_result['successfully_cached_resources']),
         'cache_recording.cached_subresource_count':
             len(cache_validation_result['expected_cached_resources']),
         'benchmark.subresource_count': len(sandwich_utils.ListUrlRequests(
             trace, sandwich_utils.RequestOutcome.All)),
         'benchmark.served_from_cache_count_theoretic':
             len(benchmark_setup['cache_whitelist']),
         'benchmark.served_from_cache_count': len(sandwich_utils.ListUrlRequests(
             trace, sandwich_utils.RequestOutcome.ServedFromCache)),
         'benchmark.served_from_network_bytes': served_from_network_bytes,
         'benchmark.served_from_cache_bytes': served_from_cache_bytes
     }
     run_metrics.update(
         sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
             repeat_dir, trace))
     run_metrics_list.append(run_metrics)
   run_metrics_list.sort(key=lambda e: e['repeat_id'])

   wpr_log_path = os.path.join(
       runner_output_dir, sandwich_runner.WPR_LOG_FILENAME)
   logging.info('verifying wpr log: %s', wpr_log_path)
   run_output_verifier.VerifyWprLog(wpr_log_path)
   return run_metrics_list


 class PrefetchBenchmarkBuilder(task_manager.Builder):
   """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""

   def __init__(self, common_builder):
     task_manager.Builder.__init__(self,
                                   common_builder.output_directory,
                                   common_builder.output_subdirectory)
     self._common_builder = common_builder

     self._original_headers_path = None
     self._wpr_archive_path = None
     self._cache_path = None
     self._trace_from_grabbing_reference_cache = None
     self._cache_validation_task = None
     self._PopulateCommonPipelines()

   def _PopulateCommonPipelines(self):
     """Creates necessary tasks to produce initial cache archive.

     Also creates a task for producing a json file with a mapping of URLs to
     subresources (urls-resources.json).

     Here is the full dependency tree for the returned task:
     common/patched-cache-validation.json
       depends on: common/patched-cache.zip
         depends on: common/original-cache.zip
           depends on: common/webpages-patched.wpr
             depends on: common/webpages.wpr
     """
     self._original_headers_path = self.RebaseOutputPath(
         'common/response-headers.json')

     @self.RegisterTask('common/webpages-patched.wpr',
                        dependencies=[self._common_builder.original_wpr_task])
     def BuildPatchedWpr():
       shutil.copyfile(
           self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)
       wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)

       # Save up original response headers.
       original_response_headers = {e.url: e.GetResponseHeadersDict() \
           for e in wpr_archive.ListUrlEntries()}
       logging.info('save up response headers for %d resources',
                    len(original_response_headers))
       if not original_response_headers:
         # TODO(gabadie): How is it possible to not even have the main resource
         # in the WPR archive? Example URL can be found in:
         # http://crbug.com/623966#c5
         raise Exception(
             'Looks like no resources were recorded in WPR during: {}'.format(
                 self._common_builder.original_wpr_task.name))
       with open(self._original_headers_path, 'w') as file_output:
         json.dump(original_response_headers, file_output)

       # Patch WPR.
       wpr_url_entries = wpr_archive.ListUrlEntries()
       for wpr_url_entry in wpr_url_entries:
         sandwich_utils.PatchWprEntryToBeCached(wpr_url_entry)
       logging.info('number of patched entries: %d', len(wpr_url_entries))
       wpr_archive.Persist()

     @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])
     def BuildOriginalCache():
       runner = self._common_builder.CreateSandwichRunner()
       runner.wpr_archive_path = BuildPatchedWpr.path
       runner.cache_archive_path = BuildOriginalCache.path
       runner.cache_operation = sandwich_runner.CacheOperation.SAVE
       runner.output_dir = BuildOriginalCache.run_path
       runner.Run()
     BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'
     original_cache_trace_path = os.path.join(
         BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)

     @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])
     def BuildPatchedCache():
       _PatchCacheArchive(BuildOriginalCache.path,
           original_cache_trace_path, BuildPatchedCache.path)

     @self.RegisterTask('common/patched-cache-validation.json',
                        [BuildPatchedCache])
     def ValidatePatchedCache():
       cache_validation_result = _ValidateCacheArchiveContent(
           original_cache_trace_path, BuildPatchedCache.path)
       with open(ValidatePatchedCache.path, 'w') as output:
         json.dump(cache_validation_result, output)

     self._wpr_archive_path = BuildPatchedWpr.path
     self._trace_from_grabbing_reference_cache = original_cache_trace_path
     self._cache_path = BuildPatchedCache.path
     self._cache_validation_task = ValidatePatchedCache

     self._common_builder.default_final_tasks.append(ValidatePatchedCache)

   def PopulateLoadBenchmark(self, subresource_discoverer,
                             transformer_list_name, transformer_list):
     """Populate benchmarking tasks from its setup tasks.

     Args:
       subresource_discoverer: Name of a subresources discoverer.
       transformer_list_name: A string describing the transformers, will be used
           in Task names (prefer names without spaces and special characters).
       transformer_list: An ordered list of function that takes an instance of
           SandwichRunner as parameter, would be applied immediately before
           SandwichRunner.Run() in the given order.

     Here is the full dependency of the added tree for the returned task:
     <transformer_list_name>/<subresource_discoverer>-metrics.csv
       depends on: <transformer_list_name>/<subresource_discoverer>-run/
         depends on: common/<subresource_discoverer>-cache.zip
           depends on: common/<subresource_discoverer>-setup.json
             depends on: common/patched-cache-validation.json
     """
     additional_column_names = [
         'url',
         'repeat_id',
         'subresource_discoverer',
         'cache_recording.subresource_count',
         'cache_recording.cached_subresource_count_theoretic',
         'cache_recording.cached_subresource_count',
         'benchmark.subresource_count',
         'benchmark.served_from_cache_count_theoretic',
         'benchmark.served_from_cache_count',
         'benchmark.served_from_network_bytes',
         'benchmark.served_from_cache_bytes']

     assert subresource_discoverer in SUBRESOURCE_DISCOVERERS
     assert 'common' not in SUBRESOURCE_DISCOVERERS
     shared_task_prefix = os.path.join('common', subresource_discoverer)
     task_prefix = os.path.join(transformer_list_name, subresource_discoverer)

     @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,
                        dependencies=[self._cache_validation_task])
     def SetupBenchmark():
       whitelisted_urls = _ExtractDiscoverableUrls(
           original_headers_path=self._original_headers_path,
           loading_trace_path=self._trace_from_grabbing_reference_cache,
           subresource_discoverer=subresource_discoverer)

       common_util.EnsureParentDirectoryExists(SetupBenchmark.path)
       with open(SetupBenchmark.path, 'w') as output:
         json.dump({
             'cache_whitelist': [url for url in whitelisted_urls],
             'subresource_discoverer': subresource_discoverer,
           }, output)

     @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,
                        dependencies=[SetupBenchmark])
     def BuildBenchmarkCacheArchive():
       benchmark_setup = json.load(open(SetupBenchmark.path))
       chrome_cache.ApplyUrlWhitelistToCacheArchive(
           cache_archive_path=self._cache_path,
           whitelisted_urls=benchmark_setup['cache_whitelist'],
           output_cache_archive_path=BuildBenchmarkCacheArchive.path)

     @self.RegisterTask(task_prefix + '-run/',
                        dependencies=[BuildBenchmarkCacheArchive])
     def RunBenchmark():
       runner = self._common_builder.CreateSandwichRunner()
       for transformer in transformer_list:
         transformer(runner)
       runner.wpr_archive_path = self._common_builder.original_wpr_task.path
       runner.wpr_out_log_path = os.path.join(
           RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)
       runner.cache_archive_path = BuildBenchmarkCacheArchive.path
       runner.cache_operation = sandwich_runner.CacheOperation.PUSH
       runner.output_dir = RunBenchmark.path
       runner.Run()

     @self.RegisterTask(task_prefix + '-metrics.csv',
                        dependencies=[RunBenchmark])
     def ProcessRunOutputDir():
       benchmark_setup = json.load(open(SetupBenchmark.path))
       cache_validation_result = json.load(
           open(self._cache_validation_task.path))

       run_metrics_list = _ProcessRunOutputDir(
           cache_validation_result, benchmark_setup, RunBenchmark.path)
       with open(ProcessRunOutputDir.path, 'w') as csv_file:
         writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
                                     sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
         writer.writeheader()
         for trace_metrics in run_metrics_list:
           writer.writerow(trace_metrics)

     self._common_builder.default_final_tasks.append(ProcessRunOutputDir)