| # Copyright 2020 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import logging |
| import os |
| |
| from py_utils import cloud_storage |
| |
| _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) |
| DEFAULT_TRACE_DIR = os.path.join(_SCRIPT_DIR, 'traces') |
| |
| HTML_URL_PREFIX = ('https://storage.cloud.google.com/chrome-telemetry-output/') |
| |
| |
| def _GetSubpathInBucket(html_url): |
| """Returns the path minus the HTML_URL_PREFIX. |
| |
| Given https://storage.../chrome-telemetry-output/foo/bar/trace.html, |
| it returns foo/bar/trace.html.""" |
| if not html_url.startswith(HTML_URL_PREFIX): |
| raise Exception('Html trace url must start with %s' % HTML_URL_PREFIX) |
| return html_url.replace(HTML_URL_PREFIX, "") |
| |
| |
| def _GetProtoTraceLinkFromTraceEventsDir(link_prefix): |
| """Returns the first proto trace in |link_prefix|/trace/traceEvents/""" |
| proto_link_prefix = '/'.join([link_prefix, 'trace/traceEvents/**']) |
| try: |
| for link in cloud_storage.List(cloud_storage.TELEMETRY_OUTPUT, |
| proto_link_prefix): |
| if link.endswith('.pb.gz') or link.endswith('.pb'): |
| return link[1:] # Strip the initial '/'. |
| except cloud_storage.NotFoundError as e: |
| # This directory doesn't exist at all. |
| raise cloud_storage.NotFoundError('No URLs match the prefix %s: %s' % |
| (proto_link_prefix, str(e))) |
| # The directory exists, but no proto trace found. |
| raise cloud_storage.NotFoundError( |
| 'Proto trace not found in cloud storage. Path: %s.' % proto_link_prefix) |
| |
| |
| def GetFileExtension(file_path): |
| """Given foo/bar/baz.pb.gz, returns '.pb.gz'.""" |
| # Get the filename only because the directory names can contain "." like |
| # "v8.browsing". |
| filename = file_path.split('/')[-1] |
| first_dot_index = filename.find('.') |
| if first_dot_index == -1: |
| return '' |
| return filename[first_dot_index:] |
| |
| |
| def GetLocalTraceFileName(html_url): |
| """Returns a local filename derived from the html trace url. |
| |
| Given https://storage.../chrome-telemetry-output/foo/bar/trace.html, it |
| returns foo_bar_trace as the local filename. The filename does not contain |
| extensions. It's up to the caller to add .html or .pb etc.""" |
| subpath = _GetSubpathInBucket(html_url) |
| extension = GetFileExtension(subpath) |
| no_extension_subpath = subpath[:-len(extension)] |
| return '_'.join(no_extension_subpath.split('/')) |
| |
| |
| def FindProtoTracePath(html_url): |
| """ |
| Finds the proto trace path given a html trace url. |
| |
| In the simple case foo/bar/trace.pb is the proto trace for foo/bar/trace.html. |
| But sometimes that's not available so we have to look for a .pb.gz file in a |
| special directory.""" |
| subpath = _GetSubpathInBucket(html_url) |
| if subpath.endswith('trace.html'): |
| proto_path = subpath.replace('trace.html', 'trace.pb') |
| if cloud_storage.Exists(cloud_storage.TELEMETRY_OUTPUT, proto_path): |
| return proto_path |
| proto_path += '.gz' |
| if cloud_storage.Exists(cloud_storage.TELEMETRY_OUTPUT, proto_path): |
| return proto_path |
| |
| directory_path = '/'.join(subpath.split('/')[:-1]) |
| return _GetProtoTraceLinkFromTraceEventsDir(directory_path) |
| |
| |
| def DownloadHtmlTrace(html_url, download_dir=DEFAULT_TRACE_DIR): |
| """Downloads html trace given the url. Returns local path. |
| |
| Skips downloading if file was already downloaded once.""" |
| local_filename = os.path.join(download_dir, GetLocalTraceFileName(html_url)) |
| local_path = local_filename + '.html' |
| if os.path.exists(local_path): |
| logging.info('%s already downloaded. Skipping.' % local_path) |
| return local_path |
| |
| remote_path = _GetSubpathInBucket(html_url) |
| if not cloud_storage.Exists(cloud_storage.TELEMETRY_OUTPUT, remote_path): |
| raise cloud_storage.NotFoundError( |
| 'HTML trace %s not found in cloud storage.' % html_url) |
| |
| cloud_storage.Get(cloud_storage.TELEMETRY_OUTPUT, remote_path, local_path) |
| return local_path |
| |
| |
| def DownloadProtoTrace(html_url, download_dir=DEFAULT_TRACE_DIR): |
| """Downloads the associated proto trace for html trace url. Returns path. |
| |
| Skips downloading if file was already downloaded once.""" |
| local_filename = os.path.join(download_dir, GetLocalTraceFileName(html_url)) |
| for local_path in [local_filename + '.pb', local_filename + '.pb.gz']: |
| if os.path.exists(local_path): |
| logging.info('%s already downloaded. Skipping.' % local_path) |
| return local_path |
| |
| remote_path = FindProtoTracePath(html_url) |
| extension = GetFileExtension(remote_path) |
| local_path = local_filename + extension |
| |
| cloud_storage.Get(cloud_storage.TELEMETRY_OUTPUT, remote_path, local_path) |
| return local_path |