blob: 12df533c404947508ebebbbdb2bd8776fe5773a9 [file] [log] [blame]
# Copyright 2022 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""A small subset of commands for interaction with Google Cloud Storage.
It's used when py_utils from the catapult repo are not available (as is the case
on autorollers).
"""
import hashlib
import os
import subprocess
PUBLIC_BUCKET = 'chromium-telemetry'
INTERNAL_BUCKET = 'chrome-telemetry'
def _RunCommand(args):
gsutil_command = 'gsutil'
pathenv = os.getenv('PATH')
for path in pathenv.split(os.path.pathsep):
gsutil_path = os.path.join(path, gsutil_command)
if os.path.exists(gsutil_path):
break
gsutil = subprocess.Popen([gsutil_path] + args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = gsutil.communicate()
if gsutil.returncode:
raise RuntimeError(stderr.decode('utf-8'))
return stdout.decode('utf-8')
def Get(bucket, remote_path, local_path):
url = 'gs://%s/%s' % (bucket, remote_path)
_RunCommand(['cp', url, local_path])
def Exists(bucket, remote_path):
try:
_RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)])
return True
except RuntimeError:
return False
def ListFiles(bucket, path='', sort_by='name'):
"""Returns files matching the given path in bucket.
Args:
bucket: Name of cloud storage bucket to look at.
path: Path within the bucket to filter to. Path can include wildcards.
sort_by: 'name' (default), 'time' or 'size'.
Returns:
A sorted list of files.
"""
bucket_prefix = 'gs://%s' % bucket
full_path = '%s/%s' % (bucket_prefix, path)
stdout = _RunCommand(['ls', '-l', '-d', full_path])
# Filter out directories and the summary line.
file_infos = [
line.split(None, 2) for line in stdout.splitlines() if len(line) > 0
and not line.startswith("TOTAL") and not line.endswith('/')
]
# The first field in the info is size, the second is time, the third is name.
if sort_by == 'size':
file_infos.sort(key=lambda info: int(info[0]))
elif sort_by == 'time':
file_infos.sort(key=lambda info: info[1])
elif sort_by == 'name':
file_infos.sort(key=lambda info: info[2])
else:
raise ValueError("Wrong sort_by value: %s" % sort_by)
return [url[len(bucket_prefix):] for _, _, url in file_infos]
def Insert(bucket, remote_path, local_path, publicly_readable):
"""Upload file in |local_path| to cloud storage.
Newer version of 'Insert()' returns an object instead of a string.
Args:
bucket: the google cloud storage bucket name.
remote_path: the remote file path in |bucket|.
local_path: path of the local file to be uploaded.
publicly_readable: whether the uploaded file has publicly readable
permission.
Returns:
A CloudFilepath object providing the location of the object in various
formats.
"""
url = 'gs://%s/%s' % (bucket, remote_path)
command_and_args = ['cp']
if publicly_readable:
command_and_args += ['-a', 'public-read']
command_and_args += [local_path, url]
_RunCommand(command_and_args)
def CalculateHash(file_path):
"""Calculates and returns the hash of the file at file_path."""
sha1 = hashlib.sha1()
with open(file_path, 'rb') as f:
while True:
# Read in 1mb chunks, so it doesn't all have to be loaded into memory.
chunk = f.read(1024 * 1024)
if not chunk:
break
sha1.update(chunk)
return sha1.hexdigest()