| # Copyright 2013 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import contextlib |
| import re |
| |
| from recipe_engine import recipe_api |
| |
| class GSUtilApi(recipe_api.RecipeApi): |
| |
| def __init__(self, env_properties, *args, **kwargs): |
| super(GSUtilApi, self).__init__(*args, **kwargs) |
| self._boto_config_path = env_properties.BOTO_CONFIG |
| self._boto_path = env_properties.BOTO_PATH |
| |
| @property |
| def gsutil_py_path(self): |
| return self.repo_resource('gsutil.py') |
| |
| def __call__(self, |
| cmd, |
| name=None, |
| use_retry_wrapper=True, |
| version=None, |
| parallel_upload=False, |
| multithreaded=False, |
| infra_step=True, |
| dry_run=False, |
| **kwargs): |
| """A step to run arbitrary gsutil commands. |
| |
| On LUCI this should automatically use the ambient task account credentials. |
| On Buildbot, this assumes that gsutil authentication environment variables |
| (AWS_CREDENTIAL_FILE and BOTO_CONFIG) are already set, though if you want to |
| set them to something else you can always do so using the env={} kwarg. |
| |
| Note also that gsutil does its own wildcard processing, so wildcards are |
| valid in file-like portions of the cmd. See 'gsutil help wildcards'. |
| |
| Args: |
| * cmd (List[str|Path]) - Arguments to pass to gsutil. Include gsutil-level |
| options first (see 'gsutil help options'). |
| * name (str) - Name of the step to use. Defaults to the first non-flag |
| token in the cmd. |
| * dry_run (bool): If True, don't actually run the step; just log what |
| the step would have been. |
| """ |
| if name: |
| full_name = 'gsutil ' + name |
| else: |
| full_name = 'gsutil' # our fall-through name |
| # Find first cmd token not starting with '-' |
| for itm in cmd: |
| token = str(itm) # it could be a Path |
| if not token.startswith('-'): |
| full_name = 'gsutil ' + token |
| break |
| |
| gsutil_path = self.gsutil_py_path |
| cmd_prefix = [] |
| |
| if use_retry_wrapper: |
| # We pass the real gsutil_path to the wrapper so it doesn't have to do |
| # brittle path logic. |
| cmd_prefix = ['--', gsutil_path] |
| gsutil_path = self.resource('gsutil_smart_retry.py') |
| |
| if version: |
| cmd_prefix.extend(['--force-version', version]) |
| |
| if parallel_upload: |
| cmd_prefix.extend([ |
| '-o', |
| 'GSUtil:parallel_composite_upload_threshold=50M' |
| ]) |
| |
| if multithreaded: |
| cmd_prefix.extend(['-m']) |
| |
| if use_retry_wrapper: |
| # The -- argument for the wrapped gsutil.py is escaped as ---- as python |
| # 2.7.3 removes all occurrences of --, not only the first. It is unescaped |
| # in gsutil_wrapper.py and then passed as -- to gsutil.py. |
| # Note, that 2.7.6 doesn't have this problem, but it doesn't hurt. |
| cmd_prefix.append('----') |
| else: |
| cmd_prefix.append('--') |
| |
| exec_cmd = ['python3', '-u', gsutil_path] + cmd_prefix + cmd |
| if dry_run: |
| return self.m.step.empty(full_name, |
| step_text='Pretending to run gsutil command', |
| log_text=' '.join((str(i) for i in exec_cmd)), |
| log_name='command') |
| return self.m.step(full_name, exec_cmd, infra_step=infra_step, **kwargs) |
| |
| def upload(self, source, bucket, dest, args=None, link_name='gsutil.upload', |
| metadata=None, unauthenticated_url=False, **kwargs): |
| args = [] if args is None else args[:] |
| # Note that metadata arguments have to be passed before the command cp. |
| metadata_args = self._generate_metadata_args(metadata) |
| full_dest = 'gs://%s/%s' % (bucket, dest) |
| cmd = metadata_args + ['cp'] + args + [source, full_dest] |
| name = kwargs.pop('name', 'upload') |
| |
| result = self(cmd, name, **kwargs) |
| |
| if link_name: |
| is_dir = '-r' in args or '--recursive' in args |
| result.presentation.links[link_name] = self._http_url( |
| bucket, dest, is_directory=is_dir, is_anonymous=unauthenticated_url) |
| return result |
| |
| def download(self, bucket, source, dest, args=None, **kwargs): |
| args = [] if args is None else args[:] |
| full_source = 'gs://%s/%s' % (bucket, source) |
| cmd = ['cp'] + args + [full_source, dest] |
| name = kwargs.pop('name', 'download') |
| return self(cmd, name, **kwargs) |
| |
| def download_url(self, url, dest, args=None, **kwargs): |
| args = args or [] |
| url = self._normalize_url(url) |
| cmd = ['cp'] + args + [url, dest] |
| name = kwargs.pop('name', 'download_url') |
| return self(cmd, name, **kwargs) |
| |
| def cat(self, url, args=None, **kwargs): |
| args = args or [] |
| url = self._normalize_url(url) |
| cmd = ['cat'] + args + [url] |
| name = kwargs.pop('name', 'cat') |
| return self(cmd, name, **kwargs) |
| |
| def stat(self, url, args=None, **kwargs): |
| args = args or [] |
| url = self._normalize_url(url) |
| cmd = ['stat'] + args + [url] |
| name = kwargs.pop('name', 'stat') |
| return self(cmd, name, **kwargs) |
| |
| def copy(self, source_bucket, source, dest_bucket, dest, args=None, |
| link_name='gsutil.copy', metadata=None, unauthenticated_url=False, |
| **kwargs): |
| args = args or [] |
| args += self._generate_metadata_args(metadata) |
| full_source = 'gs://%s/%s' % (source_bucket, source) |
| full_dest = 'gs://%s/%s' % (dest_bucket, dest) |
| cmd = ['cp'] + args + [full_source, full_dest] |
| name = kwargs.pop('name', 'copy') |
| |
| result = self(cmd, name, **kwargs) |
| |
| if link_name: |
| is_dir = '-r' in args or '--recursive' in args |
| result.presentation.links[link_name] = self._http_url( |
| dest_bucket, dest, is_directory=is_dir, |
| is_anonymous=unauthenticated_url) |
| return result |
| |
| def list(self, url, args=None, **kwargs): |
| args = args or [] |
| url = self._normalize_url(url) |
| cmd = ['ls'] + args + [url] |
| name = kwargs.pop('name', 'list') |
| return self(cmd, name, **kwargs) |
| |
| def signurl(self, private_key_file, bucket, dest, args=None, **kwargs): |
| args = args or [] |
| full_source = 'gs://%s/%s' % (bucket, dest) |
| cmd = ['signurl'] + args + [private_key_file, full_source] |
| name = kwargs.pop('name', 'signurl') |
| return self(cmd, name, **kwargs) |
| |
| def remove_url(self, url, args=None, **kwargs): |
| args = args or [] |
| url = self._normalize_url(url) |
| cmd = ['rm'] + args + [url] |
| name = kwargs.pop('name', 'remove') |
| return self(cmd, name, **kwargs) |
| |
| @contextlib.contextmanager |
| def configure_gsutil(self, **kwargs): |
| """Temporarily configures the behavior of gsutil. |
| |
| For the duration of its context, this method will temporarily append a |
| custom Boto file to the BOTO_PATH env var without overwriting bbagent's |
| BOTO_CONFIG. See https://cloud.google.com/storage/docs/boto-gsutil for |
| possible configurations. |
| |
| Args: |
| kwargs: Every keyword arg is treated as config line in the temp Boto file. |
| """ |
| if self.m.platform.is_mac: |
| # Due to https://bugs.python.org/issue33725, using gsutil to download |
| # sufficiently large files on MacOS has been seen to hang indefinitely, |
| # and disabling multi-processing avoids that hang. |
| kwargs.setdefault('parallel_process_count', '1') |
| if not kwargs: |
| yield |
| return |
| |
| # If neither BOTO_CONFIG nor BOTO_PATH are set, gsutil looks at default |
| # locations (/etc/boto.cfg and ~/.boto). So give up in that case just to |
| # avoid the hassle of incorporating all the defaults. ~All LUCI builds |
| # should at least be setting BOTO_CONFIG. |
| if not self._boto_config_path and not self._boto_path: |
| yield |
| return |
| custom_boto_path = self.m.path.mkstemp(prefix='custom_boto_') |
| contents = [ |
| '# Generated by $depot_tools.recipe_modules.gsutil', |
| # https://cloud.google.com/storage/docs/boto-gsutil seems to indicate |
| # that the section headers are important. So certain config lines may |
| # not work unless they show up under the appropriate header. |
| '[GSUtil]', |
| ] |
| for k, v in kwargs.items(): |
| contents.append('%s = %s' % (k, str(v))) |
| self.m.file.write_text( |
| 'write temp Boto file', custom_boto_path, '\n'.join(contents)) |
| # BOTO_CONFIG can only point to one file; BOTO_PATH can point to multiple, |
| # each joined by ':'. If BOTO_CONFIG is set, BOTO_PATH is ignored. |
| if self._boto_config_path: |
| custom_boto_path = ( |
| self._boto_config_path + ':' + self.m.path.abspath(custom_boto_path)) |
| elif self._boto_path: |
| custom_boto_path = ( |
| self._boto_path + ':' + self.m.path.abspath(custom_boto_path)) |
| with self.m.context( |
| env={'BOTO_PATH': custom_boto_path, 'BOTO_CONFIG': None}): |
| yield |
| |
| def _generate_metadata_args(self, metadata): |
| result = [] |
| if metadata: |
| for k, v in sorted(metadata.items(), key=lambda k: k[0]): |
| field = self._get_metadata_field(k) |
| param = (field) if v is None else ('%s:%s' % (field, v)) |
| result += ['-h', param] |
| return result |
| |
| def _normalize_url(self, url): |
| gs_prefix = 'gs://' |
| # Defines the regex that matches a normalized URL. |
| for prefix in ( |
| gs_prefix, |
| 'https://storage.cloud.google.com/', |
| 'https://storage.googleapis.com/', |
| ): |
| if url.startswith(prefix): |
| return gs_prefix + url[len(prefix):] |
| raise AssertionError("%s cannot be normalized" % url) |
| |
| @classmethod |
| def _http_url(cls, bucket, dest, is_directory=False, is_anonymous=False): |
| if is_directory: |
| # Use GCP console. |
| url_template = 'https://console.cloud.google.com/storage/browser/%s/%s' |
| elif is_anonymous: |
| # Use unauthenticated object viewer. |
| url_template = 'https://storage.googleapis.com/%s/%s' |
| else: |
| # Use authenticated object viewer. |
| url_template = 'https://storage.cloud.google.com/%s/%s' |
| return url_template % (bucket, dest) |
| |
| @staticmethod |
| def _get_metadata_field(name, provider_prefix=None): |
| """Returns: (str) the metadata field to use with Google Storage |
| |
| The Google Storage specification for metadata can be found at: |
| https://developers.google.com/storage/docs/gsutil/addlhelp/WorkingWithObjectMetadata |
| """ |
| # Already contains custom provider prefix |
| if name.lower().startswith('x-'): |
| return name |
| |
| # See if it's innately supported by Google Storage |
| if name in ( |
| 'Cache-Control', |
| 'Content-Disposition', |
| 'Content-Encoding', |
| 'Content-Language', |
| 'Content-MD5', |
| 'Content-Type', |
| ): |
| return name |
| |
| # Add provider prefix |
| if not provider_prefix: |
| provider_prefix = 'x-goog-meta' |
| return '%s-%s' % (provider_prefix, name) |