blob: 79990cc17c636940f928f240701e1fe5a91ef706 [file] [log] [blame]
#!/usr/bin/python
# Copyright (c) 2012 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Provide Google Storage access.
Provide an high-level interface to Google Storage.
Operations are provided to read/write whole files and to
read/write strings. This allows google storage to be treated
more or less like a key+value data-store.
"""
import logging
import os
import posixpath
import random
import re
import shutil
import subprocess
import string
import sys
import tempfile
import pynacl.file_tools
import pynacl.http_download
import pynacl.platform
GS_PATTERN = 'gs://%s'
GS_HTTPS_PATTERN = 'https://storage.googleapis.com/%s'
def LegalizeName(name):
""" Return a file name suitable for uploading to Google Storage.
The names of such files cannot contain dashes or other non-identifier
characters.
"""
return re.sub(r'[^A-Za-z0-9_/.]', '_', name)
def HttpDownload(url, target):
"""Default download route."""
pynacl.http_download.HttpDownload(
url, os.path.abspath(target), verbose=False, logger=logging.debug)
class GSDStorageError(Exception):
"""Error indicating writing to storage failed."""
pass
class GSDStorage(object):
"""A wrapper for reading and writing to GSD buckets.
Multiple read buckets may be specified, and the wrapper will sequentially try
each and fall back to the next if the previous fails.
Writing is to a single bucket.
"""
def __init__(self,
write_bucket,
read_buckets,
gsutil=None,
call=subprocess.call,
download=HttpDownload):
"""Init for this class.
Args:
write_bucket: Google storage location to write to.
read_buckets: Google storage locations to read from in preferred order.
gsutil: List of cmd components needed to call gsutil.
call: Testing hook to intercept command invocation.
download: Testing hook to intercept download.
"""
if gsutil is None:
gsutil_script = pynacl.platform.CygPath(
os.environ.get('GSUTIL', 'gsutil'))
try:
# Require that gsutil be Python if it is specified in the environment.
gsutil = [
sys.executable,
pynacl.file_tools.Which(gsutil_script, require_executable=False)
]
except pynacl.file_tools.ExecutableNotFound:
gsutil = [gsutil_script]
assert isinstance(gsutil, list)
assert isinstance(read_buckets, list)
self._gsutil = gsutil
self._write_bucket = write_bucket
self._read_buckets = read_buckets
self._call = call
self._download = download
def Exists(self, key):
"""Queries whether or not a key exists.
Args:
key: Key file is stored under.
Returns:
URL of existing key, or False if file does not exist.
"""
for bucket in set(self._read_buckets + [self._write_bucket]):
obj = posixpath.join(bucket, key)
cmd = self._gsutil + ['ls', GS_PATTERN % obj]
logging.info('Running: %s', str(cmd))
if self._call(cmd) == 0:
return GS_HTTPS_PATTERN % obj
return False
def PutFile(self, path, key, clobber=True):
"""Write a file to global storage.
Args:
path: Path of the file to write.
key: Key to store file under.
Raises:
GSDStorageError if the underlying storage fails.
Returns:
URL written to.
"""
if self._write_bucket is None:
raise GSDStorageError('no bucket when storing %s to %s' % (path, key))
obj = self._write_bucket + '/' + key
cp_cmd = ['cp', '-a', 'public-read']
if not clobber:
cp_cmd.append('-n')
gs_path = GS_PATTERN % obj
gs_tmp_path = gs_path + '.tmp.' + ''.join(
random.choice(string.lowercase) for x in range(10))
# Using file://c:/foo/bar form of path as gsutil does not like drive
# letters without it.
file_path ='file://' + os.path.abspath(path).replace(os.sep, '/')
# Store to temporary location.
cmd = self._gsutil + cp_cmd + [file_path, gs_tmp_path]
logging.info('Running: %s' % str(cmd))
if self._call(cmd) != 0:
raise GSDStorageError('failed when storing %s to %s (%s)' % (
path, key, cmd))
# Copy to final location (so the window of time is short).
cmd = self._gsutil + cp_cmd + [gs_tmp_path, gs_path]
logging.info('Running: %s' % str(cmd))
if self._call(cmd) != 0:
raise GSDStorageError('failed when storing %s to %s (%s)' % (
path, key, cmd))
# Cleanup.
cmd = self._gsutil + ['rm', gs_tmp_path]
logging.info('Running: %s' % str(cmd))
if self._call(cmd) != 0:
raise GSDStorageError('failed when storing %s to %s (%s)' % (
path, key, cmd))
return GS_HTTPS_PATTERN % obj
def PutData(self, data, key, clobber=True):
"""Write data to global storage.
Args:
data: Data to store.
key: Key to store file under.
Raises:
GSDStorageError if the underlying storage fails.
Returns:
URL written to.
"""
handle, path = tempfile.mkstemp(prefix='gdstore', suffix='.tmp')
try:
os.close(handle)
pynacl.file_tools.WriteFile(data, path)
return self.PutFile(path, key, clobber=clobber)
finally:
os.remove(path)
def GetFile(self, key, path):
"""Read a file from global storage.
Args:
key: Key to store file under.
path: Destination filename.
Returns:
URL used on success or None for failure.
"""
for bucket in self._read_buckets:
try:
obj = bucket + '/' + key
uri = GS_HTTPS_PATTERN % obj
logging.debug('Downloading: %s to %s' % (uri, path))
self._download(uri, path)
return uri
except:
logging.debug('Failed downloading: %s to %s' % (uri, path))
return None
def GetSecureFile(self, key, path):
""" Read a non-publicly-accessible file from global storage.
Args:
key: Key file is stored under.
path: Destination filename
Returns:
command used on success or None on failure.
"""
for bucket in self._read_buckets:
try:
obj = bucket + '/' + key
cmd = self._gsutil + [
'cp', GS_PATTERN % obj,
'file://' + os.path.abspath(path).replace(os.sep, '/')]
logging.info('Running: %s' % str(cmd))
if self._call(cmd) == 0:
return cmd
except:
logging.debug('Failed to fetch %s from %s (%s)' % (key, path, cmd))
return None
def GetData(self, key):
"""Read data from global storage.
Args:
key: Key to store file under.
Returns:
Data from storage, or None for failure.
"""
work_dir = tempfile.mkdtemp(prefix='gdstore', suffix='.tmp')
try:
path = os.path.join(work_dir, 'data')
if self.GetFile(key, path) is not None:
return pynacl.file_tools.ReadFile(path)
return None
finally:
shutil.rmtree(work_dir)