| # Copyright 2012 Google Inc. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, |
| # either express or implied. See the License for the specific |
| # language governing permissions and limitations under the License. |
| |
| """Helpers shared by cloudstorage_stub and cloudstorage_api.""" |
| |
| |
| |
| |
| |
| __all__ = ['CS_XML_NS', |
| 'CSFileStat', |
| 'dt_str_to_posix', |
| 'local_api_url', |
| 'LOCAL_GCS_ENDPOINT', |
| 'local_run', |
| 'get_access_token', |
| 'get_metadata', |
| 'GCSFileStat', |
| 'http_time_to_posix', |
| 'memory_usage', |
| 'posix_time_to_http', |
| 'posix_to_dt_str', |
| 'set_access_token', |
| 'validate_options', |
| 'validate_bucket_name', |
| 'validate_bucket_path', |
| 'validate_file_path', |
| ] |
| |
| |
| import calendar |
| import datetime |
| from email import utils as email_utils |
| import logging |
| import os |
| import re |
| |
| try: |
| from google.appengine.api import runtime |
| except ImportError: |
| from google.appengine.api import runtime |
| |
| |
| _GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}' |
| _GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$') |
| _GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$') |
| _GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*') |
| _GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*') |
| _GCS_METADATA = ['x-goog-meta-', |
| 'content-disposition', |
| 'cache-control', |
| 'content-encoding'] |
| _GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl'] |
| CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01' |
| LOCAL_GCS_ENDPOINT = '/_ah/gcs' |
| _access_token = '' |
| |
| |
| _MAX_GET_BUCKET_RESULT = 1000 |
| |
| |
| def set_access_token(access_token): |
| """Set the shared access token to authenticate with Google Cloud Storage. |
| |
| When set, the library will always attempt to communicate with the |
| real Google Cloud Storage with this token even when running on dev appserver. |
| Note the token could expire so it's up to you to renew it. |
| |
| When absent, the library will automatically request and refresh a token |
| on appserver, or when on dev appserver, talk to a Google Cloud Storage |
| stub. |
| |
| Args: |
| access_token: you can get one by run 'gsutil -d ls' and copy the |
| str after 'Bearer'. |
| """ |
| global _access_token |
| _access_token = access_token |
| |
| |
| def get_access_token(): |
| """Returns the shared access token.""" |
| return _access_token |
| |
| |
| class GCSFileStat(object): |
| """Container for GCS file stat.""" |
| |
| def __init__(self, |
| filename, |
| st_size, |
| etag, |
| st_ctime, |
| content_type=None, |
| metadata=None, |
| is_dir=False): |
| """Initialize. |
| |
| For files, the non optional arguments are always set. |
| For directories, only filename and is_dir is set. |
| |
| Args: |
| filename: a Google Cloud Storage filename of form '/bucket/filename'. |
| st_size: file size in bytes. long compatible. |
| etag: hex digest of the md5 hash of the file's content. str. |
| st_ctime: posix file creation time. float compatible. |
| content_type: content type. str. |
| metadata: a str->str dict of user specified options when creating |
| the file. Possible keys are x-goog-meta-, content-disposition, |
| content-encoding, and cache-control. |
| is_dir: True if this represents a directory. False if this is a real file. |
| """ |
| self.filename = filename |
| self.is_dir = is_dir |
| self.st_size = None |
| self.st_ctime = None |
| self.etag = None |
| self.content_type = content_type |
| self.metadata = metadata |
| |
| if not is_dir: |
| self.st_size = long(st_size) |
| self.st_ctime = float(st_ctime) |
| if etag[0] == '"' and etag[-1] == '"': |
| etag = etag[1:-1] |
| self.etag = etag |
| |
| def __repr__(self): |
| if self.is_dir: |
| return '(directory: %s)' % self.filename |
| |
| return ( |
| '(filename: %(filename)s, st_size: %(st_size)s, ' |
| 'st_ctime: %(st_ctime)s, etag: %(etag)s, ' |
| 'content_type: %(content_type)s, ' |
| 'metadata: %(metadata)s)' % |
| dict(filename=self.filename, |
| st_size=self.st_size, |
| st_ctime=self.st_ctime, |
| etag=self.etag, |
| content_type=self.content_type, |
| metadata=self.metadata)) |
| |
| def __cmp__(self, other): |
| if not isinstance(other, self.__class__): |
| raise ValueError('Argument to cmp must have the same type. ' |
| 'Expect %s, got %s', self.__class__.__name__, |
| other.__class__.__name__) |
| if self.filename > other.filename: |
| return 1 |
| elif self.filename < other.filename: |
| return -1 |
| return 0 |
| |
| def __hash__(self): |
| if self.etag: |
| return hash(self.etag) |
| return hash(self.filename) |
| |
| |
| CSFileStat = GCSFileStat |
| |
| |
| def get_metadata(headers): |
| """Get user defined options from HTTP response headers.""" |
| return dict((k, v) for k, v in headers.iteritems() |
| if any(k.lower().startswith(valid) for valid in _GCS_METADATA)) |
| |
| |
| def validate_bucket_name(name): |
| """Validate a Google Storage bucket name. |
| |
| Args: |
| name: a Google Storage bucket name with no prefix or suffix. |
| |
| Raises: |
| ValueError: if name is invalid. |
| """ |
| _validate_path(name) |
| if not _GCS_BUCKET_REGEX.match(name): |
| raise ValueError('Bucket should be 3-63 characters long using only a-z,' |
| '0-9, underscore, dash or dot but got %s' % name) |
| |
| |
| def validate_bucket_path(path): |
| """Validate a Google Cloud Storage bucket path. |
| |
| Args: |
| path: a Google Storage bucket path. It should have form '/bucket'. |
| |
| Raises: |
| ValueError: if path is invalid. |
| """ |
| _validate_path(path) |
| if not _GCS_BUCKET_PATH_REGEX.match(path): |
| raise ValueError('Bucket should have format /bucket ' |
| 'but got %s' % path) |
| |
| |
| def validate_file_path(path): |
| """Validate a Google Cloud Storage file path. |
| |
| Args: |
| path: a Google Storage file path. It should have form '/bucket/filename'. |
| |
| Raises: |
| ValueError: if path is invalid. |
| """ |
| _validate_path(path) |
| if not _GCS_FULLPATH_REGEX.match(path): |
| raise ValueError('Path should have format /bucket/filename ' |
| 'but got %s' % path) |
| |
| |
| def _process_path_prefix(path_prefix): |
| """Validate and process a Google Cloud Stoarge path prefix. |
| |
| Args: |
| path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix' |
| or '/bucket/' or '/bucket'. |
| |
| Raises: |
| ValueError: if path is invalid. |
| |
| Returns: |
| a tuple of /bucket and prefix. prefix can be None. |
| """ |
| _validate_path(path_prefix) |
| if not _GCS_PATH_PREFIX_REGEX.match(path_prefix): |
| raise ValueError('Path prefix should have format /bucket, /bucket/, ' |
| 'or /bucket/prefix but got %s.' % path_prefix) |
| bucket_name_end = path_prefix.find('/', 1) |
| bucket = path_prefix |
| prefix = None |
| if bucket_name_end != -1: |
| bucket = path_prefix[:bucket_name_end] |
| prefix = path_prefix[bucket_name_end + 1:] or None |
| return bucket, prefix |
| |
| |
| def _validate_path(path): |
| """Basic validation of Google Storage paths. |
| |
| Args: |
| path: a Google Storage path. It should have form '/bucket/filename' |
| or '/bucket'. |
| |
| Raises: |
| ValueError: if path is invalid. |
| TypeError: if path is not of type basestring. |
| """ |
| if not path: |
| raise ValueError('Path is empty') |
| if not isinstance(path, basestring): |
| raise TypeError('Path should be a string but is %s (%s).' % |
| (path.__class__, path)) |
| |
| |
| def validate_options(options): |
| """Validate Google Cloud Storage options. |
| |
| Args: |
| options: a str->basestring dict of options to pass to Google Cloud Storage. |
| |
| Raises: |
| ValueError: if option is not supported. |
| TypeError: if option is not of type str or value of an option |
| is not of type basestring. |
| """ |
| if not options: |
| return |
| |
| for k, v in options.iteritems(): |
| if not isinstance(k, str): |
| raise TypeError('option %r should be a str.' % k) |
| if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS): |
| raise ValueError('option %s is not supported.' % k) |
| if not isinstance(v, basestring): |
| raise TypeError('value %r for option %s should be of type basestring.' % |
| (v, k)) |
| |
| |
| def http_time_to_posix(http_time): |
| """Convert HTTP time format to posix time. |
| |
| See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1 |
| for http time format. |
| |
| Args: |
| http_time: time in RFC 2616 format. e.g. |
| "Mon, 20 Nov 1995 19:12:08 GMT". |
| |
| Returns: |
| A float of secs from unix epoch. |
| """ |
| if http_time is not None: |
| return email_utils.mktime_tz(email_utils.parsedate_tz(http_time)) |
| |
| |
| def posix_time_to_http(posix_time): |
| """Convert posix time to HTML header time format. |
| |
| Args: |
| posix_time: unix time. |
| |
| Returns: |
| A datatime str in RFC 2616 format. |
| """ |
| if posix_time: |
| return email_utils.formatdate(posix_time, usegmt=True) |
| |
| |
| _DT_FORMAT = '%Y-%m-%dT%H:%M:%S' |
| |
| |
| def dt_str_to_posix(dt_str): |
| """format str to posix. |
| |
| datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ, |
| e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator |
| between date and time when they are on the same line. |
| Z indicates UTC (zero meridian). |
| |
| A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html |
| |
| This is used to parse LastModified node from GCS's GET bucket XML response. |
| |
| Args: |
| dt_str: A datetime str. |
| |
| Returns: |
| A float of secs from unix epoch. By posix definition, epoch is midnight |
| 1970/1/1 UTC. |
| """ |
| parsable, _ = dt_str.split('.') |
| dt = datetime.datetime.strptime(parsable, _DT_FORMAT) |
| return calendar.timegm(dt.utctimetuple()) |
| |
| |
| def posix_to_dt_str(posix): |
| """Reverse of str_to_datetime. |
| |
| This is used by GCS stub to generate GET bucket XML response. |
| |
| Args: |
| posix: A float of secs from unix epoch. |
| |
| Returns: |
| A datetime str. |
| """ |
| dt = datetime.datetime.utcfromtimestamp(posix) |
| dt_str = dt.strftime(_DT_FORMAT) |
| return dt_str + '.000Z' |
| |
| |
| def local_run(): |
| """Whether we should hit GCS dev appserver stub.""" |
| server_software = os.environ.get('SERVER_SOFTWARE') |
| if server_software is None: |
| return True |
| if 'remote_api' in server_software: |
| return False |
| if server_software.startswith(('Development', 'testutil')): |
| return True |
| return False |
| |
| |
| def local_api_url(): |
| """Return URL for GCS emulation on dev appserver.""" |
| return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT) |
| |
| |
| def memory_usage(method): |
| """Log memory usage before and after a method.""" |
| def wrapper(*args, **kwargs): |
| logging.info('Memory before method %s is %s.', |
| method.__name__, runtime.memory_usage().current()) |
| result = method(*args, **kwargs) |
| logging.info('Memory after method %s is %s', |
| method.__name__, runtime.memory_usage().current()) |
| return result |
| return wrapper |
| |
| |
| def _add_ns(tagname): |
| return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS, |
| 'tag': tagname} |
| |
| |
| _T_CONTENTS = _add_ns('Contents') |
| _T_LAST_MODIFIED = _add_ns('LastModified') |
| _T_ETAG = _add_ns('ETag') |
| _T_KEY = _add_ns('Key') |
| _T_SIZE = _add_ns('Size') |
| _T_PREFIX = _add_ns('Prefix') |
| _T_COMMON_PREFIXES = _add_ns('CommonPrefixes') |
| _T_NEXT_MARKER = _add_ns('NextMarker') |
| _T_IS_TRUNCATED = _add_ns('IsTruncated') |