| # Copyright 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import json |
| import logging |
| import posixpath |
| import traceback |
| import urllib |
| |
| from docs_server_utils import StringIdentity |
| from environment_wrappers import CreateUrlFetcher |
| from file_system import FileSystem, FileNotFoundError, StatInfo |
| from future import Future |
| from path_util import ( |
| AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join) |
| |
| |
| # See gcs_file_system_provider.py for documentation on using Google Cloud |
| # Storage as a filesystem. |
| # |
| # Note that the path requirements for GCS are different for the docserver; |
| # GCS requires that paths start with a /, we require that they don't. |
| |
| |
| # Name of the file containing the Git hash of the latest commit sync'ed |
| # to Cloud Storage. This file is generated by the Github->GCS sync script |
| _LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt' |
| |
| |
| # Base URL for GCS requests. |
| _STORAGE_API_BASE = 'https://www.googleapis.com/storage/v1' |
| |
| |
| class CloudStorageFileSystem(FileSystem): |
| '''FileSystem implementation which fetches resources from Google Cloud |
| Storage. |
| ''' |
| def __init__(self, bucket, debug_bucket_prefix=None): |
| self._bucket = bucket |
| self._access_token = None |
| self._last_commit_hash = None |
| AssertIsValid(self._bucket) |
| |
| def Read(self, paths, skip_not_found=False): |
| def resolve(): |
| result = {} |
| for path in paths: |
| if IsDirectory(path): |
| result[path] = self._ListDir(path) |
| else: |
| result[path] = self._ReadFile(path) |
| return result |
| |
| return Future(callback=resolve) |
| |
| def Refresh(self): |
| return Future(value=()) |
| |
| def Stat(self, path): |
| AssertIsValid(path) |
| return self._CreateStatInfo(path) |
| |
| def GetIdentity(self): |
| return '@'.join((self.__class__.__name__, StringIdentity(self._bucket))) |
| |
| def _CreateStatInfo(self, path): |
| if not self._last_commit_hash: |
| self._last_commit_hash = self._ReadFile(_LAST_COMMIT_HASH_FILENAME) |
| if IsDirectory(path): |
| child_versions = dict((filename, self._last_commit_hash) |
| for filename in self._ListDir(path)) |
| else: |
| child_versions = None |
| return StatInfo(self._last_commit_hash, child_versions) |
| |
| def _ReadFile(self, path): |
| AssertIsFile(path) |
| return self._FetchObjectData(path) |
| |
| def _ListDir(self, path, recursive=False): |
| AssertIsDirectory(path) |
| # The listbucket method uses a prefix approach to simulate hierarchy. |
| # Calling it with the "delimiter" argument set to '/' gets only files |
| # directly inside the directory, not all recursive content. |
| |
| # Subdirectories are returned in the 'prefixes' property, but they are |
| # full paths from the root. This plucks off the name of the leaf with a |
| # trailing slash. |
| def path_from_prefix(prefix): |
| return posixpath.split(posixpath.split(prefix)[0])[1] + '/' |
| |
| query = { 'prefix': path } |
| if not recursive: |
| query['delimiter'] = '/' |
| root_object = json.loads(self._FetchObject('', query=query)) |
| files = [posixpath.basename(o['name']) |
| for o in root_object.get('items', [])] |
| dirs = [path_from_prefix(prefix) |
| for prefix in root_object.get('prefixes', [])] |
| return files + dirs |
| |
| def _FetchObject(self, path, query={}): |
| # Escape the path, including slashes. |
| url_path = urllib.quote(path.lstrip('/'), safe='') |
| fetcher = CreateUrlFetcher() |
| object_url = '%s/b/%s/o/%s' % (_STORAGE_API_BASE, self._bucket, url_path) |
| response = fetcher.Fetch(object_url, query=query) |
| if response.status_code != 200: |
| raise FileNotFoundError( |
| 'Path %s not found in GCS bucket %s' % (path, self._bucket)) |
| return response.content |
| |
| def _FetchObjectData(self, path, query={}): |
| q = query.copy() |
| q.update({ 'alt': 'media' }) |
| return self._FetchObject(path, query=q) |
| |
| def __repr__(self): |
| return 'CloudStorageFileSystem(%s)' % self._bucket |