* Copyright 2007 Google Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
* CloudStorageClient is the base class for classes that are used to communicate
* with Google Cloud Storage via the PHP streams interface.
namespace google\appengine\ext\cloud_storage_streams;
use google\appengine\api\app_identity\AppIdentityService;
use google\appengine\api\app_identity\AppIdentityException;
use google\appengine\api\cloud_storage\CloudStorageTools;
use google\appengine\runtime\ApiProxy;
use google\appengine\runtime\ApplicationError;
use google\appengine\URLFetchRequest\RequestMethod;
use google\appengine\util\ArrayUtil;
* CloudStorageClient provides default fail implementations for all of the
* methods that the stream wrapper might potentially call. Derived classes then
* only implement the methods that are relevant to the operations that they
* perform.
abstract class CloudStorageClient {
// The default chunk size that we will read from the file. This value should
// remain smaller than the maximum object size valid for memcache writes so
// we can cache the reads.
const DEFAULT_READ_SIZE = 524288;
// The default amount of time that reads will be held in the cache.
const DEFAULT_READ_CACHE_EXPIRY_SECONDS = 3600; // one hour
// The default maximum number of times that certain (see retryable_statuses)
// failed Google Cloud Storage requests will be retried before returning
// failure.
// The default time the writable state of a bucket will be cached for.
// Token scopers for accessing objects in Google Cloud Storage
const READ_SCOPE = "";
const WRITE_SCOPE = "";
const FULL_SCOPE = "";
// Format for the OAuth token header.
const OAUTH_TOKEN_FORMAT = "OAuth %s";
// Content Range Header format when the total length is unknown.
const PARTIAL_CONTENT_RANGE_FORMAT = "bytes %d-%d/*";
// Content Range Header format when the length is known.
const FINAL_CONTENT_RANGE_FORMAT = "bytes %d-%d/%d";
// Content Range Header for final chunk with no new data
const FINAL_CONTENT_RANGE_NO_DATA = "bytes */%d";
// A character or multiple characters that can be used to simplify a list of
// objects that use a directory-like naming scheme. Can be used in conjunction
// with a prefix.
const DELIMITER = '/';
// Cloud storage can append _$folder$ to an object name and have it behave
// like a regular file system folder.
const FOLDER_SUFFIX = '_$folder$';
// Temporary file name we create when checking if a bucket is writable.
const WRITABLE_TEMP_FILENAME = "/_ah_is_writable_temp_file";
// Bit fields for the stat mode field
const S_IFREG = 0100000;
const S_IFDIR = 0040000;
const S_IRWXU = 00700; // mask for owner permissions
const S_IRUSR = 00400; // read for owner
const S_IWUSR = 00200; // write for owner
const S_IXUSR = 00100; // execute for owner
const S_IRWXG = 00070; // mask for group permissions
const S_IRGRP = 00040; // read for group
const S_IWGRP = 00020; // write for group
const S_IXGRP = 00010; // execute for group
const S_IRWXO = 00007; // mask for other other permissions
const S_IROTH = 00004; // read for other
const S_IWOTH = 00002; // write for other
const S_IXOTH = 00001; // execute for other
// The API version header
private static $api_version_header = ["x-goog-api-version" => 2];
// Regex patterm for retrieving the Length of the content being served.
const CONTENT_RANGE_REGEX = "/bytes\s+(\d+)-(\d+)\/(\d+)/i";
* Memcache key format for caching the results of reads from GCS. The
* parameters are the object url (as a string) and the read range, as a
* string (e.g. bytes=0-512000).
* Example key for a cloud storage file gs://bucket/object.png
* _ah_gs_read_cache_
const MEMCACHE_KEY_FORMAT = "_ah_gs_read_cache_%s_%s";
* Memcache key format for caching the results of checking if a bucket is
* writable. The only way to check if an app can write to a bucket is by
* actually writing a file. As the ACL on a bucket is unlikely to change
* then we can cache the result.
const WRITABLE_MEMCACHE_KEY_FORMAT = "_ah_gs_write_bucket_cache_%s";
// HTTP status codes that should be retried if they are returned by a request
// to GCS. Retry should occur with a random exponential back-off.
protected static $retry_error_codes = [HttpResponse::REQUEST_TIMEOUT,
// Values that are allowed to be supplied as ACLs when writing objects.
protected static $valid_acl_values = ["private",
protected static $upload_start_header = ["x-goog-resumable" => "start"];
// Map HTTP request types to URLFetch method enum.
private static $request_map = [
"GET" => RequestMethod::GET,
"POST" => RequestMethod::POST,
"HEAD" => RequestMethod::HEAD,
"PUT" => RequestMethod::PUT,
"DELETE" => RequestMethod::DELETE,
"PATCH" => RequestMethod::PATCH
private static $retryable_statuses = [
408, // Request Timeout
500, // Internal Server Error
502, // Bad Gateway
503, // Service Unavailable
504, // Gateway Timeout
private static $default_gs_context_options = [
"enable_cache" => true,
"enable_optimistic_cache" => false,
"read_cache_expiry_seconds" => self::DEFAULT_READ_CACHE_EXPIRY_SECONDS,
"writable_cache_expiry_seconds" =>
protected $bucket_name; // Name of the bucket for this object.
protected $object_name; // The name of the object.
protected $context_options = []; // Any context arguments supplied on open.
protected $url; // GCS URL of the object.
protected $anonymous; // Use anonymous access when contacting GCS.
* Construct an object of CloudStorageClient.
* @param string $bucket The name of the bucket.
* @param string $object The name of the object, or null if there is no
* object.
* @param resource $context The stream context to use.
public function __construct($bucket, $object = null, $context = null) {
$this->bucket_name = $bucket;
$this->object_name = $object;
if (!isset($context)) {
$context = stream_context_get_default();
$context_array = stream_context_get_options($context);
if (array_key_exists("gs", $context_array)) {
$this->context_options = array_merge(self::$default_gs_context_options,
} else {
$this->context_options = self::$default_gs_context_options;
$this->anonymous = ArrayUtil::findByKeyOrNull($this->context_options,
$this->url = $this->createObjectUrl($bucket, $object);
public function __destruct() {
public function initialize() {
return false;
public function dir_readdir() {
return false;
public function dir_rewinddir() {
return false;
// @return nothing
public function close() {
public function delete() {
return false;
public function eof() {
return true;
public function flush() {
return true;
public function read($count_bytes) {
return false;
public function seek($offset, $whence) {
return false;
public function stat() {
return false;
public function tell() {
return false;
public function write($data) {
return false;
* Subclass can override this method to return the metadata of the underlying
* GCS object.
public function getMetaData() {
trigger_error(sprintf("%s does not have metadata", get_class($this)));
return false;
* Subclass can override this method to return the MIME content type of the
* underlying GCS object.
public function getContentType() {
trigger_error(sprintf("%s does not have content type", get_class($this)));
return false;
* Get the OAuth Token HTTP header for the supplied scope.
* @param $scopes mixed The scopes to acquire the token for.
* @return array The HTTP authorization header for the scopes, using the
* applications service account. False if the call failed.
protected function getOAuthTokenHeader($scopes) {
if ($this->anonymous) {
return [];
try {
$token = AppIdentityService::getAccessToken($scopes);
return ["Authorization" => sprintf(self::OAUTH_TOKEN_FORMAT,
} catch (AppIdentityException $e) {
return false;
* Create a URL for a target bucket and optional object.
* @visibleForTesting
public static function createObjectUrl($bucket, $object = null) {
// Strip leading "/" for $object
if (isset($object) && $object[0] == "/") {
$object_name = substr($object, 1);
} else {
$object_name = "";
$gs_filename = CloudStorageTools::getFilename($bucket, $object_name);
return CloudStorageTools::getPublicUrl($gs_filename, true);
* Return a Range HTTP header.
* @param $start_byte int The offset of the first byte in the range.
* @param $end_byte int The offset of the last byte in the range.
* @return array The HTTP Range header for the supplied offsets.
protected function getRangeHeader($start_byte, $end_byte) {
assert($start_byte <= $end_byte);
return ["Range" => sprintf("bytes=%d-%d", $start_byte, $end_byte)];
* Make a request to GCS using HttpStreams.
* Returns:
* headers array
* response body
protected function makeHttpRequest($url, $method, $headers, $body = null) {
$request_headers = array_merge($headers, self::$api_version_header);
$result = $this->doHttpRequest($url,
if ($result === false) {
return false;
return [
'status_code' => $result['status_code'],
'headers' => $result['headers'],
'body' => $result['body'],
* Return the value of a header stored in an associative array, using a case
* insensitive comparison on the header name.
* @param $header_name string The name of the header to lookup.
* @param $headers array Associative array of headers.
* @return The value of the header if found, false otherwise.
protected function getHeaderValue($header_name, $headers) {
// Could be more than one header, in which case we keep an array.
foreach($headers as $key => $value) {
if (strcasecmp($key, $header_name) === 0) {
return $value;
return null;
private function doHttpRequest($url, $method, $headers, $body) {
$req = new \google\appengine\URLFetchRequest();
if (isset($body)) {
foreach($headers as $key => $value) {
$h = $req->addHeader();
$resp = new \google\appengine\URLFetchResponse();
for ($num_retries = 0; ; $num_retries++) {
try {
ApiProxy::makeSyncCall('urlfetch', 'Fetch', $req, $resp);
} catch (ApplicationError $e) {
sprintf("Call to URLFetch failed with application error %d.",
return false;
$status_code = $resp->getStatusCode();
if ($num_retries < $this->context_options['max_retries'] &&
in_array($status_code, self::$retryable_statuses) &&
(connection_status() & CONNECTION_TIMEOUT) == 0) {
usleep(rand(0, 1000000 * pow(2, $num_retries)));
if ((connection_status() & CONNECTION_TIMEOUT) == CONNECTION_TIMEOUT) {
} else {
$response_headers = [];
foreach($resp->getHeaderList() as $header) {
// TODO: Do we need to support multiple headers with the same key?
$response_headers[trim($header->getKey())] = trim($header->getValue());
return [
'status_code' => $resp->getStatusCode(),
'headers' => $response_headers,
'body' => $resp->getContent(),
* Generate the default stat() array, which is both associative and index
* based.
* @access private
protected function createStatArray($stat_args) {
$stat_keys = ["dev", "ino", "mode", "nlink", "uid", "gid", "rdev", "size",
"atime", "mtime", "ctime", "blksize", "blocks"];
$result = [];
foreach ($stat_keys as $key) {
$value = 0;
if (array_key_exists($key, $stat_args)) {
$value = $stat_args[$key];
// Add the associative entry.
$result[$key] = $value;
// Add the index entry.
$result[] = $value;
return $result;
* Given an xml based error response from Cloud Storage, try and extract the
* error code and error message according to the schema described at
* @param string $gcs_result The response body of the last call to Google
* Cloud Storage.
* @param string $code Reference variable where the error code for the last
* message will be returned.
* @param string $message Reference variable where the error detail for the
* last message will be returned.
* @return bool True if the error code and message could be extracted, false
* otherwise.
protected function tryParseCloudStorageErrorMessage($gcs_result,
&$message) {
$code = null;
$message = null;
$old_errors = libxml_use_internal_errors(true);
$xml = simplexml_load_string($gcs_result);
if (false != $xml) {
$code = (string) $xml->Code;
$message = (string) $xml->Message;
return (isset($code) && isset($message));
* Return a formatted error message for the http response.
* @param int $http_status_code The HTTP status code returned from the last
* http request.
* @param string $http_result The response body from the last http request.
* @param string $msg_prefix The prefix to add to the error message that will
* be generated.
* @return string The error message for the last HTTP response.
protected function getErrorMessage($http_status_code,
$msg_prefix = "Cloud Storage Error:") {
if ($this->tryParseCloudStorageErrorMessage($http_result,
$message)) {
return sprintf("%s %s (%s)", $msg_prefix, $message, $code);
} else {
return sprintf("%s %s",