blob: 453598777cfd6d7e66246992c457884972d67e19 [file] [log] [blame]
<?php
/**
* Copyright 2007 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Google Cloud Storage Read Client - Implements only the methods required to
* read bytes from GCS using stream wrappers. For a fully fledged client
* to access Google Cloud Storage you should consult the Google API client.
*
*/
namespace google\appengine\ext\cloud_storage_streams;
use google\appengine\util\StringUtil;
/**
* Google Cloud Storage Client for reading objects.
*/
final class CloudStorageReadClient extends CloudStorageClient {
const METADATA_HEADER_PREFIX = 'x-goog-meta-';
// Buffer for storing data.
private $read_buffer;
// Position in the read buffer where we are currently
private $buffer_read_position = 0;
// Position in the object where the current block starts from
private $object_block_start_position = 0;
// Next position to read from when this buffer is finished.
private $next_read_position = 0;
// Overall size of the object in GCS
private $object_total_length;
// ETag of the object as it was first read.
private $object_etag;
// We have reached the end of the file while reading it.
private $eof = false;
// When we first read the file we partially complete the stat_result that
// we then return in calls to stat()
private $stat_result = [];
// Metadata for the object as it was first read.
private $metadata = [];
// Content-Type for the object as it was first read.
private $content_type;
// HTTP status codes that indicate that there is an object to read, and we
// need to process the response.
private static $valid_status_codes = [HttpResponse::OK,
HttpResponse::PARTIAL_CONTENT,
HttpResponse::RANGE_NOT_SATISFIABLE];
// Client for caching the results of GCS reads.
private $memcache_client;
public function __construct($bucket, $object, $context) {
parent::__construct($bucket, $object, $context);
$this->memcache_client = new \Memcache();
}
public function __destruct() {
parent::__destruct();
}
// Initialize is called when opening the stream. We will try and retrieve
// the first chunk of the file during this stage, to validate that
// - it exists
// - the app has the ACL to access it.
public function initialize() {
return $this->fillReadBuffer(0);
}
/**
* Read at most $count_bytes from the file.
* If we have reached the end of the buffered amount, and there is more
* data in the file then retreive more bytes from storage.
*/
public function read($count_bytes) {
// If we have data in the read_buffer then use it.
$readBuffer_size = strlen($this->read_buffer);
$bytes_available = $readBuffer_size - $this->buffer_read_position;
// If there are no more bytes available then get some.
if ($bytes_available === 0 && !$this->eof) {
// If we know the object size, check it first.
$object_bytes_read = $this->object_block_start_position +
$this->buffer_read_position;
if ($object_bytes_read === $this->object_total_length ||
!isset($this->next_read_position)) {
$this->eof = true;
return false;
}
if (!$this->fillReadBuffer($this->next_read_position)) {
return false;
}
// Re-calculate the number of bytes we can serve.
$readBuffer_size = strlen($this->read_buffer);
$bytes_available = $readBuffer_size - $this->buffer_read_position;
}
if ($bytes_available > 0) {
$bytes_to_read = min($bytes_available, $count_bytes);
$current_buffer_position = $this->buffer_read_position;
$this->buffer_read_position += $bytes_to_read;
return substr($this->read_buffer,
$current_buffer_position,
$bytes_to_read);
}
return false;
}
/**
* Returns true if we have read to the end of file, false otherwise.
*/
public function eof() {
return $this->eof;
}
/**
* Seek within the current file. We only deal with SEEK_SET which we expect
* the upper layers of PHP to convert and SEEK_CUR or SEEK_END calls to.
*/
public function seek($offset, $whence) {
if ($whence != SEEK_SET) {
trigger_error(sprintf("Unsupported seek mode: %d", $whence),
E_USER_WARNING);
return false;
}
// If we know the size, then make sure they are only seeking within it.
if (isset($this->object_total_length) &&
$offset > $this->object_total_length) {
return false;
}
if ($offset < 0) {
return false;
}
// Clear EOF and work it out next time they read.
$this->eof = false;
// Check if we can seek inside the current buffer
$buffer_end = $this->object_block_start_position +
strlen($this->read_buffer);
if ($this->object_block_start_position <= $offset && $offset < $buffer_end) {
$this->buffer_read_position = $offset -
$this->object_block_start_position;
} else {
$this->read_buffer = "";
$this->buffer_read_position = 0;
$this->next_read_position = $offset;
}
return true;
}
/**
* Return our stat buffer, if we have one.
*/
public function stat() {
if (!empty($this->stat_result)) {
return $this->stat_result;
} else {
return false;
}
}
/**
* Having tell() at this level in the stack seems bonkers.
*/
public function tell() {
return $this->buffer_read_position + $this->object_block_start_position;
}
public function getMetaData() {
return $this->metadata;
}
public function getContentType() {
return $this->content_type;
}
/**
* Override the makeHttpRequest function so we can implement caching.
* If caching is enabled then we try and retrieve a matching request for the
* object name and range from memcache.
* If we find a result in memcache, and optimistic caching is enabled then
* we return that result immediately without checking if the object has
* changed in GCS. Otherwise, we will issue a 'If-None-Match' request with
* the ETag of the object to ensure it is still current.
*
* Optimisitic caching is best suited when the application is soley updating
* objects in cloud storage, as the cache can be invalidated when the object
* is updated by the application.
*/
protected function makeHttpRequest($url, $method, $headers, $body = null) {
if (!$this->context_options['enable_cache']) {
return parent::makeHttpRequest($url, $method, $headers, $body);
}
$cache_key = sprintf(parent::MEMCACHE_KEY_FORMAT, $url, $headers['Range']);
$cache_obj = $this->memcache_client->get($cache_key);
if (false !== $cache_obj) {
if ($this->context_options['enable_optimistic_cache']) {
return $cache_obj;
} else {
$cache_etag = $this->getHeaderValue('ETag', $cache_obj['headers']);
if (array_key_exists('If-Match', $headers)) {
// We will perform a If-None-Match to validate the cache object, only
// if it has the same ETag value as what we are asking for.
if ($headers['If-Match'] === $cache_etag) {
unset($headers['If-Match']);
} else {
// We are asking for a different object that what is in the cache.
$cache_etag = null;
}
}
}
if (isset($cache_etag)) {
$headers['If-None-Match'] = $cache_etag;
}
}
$result = parent::makeHttpRequest($url, $method, $headers, $body);
if (false === $result) {
return false;
}
$status_code = $result['status_code'];
if (HttpResponse::NOT_MODIFIED === $result['status_code']) {
return $cache_obj;
}
if (in_array($status_code, self::$valid_status_codes)) {
$this->memcache_client->set($cache_key, $result, 0,
$this->context_options['read_cache_expiry_seconds']);
}
return $result;
}
/**
* Fill our internal buffer with data, by making a http request to Google
* Cloud Storage.
*/
private function fillReadBuffer($read_position) {
$headers = $this->getOAuthTokenHeader(parent::READ_SCOPE);
if ($headers === false) {
trigger_error("Unable to acquire OAuth token.", E_USER_WARNING);
return false;
}
$end_range = $read_position + parent::DEFAULT_READ_SIZE - 1;
$range = $this->getRangeHeader($read_position, $end_range);
$headers = array_merge($headers, $range);
// If we have an ETag from the first read then use it to ensure we are
// retrieving the same object.
if (isset($this->object_etag)) {
$headers["If-Match"] = $this->object_etag;
}
$http_response = $this->makeHttpRequest($this->url,
"GET",
$headers);
if ($http_response === false) {
trigger_error("Unable to connect to Google Cloud Storage Service.",
E_USER_WARNING);
return false;
}
$status_code = $http_response['status_code'];
if ($status_code === HttpResponse::NOT_FOUND) {
return false;
}
if ($status_code === HttpResponse::PRECONDITION_FAILED) {
trigger_error("Object content has changed.", E_USER_WARNING);
return false;
}
if (!in_array($status_code, self::$valid_status_codes)) {
trigger_error($this->getErrorMessage($status_code,
$http_response['body']),
E_USER_WARNING);
return false;
}
$this->read_buffer = $http_response['body'];
$this->buffer_read_position = 0;
$this->object_block_start_position = $read_position;
// If we got the complete object in the response then use the
// Content-Length
if ($status_code == HttpResponse::OK) {
$content_length = $this->getHeaderValue('Content-Length',
$http_response['headers']);
assert(isset($content_length));
$this->object_total_length = intval($content_length);
$this->next_read_position = null;
} else if ($status_code == HttpResponse::RANGE_NOT_SATISFIABLE) {
// We've read past the end of the object ... no more data.
$this->read_buffer = "";
$this->eof = true;
$this->next_read_position = null;
if (!isset($this->object_total_length)) {
$this->object_total_length = 0;
}
} else {
$content_range = $this->getHeaderValue('Content-Range',
$http_response['headers']);
assert(isset($content_range));
if (preg_match(parent::CONTENT_RANGE_REGEX, $content_range, $m) === 1) {
$this->next_read_position = intval($m[2]) + 1;
$this->object_total_length = intval($m[3]);
}
}
$this->metadata = self::extractMetaData($http_response['headers']);
$this->content_type = $this->getHeaderValue('Content-Type',
$http_response['headers']);
$this->object_etag =
$this->getHeaderValue('ETag', $http_response['headers']);
if (empty($this->stat_result)) {
$stat_args = ['size' => $this->object_total_length,
'mode' => parent::S_IFREG];
$last_modified = $this->getHeaderValue('Last-Modified',
$http_response['headers']);
if (isset($last_modified)) {
$unix_time = strtotime($last_modified);
if ($unix_time !== false) {
$stat_args["mtime"] = $unix_time;
}
}
$this->stat_result = $this->createStatArray($stat_args);
}
return true;
}
/**
* Extract metadata from HTTP response headers.
*/
private static function extractMetaData($headers) {
$metadata = [];
foreach($headers as $key => $value) {
if (StringUtil::startsWith(strtolower($key),
self::METADATA_HEADER_PREFIX)) {
$metadata_key = substr($key, strlen(self::METADATA_HEADER_PREFIX));
$metadata[$metadata_key] = $value;
}
}
return $metadata;
}
}