blob: 0a2f9447c0ef4f157595e41730ec41bd54fb71d7 [file] [log] [blame]
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/document_loader_impl.h"
#include <stddef.h>
#include <stdint.h>
#include <algorithm>
#include <utility>
#include "base/logging.h"
#include "base/numerics/safe_math.h"
#include "base/strings/string_util.h"
#include "pdf/url_loader_wrapper.h"
#include "ppapi/c/pp_errors.h"
#include "ui/gfx/range/range.h"
namespace chrome_pdf {
namespace {
// The distance from last received chunk, when we wait requesting data, using
// current connection (like playing a cassette tape) and do not send new range
// request (like rewind a cassette tape, and continue playing after).
// Experimentally chosen value.
constexpr int kChunkCloseDistance = 10;
// Return true if the HTTP response of |loader| is a successful one and loading
// should continue. 4xx error indicate subsequent requests will fail too.
// e.g. resource has been removed from the server while loading it. 301
// indicates a redirect was returned which won't be successful because we
// disable following redirects for PDF loading (we assume they are already
// resolved by the browser.
bool ResponseStatusSuccess(const URLLoaderWrapper* loader) {
int32_t http_code = loader->GetStatusCode();
return (http_code < 400 && http_code != 301) || http_code >= 500;
}
bool IsValidContentType(const std::string& type) {
return (
base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) ||
base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) ||
base::EndsWith(type, "/x-pdf", base::CompareCase::INSENSITIVE_ASCII) ||
base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) ||
base::EndsWith(type, "/octet-stream",
base::CompareCase::INSENSITIVE_ASCII) ||
base::EndsWith(type, "/acrobat", base::CompareCase::INSENSITIVE_ASCII) ||
base::EndsWith(type, "/unknown", base::CompareCase::INSENSITIVE_ASCII));
}
} // namespace
DocumentLoaderImpl::Chunk::Chunk() = default;
DocumentLoaderImpl::Chunk::~Chunk() = default;
void DocumentLoaderImpl::Chunk::Clear() {
chunk_index = 0;
data_size = 0;
chunk_data.reset();
}
DocumentLoaderImpl::DocumentLoaderImpl(Client* client)
: client_(client), loader_factory_(this) {}
DocumentLoaderImpl::~DocumentLoaderImpl() = default;
bool DocumentLoaderImpl::Init(std::unique_ptr<URLLoaderWrapper> loader,
const std::string& url) {
DCHECK(url_.empty());
DCHECK(!loader_);
// Check that the initial response status is a valid one.
if (!ResponseStatusSuccess(loader.get()))
return false;
std::string type = loader->GetContentType();
// This happens for PDFs not loaded from http(s) sources.
if (type == "text/plain") {
if (!base::StartsWith(url, "http://",
base::CompareCase::INSENSITIVE_ASCII) &&
!base::StartsWith(url, "https://",
base::CompareCase::INSENSITIVE_ASCII)) {
type = "application/pdf";
}
}
if (!type.empty() && !IsValidContentType(type))
return false;
if (base::StartsWith(loader->GetContentDisposition(), "attachment",
base::CompareCase::INSENSITIVE_ASCII))
return false;
url_ = url;
loader_ = std::move(loader);
if (!loader_->IsContentEncoded())
SetDocumentSize(std::max(0, loader_->GetContentLength()));
int64_t bytes_received = 0;
int64_t total_bytes_to_be_received = 0;
if (GetDocumentSize() == 0 &&
loader_->GetDownloadProgress(&bytes_received,
&total_bytes_to_be_received)) {
SetDocumentSize(std::max(0, static_cast<int>(total_bytes_to_be_received)));
}
SetPartialLoadingEnabled(
partial_loading_enabled_ &&
!base::StartsWith(url, "file://", base::CompareCase::INSENSITIVE_ASCII) &&
loader_->IsAcceptRangesBytes() && !loader_->IsContentEncoded() &&
GetDocumentSize());
ReadMore();
return true;
}
bool DocumentLoaderImpl::IsDocumentComplete() const {
return chunk_stream_.IsComplete();
}
void DocumentLoaderImpl::SetDocumentSize(uint32_t size) {
chunk_stream_.set_eof_pos(size);
}
uint32_t DocumentLoaderImpl::GetDocumentSize() const {
return chunk_stream_.eof_pos();
}
uint32_t DocumentLoaderImpl::BytesReceived() const {
return bytes_received_;
}
void DocumentLoaderImpl::ClearPendingRequests() {
pending_requests_.Clear();
}
bool DocumentLoaderImpl::GetBlock(uint32_t position,
uint32_t size,
void* buf) const {
base::CheckedNumeric<uint32_t> addition_result = position;
addition_result += size;
if (!addition_result.IsValid())
return false;
return chunk_stream_.ReadData(
gfx::Range(position, addition_result.ValueOrDie()), buf);
}
bool DocumentLoaderImpl::IsDataAvailable(uint32_t position,
uint32_t size) const {
base::CheckedNumeric<uint32_t> addition_result = position;
addition_result += size;
if (!addition_result.IsValid())
return false;
return chunk_stream_.IsRangeAvailable(
gfx::Range(position, addition_result.ValueOrDie()));
}
void DocumentLoaderImpl::RequestData(uint32_t position, uint32_t size) {
if (size == 0 || IsDataAvailable(position, size))
return;
const uint32_t document_size = GetDocumentSize();
if (document_size != 0) {
// Check for integer overflow.
base::CheckedNumeric<uint32_t> addition_result = position;
addition_result += size;
if (!addition_result.IsValid())
return;
if (addition_result.ValueOrDie() > document_size)
return;
}
// We have some artifact request from
// PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after
// document is complete.
// We need this fix in PDFIum. Adding this as a work around.
// Bug: http://code.google.com/p/chromium/issues/detail?id=79996
// Test url:
// http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf
if (!loader_)
return;
RangeSet requested_chunks(chunk_stream_.GetChunksRange(position, size));
requested_chunks.Subtract(chunk_stream_.filled_chunks());
if (requested_chunks.IsEmpty()) {
NOTREACHED();
return;
}
pending_requests_.Union(requested_chunks);
}
void DocumentLoaderImpl::SetPartialLoadingEnabled(bool enabled) {
partial_loading_enabled_ = enabled;
if (!enabled) {
is_partial_loader_active_ = false;
}
}
bool DocumentLoaderImpl::ShouldCancelLoading() const {
if (!loader_)
return true;
if (!partial_loading_enabled_)
return false;
if (pending_requests_.IsEmpty()) {
// Cancel loading if this is unepected data from server.
return !chunk_stream_.IsValidChunkIndex(chunk_.chunk_index) ||
chunk_stream_.IsChunkAvailable(chunk_.chunk_index);
}
const gfx::Range current_range(chunk_.chunk_index,
chunk_.chunk_index + kChunkCloseDistance);
return !pending_requests_.Intersects(current_range);
}
void DocumentLoaderImpl::ContinueDownload() {
if (!ShouldCancelLoading())
return ReadMore();
DCHECK(partial_loading_enabled_);
DCHECK(!IsDocumentComplete());
DCHECK_GT(GetDocumentSize(), 0U);
const uint32_t range_start =
pending_requests_.IsEmpty() ? 0 : pending_requests_.First().start();
RangeSet candidates_for_request(
gfx::Range(range_start, chunk_stream_.total_chunks_count()));
candidates_for_request.Subtract(chunk_stream_.filled_chunks());
DCHECK(!candidates_for_request.IsEmpty());
gfx::Range next_request = candidates_for_request.First();
if (candidates_for_request.Size() == 1 &&
next_request.length() < kChunkCloseDistance) {
// We have only request at the end, try to enlarge it to improve back order
// reading.
const int additional_chunks_count =
kChunkCloseDistance - next_request.length();
int new_start = std::max(
0, static_cast<int>(next_request.start()) - additional_chunks_count);
candidates_for_request =
RangeSet(gfx::Range(new_start, next_request.end()));
candidates_for_request.Subtract(chunk_stream_.filled_chunks());
next_request = candidates_for_request.Last();
}
loader_.reset();
chunk_.Clear();
is_partial_loader_active_ = true;
const uint32_t start = next_request.start() * DataStream::kChunkSize;
const uint32_t length =
std::min(GetDocumentSize() - start,
next_request.length() * DataStream::kChunkSize);
loader_ = client_->CreateURLLoader();
loader_->OpenRange(
url_, url_, start, length,
loader_factory_.NewCallback(&DocumentLoaderImpl::DidOpenPartial));
}
void DocumentLoaderImpl::DidOpenPartial(int32_t result) {
if (result != PP_OK) {
return ReadComplete();
}
if (!ResponseStatusSuccess(loader_.get()))
return ReadComplete();
// Leave position untouched for multiparted responce for now, when we read the
// data we'll get it.
if (loader_->IsMultipart()) {
// Needs more data to calc chunk index.
return ReadMore();
}
// Need to make sure that the server returned a byte-range, since it's
// possible for a server to just ignore our byte-range request and just
// return the entire document even if it supports byte-range requests.
// i.e. sniff response to
// http://www.act.org/compass/sample/pdf/geometry.pdf
int start_pos = 0;
if (loader_->GetByteRangeStart(&start_pos)) {
if (start_pos % DataStream::kChunkSize != 0)
return ReadComplete();
DCHECK(!chunk_.chunk_data);
chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos);
} else {
SetPartialLoadingEnabled(false);
}
return ContinueDownload();
}
void DocumentLoaderImpl::ReadMore() {
loader_->ReadResponseBody(
buffer_, sizeof(buffer_),
loader_factory_.NewCallback(&DocumentLoaderImpl::DidRead));
}
void DocumentLoaderImpl::DidRead(int32_t result) {
if (result < 0) {
// An error occurred.
// The renderer will detect that we're missing data and will display a
// message.
return ReadComplete();
}
if (result == 0) {
loader_.reset();
if (!is_partial_loader_active_)
return ReadComplete();
return ContinueDownload();
}
if (loader_->IsMultipart()) {
int start_pos = 0;
if (!loader_->GetByteRangeStart(&start_pos))
return ReadComplete();
DCHECK(!chunk_.chunk_data);
chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos);
}
if (!SaveBuffer(buffer_, result))
return ReadMore();
if (IsDocumentComplete())
return ReadComplete();
return ContinueDownload();
}
bool DocumentLoaderImpl::SaveBuffer(char* input, uint32_t input_size) {
const uint32_t document_size = GetDocumentSize();
bytes_received_ += input_size;
bool chunk_saved = false;
bool loading_pending_request = pending_requests_.Contains(chunk_.chunk_index);
while (input_size > 0) {
if (chunk_.data_size == 0)
chunk_.chunk_data = std::make_unique<DataStream::ChunkData>();
const uint32_t new_chunk_data_len =
std::min(DataStream::kChunkSize - chunk_.data_size, input_size);
memcpy(chunk_.chunk_data->data() + chunk_.data_size, input,
new_chunk_data_len);
chunk_.data_size += new_chunk_data_len;
if (chunk_.data_size == DataStream::kChunkSize ||
(document_size > 0 && document_size <= EndOfCurrentChunk())) {
pending_requests_.Subtract(
gfx::Range(chunk_.chunk_index, chunk_.chunk_index + 1));
SaveChunkData();
chunk_saved = true;
}
input += new_chunk_data_len;
input_size -= new_chunk_data_len;
}
client_->OnNewDataReceived();
if (IsDocumentComplete())
return true;
if (!chunk_saved)
return false;
if (loading_pending_request &&
!pending_requests_.Contains(chunk_.chunk_index)) {
client_->OnPendingRequestComplete();
}
return true;
}
void DocumentLoaderImpl::SaveChunkData() {
chunk_stream_.SetChunkData(chunk_.chunk_index, std::move(chunk_.chunk_data));
chunk_.data_size = 0;
++chunk_.chunk_index;
}
uint32_t DocumentLoaderImpl::EndOfCurrentChunk() const {
return chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size;
}
void DocumentLoaderImpl::ReadComplete() {
if (GetDocumentSize() != 0) {
// If there is remaining data in |chunk_|, then save whatever can be saved.
// e.g. In the underrun case.
if (chunk_.data_size != 0)
SaveChunkData();
} else {
uint32_t eof = EndOfCurrentChunk();
if (!chunk_stream_.filled_chunks().IsEmpty()) {
eof = std::max(
chunk_stream_.filled_chunks().Last().end() * DataStream::kChunkSize,
eof);
}
SetDocumentSize(eof);
if (eof == EndOfCurrentChunk())
SaveChunkData();
}
loader_.reset();
if (IsDocumentComplete()) {
client_->OnDocumentComplete();
} else {
client_->OnDocumentCanceled();
}
}
} // namespace chrome_pdf