blob: ff612a007af346d584c6048d69b44bdd562f9405 [file] [log] [blame]
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "net/filter/sdch_filter.h"
#include <ctype.h>
#include <limits.h>
#include <algorithm>
#include "base/logging.h"
#include "base/metrics/histogram.h"
#include "base/values.h"
#include "net/base/sdch_manager.h"
#include "net/base/sdch_net_log_params.h"
#include "net/url_request/url_request_context.h"
#include "sdch/open-vcdiff/src/google/vcdecoder.h"
namespace net {
namespace {
// Disambiguate various types of responses that trigger a meta-refresh,
// failure, or fallback to pass-through.
enum ResponseCorruptionDetectionCause {
RESPONSE_NONE,
// 404 Http Response Code
RESPONSE_404 = 1,
// Not a 200 Http Response Code
RESPONSE_NOT_200 = 2,
// Cached before dictionary retrieved.
RESPONSE_OLD_UNENCODED = 3,
// Speculative but incorrect SDCH filtering was added added.
RESPONSE_TENTATIVE_SDCH = 4,
// Missing correct dict for decoding.
RESPONSE_NO_DICTIONARY = 5,
// Not an SDCH response but should be.
RESPONSE_CORRUPT_SDCH = 6,
// No dictionary was advertised with the request, the server claims
// to have encoded with SDCH anyway, but it isn't an SDCH response.
RESPONSE_ENCODING_LIE = 7,
RESPONSE_MAX,
};
const char* ResponseCorruptionDetectionCauseToString(
ResponseCorruptionDetectionCause cause) {
const char* cause_string = "<unknown>";
switch (cause) {
case RESPONSE_NONE:
cause_string = "NONE";
case RESPONSE_404:
cause_string = "404";
case RESPONSE_NOT_200:
cause_string = "NOT_200";
case RESPONSE_OLD_UNENCODED:
cause_string = "OLD_UNENCODED";
case RESPONSE_TENTATIVE_SDCH:
cause_string = "TENTATIVE_SDCH";
case RESPONSE_NO_DICTIONARY:
cause_string = "NO_DICTIONARY";
case RESPONSE_CORRUPT_SDCH:
cause_string = "CORRUPT_SDCH";
case RESPONSE_ENCODING_LIE:
cause_string = "ENCODING_LIE";
case RESPONSE_MAX:
cause_string = "<Error: max enum value>";
}
return cause_string;
}
base::Value* NetLogSdchResponseCorruptionDetectionCallback(
ResponseCorruptionDetectionCause cause,
bool cached,
NetLog::LogLevel log_level) {
base::DictionaryValue* dict = new base::DictionaryValue();
dict->SetString("cause", ResponseCorruptionDetectionCauseToString(cause));
dict->SetBoolean("cached", cached);
dict->SetInteger("net_error", ERR_FAILED);
return dict;
}
} // namespace
SdchFilter::SdchFilter(const FilterContext& filter_context)
: filter_context_(filter_context),
decoding_status_(DECODING_UNINITIALIZED),
dictionary_hash_(),
dictionary_hash_is_plausible_(false),
dictionary_(NULL),
url_request_context_(filter_context.GetURLRequestContext()),
dest_buffer_excess_(),
dest_buffer_excess_index_(0),
source_bytes_(0),
output_bytes_(0),
possible_pass_through_(false) {
bool success = filter_context.GetMimeType(&mime_type_);
DCHECK(success);
success = filter_context.GetURL(&url_);
DCHECK(success);
DCHECK(url_request_context_->sdch_manager());
}
SdchFilter::~SdchFilter() {
// All code here is for gathering stats, and can be removed when SDCH is
// considered stable.
static int filter_use_count = 0;
++filter_use_count;
if (META_REFRESH_RECOVERY == decoding_status_) {
UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
}
if (vcdiff_streaming_decoder_.get()) {
if (!vcdiff_streaming_decoder_->FinishDecoding()) {
decoding_status_ = DECODING_ERROR;
LogSdchProblem(SDCH_INCOMPLETE_SDCH_CONTENT);
// Make it possible for the user to hit reload, and get non-sdch content.
// Note this will "wear off" quickly enough, and is just meant to assure
// in some rare case that the user is not stuck.
url_request_context_->sdch_manager()->BlacklistDomain(
url_, SDCH_INCOMPLETE_SDCH_CONTENT);
UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
static_cast<int>(filter_context_.GetByteReadCount()));
UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
}
}
if (!dest_buffer_excess_.empty()) {
// Filter chaining error, or premature teardown.
LogSdchProblem(SDCH_UNFLUSHED_CONTENT);
UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
static_cast<int>(filter_context_.GetByteReadCount()));
UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
dest_buffer_excess_.size());
UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
}
if (filter_context_.IsCachedContent()) {
// Not a real error, but it is useful to have this tally.
// TODO(jar): Remove this stat after SDCH stability is validated.
LogSdchProblem(SDCH_CACHE_DECODED);
return; // We don't need timing stats, and we aready got ratios.
}
switch (decoding_status_) {
case DECODING_IN_PROGRESS: {
if (output_bytes_)
UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
static_cast<int>(
(filter_context_.GetByteReadCount() * 100) / output_bytes_));
UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
output_bytes_);
filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE);
// Allow latency experiments to proceed.
url_request_context_->sdch_manager()->SetAllowLatencyExperiment(
url_, true);
return;
}
case PASS_THROUGH: {
filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
return;
}
case DECODING_UNINITIALIZED: {
LogSdchProblem(SDCH_UNINITIALIZED);
return;
}
case WAITING_FOR_DICTIONARY_SELECTION: {
LogSdchProblem(SDCH_PRIOR_TO_DICTIONARY);
return;
}
case DECODING_ERROR: {
LogSdchProblem(SDCH_DECODE_ERROR);
return;
}
case META_REFRESH_RECOVERY: {
// Already accounted for when set.
return;
}
} // end of switch.
}
bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
if (decoding_status_ != DECODING_UNINITIALIZED)
return false;
// Handle case where sdch filter is guessed, but not required.
if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
possible_pass_through_ = true;
// Initialize decoder only after we have a dictionary in hand.
decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
return true;
}
#ifndef NDEBUG
static const char* kDecompressionErrorHtml =
"<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
"<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
"border-color:black;border-style:solid;text-align:left;font-family:arial;"
"font-size:10pt;foreground-color:black;background-color:white\">"
"An error occurred. This page will be reloaded shortly. "
"Or press the \"reload\" button now to reload it immediately."
"</div>";
#else
static const char* kDecompressionErrorHtml =
"<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
#endif
Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
int* dest_len) {
int available_space = *dest_len;
*dest_len = 0; // Nothing output yet.
if (!dest_buffer || available_space <= 0)
return FILTER_ERROR;
if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
FilterStatus status = InitializeDictionary();
if (FILTER_NEED_MORE_DATA == status)
return FILTER_NEED_MORE_DATA;
if (FILTER_ERROR == status) {
DCHECK_EQ(DECODING_ERROR, decoding_status_);
DCHECK_EQ(0u, dest_buffer_excess_index_);
DCHECK(dest_buffer_excess_.empty());
// This is where we try very hard to do error recovery, and make this
// protocol robust in the face of proxies that do many different things.
// If we decide that things are looking very bad (too hard to recover),
// we may even issue a "meta-refresh" to reload the page without an SDCH
// advertisement (so that we are sure we're not hurting anything).
//
// Watch out for an error page inserted by the proxy as part of a 40x
// error response. When we see such content molestation, we certainly
// need to fall into the meta-refresh case.
ResponseCorruptionDetectionCause cause = RESPONSE_NONE;
if (filter_context_.GetResponseCode() == 404) {
// We could be more generous, but for now, only a "NOT FOUND" code will
// cause a pass through. All other bad codes will fall into a
// meta-refresh.
LogSdchProblem(SDCH_PASS_THROUGH_404_CODE);
cause = RESPONSE_404;
decoding_status_ = PASS_THROUGH;
} else if (filter_context_.GetResponseCode() != 200) {
// We need to meta-refresh, with SDCH disabled.
cause = RESPONSE_NOT_200;
} else if (filter_context_.IsCachedContent()
&& !dictionary_hash_is_plausible_) {
// We must have hit the back button, and gotten content that was fetched
// before we *really* advertised SDCH and a dictionary.
LogSdchProblem(SDCH_PASS_THROUGH_OLD_CACHED);
decoding_status_ = PASS_THROUGH;
cause = RESPONSE_OLD_UNENCODED;
} else if (possible_pass_through_) {
// This is the potentially most graceful response. There really was no
// error. We were just overly cautious when we added a TENTATIVE_SDCH.
// We added the sdch coding tag, and it should not have been added.
// This can happen in server experiments, where the server decides
// not to use sdch, even though there is a dictionary. To be
// conservative, we locally added the tentative sdch (fearing that a
// proxy stripped it!) and we must now recant (pass through).
//
// However.... just to be sure we don't get burned by proxies that
// re-compress with gzip or other system, we can sniff to see if this
// is compressed data etc. For now, we do nothing, which gets us into
// the meta-refresh result.
// TODO(jar): Improve robustness by sniffing for valid text that we can
// actual use re: decoding_status_ = PASS_THROUGH;
cause = RESPONSE_TENTATIVE_SDCH;
} else if (dictionary_hash_is_plausible_) {
// We need a meta-refresh since we don't have the dictionary.
// The common cause is a restart of the browser, where we try to render
// cached content that was saved when we had a dictionary.
cause = RESPONSE_NO_DICTIONARY;
} else if (filter_context_.SdchResponseExpected()) {
// This is a very corrupt SDCH request response. We can't decode it.
// We'll use a meta-refresh, and get content without asking for SDCH.
// This will also progressively disable SDCH for this domain.
cause = RESPONSE_CORRUPT_SDCH;
} else {
// One of the first 9 bytes precluded consideration as a hash.
// This can't be an SDCH payload, even though the server said it was.
// This is a major error, as the server or proxy tagged this SDCH even
// though it is not!
// Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
// Worse yet, meta-refresh could lead to an infinite refresh loop.
LogSdchProblem(SDCH_PASSING_THROUGH_NON_SDCH);
decoding_status_ = PASS_THROUGH;
// ... but further back-off on advertising SDCH support.
url_request_context_->sdch_manager()->BlacklistDomain(
url_, SDCH_PASSING_THROUGH_NON_SDCH);
cause = RESPONSE_ENCODING_LIE;
}
DCHECK_NE(RESPONSE_NONE, cause);
// Use if statement rather than ?: because UMA_HISTOGRAM_ENUMERATION
// caches the histogram name based on the call site.
if (filter_context_.IsCachedContent()) {
UMA_HISTOGRAM_ENUMERATION(
"Sdch3.ResponseCorruptionDetection.Cached", cause, RESPONSE_MAX);
} else {
UMA_HISTOGRAM_ENUMERATION(
"Sdch3.ResponseCorruptionDetection.Uncached", cause, RESPONSE_MAX);
}
filter_context_.GetNetLog().AddEvent(
NetLog::TYPE_SDCH_RESPONSE_CORRUPTION_DETECTION,
base::Bind(&NetLogSdchResponseCorruptionDetectionCallback,
cause,
filter_context_.IsCachedContent()));
if (decoding_status_ == PASS_THROUGH) {
dest_buffer_excess_ = dictionary_hash_; // Send what we scanned.
} else {
// This is where we try to do the expensive meta-refresh.
if (std::string::npos == mime_type_.find("text/html")) {
// Since we can't do a meta-refresh (along with an exponential
// backoff), we'll just make sure this NEVER happens again.
SdchProblemCode problem = (filter_context_.IsCachedContent()
? SDCH_CACHED_META_REFRESH_UNSUPPORTED
: SDCH_META_REFRESH_UNSUPPORTED);
url_request_context_->sdch_manager()->BlacklistDomainForever(
url_, problem);
LogSdchProblem(problem);
return FILTER_ERROR;
}
// HTML content means we can issue a meta-refresh, and get the content
// again, perhaps without SDCH (to be safe).
if (filter_context_.IsCachedContent()) {
// Cached content is probably a startup tab, so we'll just get fresh
// content and try again, without disabling sdch.
LogSdchProblem(SDCH_META_REFRESH_CACHED_RECOVERY);
} else {
// Since it wasn't in the cache, we definately need at least some
// period of blacklisting to get the correct content.
url_request_context_->sdch_manager()->BlacklistDomain(
url_, SDCH_META_REFRESH_RECOVERY);
LogSdchProblem(SDCH_META_REFRESH_RECOVERY);
}
decoding_status_ = META_REFRESH_RECOVERY;
// Issue a meta redirect with SDCH disabled.
dest_buffer_excess_ = kDecompressionErrorHtml;
}
} else {
DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_);
}
}
int amount = OutputBufferExcess(dest_buffer, available_space);
*dest_len += amount;
dest_buffer += amount;
available_space -= amount;
DCHECK_GE(available_space, 0);
if (available_space <= 0)
return FILTER_OK;
DCHECK(dest_buffer_excess_.empty());
DCHECK_EQ(0u, dest_buffer_excess_index_);
if (decoding_status_ != DECODING_IN_PROGRESS) {
if (META_REFRESH_RECOVERY == decoding_status_) {
// Absorb all input data. We've already output page reload HTML.
next_stream_data_ = NULL;
stream_data_len_ = 0;
return FILTER_NEED_MORE_DATA;
}
if (PASS_THROUGH == decoding_status_) {
// We must pass in available_space, but it will be changed to bytes_used.
FilterStatus result = CopyOut(dest_buffer, &available_space);
// Accumulate the returned count of bytes_used (a.k.a., available_space).
*dest_len += available_space;
return result;
}
DCHECK(false);
decoding_status_ = DECODING_ERROR;
return FILTER_ERROR;
}
if (!next_stream_data_ || stream_data_len_ <= 0)
return FILTER_NEED_MORE_DATA;
bool ret = vcdiff_streaming_decoder_->DecodeChunk(
next_stream_data_, stream_data_len_, &dest_buffer_excess_);
// Assume all data was used in decoding.
next_stream_data_ = NULL;
source_bytes_ += stream_data_len_;
stream_data_len_ = 0;
output_bytes_ += dest_buffer_excess_.size();
if (!ret) {
vcdiff_streaming_decoder_.reset(NULL); // Don't call it again.
decoding_status_ = DECODING_ERROR;
LogSdchProblem(SDCH_DECODE_BODY_ERROR);
return FILTER_ERROR;
}
amount = OutputBufferExcess(dest_buffer, available_space);
*dest_len += amount;
dest_buffer += amount;
available_space -= amount;
if (0 == available_space && !dest_buffer_excess_.empty())
return FILTER_OK;
return FILTER_NEED_MORE_DATA;
}
Filter::FilterStatus SdchFilter::InitializeDictionary() {
const size_t kServerIdLength = 9; // Dictionary hash plus null from server.
size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
DCHECK_GT(bytes_needed, 0u);
if (!next_stream_data_)
return FILTER_NEED_MORE_DATA;
if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
dictionary_hash_.append(next_stream_data_, stream_data_len_);
next_stream_data_ = NULL;
stream_data_len_ = 0;
return FILTER_NEED_MORE_DATA;
}
dictionary_hash_.append(next_stream_data_, bytes_needed);
DCHECK(kServerIdLength == dictionary_hash_.size());
stream_data_len_ -= bytes_needed;
DCHECK_LE(0, stream_data_len_);
if (stream_data_len_ > 0)
next_stream_data_ += bytes_needed;
else
next_stream_data_ = NULL;
DCHECK(!dictionary_.get());
dictionary_hash_is_plausible_ = true; // Assume plausible, but check.
SdchProblemCode rv = SDCH_OK;
if ('\0' == dictionary_hash_[kServerIdLength - 1]) {
SdchManager* manager(url_request_context_->sdch_manager());
rv = manager->GetVcdiffDictionary(
std::string(dictionary_hash_, 0, kServerIdLength - 1),
url_,
&dictionary_);
if (rv == SDCH_DICTIONARY_HASH_NOT_FOUND) {
DCHECK(dictionary_hash_.size() == kServerIdLength);
// Since dictionary was not found, check to see if hash was even
// plausible.
for (size_t i = 0; i < kServerIdLength - 1; ++i) {
char base64_char = dictionary_hash_[i];
if (!isalnum(base64_char) && '-' != base64_char &&
'_' != base64_char) {
rv = SDCH_DICTIONARY_HASH_MALFORMED;
dictionary_hash_is_plausible_ = false;
break;
}
}
}
} else {
dictionary_hash_is_plausible_ = false;
rv = SDCH_DICTIONARY_HASH_MALFORMED;
}
if (rv != SDCH_OK) {
LogSdchProblem(rv);
decoding_status_ = DECODING_ERROR;
return FILTER_ERROR;
}
DCHECK(dictionary_.get());
vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
dictionary_->text().size());
decoding_status_ = DECODING_IN_PROGRESS;
return FILTER_OK;
}
int SdchFilter::OutputBufferExcess(char* const dest_buffer,
size_t available_space) {
if (dest_buffer_excess_.empty())
return 0;
DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
size_t amount = std::min(available_space,
dest_buffer_excess_.size() - dest_buffer_excess_index_);
memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
amount);
dest_buffer_excess_index_ += amount;
if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
dest_buffer_excess_.clear();
dest_buffer_excess_index_ = 0;
}
return amount;
}
void SdchFilter::LogSdchProblem(SdchProblemCode problem) {
SdchManager::SdchErrorRecovery(problem);
filter_context_.GetNetLog().AddEvent(
NetLog::TYPE_SDCH_DECODING_ERROR,
base::Bind(&NetLogSdchResourceProblemCallback, problem));
}
} // namespace net