blob: 5945acbbb47196ac3802a41d5d43c87d85b4e943 [file] [log] [blame]
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "services/network/public/cpp/corb/corb_impl.h"
#include <stddef.h>
#include <set>
#include <string>
#include <unordered_set>
#include <vector>
#include "base/check_op.h"
#include "base/command_line.h"
#include "base/containers/contains.h"
#include "base/containers/fixed_flat_set.h"
#include "base/feature_list.h"
#include "base/lazy_instance.h"
#include "base/metrics/histogram_macros.h"
#include "base/notreached.h"
#include "base/ranges/algorithm.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "net/base/mime_sniffer.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "net/http/http_response_headers.h"
#include "services/network/public/cpp/cross_origin_embedder_policy.h"
#include "services/network/public/cpp/features.h"
#include "services/network/public/cpp/initiator_lock_compatibility.h"
#include "services/network/public/mojom/network_context.mojom.h"
#include "services/network/public/mojom/network_service.mojom.h"
#include "services/network/public/mojom/url_response_head.mojom.h"
using base::StringPiece;
using Decision = network::corb::ResponseAnalyzer::Decision;
using MimeType = network::corb::CrossOriginReadBlocking::MimeType;
using SniffingResult = network::corb::CrossOriginReadBlocking::SniffingResult;
namespace network::corb {
namespace {
// MIME types
const char kTextHtml[] = "text/html";
const char kTextXml[] = "text/xml";
const char kAppXml[] = "application/xml";
const char kAppJson[] = "application/json";
const char kImageSvg[] = "image/svg+xml";
const char kDashVideo[] = "application/dash+xml"; // https://crbug.com/947498
const char kTextJson[] = "text/json";
const char kTextPlain[] = "text/plain";
// Javascript MIME type suffixes for use in CORB protection logging. See also
// https://mimesniff.spec.whatwg.org/#javascript-mime-type.
const char* kJavaScriptSuffixes[] = {"ecmascript",
"javascript",
"x-ecmascript",
"x-javascript",
"javascript1.0",
"javascript1.1",
"javascript1.2",
"javascript1.3",
"javascript1.4",
"javascript1.5",
"jscript",
"livescript",
"js",
"x-js"};
// TODO(lukasza): Remove kJsonProtobuf once this MIME type is not used in
// practice. See also https://crbug.com/826756#c3
const char kJsonProtobuf[] = "application/json+protobuf";
// MIME type suffixes
const char kJsonSuffix[] = "+json";
const char kXmlSuffix[] = "+xml";
void AdvancePastWhitespace(StringPiece* data) {
size_t offset = data->find_first_not_of(" \t\r\n");
if (offset == base::StringPiece::npos) {
// |data| was entirely whitespace.
*data = StringPiece();
} else {
data->remove_prefix(offset);
}
}
// Returns kYes if |data| starts with one of the string patterns in
// |signatures|, kMaybe if |data| is a prefix of one of the patterns in
// |signatures|, and kNo otherwise.
//
// When kYes is returned, the matching prefix is erased from |data|.
SniffingResult MatchesSignature(StringPiece* data,
const StringPiece signatures[],
size_t arr_size,
base::CompareCase compare_case) {
for (size_t i = 0; i < arr_size; ++i) {
if (signatures[i].length() <= data->length()) {
if (base::StartsWith(*data, signatures[i], compare_case)) {
// When |signatures[i]| is a prefix of |data|, it constitutes a match.
// Strip the matching characters, and return.
data->remove_prefix(signatures[i].length());
return CrossOriginReadBlocking::kYes;
}
} else {
if (base::StartsWith(signatures[i], *data, compare_case)) {
// When |data| is a prefix of |signatures[i]|, that means that
// subsequent bytes in the stream could cause a match to occur.
return CrossOriginReadBlocking::kMaybe;
}
}
}
return CrossOriginReadBlocking::kNo;
}
size_t FindFirstJavascriptLineTerminator(const base::StringPiece& hay,
size_t pos) {
// https://www.ecma-international.org/ecma-262/8.0/index.html#prod-LineTerminator
// defines LineTerminator ::= <LF> | <CR> | <LS> | <PS>.
//
// https://www.ecma-international.org/ecma-262/8.0/index.html#sec-line-terminators
// defines <LF>, <CR>, <LS> ::= "\u2028", <PS> ::= "\u2029".
//
// In UTF8 encoding <LS> is 0xE2 0x80 0xA8 and <PS> is 0xE2 0x80 0xA9.
while (true) {
pos = hay.find_first_of("\n\r\xe2", pos);
if (pos == base::StringPiece::npos)
break;
if (hay[pos] != '\xe2') {
DCHECK(hay[pos] == '\r' || hay[pos] == '\n');
break;
}
// TODO(lukasza): Prevent matching 3 bytes that span/straddle 2 UTF8
// characters.
base::StringPiece substr = hay.substr(pos);
if (base::StartsWith(substr, "\u2028") ||
base::StartsWith(substr, "\u2029"))
break;
pos++; // Skip the \xe2 character.
}
return pos;
}
// Checks if |data| starts with an HTML comment (i.e. with "<!-- ... -->").
// - If there is a valid, terminated comment then returns kYes.
// - If there is a start of a comment, but the comment is not completed (e.g.
// |data| == "<!-" or |data| == "<!-- not terminated yet") then returns
// kMaybe.
// - Returns kNo otherwise.
//
// Mutates |data| to advance past the comment when returning kYes. Note that
// SingleLineHTMLCloseComment ECMAscript rule is taken into account which means
// that characters following an HTML comment are consumed up to the nearest line
// terminating character.
SniffingResult MaybeSkipHtmlComment(StringPiece* data) {
constexpr StringPiece kStartString = "<!--";
if (!base::StartsWith(*data, kStartString)) {
if (base::StartsWith(kStartString, *data))
return CrossOriginReadBlocking::kMaybe;
return CrossOriginReadBlocking::kNo;
}
constexpr StringPiece kEndString = "-->";
size_t end_of_html_comment = data->find(kEndString, kStartString.length());
if (end_of_html_comment == StringPiece::npos)
return CrossOriginReadBlocking::kMaybe;
end_of_html_comment += kEndString.length();
// Skipping until the first line terminating character. See
// https://crbug.com/839945 for the motivation behind this.
size_t end_of_line =
FindFirstJavascriptLineTerminator(*data, end_of_html_comment);
if (end_of_line == base::StringPiece::npos)
return CrossOriginReadBlocking::kMaybe;
// Found real end of the combined HTML/JS comment.
data->remove_prefix(end_of_line);
return CrossOriginReadBlocking::kYes;
}
// The function below returns a set of MIME types below may be blocked by CORB
// without any confirmation sniffing (in contrast to HTML/JSON/XML which require
// confirmation sniffing because images, scripts, etc. are frequently
// mislabelled by http servers as HTML/JSON/XML).
//
// CORB cannot block images, scripts, stylesheets and other resources that the
// web standards allows to be fetched in `no-cors` mode. CORB cannot block
// these resources even if they are not explicitly labeled with their type - in
// practice http servers may serve images as application/octet-stream or even as
// text/html. OTOH, CORB *can* block all Content-Types that are very unlikely
// to represent images, scripts, stylesheets, etc. - such Content-Types are
// returned by GetNeverSniffedMimeTypes.
//
// Some of the Content-Types returned below might seem like a layering violation
// (e.g. why would //services/network care about application/zip or
// application/pdf or application/msword), but note that the decision to list a
// Content-Type below is not driven by whether the type is handled above or
// below //services/network layer. Instead the decision to list a Content-Type
// below is driven by whether the Content-Type is unlikely to be attached to an
// image, script, stylesheet or other subresource type that web standards
// require to be fetched in `no-cors` mode. In particular, CORB would still
// want to prevent cross-site disclosure of "application/msword" even if Chrome
// did not support this type (AFAIK today this support is only present on
// ChromeOS) in one of Chrome's many layers. Similarly, CORB wants to prevent
// disclosure of "application/zip" even though Chrome doesn't have built-in
// support for this resource type. And CORB also wants to protect
// "application/pdf" even though Chrome happens to support this resource type.
const auto& GetNeverSniffedMimeTypes() {
static constexpr auto kNeverSniffedMimeTypes = base::MakeFixedFlatSet<
base::StringPiece>({
// clang-format off
// The types below (zip, protobuf, etc.) are based on most commonly used
// content types according to HTTP Archive - see:
// https://github.com/whatwg/fetch/issues/860#issuecomment-457330454
"application/gzip",
"application/x-gzip",
"application/x-protobuf",
"application/zip",
"text/event-stream",
// The types listed below were initially taken from the list of types
// handled by MimeHandlerView (although we would want to protect them even
// if Chrome didn't support rendering these content types and/or if there
// was no such thing as MimeHandlerView).
"application/msexcel",
"application/mspowerpoint",
"application/msword",
"application/msword-template",
"application/pdf",
"application/vnd.ces-quickpoint",
"application/vnd.ces-quicksheet",
"application/vnd.ces-quickword",
"application/vnd.ms-excel",
"application/vnd.ms-excel.sheet.macroenabled.12",
"application/vnd.ms-powerpoint",
"application/vnd.ms-powerpoint.presentation.macroenabled.12",
"application/vnd.ms-word",
"application/vnd.ms-word.document.12",
"application/vnd.ms-word.document.macroenabled.12",
"application/vnd.msword",
"application/"
"vnd.openxmlformats-officedocument.presentationml.presentation",
"application/"
"vnd.openxmlformats-officedocument.presentationml.template",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.spreadsheetml.template",
"application/"
"vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/"
"vnd.openxmlformats-officedocument.wordprocessingml.template",
"application/vnd.presentation-openxml",
"application/vnd.presentation-openxmlm",
"application/vnd.spreadsheet-openxml",
"application/vnd.wordprocessing-openxml",
"text/csv",
// Block signed documents to protect (potentially sensitive) unencrypted
// body of the signed document. There should be no need to block
// encrypted documents (e.g. `multipart/encrypted` nor
// `application/pgp-encrypted`) and no need to block the signatures (e.g.
// `application/pgp-signature`).
"multipart/signed",
// Block multipart responses because a protected type (e.g. JSON) can
// become multipart if returned in a range request with multiple parts.
// This is compatible with the web because the renderer can only see into
// the result of a fetch for a multipart file when the request is made
// with CORS. Media tags only make single-range requests which will not
// have the multipart type.
"multipart/byteranges",
// TODO(lukasza): https://crbug.com/802836#c11: Add
// application/signed-exchange.
// clang-format on
});
// All items need to be lower-case, to support case-insensitive comparisons
// later.
DCHECK(base::ranges::all_of(kNeverSniffedMimeTypes, [](const auto& s) {
return s == base::ToLowerASCII(s);
}));
return kNeverSniffedMimeTypes;
}
void LogAction(CrossOriginReadBlocking::Action action) {
UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.Browser.Action", action);
}
} // namespace
// static
bool CrossOriginReadBlocking::IsJavascriptMimeType(
base::StringPiece mime_type) {
constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII;
for (const std::string& suffix : kJavaScriptSuffixes) {
if (base::EndsWith(mime_type, suffix, kCaseInsensitive))
return true;
}
return false;
}
// static
MimeType CrossOriginReadBlocking::GetCanonicalMimeType(
base::StringPiece mime_type) {
// Checking for image/svg+xml and application/dash+xml early ensures that they
// won't get classified as MimeType::kXml by the presence of the "+xml"
// suffix.
if (base::EqualsCaseInsensitiveASCII(mime_type, kImageSvg) ||
base::EqualsCaseInsensitiveASCII(mime_type, kDashVideo))
return MimeType::kOthers;
// See also https://mimesniff.spec.whatwg.org/#html-mime-type
if (base::EqualsCaseInsensitiveASCII(mime_type, kTextHtml))
return MimeType::kHtml;
// See also https://mimesniff.spec.whatwg.org/#json-mime-type
constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII;
if (base::EqualsCaseInsensitiveASCII(mime_type, kAppJson) ||
base::EqualsCaseInsensitiveASCII(mime_type, kTextJson) ||
base::EqualsCaseInsensitiveASCII(mime_type, kJsonProtobuf) ||
base::EndsWith(mime_type, kJsonSuffix, kCaseInsensitive)) {
return MimeType::kJson;
}
// See also https://mimesniff.spec.whatwg.org/#xml-mime-type
if (base::EqualsCaseInsensitiveASCII(mime_type, kAppXml) ||
base::EqualsCaseInsensitiveASCII(mime_type, kTextXml) ||
base::EndsWith(mime_type, kXmlSuffix, kCaseInsensitive)) {
return MimeType::kXml;
}
if (base::EqualsCaseInsensitiveASCII(mime_type, kTextPlain))
return MimeType::kPlain;
if (base::Contains(GetNeverSniffedMimeTypes(),
base::ToLowerASCII(mime_type))) {
return MimeType::kNeverSniffed;
}
return MimeType::kOthers;
}
// static
// This function is a slight modification of |net::SniffForHTML|.
SniffingResult CrossOriginReadBlocking::SniffForHTML(StringPiece data) {
// The content sniffers used by Chrome and Firefox are using "<!--" as one of
// the HTML signatures, but it also appears in valid JavaScript, considered as
// well-formed JS by the browser. Since we do not want to block any JS, we
// exclude it from our HTML signatures. This can weaken our CORB policy,
// but we can break less websites.
//
// Note that <body> and <br> are not included below, since <b is a prefix of
// them.
//
// TODO(dsjang): parameterize |net::SniffForHTML| with an option that decides
// whether to include <!-- or not, so that we can remove this function.
// TODO(dsjang): Once CrossOriginReadBlocking is moved into the browser
// process, we should do single-thread checking here for the static
// initializer.
static constexpr StringPiece kHtmlSignatures[] = {
StringPiece("<!doctype html"), // HTML5 spec
StringPiece("<script"), // HTML5 spec, Mozilla
StringPiece("<html"), // HTML5 spec, Mozilla
StringPiece("<head"), // HTML5 spec, Mozilla
StringPiece("<iframe"), // Mozilla
StringPiece("<h1"), // Mozilla
StringPiece("<div"), // Mozilla
StringPiece("<font"), // Mozilla
StringPiece("<table"), // Mozilla
StringPiece("<a"), // Mozilla
StringPiece("<style"), // Mozilla
StringPiece("<title"), // Mozilla
StringPiece("<b"), // Mozilla (note: subsumes <body>, <br>)
StringPiece("<p") // Mozilla
};
while (data.length() > 0) {
AdvancePastWhitespace(&data);
SniffingResult signature_match =
MatchesSignature(&data, kHtmlSignatures, std::size(kHtmlSignatures),
base::CompareCase::INSENSITIVE_ASCII);
if (signature_match != kNo)
return signature_match;
SniffingResult comment_match = MaybeSkipHtmlComment(&data);
if (comment_match != kYes)
return comment_match;
}
// All of |data| was consumed, without a clear determination.
return kMaybe;
}
// static
SniffingResult CrossOriginReadBlocking::SniffForXML(base::StringPiece data) {
// TODO(dsjang): Once CrossOriginReadBlocking is moved into the browser
// process, we should do single-thread checking here for the static
// initializer.
AdvancePastWhitespace(&data);
static constexpr StringPiece kXmlSignatures[] = {StringPiece("<?xml")};
return MatchesSignature(&data, kXmlSignatures, std::size(kXmlSignatures),
base::CompareCase::SENSITIVE);
}
// static
SniffingResult CrossOriginReadBlocking::SniffForJSON(base::StringPiece data) {
// Currently this function looks for an opening brace ('{'), followed by a
// double-quoted string literal, followed by a colon. Importantly, such a
// sequence is a Javascript syntax error: although the JSON object syntax is
// exactly Javascript's object-initializer syntax, a Javascript object-
// initializer expression is not valid as a standalone Javascript statement.
//
// TODO(nick): We have to come up with a better way to sniff JSON. The
// following are known limitations of this function:
// https://crbug.com/795470/ Support non-dictionary values (e.g. lists)
enum {
kStartState,
kLeftBraceState,
kLeftQuoteState,
kEscapeState,
kRightQuoteState,
} state = kStartState;
for (size_t i = 0; i < data.length(); ++i) {
const char c = data[i];
if (state != kLeftQuoteState && state != kEscapeState) {
// Whitespace is ignored (outside of string literals)
if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
continue;
}
switch (state) {
case kStartState:
if (c == '{')
state = kLeftBraceState;
else
return kNo;
break;
case kLeftBraceState:
if (c == '"')
state = kLeftQuoteState;
else
return kNo;
break;
case kLeftQuoteState:
if (c == '"')
state = kRightQuoteState;
else if (c == '\\')
state = kEscapeState;
break;
case kEscapeState:
// Simplification: don't bother rejecting hex escapes.
state = kLeftQuoteState;
break;
case kRightQuoteState:
if (c == ':')
return kYes;
return kNo;
}
}
return kMaybe;
}
// static
SniffingResult CrossOriginReadBlocking::SniffForFetchOnlyResource(
base::StringPiece data) {
// kScriptBreakingPrefixes contains prefixes that are conventionally used to
// prevent a JSON response from becoming a valid Javascript program (an attack
// vector known as XSSI). The presence of such a prefix is a strong signal
// that the resource is meant to be consumed only by the fetch API or
// XMLHttpRequest, and is meant to be protected from use in non-CORS, cross-
// origin contexts like <script>, <img>, etc.
//
// These prefixes work either by inducing a syntax error, or inducing an
// infinite loop. In either case, the prefix must create a guarantee that no
// matter what bytes follow it, the entire response would be worthless to
// execute as a <script>.
static constexpr StringPiece kScriptBreakingPrefixes[] = {
// Parser breaker prefix.
//
// Built into angular.js (followed by a comma and a newline):
// https://docs.angularjs.org/api/ng/service/$http
//
// Built into the Java Spring framework (followed by a comma and a space):
// https://goo.gl/xP7FWn
//
// Observed on google.com (without a comma, followed by a newline).
StringPiece(")]}'"),
// Apache struts: https://struts.apache.org/plugins/json/#prefix
StringPiece("{}&&"),
// Spring framework (historically): https://goo.gl/JYPFAv
StringPiece("{} &&"),
// Infinite loops.
StringPiece("for(;;);"), // observed on facebook.com
StringPiece("while(1);"),
StringPiece("for (;;);"),
StringPiece("while (1);"),
};
SniffingResult has_parser_breaker = MatchesSignature(
&data, kScriptBreakingPrefixes, std::size(kScriptBreakingPrefixes),
base::CompareCase::SENSITIVE);
if (has_parser_breaker != kNo)
return has_parser_breaker;
// A non-empty JSON object also effectively introduces a JS syntax error.
return SniffForJSON(data);
}
// An interface to enable incremental content sniffing. These are instantiated
// for each each request; thus they can be stateful.
class CrossOriginReadBlocking::CorbResponseAnalyzer::ConfirmationSniffer {
public:
virtual ~ConfirmationSniffer() = default;
// Called after data is read from the network. |sniffing_buffer| contains the
// entire response body delivered thus far.
virtual void OnDataAvailable(base::StringPiece sniffing_buffer) = 0;
// Returns true if the return value of IsConfirmedContentType() might change
// with the addition of more data. Returns false if a final decision is
// available.
virtual bool WantsMoreData() const = 0;
// Returns true if the data has been confirmed to be of the CORB-protected
// content type that this sniffer is intended to detect.
virtual bool IsConfirmedContentType() const = 0;
};
// A ConfirmationSniffer that wraps one of the sniffing functions from
// CrossOriginReadBlocking.
class CrossOriginReadBlocking::CorbResponseAnalyzer::SimpleConfirmationSniffer
: public CrossOriginReadBlocking::CorbResponseAnalyzer::
ConfirmationSniffer {
public:
// The function pointer type corresponding to one of the available sniffing
// functions from CrossOriginReadBlocking.
using SnifferFunction = decltype(&CrossOriginReadBlocking::SniffForHTML);
explicit SimpleConfirmationSniffer(SnifferFunction sniffer_function)
: sniffer_function_(sniffer_function) {}
~SimpleConfirmationSniffer() override = default;
SimpleConfirmationSniffer(const SimpleConfirmationSniffer*) = delete;
SimpleConfirmationSniffer& operator=(const SimpleConfirmationSniffer*) =
delete;
void OnDataAvailable(base::StringPiece sniffing_buffer) final {
// The sniffing functions don't support streaming, so with each new chunk of
// data, call the sniffer on the whole buffer.
last_sniff_result_ = (*sniffer_function_)(sniffing_buffer);
}
bool WantsMoreData() const final {
// kNo and kYes results are final, meaning that sniffing can stop once they
// occur. A kMaybe result corresponds to an indeterminate state, that could
// change to kYes or kNo with more data.
return last_sniff_result_ == SniffingResult::kMaybe;
}
bool IsConfirmedContentType() const final {
// Only confirm the mime type if an affirmative pattern (e.g. an HTML tag,
// if using the HTML sniffer) was detected.
//
// Note that if the stream ends (or net::kMaxBytesToSniff has been reached)
// and |last_sniff_result_| is kMaybe, the response is allowed to go
// through.
return last_sniff_result_ == SniffingResult::kYes;
}
private:
// The function that actually knows how to sniff for a content type.
SnifferFunction sniffer_function_;
// Result of sniffing the data available thus far.
SniffingResult last_sniff_result_ = SniffingResult::kMaybe;
};
Decision CrossOriginReadBlocking::CorbResponseAnalyzer::Init(
const GURL& request_url,
const absl::optional<url::Origin>& request_initiator,
mojom::RequestMode request_mode,
const mojom::URLResponseHead& response) {
seems_sensitive_from_cors_heuristic_ =
SeemsSensitiveFromCORSHeuristic(response);
seems_sensitive_from_cache_heuristic_ =
SeemsSensitiveFromCacheHeuristic(response);
supports_range_requests_ = SupportsRangeRequests(response);
has_nosniff_header_ = HasNoSniff(response);
content_length_ = response.content_length;
http_response_code_ =
response.headers ? response.headers->response_code() : 0;
LogAction(Action::kResponseStarted);
// CORB should look directly at the Content-Type header if one has been
// received from the network. Ignoring |response.mime_type| helps avoid
// breaking legitimate websites (which might happen more often when blocking
// would be based on the mime type sniffed by MimeSniffingResourceHandler).
//
// This value could be computed later in ShouldBlockBasedOnHeaders after
// has_nosniff_header, but we compute it here to keep
// ShouldBlockBasedOnHeaders (which is called twice) const.
//
// TODO(nick): What if the mime type is omitted? Should that be treated the
// same as text/plain? https://crbug.com/795971
std::string mime_type;
if (response.headers)
response.headers->GetMimeType(&mime_type);
// Canonicalize the MIME type. Note that even if it doesn't claim to be a
// blockable type (i.e., HTML, XML, JSON, or plain text), it may still fail
// the checks during the SniffForFetchOnlyResource() phase.
canonical_mime_type_ = GetCanonicalMimeType(mime_type);
should_block_based_on_headers_ =
ShouldBlockBasedOnHeaders(request_mode, request_url, request_initiator,
response, canonical_mime_type_);
// Check if the response seems sensitive and if so include in our CORB
// protection logging. We have not sniffed yet, so the answer might be
// kSniffMore.
if (seems_sensitive_from_cors_heuristic_ ||
seems_sensitive_from_cache_heuristic_) {
// Create a new Origin with a unique internal identifier so we can pretend
// the request is cross-origin.
url::Origin cross_origin_request_initiator = url::Origin();
// kNoCors is used (instead of passing `request_mode`) to also cover CORS
// requests with the CORB Protection heuristics and UMAs. Using kNoCors
// simulates an attacker requesting "seems-sensitive" subresources from a
// script tag.
Decision would_protect_based_on_headers = ShouldBlockBasedOnHeaders(
mojom::RequestMode::kNoCors, request_url,
cross_origin_request_initiator, response, canonical_mime_type_);
corb_protection_logging_needs_sniffing_ =
(would_protect_based_on_headers == Decision::kSniffMore) &&
base::FeatureList::IsEnabled(features::kCORBProtectionSniffing);
hypothetical_sniffing_mode_ =
corb_protection_logging_needs_sniffing_ &&
should_block_based_on_headers_ != Decision::kSniffMore;
mime_type_bucket_ = GetMimeTypeBucket(response);
UMA_HISTOGRAM_BOOLEAN("SiteIsolation.CORBProtection.SensitiveResource",
true);
if (!corb_protection_logging_needs_sniffing_) {
// If we are not going to sniff, then we can and must log everything now.
LogSensitiveResponseProtection(
BlockingDecisionToProtectionDecision(would_protect_based_on_headers));
}
} else {
UMA_HISTOGRAM_BOOLEAN("SiteIsolation.CORBProtection.SensitiveResource",
false);
}
if (needs_sniffing())
CreateSniffers();
return GetCorbDecision();
}
CrossOriginReadBlocking::CorbResponseAnalyzer::CorbResponseAnalyzer() = default;
CrossOriginReadBlocking::CorbResponseAnalyzer::~CorbResponseAnalyzer() {
if (ShouldBlock()) {
LogBlockedResponse();
} else {
// Allowing happens either 1) explicitly, or 2) when sniffing didn't reach a
// conclusion after sniffing 1024 (or all) bytes.
DCHECK(ShouldAllow() || needs_sniffing());
LogAllowedResponse();
}
}
// static
Decision
CrossOriginReadBlocking::CorbResponseAnalyzer::ShouldBlockBasedOnHeaders(
mojom::RequestMode request_mode,
const GURL& request_url,
const absl::optional<url::Origin>& request_initiator,
const mojom::URLResponseHead& response,
MimeType canonical_mime_type) {
// The checks in this method are ordered to rule out blocking in most cases as
// quickly as possible. Checks that are likely to lead to returning false or
// that are inexpensive should be near the top.
// Extract the `initiator` of the request, allowing requests with no
// initiator. (Such requests are browser-initiated and therefore trustworthy;
// CorsURLLoaderFactory::IsValidRequest enforces that renderer-initiated
// requests specify a non-null `request_initiator`.)
if (!request_initiator.has_value())
return Decision::kAllow;
const url::Origin& initiator = request_initiator.value();
// Don't block same-origin documents.
if (initiator.IsSameOriginWith(request_url))
return Decision::kAllow;
// Only apply CORB to `no-cors` requests.
//
// CORB doesn't need to block kNavigate requests because results of these are
// OOPIF-isolated (note that CorsURLLoaderFactory::IsValidRequest
// validates that only the Browser process can initiate requests in kNavigate
// mode).
//
// CORB doesn't need to work with kSameOrigin, kCors, nor
// kCorsWithForcedPreflight modes, because these are covered by OOR-CORS.
if (request_mode != mojom::RequestMode::kNoCors)
return Decision::kAllow;
// Requests from foo.example.com will consult foo.example.com's service worker
// first (if one has been registered). The service worker can handle requests
// initiated by foo.example.com even if they are cross-origin (e.g. requests
// for bar.example.com). This is okay, because there is no security boundary
// between foo.example.com and the service worker of foo.example.com + because
// the response data is "conjured" within the service worker of
// foo.example.com (rather than being fetched from bar.example.com).
// Therefore such responses should not be blocked by CORB, unless the
// initiator opted out of CORS / opted into receiving an opaque response. See
// also https://crbug.com/803672.
if (response.was_fetched_via_service_worker) {
switch (response.response_type) {
case mojom::FetchResponseType::kBasic:
case mojom::FetchResponseType::kCors:
case mojom::FetchResponseType::kDefault:
case mojom::FetchResponseType::kError:
// Non-opaque responses shouldn't be blocked.
return Decision::kAllow;
case mojom::FetchResponseType::kOpaque:
case mojom::FetchResponseType::kOpaqueRedirect:
// Opaque responses are eligible for blocking. Continue on...
break;
}
}
// Some types (e.g. ZIP) are protected without any confirmation sniffing.
if (canonical_mime_type == MimeType::kNeverSniffed)
return Decision::kBlock;
// If this is a partial response, sniffing is not possible, so allow the
// response if it's not a protected mime type.
std::string range_header;
response.headers->GetNormalizedHeader("content-range", &range_header);
bool has_range_header = !range_header.empty();
if (has_range_header) {
switch (canonical_mime_type) {
case MimeType::kOthers:
case MimeType::kPlain: // See also https://crbug.com/801709
return Decision::kAllow;
case MimeType::kHtml:
case MimeType::kJson:
case MimeType::kXml:
return Decision::kBlock;
case MimeType::kInvalidMimeType:
case MimeType::kNeverSniffed: // Handled much earlier.
NOTREACHED();
return Decision::kBlock;
}
}
// We intend to block the response at this point. However, we will usually
// sniff the contents to confirm the MIME type, to avoid blocking incorrectly
// labeled JavaScript, JSONP, etc files.
//
// Note: if there is a nosniff header, it means we should honor the response
// mime type without trying to confirm it.
//
// Decide whether to block based on the MIME type.
switch (canonical_mime_type) {
case MimeType::kHtml:
case MimeType::kXml:
case MimeType::kJson:
case MimeType::kPlain:
if (HasNoSniff(response))
return Decision::kBlock;
return Decision::kSniffMore;
case MimeType::kOthers:
// Stylesheets shouldn't be sniffed for JSON parser breakers - see
// https://crbug.com/809259.
if (base::EqualsCaseInsensitiveASCII(response.mime_type, "text/css"))
return Decision::kAllow;
return Decision::kSniffMore;
case MimeType::kInvalidMimeType:
case MimeType::kNeverSniffed: // Handled much earlier.
NOTREACHED();
return Decision::kBlock;
}
NOTREACHED();
return Decision::kBlock;
}
// static
bool CrossOriginReadBlocking::CorbResponseAnalyzer::
SeemsSensitiveFromCORSHeuristic(const mojom::URLResponseHead& response) {
// Check if the response has an Access-Control-Allow-Origin with a value other
// than "*" or "null" ("null" offers no more protection than "*" because it
// matches any unique origin).
if (!response.headers)
return false;
std::string cors_header_value;
response.headers->GetNormalizedHeader("access-control-allow-origin",
&cors_header_value);
if (cors_header_value != "*" && cors_header_value != "null" &&
cors_header_value != "") {
return true;
}
return false;
}
// static
bool CrossOriginReadBlocking::CorbResponseAnalyzer::
SeemsSensitiveFromCacheHeuristic(const mojom::URLResponseHead& response) {
// Check if the response has both Vary: Origin and Cache-Control: Private
// headers, which we take as a signal that it may be a sensitive resource. We
// require both to reduce the number of false positives (as both headers are
// sometimes used on non-sensitive resources). Cache-Control: no-store appears
// on non-sensitive resources that change frequently, so we ignore it here.
if (!response.headers)
return false;
bool has_vary_origin = response.headers->HasHeaderValue("vary", "origin");
bool has_cache_private =
response.headers->HasHeaderValue("cache-control", "private");
return has_vary_origin && has_cache_private;
}
// static
bool CrossOriginReadBlocking::CorbResponseAnalyzer::SupportsRangeRequests(
const mojom::URLResponseHead& response) {
if (response.headers) {
std::string value;
response.headers->GetNormalizedHeader("accept-ranges", &value);
if (!value.empty() && !base::EqualsCaseInsensitiveASCII(value, "none")) {
return true;
}
}
return false;
}
// static
CrossOriginReadBlocking::CorbResponseAnalyzer::MimeTypeBucket
CrossOriginReadBlocking::CorbResponseAnalyzer::GetMimeTypeBucket(
const mojom::URLResponseHead& response) {
std::string mime_type;
if (response.headers)
response.headers->GetMimeType(&mime_type);
MimeType canonical_mime_type = GetCanonicalMimeType(mime_type);
switch (canonical_mime_type) {
case MimeType::kHtml:
case MimeType::kXml:
case MimeType::kJson:
case MimeType::kNeverSniffed:
case MimeType::kPlain:
return kProtected;
case MimeType::kOthers:
break;
case MimeType::kInvalidMimeType:
NOTREACHED();
break;
}
// Javascript is assumed public. See also
// https://mimesniff.spec.whatwg.org/#javascript-mime-type.
if (IsJavascriptMimeType(mime_type)) {
return kPublic;
}
// Images are assumed public. See also
// https://mimesniff.spec.whatwg.org/#image-mime-type.
constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII;
if (base::StartsWith(mime_type, "image", kCaseInsensitive)) {
return kPublic;
}
// Audio and video are assumed public. See also
// https://mimesniff.spec.whatwg.org/#audio-or-video-mime-type.
if (base::StartsWith(mime_type, "audio", kCaseInsensitive) ||
base::StartsWith(mime_type, "video", kCaseInsensitive) ||
base::EqualsCaseInsensitiveASCII(mime_type, "application/ogg") ||
base::EqualsCaseInsensitiveASCII(mime_type, "application/dash+xml")) {
return kPublic;
}
// CSS files are assumed public and must be sent with text/css.
if (base::EqualsCaseInsensitiveASCII(mime_type, "text/css")) {
return kPublic;
}
return kOther;
}
void CrossOriginReadBlocking::CorbResponseAnalyzer::CreateSniffers() {
// Create one or more |sniffers_| to confirm that the body is actually the
// MIME type advertised in the Content-Type header.
DCHECK(needs_sniffing());
DCHECK(sniffers_.empty());
// When the MIME type is "text/plain", create sniffers for HTML, XML and
// JSON. If any of these sniffers match, the response will be blocked.
const bool use_all = canonical_mime_type_ == MimeType::kPlain;
// HTML sniffer.
if (use_all || canonical_mime_type_ == MimeType::kHtml) {
sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
&CrossOriginReadBlocking::SniffForHTML));
}
// XML sniffer.
if (use_all || canonical_mime_type_ == MimeType::kXml) {
sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
&CrossOriginReadBlocking::SniffForXML));
}
// JSON sniffer.
if (use_all || canonical_mime_type_ == MimeType::kJson) {
sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
&CrossOriginReadBlocking::SniffForJSON));
}
// Parser-breaker sniffer.
//
// Because these prefixes are an XSSI-defeating mechanism, CORB considers
// them distinctive enough to be worth blocking no matter the Content-Type
// header. So this sniffer is created unconditionally.
//
// For MimeType::kOthers, this will be the only sniffer that's active.
sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
&CrossOriginReadBlocking::SniffForFetchOnlyResource));
}
Decision CrossOriginReadBlocking::CorbResponseAnalyzer::Sniff(
base::StringPiece data) {
DCHECK(needs_sniffing());
DCHECK(!sniffers_.empty());
DCHECK(!found_blockable_content_);
DCHECK_LE(data.size(), static_cast<size_t>(net::kMaxBytesToSniff));
for (size_t i = 0; i < sniffers_.size();) {
sniffers_[i]->OnDataAvailable(data);
if (sniffers_[i]->WantsMoreData()) {
i++;
continue;
}
if (sniffers_[i]->IsConfirmedContentType()) {
found_blockable_content_ = true;
sniffers_.clear();
break;
} else {
// This response is CORB-exempt as far as this sniffer is concerned;
// remove it from the list.
sniffers_.erase(sniffers_.begin() + i);
}
}
return GetCorbDecision();
}
Decision CrossOriginReadBlocking::CorbResponseAnalyzer::
HandleEndOfSniffableResponseBody() {
// If CORB reached the end of sniffable response body, then it means that the
// HTML, XML, and JSON confirmation sniffers weren't able to confirm that the
// response body contains HTML, XML, or JSON. In this case CORB fails open,
// by assuming that the response body might contain an allowed resource (e.g.
// an image, or a script).
return Decision::kAllow;
}
bool CrossOriginReadBlocking::CorbResponseAnalyzer::ShouldAllow() const {
// If we're in hypothetical mode then CORB must have decided to kAllow (see
// comment in ShouldBlock). Thus we just need to wait until the sniffers are
// all done (i.e. empty).
if (hypothetical_sniffing_mode_) {
DCHECK_EQ(should_block_based_on_headers_, Decision::kAllow);
return sniffers_.empty();
}
switch (should_block_based_on_headers_) {
case Decision::kAllow:
return true;
case Decision::kSniffMore:
return sniffers_.empty() && !found_blockable_content_;
case Decision::kBlock:
return false;
}
}
bool CrossOriginReadBlocking::CorbResponseAnalyzer::ShouldBlock() const {
// If we're in *hypothetical* sniffing mode then the following must be true:
// (1) We are only sniffing to find out if CORB would have blocked the request
// were it made cross origin (CORB itself did *not* need to sniff the file).
// (2) CORB must have decided to kAllow (if it was kBlock then the protection
// decision would have been kBlock as well, no hypothetical mode needed).
if (hypothetical_sniffing_mode_) {
DCHECK_EQ(should_block_based_on_headers_, Decision::kAllow);
return false;
}
switch (should_block_based_on_headers_) {
case Decision::kAllow:
return false;
case Decision::kSniffMore:
return sniffers_.empty() && found_blockable_content_;
case Decision::kBlock:
return true;
}
}
bool CrossOriginReadBlocking::CorbResponseAnalyzer::
ShouldReportBlockedResponse() const {
if (!ShouldBlock())
return false;
// Don't bother showing a warning message when blocking responses that are
// already empty.
if (content_length_ == 0)
return false;
if (http_response_code_ == 204)
return false;
// Don't bother showing a warning message when blocking responses that are
// associated with error responses (e.g. it is quite common to serve a
// text/html 404 error page for an <img> tag pointing to a wrong URL).
if (400 <= http_response_code_ && http_response_code_ <= 599)
return false;
return true;
}
ResponseAnalyzer::BlockedResponseHandling
CrossOriginReadBlocking::CorbResponseAnalyzer::ShouldHandleBlockedResponseAs()
const {
// CORB wants blocked responses to be empty responses.
return ResponseAnalyzer::BlockedResponseHandling::kEmptyResponse;
}
Decision CrossOriginReadBlocking::CorbResponseAnalyzer::GetCorbDecision() {
if (ShouldBlock())
return Decision::kBlock;
else if (ShouldAllow())
return Decision::kAllow;
else
return Decision::kSniffMore;
}
void CrossOriginReadBlocking::CorbResponseAnalyzer::LogAllowedResponse() {
DCHECK(!has_logged_final_decision_);
has_logged_final_decision_ = true;
if (corb_protection_logging_needs_sniffing_) {
LogSensitiveResponseProtection(
SniffingDecisionToProtectionDecision(found_blockable_content_));
}
// Note that if a response is allowed because of hitting EOF or
// kMaxBytesToSniff, then |sniffers_| are not emptied and consequently
// ShouldAllow doesn't start returning true. This means that we can't
// DCHECK(ShouldAllow()) or DCHECK(sniffers_.empty()) here - the decision to
// allow the response could have been made in the
// CrossSiteDocumentResourceHandler layer without CrossOriginReadBlocking
// realizing that it has hit EOF or kMaxBytesToSniff.
// Note that the response might be allowed even if ShouldBlock() returns true
// - for example to allow responses to requests initiated by content scripts.
// This means that we cannot DCHECK(!ShouldBlock()) here.
LogAction(needs_sniffing() ? Action::kAllowedAfterSniffing
: Action::kAllowedWithoutSniffing);
}
void CrossOriginReadBlocking::CorbResponseAnalyzer::LogBlockedResponse() {
DCHECK(!has_logged_final_decision_);
has_logged_final_decision_ = true;
DCHECK(!ShouldAllow());
DCHECK(ShouldBlock());
DCHECK(sniffers_.empty());
if (corb_protection_logging_needs_sniffing_) {
LogSensitiveResponseProtection(
SniffingDecisionToProtectionDecision(found_blockable_content_));
}
LogAction(needs_sniffing() ? Action::kBlockedAfterSniffing
: Action::kBlockedWithoutSniffing);
UMA_HISTOGRAM_ENUMERATION(
"SiteIsolation.XSD.Browser.Blocked.CanonicalMimeType",
canonical_mime_type_);
}
// static
bool CrossOriginReadBlocking::CorbResponseAnalyzer::HasNoSniff(
const mojom::URLResponseHead& response) {
if (!response.headers)
return false;
std::string nosniff_header;
response.headers->GetNormalizedHeader("x-content-type-options",
&nosniff_header);
return base::EqualsCaseInsensitiveASCII(nosniff_header, "nosniff");
}
// static
CrossOriginReadBlocking::CorbResponseAnalyzer::CrossOriginProtectionDecision
CrossOriginReadBlocking::CorbResponseAnalyzer::
BlockingDecisionToProtectionDecision(Decision blocking_decision) {
switch (blocking_decision) {
case Decision::kAllow:
return CrossOriginProtectionDecision::kAllow;
case Decision::kBlock:
return CrossOriginProtectionDecision::kBlock;
case Decision::kSniffMore:
return CrossOriginProtectionDecision::kNeedToSniffMore;
}
}
// static
CrossOriginReadBlocking::CorbResponseAnalyzer::CrossOriginProtectionDecision
CrossOriginReadBlocking::CorbResponseAnalyzer::
SniffingDecisionToProtectionDecision(bool found_blockable_content) {
if (found_blockable_content)
return CrossOriginProtectionDecision::kBlockedAfterSniffing;
return CrossOriginProtectionDecision::kAllowedAfterSniffing;
}
void CrossOriginReadBlocking::CorbResponseAnalyzer::
LogSensitiveResponseProtection(
CrossOriginProtectionDecision protection_decision) const {
DCHECK(seems_sensitive_from_cors_heuristic_ ||
seems_sensitive_from_cache_heuristic_);
if (seems_sensitive_from_cors_heuristic_) {
switch (mime_type_bucket_) {
case kProtected:
UMA_HISTOGRAM_ENUMERATION(
"SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType",
protection_decision);
// We report if a response with a protected MIME type supports range
// requests since we want to measure how often making a multipart range
// requests would have allowed bypassing CORB.
if (protection_decision == CrossOriginProtectionDecision::kBlock) {
UMA_HISTOGRAM_BOOLEAN(
"SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType."
"BlockedWithRangeSupport",
supports_range_requests_);
UMA_HISTOGRAM_BOOLEAN(
"SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType."
"BlockedWithoutSniffing.HasNoSniff",
has_nosniff_header_);
} else if (protection_decision ==
CrossOriginProtectionDecision::kBlockedAfterSniffing) {
UMA_HISTOGRAM_BOOLEAN(
"SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType."
"BlockedAfterSniffingWithRangeSupport",
supports_range_requests_);
}
break;
case kPublic:
UMA_HISTOGRAM_ENUMERATION(
"SiteIsolation.CORBProtection.CORSHeuristic.PublicMimeType",
protection_decision);
break;
case kOther:
UMA_HISTOGRAM_ENUMERATION(
"SiteIsolation.CORBProtection.CORSHeuristic.OtherMimeType",
protection_decision);
}
}
if (seems_sensitive_from_cache_heuristic_) {
switch (mime_type_bucket_) {
case kProtected:
UMA_HISTOGRAM_ENUMERATION(
"SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType",
protection_decision);
if (protection_decision == CrossOriginProtectionDecision::kBlock) {
UMA_HISTOGRAM_BOOLEAN(
"SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType."
"BlockedWithRangeSupport",
supports_range_requests_);
UMA_HISTOGRAM_BOOLEAN(
"SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType."
"BlockedWithoutSniffing.HasNoSniff",
has_nosniff_header_);
} else if (protection_decision ==
CrossOriginProtectionDecision::kBlockedAfterSniffing) {
UMA_HISTOGRAM_BOOLEAN(
"SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType."
"BlockedAfterSniffingWithRangeSupport",
supports_range_requests_);
}
break;
case kPublic:
UMA_HISTOGRAM_ENUMERATION(
"SiteIsolation.CORBProtection.CacheHeuristic.PublicMimeType",
protection_decision);
break;
case kOther:
UMA_HISTOGRAM_ENUMERATION(
"SiteIsolation.CORBProtection.CacheHeuristic.OtherMimeType",
protection_decision);
}
}
// Also log if the server supports range requests, since these may allow
// bypassing CORB.
UMA_HISTOGRAM_BOOLEAN(
"SiteIsolation.CORBProtection.SensitiveWithRangeSupport",
supports_range_requests_);
}
} // namespace network::corb