blob: 45a8d690291051a5141df2de4b88ca900b41ce4f [file] [log] [blame]
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "services/network/orb/orb_impl.h"
#include "base/check.h"
#include "base/containers/contains.h"
#include "base/metrics/histogram_functions.h"
#include "base/rand_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "net/base/mime_sniffer.h"
#include "net/http/http_util.h"
#include "net/url_request/url_request.h"
#include "services/network/orb/orb_mimetypes.h"
#include "services/network/orb/orb_sniffers.h"
#include "services/network/public/cpp/features.h"
#include "services/network/public/cpp/resource_request.h"
#include "services/network/public/mojom/url_response_head.mojom.h"
using Decision = network::orb::ResponseAnalyzer::Decision;
namespace network::orb {
namespace {
bool IsNonSniffableImageMimeType(std::string_view mime_type) {
// TODO(lukasza): Once full Javascript sniffing is implemented, we may start
// to undesirably block future (=unsniffable) image formats. We should
// explicitly recognize MIME types of such image formats below. See also
// https://github.com/annevk/orb/issues/3#issuecomment-974334651
// This function returns true for image formats that are not recognized by
// net::SniffMimeTypeFromLocalData. This helps to allow such images.
return base::EqualsCaseInsensitiveASCII(mime_type, "image/svg+xml");
}
bool IsAudioOrVideoMimeType(std::string_view mime_type) {
// TODO(lukasza): Restrict this to only known, non-sniffable audio/video types
// (hopefully we can reach agreement on this approach + document this in ORB
// spec). See also https://github.com/annevk/orb/issues/3. Notes:
// - In the long-term (once Javascript sniffing is implemented) this will
// prevent non-webby images (e.g. image/vnd.adobe.photoshop) from being
// unnecessarily allowed by ORB.
// - In the short-term this shouldn't matter for security of 200 responses
// (with only HTML/XML/JSON sniffing current implementation wouldn't block
// such non-webby images anyway).
// - The current implementation reduces risk of blocking range requests for
// A) non-sniffable types and B) range responses for middle-of-resource
// when first-bytes-response wasn't seen earlier.
constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII;
if (base::StartsWith(mime_type, "audio/", kCaseInsensitive) ||
base::StartsWith(mime_type, "video/", kCaseInsensitive)) {
return true;
}
// Special-casing "application/ogg" here is a minor departure from the spec
// when IsAudioOrVideoMimeType is called from IsOpaqueSafelistedMimeType.
// OTOH, covering "application/ogg" here helps helps implement step 7 from ORB
// (sniffing audio/video in the OpaqueResponseBlockingAnalyzer::Sniff method
// below) because net::SniffMimeTypeFromLocalData may return
// "application/ogg".
if (base::EqualsCaseInsensitiveASCII(mime_type, "application/ogg"))
return true;
// TODO(lukasza): Address this departure from the spec (which doesn't
// explicitly mention DASH and other MIME types here). The current
// implementation enforces strict MIME types for DASH/HLS resources - if this
// can ship without too much of web-compatibility issues, then we should
// modify ORB spec to match this implementation. If there is too much
// web-compatibility risk, then ORB might need to fully parse DASH/HLS
// manifests.
if (base::EqualsCaseInsensitiveASCII(mime_type, "application/dash+xml"))
return true;
if (base::EqualsCaseInsensitiveASCII(mime_type,
"application/vnd.apple.mpegurl"))
return true;
if (base::EqualsCaseInsensitiveASCII(mime_type, "text/vtt"))
return true;
return false;
}
bool IsTextCssMimeType(std::string_view mime_type) {
return base::EqualsCaseInsensitiveASCII(mime_type, "text/css");
}
// ORB spec says that "An opaque-safelisted MIME type" is a JavaScript MIME type
// or a MIME type whose essence is "text/css" or "image/svg+xml".
bool IsOpaqueSafelistedMimeType(std::string_view mime_type) {
// Based on the spec: Is it a MIME type whose essence is text/css [...] ?
if (IsTextCssMimeType(mime_type))
return true;
// Based on the spec: Is it a MIME type whose essence is [...] image/svg+xml?
if (IsNonSniffableImageMimeType(mime_type))
return true;
// Deviation from spec: We do not handle JavaScript MIME types here. See
// comments at IsOpaqueSafelistedMimeTypeThatWeSniffAnyway and the
// IsOpaqueSafelistedMimeType call site for details.
// TODO(vogelheim): Departure from the spec - see the comment in
// IsAudioOrVideoMimeType for more details.
if (IsAudioOrVideoMimeType(mime_type))
return true;
return false;
}
// ORB spec defines "an opaque-safelisted MIME type". Until we have full ORB
// compliance, we'll need to handle some MIME types differently and run the
// JavaScript-parser-breaker sniffer from CORB on these resources.
bool IsOpaqueSafelistedMimeTypeThatWeSniffAnyway(std::string_view mime_type) {
// Based on the spec, but handled in HandleEndOfSniffableResponseBody:
// Is it a JavaScript MIME type?
if (IsJavascriptMimeType(mime_type)) {
return true;
}
return false;
}
// This corresponds to https://fetch.spec.whatwg.org/#ok-status
bool IsOkayHttpStatus(const mojom::URLResponseHead& response) {
if (!response.headers)
return false;
int code = response.headers->response_code();
return (200 <= code) && (code <= 299);
}
bool IsHttpStatus(const mojom::URLResponseHead& response,
int expected_status_code) {
if (!response.headers)
return false;
int code = response.headers->response_code();
return code == expected_status_code;
}
bool IsRangeResponseWithMiddleOfResource(
const mojom::URLResponseHead& response) {
if (!response.headers)
return false;
if (!IsHttpStatus(response, 206))
return false;
std::optional<std::string> range =
response.headers->GetNormalizedHeader("content-range");
if (!range) {
return false;
}
int64_t first_byte_position = -1;
int64_t last_byte_position = -1;
int64_t instance_length = -1;
if (!net::HttpUtil::ParseContentRangeHeaderFor206(
*range, &first_byte_position, &last_byte_position,
&instance_length)) {
return false;
}
return first_byte_position > 0;
}
bool IsOpaqueResponse(const std::optional<url::Origin>& request_initiator,
mojom::RequestMode request_mode,
const mojom::URLResponseHead& response) {
// ORB only applies to "no-cors" requests.
if (request_mode != mojom::RequestMode::kNoCors)
return false;
// Browser-initiated requests are never opaque.
if (!request_initiator.has_value())
return false;
// Requests from foo.example.com will consult foo.example.com's service worker
// first (if one has been registered). The service worker can handle requests
// initiated by foo.example.com even if they are cross-origin (e.g. requests
// for bar.example.com). This is okay, because there is no security boundary
// between foo.example.com and the service worker of foo.example.com + because
// the response data is "conjured" within the service worker of
// foo.example.com (rather than being fetched from bar.example.com).
// Therefore such responses should not be blocked by CORB, unless the
// initiator opted out of CORS / opted into receiving an opaque response. See
// also https://crbug.com/803672.
if (response.was_fetched_via_service_worker) {
switch (response.response_type) {
case network::mojom::FetchResponseType::kBasic:
case network::mojom::FetchResponseType::kCors:
case network::mojom::FetchResponseType::kDefault:
case network::mojom::FetchResponseType::kError:
// Non-opaque responses shouldn't be blocked.
return false;
case network::mojom::FetchResponseType::kOpaque:
case network::mojom::FetchResponseType::kOpaqueRedirect:
// Opaque responses are eligible for blocking. Continue on...
break;
}
}
return true;
}
bool HasNoSniff(
const mojom::URLResponseHead& response) {
// TODO(vogelheim): Check for compatibility with spec &
// ParseContentTypeOptionsHeader. Maybe move this to parsed_headers.
if (!response.headers) {
return false;
}
std::string nosniff_header =
response.headers->GetNormalizedHeader("x-content-type-options")
.value_or(std::string());
return base::EqualsCaseInsensitiveASCII(nosniff_header, "nosniff");
}
} // namespace
OpaqueResponseBlockingAnalyzer::OpaqueResponseBlockingAnalyzer(
PerFactoryState* state)
: per_factory_state_(*state) {
CHECK(state);
}
OpaqueResponseBlockingAnalyzer::~OpaqueResponseBlockingAnalyzer() {
// TODO(crbug.com/40169301): Add UMA tracking the size of ORB state
// from `per_factory_state_`.
}
Decision OpaqueResponseBlockingAnalyzer::Init(
const GURL& request_url,
const std::optional<url::Origin>& request_initiator,
mojom::RequestMode request_mode,
mojom::RequestDestination request_destination_from_renderer,
const network::mojom::URLResponseHead& response) {
// Exclude responses that ORB doesn't apply to.
if (!IsOpaqueResponse(request_initiator, request_mode, response))
return Decision::kAllow;
DCHECK(request_initiator.has_value());
// Same-origin requests are allowed (the ORB spec doesn't explicitly deal with
// this, because it assumes that the Fetch spec has already determined that
// the request is cross-origin, before handing off to ORB).
if (request_initiator->IsSameOriginWith(request_url))
return Decision::kAllow;
// Remember request properties that will be needed later.
is_http_status_okay_ = IsOkayHttpStatus(response);
if (response.content_length == 0)
is_empty_response_ = true;
if (response.headers && response.headers->response_code() == 204)
is_empty_response_ = true;
if (response.headers &&
(response.headers->HasHeader("Attribution-Reporting-Register-Source") ||
response.headers->HasHeader("Attribution-Reporting-Register-Trigger") ||
response.headers->HasHeader(
"Attribution-Reporting-Register-OS-Source") ||
response.headers->HasHeader(
"Attribution-Reporting-Register-OS-Trigger"))) {
is_attribution_response_ = true;
}
// TODO(lukasza): Consider tweaking how `final_request_url_` is used to
// properly handle interactions between redirects and range requests. For
// example, ORB might sniff an initial a.com/a1 -> a.com/a2 redirect as media
// which should allow future range requests to the "same" resource. But what
// if in the future something like load-balancing kicks-in and a.com/a1 ->
// a.com/a3 redirect happens instead? This might require remembering that not
// just a2, but also a1 is safe. Similar considerations (checking all
// consecutive, same-origin redirect hops) apply both to the initial request
// (deciding which URLs from the redirect chain to store as validated as
// media) and to the subsequent range requests (deciding which URLs from the
// chain to validate against the ones in the store of validated URLs).
final_request_url_ = request_url;
request_destination_from_renderer_ = request_destination_from_renderer;
// 1. Let mimeType be the result of extracting a MIME type from response's
// header list.
if (response.headers)
response.headers->GetMimeType(&mime_type_);
// 2. Let nosniff be the result of determining nosniff given response's header
// list.
is_no_sniff_header_present_ =
HasNoSniff(response);
// 3. If mimeType is not failure, then:
if (!mime_type_.empty()) {
// 3.i. If mimeType is an opaque-safelisted MIME type, then return true.
//
// Because "ORB v0.1" does not have a JSON/JS parser step, we will not
// consider JS resources here and instead employ JSON-or-JS-parser-breaker
// sniffer on these resources. This means that for JS resources, step 3.i.
// from ORB is postponed until HandleEndOfSniffableResponseBody, instead of
// being handled here.
//
// Whether ORB spec can adopt this behavior is being discussed in
// https://github.com/annevk/orb/issues/30.
//
// TODO(vogelheim/lukasza): Resolve this difference from the ORB spec.
// TODO(vogelheim/lukasza): Consider other early-allow mechanisms (e.g. CORP
// - see https://github.com/annevk/orb/issues/30#issuecomment-971373842).
if (IsOpaqueSafelistedMimeType(mime_type_))
return Decision::kAllow;
// ii. If mimeType is an opaque-blocklisted-never-sniffed MIME type, then
// return false.
// iv. If nosniff is true and mimeType is an opaque-blocklisted MIME type or
// its essence is "text/plain", then return false.
//
// Step iii. is missing - this is departure from how full ORB handles 206
// responses labeled as html/json/xml. This seems okay given that we
// tighten our implementation of step 4 below (handling of range requests).
switch (GetCanonicalMimeType(mime_type_)) {
case MimeType::kNeverSniffed:
blocking_decision_reason_ =
BlockingDecisionReason::kNeverSniffedMimeType;
return Decision::kBlock; // Step ii.
case MimeType::kHtml:
case MimeType::kJson:
case MimeType::kPlain:
case MimeType::kXml:
if (is_no_sniff_header_present_) {
blocking_decision_reason_ = BlockingDecisionReason::kNoSniffHeader;
return Decision::kBlock; // Step iv.
}
break;
case MimeType::kOthers:
// TODO(vogelheim/lukasza): Departure from the spec: We currently
// handle audio/video MIME types as "opaque safelisted", to prevent
// sniffing on them and on XML-based media types in particular.
CHECK(!IsAudioOrVideoMimeType(mime_type_));
break;
case MimeType::kInvalidMimeType:
break;
}
}
// 4. If request's no-cors media request state is "subsequent", then return
// true.
//
// TODO(lukasza): Departure from the spec:
// Diff from the (blocking) step 3.iii.:
// - Moved slightly later
// - No extra conditions like "and mimeType is an opaque-blocklisted MIME
// type" (e.g. html, xml, or json).
// Diff from the (allowing) step 4.:
// - Only applying this step to IsRangeResponseWithMiddleOfResource cases
if (IsRangeResponseWithMiddleOfResource(response)) {
if (IsAllowedAudioVideoRequest(request_url)) {
return Decision::kAllow;
} else {
blocking_decision_reason_ =
BlockingDecisionReason::kUnexpectedRangeResponse;
return Decision::kBlock;
}
}
// 5. Wait for 1024 bytes of response or end-of-file, whichever comes first
// and let bytes be those bytes.
return Decision::kSniffMore;
}
Decision OpaqueResponseBlockingAnalyzer::Sniff(std::string_view data) {
std::string sniffed_mime_type;
net::SniffMimeTypeFromLocalData(data, &sniffed_mime_type);
// 7. If the audio or video type pattern matching algorithm given bytes does
// not return undefined, then:
if (IsAudioOrVideoMimeType(sniffed_mime_type)) {
// i. Append (request's opaque media identifier, request's current URL) to
// the user agent's opaque-safelisted requesters set.
StoreAllowedAudioVideoRequest(final_request_url_);
// ii. Return true.
return Decision::kAllow;
}
// Spec-divergence: no step 8:
// 8. If requests's no-cors media request state is not "N/A", then return
// false.
// This implementation doesn't know if the request came from a media element
// or not. Making the decision based on earlier sniffing should be okay.
// 9. If the image type pattern matching algorithm given bytes does not
// return undefined, then return true.
constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII;
if (base::StartsWith(sniffed_mime_type, "image/", kCaseInsensitive))
return Decision::kAllow;
// At this point, a number of MIME types should be out of the running.
CHECK(!IsTextCssMimeType(mime_type_)); // OpaqueSafelistedMimeType are not
// sniffed.
CHECK(!IsAudioOrVideoMimeType(mime_type_)); // Ditto.
CHECK(!IsNonSniffableImageMimeType(mime_type_)); // Ditto.
// 12. If mimeType is failure, then return true.
//
// The spec proposal handles this step before checking for JS and JSON. To
// be compatible, we handle this before our 'sniffing' steps that handle
// those formats.
//
// TODO(lukasza): This is not fully accurate - it doesn't capture all the
// possible failure modes of
// https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
if (mime_type_.empty()) {
return Decision::kAllow;
}
// Check if the response is HTML, XML, or JSON, in which case it is surely not
// JavaScript. (The sniffers account for HTML/JS polyglot cases - see
// https://crbug.com/839945 and https://crbug.com/839425. OTOH, the sniffers
// do not account for CSS/HTML or CSS/JS-parser-breakers polyglots so CSS is
// explicitly excluded from the sniffing below.)
//
// TODO(lukasza): Departure from the spec. This avoids having to sniff
// Javascript in the full response as described in the "Gradual CORB -> ORB
// transition" doc at
// https://docs.google.com/document/d/1qUbE2ySi6av3arUEw5DNdFJIKKBbWGRGsXz_ew3S7HQ/edit?usp=sharing
// Diff: This is a new sniffing step for the 1st 1024 bytes.
// Diff: This doesn't sniff for JavaScript, but for non-Html/Xml/Json.
if (SniffForHTML(data) == SniffingResult::kYes) {
blocking_decision_reason_ = BlockingDecisionReason::kSniffedAsHtml;
return Decision::kBlock;
}
if (SniffForXML(data) == SniffingResult::kYes) {
blocking_decision_reason_ = BlockingDecisionReason::kSniffedAsXml;
return Decision::kBlock;
}
// Check for JSON and JS parser breakers.
if (SniffForFetchOnlyResource(data) == SniffingResult::kYes) {
blocking_decision_reason_ = BlockingDecisionReason::kSniffedAsJson;
return Decision::kBlock;
}
return Decision::kSniffMore;
}
Decision OpaqueResponseBlockingAnalyzer::HandleEndOfSniffableResponseBody() {
// Deviation from spec: We run JSON-or-JS-parser-breaker sniffer on some
// MIME types. To do so, we have taken them out of IsOpaqueSafelistedMimeType
// and instead handle them here. So this effectively handles some cases
// the spec handles in step 3.i.
//
// TODO(vogelheim/lukasza): Resolve this difference from the ORB spec.
// TODO(vogelheim/lukasza): Consider other early-allow mechanisms (e.g. CORP -
// see https://github.com/annevk/orb/issues/30#issuecomment-971373842).
if (IsOpaqueSafelistedMimeTypeThatWeSniffAnyway(mime_type_))
return Decision::kAllow;
// TODO(lukasza): Implement the following steps from ORB spec:
// 10. If nosniff is true, then return false.
// 11. If response's status is not an ok status, then return false.
// (Skipping these steps minimizes the risk of shipping the initial ORB
// implementation.)
// TODO(lukasza): Departure from the spec discussed in
// https://github.com/annevk/orb/issues/3.
// Diff: Removing step 13:
// 13. If mimeType's essence starts with "audio/", "image/", or "video/",
// then return false.
// TODO(lukasza): Departure from the spec, because the current implementation
// avoids full Javascript parsing as described in the "Gradual CORB -> ORB
// transition" doc at
// https://docs.google.com/document/d/1qUbE2ySi6av3arUEw5DNdFJIKKBbWGRGsXz_ew3S7HQ/edit?usp=sharing
// Diff: Skipping/ignoring step 15:
// 15. If response's body parses as JavaScript and does not parse as JSON,
// then return true.
// Diff: Changing step 16 to fail open (e.g. return true / kAllow):
// 16. Return false.
return Decision::kAllow;
}
bool OpaqueResponseBlockingAnalyzer::ShouldReportBlockedResponse() const {
// Empty attribution responses may still result in changes to web-visible
// behavior when blocked, so they should always be reported. See
// https://crbug.com/1369637.
return (!is_empty_response_ && is_http_status_okay_) ||
is_attribution_response_;
}
ResponseAnalyzer::BlockedResponseHandling
OpaqueResponseBlockingAnalyzer::ShouldHandleBlockedResponseAs() const {
// "ORB v0.1" uses CORB-style error handling with injecting an empty response.
// "ORB v0.2" uses ORB-specified error handling (injecting a network error)
// for non-script fetches, by injecting a network error.
// "ORB errors-for-all-fetches" uses ORB-specified error handling everywhere.
if (base::FeatureList::IsEnabled(
features::kOpaqueResponseBlockingErrorsForAllFetches)) {
return BlockedResponseHandling::kNetworkError;
}
if (request_destination_from_renderer_ != mojom::RequestDestination::kEmpty) {
return BlockedResponseHandling::kNetworkError;
}
return BlockedResponseHandling::kEmptyResponse;
}
void OpaqueResponseBlockingAnalyzer::StoreAllowedAudioVideoRequest(
const GURL& media_url) {
per_factory_state_->insert(media_url);
}
bool OpaqueResponseBlockingAnalyzer::IsAllowedAudioVideoRequest(
const GURL& media_url) {
return base::Contains(*per_factory_state_, media_url);
}
} // namespace network::orb