| // Copyright 2021 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "services/network/orb/orb_impl.h" |
| |
| #include "base/check.h" |
| #include "base/containers/contains.h" |
| #include "base/metrics/histogram_functions.h" |
| #include "base/rand_util.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_util.h" |
| #include "net/base/mime_sniffer.h" |
| #include "net/http/http_util.h" |
| #include "net/url_request/url_request.h" |
| #include "services/network/orb/orb_mimetypes.h" |
| #include "services/network/orb/orb_sniffers.h" |
| #include "services/network/public/cpp/features.h" |
| #include "services/network/public/cpp/resource_request.h" |
| #include "services/network/public/mojom/url_response_head.mojom.h" |
| |
| using Decision = network::orb::ResponseAnalyzer::Decision; |
| |
| namespace network::orb { |
| |
| namespace { |
| |
| bool IsNonSniffableImageMimeType(std::string_view mime_type) { |
| // TODO(lukasza): Once full Javascript sniffing is implemented, we may start |
| // to undesirably block future (=unsniffable) image formats. We should |
| // explicitly recognize MIME types of such image formats below. See also |
| // https://github.com/annevk/orb/issues/3#issuecomment-974334651 |
| |
| // This function returns true for image formats that are not recognized by |
| // net::SniffMimeTypeFromLocalData. This helps to allow such images. |
| return base::EqualsCaseInsensitiveASCII(mime_type, "image/svg+xml"); |
| } |
| |
| bool IsAudioOrVideoMimeType(std::string_view mime_type) { |
| // TODO(lukasza): Restrict this to only known, non-sniffable audio/video types |
| // (hopefully we can reach agreement on this approach + document this in ORB |
| // spec). See also https://github.com/annevk/orb/issues/3. Notes: |
| // - In the long-term (once Javascript sniffing is implemented) this will |
| // prevent non-webby images (e.g. image/vnd.adobe.photoshop) from being |
| // unnecessarily allowed by ORB. |
| // - In the short-term this shouldn't matter for security of 200 responses |
| // (with only HTML/XML/JSON sniffing current implementation wouldn't block |
| // such non-webby images anyway). |
| // - The current implementation reduces risk of blocking range requests for |
| // A) non-sniffable types and B) range responses for middle-of-resource |
| // when first-bytes-response wasn't seen earlier. |
| constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII; |
| if (base::StartsWith(mime_type, "audio/", kCaseInsensitive) || |
| base::StartsWith(mime_type, "video/", kCaseInsensitive)) { |
| return true; |
| } |
| |
| // Special-casing "application/ogg" here is a minor departure from the spec |
| // when IsAudioOrVideoMimeType is called from IsOpaqueSafelistedMimeType. |
| // OTOH, covering "application/ogg" here helps helps implement step 7 from ORB |
| // (sniffing audio/video in the OpaqueResponseBlockingAnalyzer::Sniff method |
| // below) because net::SniffMimeTypeFromLocalData may return |
| // "application/ogg". |
| if (base::EqualsCaseInsensitiveASCII(mime_type, "application/ogg")) |
| return true; |
| |
| // TODO(lukasza): Address this departure from the spec (which doesn't |
| // explicitly mention DASH and other MIME types here). The current |
| // implementation enforces strict MIME types for DASH/HLS resources - if this |
| // can ship without too much of web-compatibility issues, then we should |
| // modify ORB spec to match this implementation. If there is too much |
| // web-compatibility risk, then ORB might need to fully parse DASH/HLS |
| // manifests. |
| if (base::EqualsCaseInsensitiveASCII(mime_type, "application/dash+xml")) |
| return true; |
| if (base::EqualsCaseInsensitiveASCII(mime_type, |
| "application/vnd.apple.mpegurl")) |
| return true; |
| if (base::EqualsCaseInsensitiveASCII(mime_type, "text/vtt")) |
| return true; |
| |
| return false; |
| } |
| |
| bool IsTextCssMimeType(std::string_view mime_type) { |
| return base::EqualsCaseInsensitiveASCII(mime_type, "text/css"); |
| } |
| |
| // ORB spec says that "An opaque-safelisted MIME type" is a JavaScript MIME type |
| // or a MIME type whose essence is "text/css" or "image/svg+xml". |
| bool IsOpaqueSafelistedMimeType(std::string_view mime_type) { |
| // Based on the spec: Is it a MIME type whose essence is text/css [...] ? |
| if (IsTextCssMimeType(mime_type)) |
| return true; |
| |
| // Based on the spec: Is it a MIME type whose essence is [...] image/svg+xml? |
| if (IsNonSniffableImageMimeType(mime_type)) |
| return true; |
| |
| // Deviation from spec: We do not handle JavaScript MIME types here. See |
| // comments at IsOpaqueSafelistedMimeTypeThatWeSniffAnyway and the |
| // IsOpaqueSafelistedMimeType call site for details. |
| |
| // TODO(vogelheim): Departure from the spec - see the comment in |
| // IsAudioOrVideoMimeType for more details. |
| if (IsAudioOrVideoMimeType(mime_type)) |
| return true; |
| |
| return false; |
| } |
| |
| // ORB spec defines "an opaque-safelisted MIME type". Until we have full ORB |
| // compliance, we'll need to handle some MIME types differently and run the |
| // JavaScript-parser-breaker sniffer from CORB on these resources. |
| bool IsOpaqueSafelistedMimeTypeThatWeSniffAnyway(std::string_view mime_type) { |
| // Based on the spec, but handled in HandleEndOfSniffableResponseBody: |
| // Is it a JavaScript MIME type? |
| if (IsJavascriptMimeType(mime_type)) { |
| return true; |
| } |
| |
| return false; |
| } |
| |
| // This corresponds to https://fetch.spec.whatwg.org/#ok-status |
| bool IsOkayHttpStatus(const mojom::URLResponseHead& response) { |
| if (!response.headers) |
| return false; |
| |
| int code = response.headers->response_code(); |
| return (200 <= code) && (code <= 299); |
| } |
| |
| bool IsHttpStatus(const mojom::URLResponseHead& response, |
| int expected_status_code) { |
| if (!response.headers) |
| return false; |
| |
| int code = response.headers->response_code(); |
| return code == expected_status_code; |
| } |
| |
| bool IsRangeResponseWithMiddleOfResource( |
| const mojom::URLResponseHead& response) { |
| if (!response.headers) |
| return false; |
| |
| if (!IsHttpStatus(response, 206)) |
| return false; |
| |
| std::optional<std::string> range = |
| response.headers->GetNormalizedHeader("content-range"); |
| if (!range) { |
| return false; |
| } |
| |
| int64_t first_byte_position = -1; |
| int64_t last_byte_position = -1; |
| int64_t instance_length = -1; |
| if (!net::HttpUtil::ParseContentRangeHeaderFor206( |
| *range, &first_byte_position, &last_byte_position, |
| &instance_length)) { |
| return false; |
| } |
| |
| return first_byte_position > 0; |
| } |
| |
| bool IsOpaqueResponse(const std::optional<url::Origin>& request_initiator, |
| mojom::RequestMode request_mode, |
| const mojom::URLResponseHead& response) { |
| // ORB only applies to "no-cors" requests. |
| if (request_mode != mojom::RequestMode::kNoCors) |
| return false; |
| |
| // Browser-initiated requests are never opaque. |
| if (!request_initiator.has_value()) |
| return false; |
| |
| // Requests from foo.example.com will consult foo.example.com's service worker |
| // first (if one has been registered). The service worker can handle requests |
| // initiated by foo.example.com even if they are cross-origin (e.g. requests |
| // for bar.example.com). This is okay, because there is no security boundary |
| // between foo.example.com and the service worker of foo.example.com + because |
| // the response data is "conjured" within the service worker of |
| // foo.example.com (rather than being fetched from bar.example.com). |
| // Therefore such responses should not be blocked by CORB, unless the |
| // initiator opted out of CORS / opted into receiving an opaque response. See |
| // also https://crbug.com/803672. |
| if (response.was_fetched_via_service_worker) { |
| switch (response.response_type) { |
| case network::mojom::FetchResponseType::kBasic: |
| case network::mojom::FetchResponseType::kCors: |
| case network::mojom::FetchResponseType::kDefault: |
| case network::mojom::FetchResponseType::kError: |
| // Non-opaque responses shouldn't be blocked. |
| return false; |
| case network::mojom::FetchResponseType::kOpaque: |
| case network::mojom::FetchResponseType::kOpaqueRedirect: |
| // Opaque responses are eligible for blocking. Continue on... |
| break; |
| } |
| } |
| |
| return true; |
| } |
| |
| bool HasNoSniff( |
| const mojom::URLResponseHead& response) { |
| // TODO(vogelheim): Check for compatibility with spec & |
| // ParseContentTypeOptionsHeader. Maybe move this to parsed_headers. |
| if (!response.headers) { |
| return false; |
| } |
| std::string nosniff_header = |
| response.headers->GetNormalizedHeader("x-content-type-options") |
| .value_or(std::string()); |
| return base::EqualsCaseInsensitiveASCII(nosniff_header, "nosniff"); |
| } |
| |
| } // namespace |
| |
| OpaqueResponseBlockingAnalyzer::OpaqueResponseBlockingAnalyzer( |
| PerFactoryState* state) |
| : per_factory_state_(*state) { |
| CHECK(state); |
| } |
| |
| OpaqueResponseBlockingAnalyzer::~OpaqueResponseBlockingAnalyzer() { |
| // TODO(crbug.com/40169301): Add UMA tracking the size of ORB state |
| // from `per_factory_state_`. |
| } |
| |
| Decision OpaqueResponseBlockingAnalyzer::Init( |
| const GURL& request_url, |
| const std::optional<url::Origin>& request_initiator, |
| mojom::RequestMode request_mode, |
| mojom::RequestDestination request_destination_from_renderer, |
| const network::mojom::URLResponseHead& response) { |
| // Exclude responses that ORB doesn't apply to. |
| if (!IsOpaqueResponse(request_initiator, request_mode, response)) |
| return Decision::kAllow; |
| DCHECK(request_initiator.has_value()); |
| |
| // Same-origin requests are allowed (the ORB spec doesn't explicitly deal with |
| // this, because it assumes that the Fetch spec has already determined that |
| // the request is cross-origin, before handing off to ORB). |
| if (request_initiator->IsSameOriginWith(request_url)) |
| return Decision::kAllow; |
| |
| // Remember request properties that will be needed later. |
| is_http_status_okay_ = IsOkayHttpStatus(response); |
| if (response.content_length == 0) |
| is_empty_response_ = true; |
| if (response.headers && response.headers->response_code() == 204) |
| is_empty_response_ = true; |
| if (response.headers && |
| (response.headers->HasHeader("Attribution-Reporting-Register-Source") || |
| response.headers->HasHeader("Attribution-Reporting-Register-Trigger") || |
| response.headers->HasHeader( |
| "Attribution-Reporting-Register-OS-Source") || |
| response.headers->HasHeader( |
| "Attribution-Reporting-Register-OS-Trigger"))) { |
| is_attribution_response_ = true; |
| } |
| // TODO(lukasza): Consider tweaking how `final_request_url_` is used to |
| // properly handle interactions between redirects and range requests. For |
| // example, ORB might sniff an initial a.com/a1 -> a.com/a2 redirect as media |
| // which should allow future range requests to the "same" resource. But what |
| // if in the future something like load-balancing kicks-in and a.com/a1 -> |
| // a.com/a3 redirect happens instead? This might require remembering that not |
| // just a2, but also a1 is safe. Similar considerations (checking all |
| // consecutive, same-origin redirect hops) apply both to the initial request |
| // (deciding which URLs from the redirect chain to store as validated as |
| // media) and to the subsequent range requests (deciding which URLs from the |
| // chain to validate against the ones in the store of validated URLs). |
| final_request_url_ = request_url; |
| |
| request_destination_from_renderer_ = request_destination_from_renderer; |
| |
| // 1. Let mimeType be the result of extracting a MIME type from response's |
| // header list. |
| if (response.headers) |
| response.headers->GetMimeType(&mime_type_); |
| |
| // 2. Let nosniff be the result of determining nosniff given response's header |
| // list. |
| is_no_sniff_header_present_ = |
| HasNoSniff(response); |
| |
| // 3. If mimeType is not failure, then: |
| if (!mime_type_.empty()) { |
| // 3.i. If mimeType is an opaque-safelisted MIME type, then return true. |
| // |
| // Because "ORB v0.1" does not have a JSON/JS parser step, we will not |
| // consider JS resources here and instead employ JSON-or-JS-parser-breaker |
| // sniffer on these resources. This means that for JS resources, step 3.i. |
| // from ORB is postponed until HandleEndOfSniffableResponseBody, instead of |
| // being handled here. |
| // |
| // Whether ORB spec can adopt this behavior is being discussed in |
| // https://github.com/annevk/orb/issues/30. |
| // |
| // TODO(vogelheim/lukasza): Resolve this difference from the ORB spec. |
| // TODO(vogelheim/lukasza): Consider other early-allow mechanisms (e.g. CORP |
| // - see https://github.com/annevk/orb/issues/30#issuecomment-971373842). |
| if (IsOpaqueSafelistedMimeType(mime_type_)) |
| return Decision::kAllow; |
| |
| // ii. If mimeType is an opaque-blocklisted-never-sniffed MIME type, then |
| // return false. |
| // iv. If nosniff is true and mimeType is an opaque-blocklisted MIME type or |
| // its essence is "text/plain", then return false. |
| // |
| // Step iii. is missing - this is departure from how full ORB handles 206 |
| // responses labeled as html/json/xml. This seems okay given that we |
| // tighten our implementation of step 4 below (handling of range requests). |
| switch (GetCanonicalMimeType(mime_type_)) { |
| case MimeType::kNeverSniffed: |
| blocking_decision_reason_ = |
| BlockingDecisionReason::kNeverSniffedMimeType; |
| return Decision::kBlock; // Step ii. |
| |
| case MimeType::kHtml: |
| case MimeType::kJson: |
| case MimeType::kPlain: |
| case MimeType::kXml: |
| if (is_no_sniff_header_present_) { |
| blocking_decision_reason_ = BlockingDecisionReason::kNoSniffHeader; |
| return Decision::kBlock; // Step iv. |
| } |
| break; |
| |
| case MimeType::kOthers: |
| // TODO(vogelheim/lukasza): Departure from the spec: We currently |
| // handle audio/video MIME types as "opaque safelisted", to prevent |
| // sniffing on them and on XML-based media types in particular. |
| CHECK(!IsAudioOrVideoMimeType(mime_type_)); |
| break; |
| |
| case MimeType::kInvalidMimeType: |
| break; |
| } |
| } |
| |
| // 4. If request's no-cors media request state is "subsequent", then return |
| // true. |
| // |
| // TODO(lukasza): Departure from the spec: |
| // Diff from the (blocking) step 3.iii.: |
| // - Moved slightly later |
| // - No extra conditions like "and mimeType is an opaque-blocklisted MIME |
| // type" (e.g. html, xml, or json). |
| // Diff from the (allowing) step 4.: |
| // - Only applying this step to IsRangeResponseWithMiddleOfResource cases |
| if (IsRangeResponseWithMiddleOfResource(response)) { |
| if (IsAllowedAudioVideoRequest(request_url)) { |
| return Decision::kAllow; |
| } else { |
| blocking_decision_reason_ = |
| BlockingDecisionReason::kUnexpectedRangeResponse; |
| return Decision::kBlock; |
| } |
| } |
| |
| // 5. Wait for 1024 bytes of response or end-of-file, whichever comes first |
| // and let bytes be those bytes. |
| return Decision::kSniffMore; |
| } |
| |
| Decision OpaqueResponseBlockingAnalyzer::Sniff(std::string_view data) { |
| std::string sniffed_mime_type; |
| net::SniffMimeTypeFromLocalData(data, &sniffed_mime_type); |
| |
| // 7. If the audio or video type pattern matching algorithm given bytes does |
| // not return undefined, then: |
| if (IsAudioOrVideoMimeType(sniffed_mime_type)) { |
| // i. Append (request's opaque media identifier, request's current URL) to |
| // the user agent's opaque-safelisted requesters set. |
| StoreAllowedAudioVideoRequest(final_request_url_); |
| |
| // ii. Return true. |
| return Decision::kAllow; |
| } |
| |
| // Spec-divergence: no step 8: |
| // 8. If requests's no-cors media request state is not "N/A", then return |
| // false. |
| // This implementation doesn't know if the request came from a media element |
| // or not. Making the decision based on earlier sniffing should be okay. |
| |
| // 9. If the image type pattern matching algorithm given bytes does not |
| // return undefined, then return true. |
| constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII; |
| if (base::StartsWith(sniffed_mime_type, "image/", kCaseInsensitive)) |
| return Decision::kAllow; |
| |
| // At this point, a number of MIME types should be out of the running. |
| CHECK(!IsTextCssMimeType(mime_type_)); // OpaqueSafelistedMimeType are not |
| // sniffed. |
| CHECK(!IsAudioOrVideoMimeType(mime_type_)); // Ditto. |
| CHECK(!IsNonSniffableImageMimeType(mime_type_)); // Ditto. |
| |
| // 12. If mimeType is failure, then return true. |
| // |
| // The spec proposal handles this step before checking for JS and JSON. To |
| // be compatible, we handle this before our 'sniffing' steps that handle |
| // those formats. |
| // |
| // TODO(lukasza): This is not fully accurate - it doesn't capture all the |
| // possible failure modes of |
| // https://fetch.spec.whatwg.org/#concept-header-extract-mime-type |
| if (mime_type_.empty()) { |
| return Decision::kAllow; |
| } |
| |
| // Check if the response is HTML, XML, or JSON, in which case it is surely not |
| // JavaScript. (The sniffers account for HTML/JS polyglot cases - see |
| // https://crbug.com/839945 and https://crbug.com/839425. OTOH, the sniffers |
| // do not account for CSS/HTML or CSS/JS-parser-breakers polyglots so CSS is |
| // explicitly excluded from the sniffing below.) |
| // |
| // TODO(lukasza): Departure from the spec. This avoids having to sniff |
| // Javascript in the full response as described in the "Gradual CORB -> ORB |
| // transition" doc at |
| // https://docs.google.com/document/d/1qUbE2ySi6av3arUEw5DNdFJIKKBbWGRGsXz_ew3S7HQ/edit?usp=sharing |
| // Diff: This is a new sniffing step for the 1st 1024 bytes. |
| // Diff: This doesn't sniff for JavaScript, but for non-Html/Xml/Json. |
| if (SniffForHTML(data) == SniffingResult::kYes) { |
| blocking_decision_reason_ = BlockingDecisionReason::kSniffedAsHtml; |
| return Decision::kBlock; |
| } |
| |
| if (SniffForXML(data) == SniffingResult::kYes) { |
| blocking_decision_reason_ = BlockingDecisionReason::kSniffedAsXml; |
| return Decision::kBlock; |
| } |
| |
| // Check for JSON and JS parser breakers. |
| if (SniffForFetchOnlyResource(data) == SniffingResult::kYes) { |
| blocking_decision_reason_ = BlockingDecisionReason::kSniffedAsJson; |
| return Decision::kBlock; |
| } |
| |
| return Decision::kSniffMore; |
| } |
| |
| Decision OpaqueResponseBlockingAnalyzer::HandleEndOfSniffableResponseBody() { |
| // Deviation from spec: We run JSON-or-JS-parser-breaker sniffer on some |
| // MIME types. To do so, we have taken them out of IsOpaqueSafelistedMimeType |
| // and instead handle them here. So this effectively handles some cases |
| // the spec handles in step 3.i. |
| // |
| // TODO(vogelheim/lukasza): Resolve this difference from the ORB spec. |
| // TODO(vogelheim/lukasza): Consider other early-allow mechanisms (e.g. CORP - |
| // see https://github.com/annevk/orb/issues/30#issuecomment-971373842). |
| if (IsOpaqueSafelistedMimeTypeThatWeSniffAnyway(mime_type_)) |
| return Decision::kAllow; |
| |
| // TODO(lukasza): Implement the following steps from ORB spec: |
| // 10. If nosniff is true, then return false. |
| // 11. If response's status is not an ok status, then return false. |
| // (Skipping these steps minimizes the risk of shipping the initial ORB |
| // implementation.) |
| |
| // TODO(lukasza): Departure from the spec discussed in |
| // https://github.com/annevk/orb/issues/3. |
| // Diff: Removing step 13: |
| // 13. If mimeType's essence starts with "audio/", "image/", or "video/", |
| // then return false. |
| |
| // TODO(lukasza): Departure from the spec, because the current implementation |
| // avoids full Javascript parsing as described in the "Gradual CORB -> ORB |
| // transition" doc at |
| // https://docs.google.com/document/d/1qUbE2ySi6av3arUEw5DNdFJIKKBbWGRGsXz_ew3S7HQ/edit?usp=sharing |
| // Diff: Skipping/ignoring step 15: |
| // 15. If response's body parses as JavaScript and does not parse as JSON, |
| // then return true. |
| // Diff: Changing step 16 to fail open (e.g. return true / kAllow): |
| // 16. Return false. |
| return Decision::kAllow; |
| } |
| |
| bool OpaqueResponseBlockingAnalyzer::ShouldReportBlockedResponse() const { |
| // Empty attribution responses may still result in changes to web-visible |
| // behavior when blocked, so they should always be reported. See |
| // https://crbug.com/1369637. |
| return (!is_empty_response_ && is_http_status_okay_) || |
| is_attribution_response_; |
| } |
| |
| ResponseAnalyzer::BlockedResponseHandling |
| OpaqueResponseBlockingAnalyzer::ShouldHandleBlockedResponseAs() const { |
| // "ORB v0.1" uses CORB-style error handling with injecting an empty response. |
| // "ORB v0.2" uses ORB-specified error handling (injecting a network error) |
| // for non-script fetches, by injecting a network error. |
| // "ORB errors-for-all-fetches" uses ORB-specified error handling everywhere. |
| |
| if (base::FeatureList::IsEnabled( |
| features::kOpaqueResponseBlockingErrorsForAllFetches)) { |
| return BlockedResponseHandling::kNetworkError; |
| } |
| |
| if (request_destination_from_renderer_ != mojom::RequestDestination::kEmpty) { |
| return BlockedResponseHandling::kNetworkError; |
| } |
| |
| return BlockedResponseHandling::kEmptyResponse; |
| } |
| |
| void OpaqueResponseBlockingAnalyzer::StoreAllowedAudioVideoRequest( |
| const GURL& media_url) { |
| per_factory_state_->insert(media_url); |
| } |
| |
| bool OpaqueResponseBlockingAnalyzer::IsAllowedAudioVideoRequest( |
| const GURL& media_url) { |
| return base::Contains(*per_factory_state_, media_url); |
| } |
| |
| } // namespace network::orb |