|  | // Copyright 2015 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "content/common/cross_site_document_classifier.h" | 
|  |  | 
|  | #include <stddef.h> | 
|  | #include <string> | 
|  |  | 
|  | #include "base/command_line.h" | 
|  | #include "base/lazy_instance.h" | 
|  | #include "base/logging.h" | 
|  | #include "base/macros.h" | 
|  | #include "base/metrics/histogram_macros.h" | 
|  | #include "base/strings/string_piece.h" | 
|  | #include "base/strings/string_util.h" | 
|  | #include "content/public/common/content_switches.h" | 
|  | #include "net/base/registry_controlled_domains/registry_controlled_domain.h" | 
|  | #include "net/http/http_response_headers.h" | 
|  | #include "services/network/public/cpp/resource_response_info.h" | 
|  |  | 
|  | using base::StringPiece; | 
|  |  | 
|  | namespace content { | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | // MIME types | 
|  | const char kTextHtml[] = "text/html"; | 
|  | const char kTextXml[] = "text/xml"; | 
|  | const char kAppXml[] = "application/xml"; | 
|  | const char kAppJson[] = "application/json"; | 
|  | const char kImageSvg[] = "image/svg+xml"; | 
|  | const char kTextJson[] = "text/json"; | 
|  | const char kTextXjson[] = "text/x-json"; | 
|  | const char kTextPlain[] = "text/plain"; | 
|  |  | 
|  | // MIME type suffixes | 
|  | const char kJsonSuffix[] = "+json"; | 
|  | const char kXmlSuffix[] = "+xml"; | 
|  |  | 
|  | void AdvancePastWhitespace(StringPiece* data) { | 
|  | size_t offset = data->find_first_not_of(" \t\r\n"); | 
|  | if (offset == base::StringPiece::npos) { | 
|  | // |data| was entirely whitespace. | 
|  | data->clear(); | 
|  | } else { | 
|  | data->remove_prefix(offset); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Returns kYes if |data| starts with one of the string patterns in | 
|  | // |signatures|, kMaybe if |data| is a prefix of one of the patterns in | 
|  | // |signatures|, and kNo otherwise. | 
|  | // | 
|  | // When kYes is returned, the matching prefix is erased from |data|. | 
|  | CrossSiteDocumentClassifier::Result MatchesSignature( | 
|  | StringPiece* data, | 
|  | const StringPiece signatures[], | 
|  | size_t arr_size, | 
|  | base::CompareCase compare_case) { | 
|  | for (size_t i = 0; i < arr_size; ++i) { | 
|  | if (signatures[i].length() <= data->length()) { | 
|  | if (base::StartsWith(*data, signatures[i], compare_case)) { | 
|  | // When |signatures[i]| is a prefix of |data|, it constitutes a match. | 
|  | // Strip the matching characters, and return. | 
|  | data->remove_prefix(signatures[i].length()); | 
|  | return CrossSiteDocumentClassifier::kYes; | 
|  | } | 
|  | } else { | 
|  | if (base::StartsWith(signatures[i], *data, compare_case)) { | 
|  | // When |data| is a prefix of |signatures[i]|, that means that | 
|  | // subsequent bytes in the stream could cause a match to occur. | 
|  | return CrossSiteDocumentClassifier::kMaybe; | 
|  | } | 
|  | } | 
|  | } | 
|  | return CrossSiteDocumentClassifier::kNo; | 
|  | } | 
|  |  | 
|  | // Returns true if |mime_type == prefix| or if |mime_type| starts with | 
|  | // |prefix + '+'|.  Returns false otherwise. | 
|  | // | 
|  | // For example: | 
|  | // - MatchesMimeTypePrefix("application/json", "application/json") -> true | 
|  | // - MatchesMimeTypePrefix("application/json+foo", "application/json") -> true | 
|  | // - MatchesMimeTypePrefix("application/jsonp", "application/json") -> false | 
|  | // - MatchesMimeTypePrefix("application/foo", "application/json") -> false | 
|  | bool MatchesMimeTypePrefix(base::StringPiece mime_type, | 
|  | base::StringPiece prefix) { | 
|  | constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII; | 
|  | if (!base::StartsWith(mime_type, prefix, kCaseInsensitive)) | 
|  | return false; | 
|  | DCHECK_GE(mime_type.length(), prefix.length()); | 
|  |  | 
|  | if (mime_type.length() == prefix.length()) { | 
|  | // Given StartsWith results above, the above condition is our O(1) check if | 
|  | // |base::LowerCaseEqualsASCII(mime_type, prefix)|. | 
|  | DCHECK(base::LowerCaseEqualsASCII(mime_type, prefix)); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | if (mime_type[prefix.length()] == '+') { | 
|  | // Given StartsWith results above, the above condition is our O(1) check if | 
|  | // |base::StartsWith(mime_type, prefix + '+', kCaseInsensitive)|. | 
|  | DCHECK(base::StartsWith(mime_type, prefix.as_string() + '+', | 
|  | kCaseInsensitive)); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | return false; | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType( | 
|  | base::StringPiece mime_type) { | 
|  | // Checking for image/svg+xml early ensures that it won't get classified as | 
|  | // CROSS_SITE_DOCUMENT_MIME_TYPE_XML by the presence of the "+xml" suffix. | 
|  | if (base::LowerCaseEqualsASCII(mime_type, kImageSvg)) | 
|  | return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS; | 
|  |  | 
|  | if (base::LowerCaseEqualsASCII(mime_type, kTextHtml)) | 
|  | return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML; | 
|  |  | 
|  | if (base::LowerCaseEqualsASCII(mime_type, kTextPlain)) | 
|  | return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN; | 
|  |  | 
|  | // StartsWith rather than LowerCaseEqualsASCII is used to account both for | 
|  | // mime types similar to 1) application/json and to 2) | 
|  | // application/json+protobuf. | 
|  | constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII; | 
|  | if (MatchesMimeTypePrefix(mime_type, kAppJson) || | 
|  | MatchesMimeTypePrefix(mime_type, kTextJson) || | 
|  | MatchesMimeTypePrefix(mime_type, kTextXjson) || | 
|  | base::EndsWith(mime_type, kJsonSuffix, kCaseInsensitive)) { | 
|  | return CROSS_SITE_DOCUMENT_MIME_TYPE_JSON; | 
|  | } | 
|  |  | 
|  | if (MatchesMimeTypePrefix(mime_type, kAppXml) || | 
|  | MatchesMimeTypePrefix(mime_type, kTextXml) || | 
|  | base::EndsWith(mime_type, kXmlSuffix, kCaseInsensitive)) { | 
|  | return CROSS_SITE_DOCUMENT_MIME_TYPE_XML; | 
|  | } | 
|  |  | 
|  | return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS; | 
|  | } | 
|  |  | 
|  | bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) { | 
|  | // We exclude ftp:// from here. FTP doesn't provide a Content-Type | 
|  | // header which our policy depends on, so we cannot protect any | 
|  | // document from FTP servers. | 
|  | return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme); | 
|  | } | 
|  |  | 
|  | // We don't use Webkit's existing CORS policy implementation since | 
|  | // their policy works in terms of origins, not sites. For example, | 
|  | // when frame is sub.a.com and it is not allowed to access a document | 
|  | // with sub1.a.com. But under Site Isolation, it's allowed. | 
|  | bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet( | 
|  | const url::Origin& frame_origin, | 
|  | const std::string& access_control_origin) { | 
|  | // Many websites are sending back "\"*\"" instead of "*". This is | 
|  | // non-standard practice, and not supported by Chrome. Refer to | 
|  | // CrossOriginAccessControl::passesAccessControlCheck(). | 
|  |  | 
|  | // Note that "null" offers no more protection than "*" because it matches any | 
|  | // unique origin, such as data URLs. Any origin can thus access it, so don't | 
|  | // bother trying to block this case. | 
|  |  | 
|  | // TODO(dsjang): * is not allowed for the response from a request | 
|  | // with cookies. This allows for more than what the renderer will | 
|  | // eventually be able to receive, so we won't see illegal cross-site | 
|  | // documents allowed by this. We have to find a way to see if this | 
|  | // response is from a cookie-tagged request or not in the future. | 
|  | if (access_control_origin == "*" || access_control_origin == "null") | 
|  | return true; | 
|  |  | 
|  | return frame_origin.IsSameOriginWith( | 
|  | url::Origin::Create(GURL(access_control_origin))); | 
|  | } | 
|  |  | 
|  | // This function is a slight modification of |net::SniffForHTML|. | 
|  | CrossSiteDocumentClassifier::Result CrossSiteDocumentClassifier::SniffForHTML( | 
|  | StringPiece data) { | 
|  | // The content sniffers used by Chrome and Firefox are using "<!--" as one of | 
|  | // the HTML signatures, but it also appears in valid JavaScript, considered as | 
|  | // well-formed JS by the browser.  Since we do not want to block any JS, we | 
|  | // exclude it from our HTML signatures. This can weaken our document block | 
|  | // policy, but we can break less websites. | 
|  | // | 
|  | // Note that <body> and <br> are not included below, since <b is a prefix of | 
|  | // them. | 
|  | // | 
|  | // TODO(dsjang): parameterize |net::SniffForHTML| with an option that decides | 
|  | // whether to include <!-- or not, so that we can remove this function. | 
|  | // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser | 
|  | // process, we should do single-thread checking here for the static | 
|  | // initializer. | 
|  | static const StringPiece kHtmlSignatures[] = { | 
|  | StringPiece("<!doctype html"),  // HTML5 spec | 
|  | StringPiece("<script"),         // HTML5 spec, Mozilla | 
|  | StringPiece("<html"),           // HTML5 spec, Mozilla | 
|  | StringPiece("<head"),           // HTML5 spec, Mozilla | 
|  | StringPiece("<iframe"),         // Mozilla | 
|  | StringPiece("<h1"),             // Mozilla | 
|  | StringPiece("<div"),            // Mozilla | 
|  | StringPiece("<font"),           // Mozilla | 
|  | StringPiece("<table"),          // Mozilla | 
|  | StringPiece("<a"),              // Mozilla | 
|  | StringPiece("<style"),          // Mozilla | 
|  | StringPiece("<title"),          // Mozilla | 
|  | StringPiece("<b"),              // Mozilla (note: subsumes <body>, <br>) | 
|  | StringPiece("<p")               // Mozilla | 
|  | }; | 
|  |  | 
|  | while (data.length() > 0) { | 
|  | AdvancePastWhitespace(&data); | 
|  |  | 
|  | Result signature_match = | 
|  | MatchesSignature(&data, kHtmlSignatures, arraysize(kHtmlSignatures), | 
|  | base::CompareCase::INSENSITIVE_ASCII); | 
|  | if (signature_match != kNo) | 
|  | return signature_match; | 
|  |  | 
|  | // "<!--" (the HTML comment syntax) is a special case, since it's valid JS | 
|  | // as well. Skip over them. | 
|  | static const StringPiece kBeginCommentSignature[] = {"<!--"}; | 
|  | Result comment_match = MatchesSignature(&data, kBeginCommentSignature, | 
|  | arraysize(kBeginCommentSignature), | 
|  | base::CompareCase::SENSITIVE); | 
|  | if (comment_match != kYes) | 
|  | return comment_match; | 
|  |  | 
|  | // Look for an end comment. | 
|  | static const StringPiece kEndComment = "-->"; | 
|  | size_t comment_end = data.find(kEndComment); | 
|  | if (comment_end == base::StringPiece::npos) | 
|  | return kMaybe;  // Hit end of data with open comment. | 
|  | data.remove_prefix(comment_end + kEndComment.length()); | 
|  | } | 
|  |  | 
|  | // All of |data| was consumed, without a clear determination. | 
|  | return kMaybe; | 
|  | } | 
|  |  | 
|  | CrossSiteDocumentClassifier::Result CrossSiteDocumentClassifier::SniffForXML( | 
|  | base::StringPiece data) { | 
|  | // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser | 
|  | // process, we should do single-thread checking here for the static | 
|  | // initializer. | 
|  | AdvancePastWhitespace(&data); | 
|  | static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")}; | 
|  | return MatchesSignature(&data, kXmlSignatures, arraysize(kXmlSignatures), | 
|  | base::CompareCase::SENSITIVE); | 
|  | } | 
|  |  | 
|  | CrossSiteDocumentClassifier::Result CrossSiteDocumentClassifier::SniffForJSON( | 
|  | base::StringPiece data) { | 
|  | // Currently this function looks for an opening brace ('{'), followed by a | 
|  | // double-quoted string literal, followed by a colon. Importantly, such a | 
|  | // sequence is a Javascript syntax error: although the JSON object syntax is | 
|  | // exactly Javascript's object-initializer syntax, a Javascript object- | 
|  | // initializer expression is not valid as a standalone Javascript statement. | 
|  | // | 
|  | // TODO(nick): We have to come up with a better way to sniff JSON. The | 
|  | // following are known limitations of this function: | 
|  | // https://crbug.com/795470/ Support non-dictionary values (e.g. lists) | 
|  | enum { | 
|  | kStartState, | 
|  | kLeftBraceState, | 
|  | kLeftQuoteState, | 
|  | kEscapeState, | 
|  | kRightQuoteState, | 
|  | } state = kStartState; | 
|  |  | 
|  | for (size_t i = 0; i < data.length(); ++i) { | 
|  | const char c = data[i]; | 
|  | if (state != kLeftQuoteState && state != kEscapeState) { | 
|  | // Whitespace is ignored (outside of string literals) | 
|  | if (c == ' ' || c == '\t' || c == '\r' || c == '\n') | 
|  | continue; | 
|  | } else { | 
|  | // Inside string literals, control characters should result in rejection. | 
|  | if ((c >= 0 && c < 32) || c == 127) | 
|  | return kNo; | 
|  | } | 
|  |  | 
|  | switch (state) { | 
|  | case kStartState: | 
|  | if (c == '{') | 
|  | state = kLeftBraceState; | 
|  | else | 
|  | return kNo; | 
|  | break; | 
|  | case kLeftBraceState: | 
|  | if (c == '"') | 
|  | state = kLeftQuoteState; | 
|  | else | 
|  | return kNo; | 
|  | break; | 
|  | case kLeftQuoteState: | 
|  | if (c == '"') | 
|  | state = kRightQuoteState; | 
|  | else if (c == '\\') | 
|  | state = kEscapeState; | 
|  | break; | 
|  | case kEscapeState: | 
|  | // Simplification: don't bother rejecting hex escapes. | 
|  | state = kLeftQuoteState; | 
|  | break; | 
|  | case kRightQuoteState: | 
|  | if (c == ':') | 
|  | return kYes; | 
|  | else | 
|  | return kNo; | 
|  | break; | 
|  | } | 
|  | } | 
|  | return kMaybe; | 
|  | } | 
|  |  | 
|  | CrossSiteDocumentClassifier::Result | 
|  | CrossSiteDocumentClassifier::SniffForFetchOnlyResource(base::StringPiece data) { | 
|  | // kScriptBreakingPrefixes contains prefixes that are conventionally used to | 
|  | // prevent a JSON response from becoming a valid Javascript program (an attack | 
|  | // vector known as XSSI). The presence of such a prefix is a strong signal | 
|  | // that the resource is meant to be consumed only by the fetch API or | 
|  | // XMLHttpRequest, and is meant to be protected from use in non-CORS, cross- | 
|  | // origin contexts like <script>, <img>, etc. | 
|  | // | 
|  | // These prefixes work either by inducing a syntax error, or inducing an | 
|  | // infinite loop. In either case, the prefix must create a guarantee that no | 
|  | // matter what bytes follow it, the entire response would be worthless to | 
|  | // execute as a <script>. | 
|  | static const StringPiece kScriptBreakingPrefixes[] = { | 
|  | // Parser breaker prefix. | 
|  | // | 
|  | // Built into angular.js (followed by a comma and a newline): | 
|  | //   https://docs.angularjs.org/api/ng/service/$http | 
|  | // | 
|  | // Built into the Java Spring framework (followed by a comma and a space): | 
|  | //   https://goo.gl/xP7FWn | 
|  | // | 
|  | // Observed on google.com (without a comma, followed by a newline). | 
|  | StringPiece(")]}'"), | 
|  |  | 
|  | // Apache struts: https://struts.apache.org/plugins/json/#prefix | 
|  | StringPiece("{}&&"), | 
|  |  | 
|  | // Spring framework (historically): https://goo.gl/JYPFAv | 
|  | StringPiece("{} &&"), | 
|  |  | 
|  | // Infinite loops. | 
|  | StringPiece("for(;;);"),  // observed on facebook.com | 
|  | StringPiece("while(1);"), StringPiece("for (;;);"), | 
|  | StringPiece("while (1);"), | 
|  | }; | 
|  | Result has_parser_breaker = MatchesSignature( | 
|  | &data, kScriptBreakingPrefixes, arraysize(kScriptBreakingPrefixes), | 
|  | base::CompareCase::SENSITIVE); | 
|  | if (has_parser_breaker != kNo) | 
|  | return has_parser_breaker; | 
|  |  | 
|  | // A non-empty JSON object also effectively introduces a JS syntax error. | 
|  | return SniffForJSON(data); | 
|  | } | 
|  |  | 
|  | }  // namespace content |