blob: 660328eb50adfa93ea2442de99b65b7a69166396 [file] [log] [blame]
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This is a port of ManifestParser.cc from WebKit/WebCore/loader/appcache.
/*
* Copyright (C) 2008 Apple Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "content/browser/appcache/appcache_manifest_parser.h"
#include <stddef.h>
#include <tuple>
#include <utility>
#include "base/check_op.h"
#include "base/metrics/histogram_functions.h"
#include "base/notreached.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "content/browser/appcache/appcache.h"
#include "third_party/blink/public/common/origin_trials/trial_token.h"
#include "third_party/blink/public/common/origin_trials/trial_token_result.h"
#include "third_party/blink/public/common/origin_trials/trial_token_validator.h"
#include "url/gurl.h"
namespace content {
namespace {
// Values for the mode in the AppCache manifest parsing algorithm specification.
enum class Mode {
kExplicit, // In the CACHE: section.
kIntercept, // In the CHROMIUM-INTERCEPT: section. (non-standard)
kFallback, // In the FALLBACK: section.
kOnlineSafelist, // In the NETWORK: section.
kOriginTrial, // In the ORIGIN-TRIAL: section. (non-standard)
kUnknown, // Sections that are not covered by the spec.
};
// AppCache defines whitespace as CR / LF / space (0x20) / tab (0x09).
constexpr bool IsWhiteSpace(char character) {
return (character == ' ') || (character == '\t') || (character == '\n') ||
(character == '\r');
}
// AppCache defines newline characters as CR or LF.
constexpr bool IsNewLine(char character) {
return (character == '\n') || (character == '\r');
}
// AppCache defines token separators as space (0x20) or tab (0x09).
constexpr bool IsTokenSeparator(char character) {
return (character == ' ') || (character == '\t');
}
// Removes the characters at the beginning of the string up to a newline.
base::StringPiece TrimToFirstNewLine(base::StringPiece data) {
size_t skip = 0;
while (skip < data.length() && !IsNewLine(data[skip]))
++skip;
return data.substr(skip);
}
// Removes whitespace characters at the beginning of the string.
base::StringPiece TrimStartingWhiteSpace(base::StringPiece data) {
size_t skip = 0;
while (skip < data.length() && IsWhiteSpace(data[skip]))
++skip;
return data.substr(skip);
}
// Removes whitespace characters at the end of the string.
base::StringPiece TrimTrailingWhiteSpace(base::StringPiece data) {
size_t length = data.size();
while (length != 0) {
--length;
if (!IsWhiteSpace(data[length])) {
++length;
break;
}
}
return data.substr(0, length);
}
// Splits a string at the first occurrence of a newline.
//
// Returns the first line, which is guaranteed not to include a newline, and the
// rest of the string, which may be empty.
std::pair<base::StringPiece, base::StringPiece> SplitOnNewLine(
base::StringPiece data) {
size_t split = 0;
while (split < data.length() && !IsNewLine(data[split]))
++split;
return {data.substr(0, split), data.substr(split)};
}
// True if the string does not contain any newline character.
bool IsSingleLine(base::StringPiece maybe_line) {
return !std::any_of(maybe_line.begin(), maybe_line.end(), &IsNewLine);
}
// Splits a token out of a manifest line.
//
// Tokens are separated by space (0x20) or tab (0x09) characters.
//
// The line must not start with a whitespace character.
//
// Returns the token and the rest of the line. Consumes the whitespace after the
// returned token -- the rest of the line will not start with whitespace.
std::pair<base::StringPiece, base::StringPiece> SplitLineToken(
base::StringPiece line) {
DCHECK(IsSingleLine(line));
DCHECK(line.empty() || !IsWhiteSpace(line[0]));
size_t token_end = 0;
while (token_end < line.length() && !IsTokenSeparator(line[token_end]))
++token_end;
size_t split = token_end;
while (split < line.length() && IsTokenSeparator(line[split]))
++split;
return {line.substr(0, token_end), line.substr(split)};
}
// True if the given line is a mode-setting line.
//
// In the AppCache parsing algorithm, the mode only changes when processing a
// line that ends with ':' (colon) after whitespace removal.
//
// The given string must have had whitespace stripped at both ends.
bool IsModeSettingLine(base::StringPiece line) {
DCHECK(IsSingleLine(line));
if (line.empty())
return false;
DCHECK(!IsWhiteSpace(line[0])) << "line starts with whitespace";
const auto last_character = line[line.length() - 1];
DCHECK(!IsWhiteSpace(last_character)) << "line ends with whitespace";
return last_character == ':';
}
// The mode that the AppCache parsing algorithm will be switched to.
//
// The given string must be a mode-setting line.
Mode ParseModeSettingLine(base::StringPiece line) {
DCHECK(IsModeSettingLine(line));
static constexpr base::StringPiece kCacheLine("CACHE:");
if (line == kCacheLine)
return Mode::kExplicit;
static constexpr base::StringPiece kFallbackLine("FALLBACK:");
if (line == kFallbackLine)
return Mode::kFallback;
static constexpr base::StringPiece kNetworkLine("NETWORK:");
if (line == kNetworkLine)
return Mode::kOnlineSafelist;
static constexpr base::StringPiece kInterceptLine("CHROMIUM-INTERCEPT:");
if (line == kInterceptLine)
return Mode::kIntercept;
static constexpr base::StringPiece kOriginTrialLine("ORIGIN-TRIAL:");
if (line == kOriginTrialLine)
return Mode::kOriginTrial;
return Mode::kUnknown;
}
// Parses a URL token in an AppCache manifest.
//
// The returned URL may not be valid, if the token does not represent a valid
// URL.
//
// Per the AppCache specification, the URL is resolved relative to the manifest
// URL, and stripped of any fragment.
GURL ParseUrlToken(base::StringPiece url_token, const GURL& manifest_url) {
GURL url = manifest_url.Resolve(url_token);
if (!url.is_valid())
return url;
if (url.has_ref()) {
GURL::Replacements replacements;
replacements.ClearRef();
url = url.ReplaceComponents(replacements);
}
return url;
}
bool IsUrlWithinScope(const GURL& url, const GURL& scope) {
return base::StartsWith(url.spec(), scope.spec());
}
// Records UMA metrics for parsing one AppCache manifest.
//
// The manifest parser accumulates metrics data in an instance of this class by
// calling the Record*() methods. When the manifest is successfully parsed, the
// accumulated metrics are logged by calling RecordParseSuccess(). Metrics for
// manifests that don't parse in the success case are discarded. Failure metrics
// are used to log early-exit conditions like invalid manifest URLs.
class ParseMetricsRecorder {
public:
ParseMetricsRecorder() = default;
~ParseMetricsRecorder() = default;
// Manifest starts with Chrome-specific header, not standard header.
void RecordChromeHeader() {
#if DCHECK_IS_ON()
DCHECK(!finalized_) << "Metrics already recorded";
#endif // DCHECK_IS_ON()
has_chrome_header_ = true;
}
// Manifest served with the MIME type that enables dangerous features.
void RecordDangerousMode() { used_dangerous_mode_ = true; }
// Manifest contains a valid Chrome-specific CHROMIUM-INTERCEPT: entry.
void RecordInterceptEntry() {
#if DCHECK_IS_ON()
DCHECK(!finalized_) << "Metrics already recorded";
#endif // DCHECK_IS_ON()
has_intercept_entry_ = true;
}
// Called after the parser has successfully consumed the entire manifest.
//
// Must be called exactly once. No other Record*() method may be called after
// this method is called.
void RecordParseSuccess() {
#if DCHECK_IS_ON()
DCHECK(!finalized_) << "Metrics already recorded";
finalized_ = true;
#endif // DCHECK_IS_ON()
base::UmaHistogramBoolean("appcache.Manifest.ChromeHeader",
has_chrome_header_);
base::UmaHistogramBoolean("appcache.Manifest.DangerousMode",
used_dangerous_mode_);
base::UmaHistogramEnumeration(
"appcache.Manifest.InterceptUsage",
has_intercept_entry_ ? InterceptUsage::kExact : InterceptUsage::kNone);
}
private:
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class InterceptUsage {
// The manifest contains no intercept entry.
kNone = 0,
// The manifest contains at least one intercept entry. All entries use exact
// URLs.
kExact = 1,
// The manifest contains at least one intercept entry. At least one
// intercept entry uses a pattern URL.
kPattern = 2,
// Required by base::UmaHistogramEnumeration(). Must be last in the enum.
kMaxValue = kPattern,
};
bool has_chrome_header_ = false;
bool used_dangerous_mode_ = false;
bool has_intercept_entry_ = false;
#if DCHECK_IS_ON()
// True after RecordParseSuccess() was called.
bool finalized_ = false;
#endif // DCHECK_IS_ON()
};
constexpr char kAppCacheOriginTrialName[] = "AppCache";
} // namespace
AppCacheManifest::AppCacheManifest() = default;
AppCacheManifest::~AppCacheManifest() = default;
bool ParseManifest(const GURL& manifest_url,
const std::string& manifest_scope,
const char* manifest_bytes,
int manifest_size,
ParseMode parse_mode,
AppCacheManifest& manifest) {
// The parsing algorithm is specified at
// https://html.spec.whatwg.org/multipage/offline.html
DCHECK(manifest.explicit_urls.empty());
DCHECK(manifest.fallback_namespaces.empty());
DCHECK(manifest.online_safelist_namespaces.empty());
DCHECK_EQ(manifest.parser_version, -1);
DCHECK_EQ(manifest.scope, "");
DCHECK(!manifest.online_safelist_all);
DCHECK(!manifest.did_ignore_intercept_namespaces);
DCHECK(!manifest.did_ignore_fallback_namespaces);
ParseMetricsRecorder parse_metrics;
if (parse_mode == PARSE_MANIFEST_ALLOWING_DANGEROUS_FEATURES)
parse_metrics.RecordDangerousMode();
Mode mode = Mode::kExplicit;
// The specification requires UTF-8-decoding the manifest, which replaces
// invalid UTF-8 characters with placeholders. It would be nice if
// utf_string_conversions included a UTF-8 to UTF-8 conversion for this
// purpose, but AppCache isn't important enough to add conversion code just
// to accelerate manifest decoding.
DCHECK_GE(manifest_size, 0);
std::u16string wide_manifest_bytes =
base::UTF8ToUTF16(base::StringPiece(manifest_bytes, manifest_size));
std::string decoded_manifest_bytes = base::UTF16ToUTF8(wide_manifest_bytes);
// The bytes of the manifest that haven't been consumed yet.
base::StringPiece data(decoded_manifest_bytes);
// Discard a leading UTF-8 Byte-Order-Mark (BOM) (0xEF, 0xBB, 0xBF);
static constexpr base::StringPiece kUtf8Bom("\xEF\xBB\xBF");
if (base::StartsWith(data, kUtf8Bom))
data = data.substr(kUtf8Bom.length());
// The manifest has to start with a well-defined signature.
static constexpr base::StringPiece kSignature("CACHE MANIFEST");
static constexpr base::StringPiece kChromiumSignature(
"CHROMIUM CACHE MANIFEST");
if (base::StartsWith(data, kSignature)) {
data = data.substr(kSignature.length());
} else if (base::StartsWith(data, kChromiumSignature)) {
// Chrome recognizes a separate signature, CHROMIUM CACHE MANIFEST. This was
// built so that manifests that use the Chrome-only feature
// CHROMIUM-INTERCEPT will be ignored by other browsers.
// See https://crbug.com/101565
data = data.substr(kChromiumSignature.length());
parse_metrics.RecordChromeHeader();
} else {
return false;
}
// The character after "CACHE MANIFEST" must be a whitespace character.
if (!data.empty() && !IsWhiteSpace(data[0]))
return false;
if (!manifest_url.is_valid()) {
return false;
}
if (!AppCache::CheckValidManifestScope(manifest_url, manifest_scope))
return false;
// Manifest parser version handling.
//
// Version 0: Pre-manifest scope, a manifest's scope for resources listed in
// the FALLBACK and CHROMIUM-INTERCEPT sections can span the entire origin.
//
// Version 1: Manifests have a scope, resources listed in the FALLBACK and
// CHROMIUM-INTERCEPT sections must exist within that scope or be ignored.
// Changing the manifest, the scope, or the version of the manifest will
// trigger a refetch of the manifest.
//
// Version 2: Manifests can have an ORIGIN-TRIAL section. This is a
// separate version so that a new version of Chrome will force a refetch.
//
// This code generates manifests with parser version 2.
manifest.parser_version = 2;
manifest.scope = manifest_scope;
const GURL manifest_scope_url = manifest_url.Resolve(manifest_scope);
// The spec requires ignoring any characters on the first line after the
// signature and its following whitespace.
data = TrimToFirstNewLine(data);
while (true) {
data = TrimStartingWhiteSpace(data);
if (data.empty())
break;
base::StringPiece line;
std::tie(line, data) = SplitOnNewLine(data);
// The checks above guarantee that the input to SplitOnNewLine() starts with
// a non-whitespace character.
DCHECK(!line.empty());
if (line[0] == '#') // Lines starting with # are comments.
continue;
line = TrimTrailingWhiteSpace(line);
// Handle all the steps checking for lines that end with ":".
if (IsModeSettingLine(line)) {
mode = ParseModeSettingLine(line);
continue;
}
if (mode == Mode::kUnknown)
continue;
static constexpr base::StringPiece kOnlineSafelistWildcard("*");
if (mode == Mode::kOnlineSafelist && line == kOnlineSafelistWildcard) {
manifest.online_safelist_all = true;
continue;
}
if (mode == Mode::kOriginTrial) {
// Only accept the first valid token.
if (manifest.token_expires != base::Time())
continue;
base::StringPiece origin_trial_token;
std::tie(origin_trial_token, line) = SplitLineToken(line);
if (!blink::TrialTokenValidator::IsTrialPossibleOnOrigin(manifest_url))
continue;
blink::TrialTokenValidator validator;
url::Origin origin = url::Origin::Create(manifest_url);
blink::TrialTokenResult result = validator.ValidateToken(
origin_trial_token, origin, base::Time::Now());
if (result.Status() == blink::OriginTrialTokenStatus::kSuccess) {
if (result.ParsedToken()->feature_name() == kAppCacheOriginTrialName)
manifest.token_expires = result.ParsedToken()->expiry_time();
}
continue;
}
// Chrome does not implement the SETTINGS: section. If we ever decided to do
// so, the implementation would go here.
// Common code for the following sections: explicit (CACHE:),
// fallback (FALLBACK:), online safelist (NETWORK:) and intercept
// (CHROMIUM-INTERCEPT:). All these sections start by parsing a URL token.
base::StringPiece namespace_url_token;
std::tie(namespace_url_token, line) = SplitLineToken(line);
GURL namespace_url = ParseUrlToken(namespace_url_token, manifest_url);
if (!namespace_url.is_valid())
continue;
if (mode == Mode::kExplicit || mode == Mode::kOnlineSafelist) {
// Scheme component must be the same as the manifest URL's.
if (namespace_url.scheme() != manifest_url.scheme()) {
continue;
}
// Deviate from the HTML5 spec by supporting the caching of cross-origin
// HTTPS resources. See https://crbug.com/69594
//
// Per the spec, explicit (CACHE:) cross-origin HTTPS resources should be
// ignored here. We've opted for a milder constraint and allow caching
// unless the resource has a "no-store" header. That condition is enforced
// in AppCacheUpdateJob.
if (mode == Mode::kExplicit) {
manifest.explicit_urls.insert(namespace_url.spec());
continue;
}
manifest.online_safelist_namespaces.emplace_back(
AppCacheNamespace(APPCACHE_NETWORK_NAMESPACE, namespace_url, GURL()));
continue;
}
if (mode == Mode::kIntercept) {
// Chrome supports a CHROMIUM-INTERCEPT section. https://crbug.com/101565
//
// This section consists of entries of the form:
// namespace_url verb url_target
if (parse_mode != PARSE_MANIFEST_ALLOWING_DANGEROUS_FEATURES) {
manifest.did_ignore_intercept_namespaces = true;
continue;
}
if (namespace_url.GetOrigin() != manifest_url.GetOrigin())
continue;
if (!IsUrlWithinScope(namespace_url, manifest_scope_url))
continue;
// The only supported verb is "return".
base::StringPiece verb_token;
std::tie(verb_token, line) = SplitLineToken(line);
static constexpr base::StringPiece kReturnVerb("return");
if (verb_token != kReturnVerb)
continue;
base::StringPiece target_url_token;
std::tie(target_url_token, line) = SplitLineToken(line);
if (target_url_token.empty())
continue;
GURL target_url = ParseUrlToken(target_url_token, manifest_url);
if (!target_url.is_valid())
continue;
if (manifest_url.GetOrigin() != target_url.GetOrigin())
continue;
manifest.intercept_namespaces.emplace_back(APPCACHE_INTERCEPT_NAMESPACE,
namespace_url, target_url);
parse_metrics.RecordInterceptEntry();
continue;
}
if (mode == Mode::kFallback) {
if (namespace_url.GetOrigin() != manifest_url.GetOrigin())
continue;
if (parse_mode != PARSE_MANIFEST_ALLOWING_DANGEROUS_FEATURES) {
if (!IsUrlWithinScope(namespace_url,
manifest_url.GetWithoutFilename())) {
manifest.did_ignore_fallback_namespaces = true;
continue;
}
}
if (!IsUrlWithinScope(namespace_url, manifest_scope_url))
continue;
base::StringPiece fallback_url_token;
std::tie(fallback_url_token, line) = SplitLineToken(line);
if (fallback_url_token.empty())
continue;
GURL fallback_url = ParseUrlToken(fallback_url_token, manifest_url);
if (!fallback_url.is_valid())
continue;
if (manifest_url.GetOrigin() != fallback_url.GetOrigin())
continue;
// Store regardless of duplicate namespace URL. Only the first match will
// ever be used.
manifest.fallback_namespaces.emplace_back(APPCACHE_FALLBACK_NAMESPACE,
namespace_url, fallback_url);
continue;
}
NOTREACHED() << "Unimplemented AppCache manifest parser mode";
}
parse_metrics.RecordParseSuccess();
return true;
}
std::string GetAppCacheOriginTrialNameForTesting() {
return kAppCacheOriginTrialName;
}
} // namespace content