| // Copyright 2025 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "net/http/no_vary_search_cache.h" |
| |
| #include <compare> |
| #include <iostream> |
| #include <tuple> |
| #include <type_traits> |
| #include <utility> |
| |
| #include "base/check.h" |
| #include "base/check_op.h" |
| #include "base/memory/weak_ptr.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/notreached.h" |
| #include "base/time/time.h" |
| #include "net/http/http_no_vary_search_data.h" |
| |
| namespace net { |
| |
| namespace { |
| |
| // We need to use a separate enum for the |
| // HttpCache.NoVarySearch.HeaderParseResult than |
| // HttpNoVarySearchData::ParseErrorEnum, as that enum does not have a value for |
| // a successful parse. |
| // |
| // These values are persisted to logs. Entries should not be renumbered and |
| // numeric values should never be reused. |
| // |
| // LINT.IfChange(NoVarySearchHeaderParseResult) |
| enum class NoVarySearchHeaderParseResult { |
| kSuccess = 0, |
| kNoHeader = 1, |
| kDefaultValue = 2, |
| kNotDictionary = 3, |
| kNonBooleanKeyOrder = 4, |
| kParamsNotStringList = 5, |
| kExceptNotStringList = 6, |
| kExceptWithoutTrueParams = 7, |
| kMaxValue = kExceptWithoutTrueParams, |
| }; |
| // LINT.ThenChange(//tools/metrics/histograms/metadata/net/enums.xml:NoVarySearchHeaderParseResult) |
| |
| NoVarySearchHeaderParseResult MapParseErrorEnum( |
| HttpNoVarySearchData::ParseErrorEnum error) { |
| using enum HttpNoVarySearchData::ParseErrorEnum; |
| switch (error) { |
| case kOk: |
| return NoVarySearchHeaderParseResult::kNoHeader; |
| |
| case kDefaultValue: |
| return NoVarySearchHeaderParseResult::kDefaultValue; |
| |
| case kNotDictionary: |
| return NoVarySearchHeaderParseResult::kNotDictionary; |
| |
| case kUnknownDictionaryKey: |
| NOTREACHED(); // No longer used. |
| |
| case kNonBooleanKeyOrder: |
| return NoVarySearchHeaderParseResult::kNonBooleanKeyOrder; |
| |
| case kParamsNotStringList: |
| return NoVarySearchHeaderParseResult::kParamsNotStringList; |
| |
| case kExceptNotStringList: |
| return NoVarySearchHeaderParseResult::kExceptNotStringList; |
| |
| case kExceptWithoutTrueParams: |
| return NoVarySearchHeaderParseResult::kExceptWithoutTrueParams; |
| } |
| NOTREACHED(); |
| } |
| |
| void EmitNoVarySearchHeaderParseResultHistogram( |
| const base::expected<HttpNoVarySearchData, |
| HttpNoVarySearchData::ParseErrorEnum>& result) { |
| auto value = NoVarySearchHeaderParseResult::kSuccess; |
| if (!result.has_value()) { |
| value = MapParseErrorEnum(result.error()); |
| } |
| UMA_HISTOGRAM_ENUMERATION("HttpCache.NoVarySearch.HeaderParseResult", value); |
| } |
| |
| // Stripping the URL of its query and fragment (ref) needs to be done for every |
| // request, so we want to avoid allocating memory for a GURL in the case of a |
| // cache miss. The return value points at memory owned by `url`, so should not |
| // outlive it. |
| std::string_view ExtractBaseURLView(const GURL& url) { |
| CHECK(url.is_valid()); |
| CHECK(url.has_path()); |
| const std::string& spec = url.possibly_invalid_spec(); |
| const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); |
| const url::Component& path = parsed.path; |
| CHECK(path.is_valid()); |
| const size_t path_end_offset = base::checked_cast<size_t>(path.end()); |
| |
| auto base_url = std::string_view(spec).substr(0, path_end_offset); |
| |
| if constexpr (DCHECK_IS_ON()) { |
| // This correctness check is very expensive, so is only done when DCHECKs |
| // are enabled. |
| GURL generated_base_url(base_url); |
| DCHECK(generated_base_url.is_valid()); |
| GURL::Replacements replacements; |
| replacements.ClearQuery(); |
| replacements.ClearRef(); |
| GURL replaced_url = url.ReplaceComponents(replacements); |
| DCHECK_EQ(generated_base_url, replaced_url); |
| } |
| |
| return base_url; |
| } |
| |
| bool URLIsAcceptable(const GURL& url) { |
| // HTTP(S) URLs always have a path starting with "/" after canonicalization. |
| return url.is_valid() && url.has_path() && !url.has_username() && |
| !url.has_password(); |
| } |
| |
| } // namespace |
| |
| // base::LinkedList only supports having an object in a single linked list at a |
| // time. However, we need QueryString objects to be in two separate lists: |
| // * the least-recently-used (LRU) list, which has the most recently added or |
| // used entry at its head and the next entry to be evicted at its tail. This |
| // list contains every QueryString in the cache. |
| // * the list of cached QueryString objects for a particular base URL and |
| // No-Vary-Search parameter. These lists have the most recently inserted |
| // entry for this {base URL, NVS} pair at their heads. |
| // |
| // In order to work-around the limitation of base::LinkedList, QueryString has |
| // two base classes. LruNode represents its role as a member of the LRU list, |
| // and QueryStringListNode represents its role as a member of the |
| // QueryStringList list. By calling base::LinkNode methods via the appropriate |
| // base class it can control which list it is manipulating. |
| class NoVarySearchCache::LruNode : public base::LinkNode<LruNode> { |
| public: |
| // Not copyable or movable. |
| LruNode(const LruNode&) = delete; |
| LruNode& operator=(const LruNode&) = delete; |
| |
| // Every instance of LruNode is actually a QueryString. |
| QueryString* ToQueryString(); |
| |
| private: |
| friend class QueryString; |
| |
| LruNode() = default; |
| ~LruNode() = default; |
| }; |
| |
| class NoVarySearchCache::QueryStringListNode |
| : public base::LinkNode<QueryStringListNode> { |
| public: |
| // Not copyable or movable. |
| QueryStringListNode(const QueryStringListNode&) = delete; |
| QueryStringListNode& operator=(const QueryStringListNode&) = delete; |
| |
| // Every instance of QueryStringListNode is actually a QueryString. |
| QueryString* ToQueryString(); |
| |
| private: |
| friend class QueryString; |
| |
| QueryStringListNode() = default; |
| ~QueryStringListNode() = default; |
| }; |
| |
| // QueryString is the entry type for the cache. Its main purpose is to hold the |
| // query string, ie. everything between the "?" and the "#" in the original URL. |
| // Together with the `base_url` that forms part of the Key, this can be used to |
| // reconstruct the original URL that was used to store the original request in |
| // the disk cache. |
| class NoVarySearchCache::QueryString final |
| : public NoVarySearchCache::LruNode, |
| public NoVarySearchCache::QueryStringListNode { |
| public: |
| // Creates a QueryString and adds it to `list` and `lru_list`. |
| static void CreateAndInsert(std::optional<std::string_view> query, |
| QueryStringList& query_string_list, |
| base::LinkedList<LruNode>& lru_list) { |
| DCHECK(!query || query->find('#') == std::string_view::npos) |
| << "Query contained a '#' character, meaning that the URL reassembly " |
| "will not work correctly because the '#' will be re-interpreted as " |
| "the start of a fragment. This should not happen. Query was '" |
| << query.value() << "'"; |
| // This use of bare new is needed because base::LinkedList does not have |
| // ownership semantics. |
| auto* query_string = new QueryString(query, query_string_list); |
| query_string->LruNode::InsertBefore(lru_list.head()); |
| query_string->QueryStringListNode::InsertBefore( |
| query_string_list.list.head()); |
| } |
| |
| // Not copyable or movable. |
| QueryString(const QueryString&) = delete; |
| QueryString& operator=(const QueryString&) = delete; |
| |
| // Removes this object from both lists and deletes it. |
| void RemoveAndDelete() { |
| LruNode::RemoveFromList(); |
| QueryStringListNode::RemoveFromList(); |
| delete this; |
| } |
| |
| // Moves this object to the head of `list`. |
| template <typename List> |
| void MoveToHead(List& linked_list) { |
| auto* head = linked_list.head(); |
| if (head != this) { |
| MoveBeforeNode(linked_list.head()->value()); |
| } |
| } |
| |
| const std::optional<std::string>& query() const { return query_; } |
| |
| QueryStringList& query_string_list_ref() { |
| return query_string_list_ref_.get(); |
| } |
| |
| base::Time insertion_time() const { return insertion_time_; } |
| |
| void UpdateInsertionTime() { insertion_time_ = base::Time::Now(); } |
| |
| // Return the original GURL that this entry was constructed from (not |
| // including any fragment). It's important to use this method to correctly |
| // reconstruct URLs that have an empty query (end in '?'). |
| GURL ReconstructOriginalURL(std::string_view base_url) { |
| if (query_.has_value()) { |
| return GURL(base::StrCat({base_url, "?", query_.value()})); |
| } else { |
| return GURL(base_url); |
| } |
| } |
| |
| EraseHandle CreateEraseHandle() { |
| return EraseHandle(weak_factory_.GetWeakPtr()); |
| } |
| |
| private: |
| QueryString(std::optional<std::string_view> query, |
| QueryStringList& query_string_list) |
| : query_(query), |
| query_string_list_ref_(query_string_list), |
| insertion_time_(base::Time::Now()) {} |
| |
| // Must only be called from RemoveAndDelete(). |
| ~QueryString() = default; |
| |
| // Moves this object in front of `node`. |
| template <typename NodeType> |
| void MoveBeforeNode(NodeType* node) { |
| static_assert(std::same_as<NodeType, LruNode> || |
| std::same_as<NodeType, QueryStringListNode>, |
| "This assert is just documentation"); |
| CHECK_NE(node, this); |
| NodeType::RemoveFromList(); |
| NodeType::InsertBefore(node); |
| } |
| |
| // No-Vary-Search treats "http://www.example.com/" and |
| // "http://www.example.com/?" as the same URL, but the disk cache key treats |
| // them as different URLs, so we need to be able to distinguish them to |
| // correctly reconstruct the original URL. `query_ == std::nullopt` means that |
| // there was no `?` in the original URL, and `query_ == ""` means there was. |
| const std::optional<std::string> query_; |
| |
| // `query_string_list_ref_` allows the keys for this entry to be located in |
| // the cache so that it can be erased efficiently. |
| const raw_ref<QueryStringList> query_string_list_ref_; |
| |
| // `insertion_time_` breaks ties when there are multiple possible matches. The |
| // most recent entry will be used as it is most likely to still exist in the |
| // disk cache. |
| base::Time insertion_time_; |
| |
| // EraseHandle uses weak pointers to QueryString objects to enable an entry to |
| // be deleted from the cache if it is found not to be readable from the disk |
| // cache. |
| base::WeakPtrFactory<QueryString> weak_factory_{this}; |
| }; |
| |
| // The two implementations of ToQueryString() are defined out-of-line so that |
| // the compiler has seen the definition of QueryString and so knows the correct |
| // offsets to apply to the `this` pointer. |
| NoVarySearchCache::QueryString* NoVarySearchCache::LruNode::ToQueryString() { |
| return static_cast<QueryString*>(this); |
| } |
| |
| NoVarySearchCache::QueryString* |
| NoVarySearchCache::QueryStringListNode::ToQueryString() { |
| return static_cast<QueryString*>(this); |
| } |
| |
| NoVarySearchCache::EraseHandle::EraseHandle( |
| base::WeakPtr<QueryString> query_string) |
| : query_string_(std::move(query_string)) {} |
| |
| NoVarySearchCache::EraseHandle::~EraseHandle() = default; |
| |
| NoVarySearchCache::EraseHandle::EraseHandle(EraseHandle&& rhs) = default; |
| NoVarySearchCache::EraseHandle& NoVarySearchCache::EraseHandle::operator=( |
| EraseHandle&& rhs) = default; |
| |
| bool NoVarySearchCache::EraseHandle::EqualsForTesting( |
| const EraseHandle& rhs) const { |
| return query_string_.get() == rhs.query_string_.get(); |
| } |
| |
| bool NoVarySearchCache::EraseHandle::IsGoneForTesting() const { |
| return !query_string_; |
| } |
| |
| NoVarySearchCache::NoVarySearchCache(size_t max_size) : max_size_(max_size) { |
| CHECK_GT(max_size_, 0u); |
| } |
| |
| NoVarySearchCache::~NoVarySearchCache() { |
| map_.clear(); |
| // Clearing the map should have freed all the QueryString objects. |
| CHECK(lru_.empty()); |
| } |
| |
| std::optional<NoVarySearchCache::LookupResult> NoVarySearchCache::Lookup( |
| const NetworkIsolationKey& nik, |
| const GURL& url) { |
| if (nik.IsTransient() || !URLIsAcceptable(url)) { |
| return std::nullopt; |
| } |
| std::string_view base_url = ExtractBaseURLView(url); |
| const auto it = map_.find(KeyReference(nik, base_url)); |
| if (it == map_.end()) { |
| return std::nullopt; |
| } |
| // We have a match, so we need to create a real URL now. |
| QueryString* best_match = nullptr; |
| GURL original_url; |
| for (auto& [nvs_data, query_strings] : it->second) { |
| auto result = FindQueryStringInList(query_strings, base_url, url, nvs_data); |
| if (result && (!best_match || best_match->insertion_time() < |
| result->match->insertion_time())) { |
| best_match = result->match; |
| original_url = result->original_url; |
| } |
| } |
| if (!best_match) { |
| return std::nullopt; |
| } |
| // Move to head of `lru_` list. |
| best_match->MoveToHead(lru_); |
| |
| return LookupResult(original_url, best_match->CreateEraseHandle()); |
| } |
| |
| void NoVarySearchCache::MaybeInsert(const NetworkIsolationKey& nik, |
| const GURL& url, |
| const HttpResponseHeaders& headers) { |
| if (nik.IsTransient() || !URLIsAcceptable(url)) { |
| return; |
| } |
| auto maybe_nvs_data = HttpNoVarySearchData::ParseFromHeaders(headers); |
| EmitNoVarySearchHeaderParseResultHistogram(maybe_nvs_data); |
| if (!maybe_nvs_data.has_value()) { |
| return; |
| } |
| const std::string_view base_url = ExtractBaseURLView(url); |
| |
| std::optional<std::string_view> query; |
| if (url.has_query()) { |
| query = url.query_piece(); |
| } |
| |
| // Using lower_bound() followed by emplace_hint() allows us to avoid |
| // constructing a Key object if there is already a matching key in the map, |
| // and do only a single logN lookup. |
| const KeyReference key(nik, base_url); |
| auto it = map_.lower_bound(key); |
| if (it == map_.end() || it->first != key) { |
| it = map_.emplace_hint(it, Key(nik, GURL(base_url)), DataMapType()); |
| } |
| DataMapType& data_map = it->second; |
| auto [data_it, inserted] = |
| data_map.emplace(std::move(*maybe_nvs_data), it->first); |
| const HttpNoVarySearchData& nvs_data = data_it->first; |
| QueryStringList& query_strings = data_it->second; |
| if (inserted) { |
| query_strings.nvs_data_ref = &nvs_data; |
| } else { |
| // There was already an entry for this `nvs_data`. We need to check if it |
| // has a match for the URL we're trying to insert. If it does, we should |
| // update or replace the existing QueryString. |
| if (auto result = |
| FindQueryStringInList(query_strings, base_url, url, nvs_data)) { |
| QueryString* match = result->match; |
| if (match->query() == query) { |
| // In the exact match case we can use the existing QueryString object. |
| match->UpdateInsertionTime(); |
| match->MoveToHead(query_strings.list); |
| match->MoveToHead(lru_); |
| return; |
| } |
| |
| // No-Vary-Search matches are transitive. Any future requests that might |
| // be a match for `match` are also a match for `url`. Since `url` is newer |
| // we will prefer it, and so `match` will never be used again and we can |
| // safely remove it from the cache. |
| --size_; |
| match->RemoveAndDelete(); |
| } |
| } |
| EvictIfFull(); |
| CHECK_LT(size_, max_size_); |
| ++size_; |
| QueryString::CreateAndInsert(query, query_strings, lru_); |
| } |
| |
| void NoVarySearchCache::Erase(EraseHandle handle) { |
| if (QueryString* query_string = handle.query_string_.get()) { |
| EraseQuery(query_string); |
| } |
| } |
| |
| // This is out-of-line to discourage inlining so the bots can detect if it is |
| // accidentally linked into the binary. |
| size_t NoVarySearchCache::GetSizeForTesting() const { |
| return size_; |
| } |
| |
| bool NoVarySearchCache::IsTopLevelMapEmptyForTesting() const { |
| return map_.empty(); |
| } |
| |
| bool NoVarySearchCache::Key::operator==(const Key& rhs) const = default; |
| |
| bool NoVarySearchCache::KeyComparator::operator()(const Key& lhs, |
| const Key& rhs) const { |
| return std::tie(lhs.nik, lhs.base_url) < std::tie(rhs.nik, rhs.base_url); |
| } |
| |
| bool NoVarySearchCache::KeyComparator::operator()( |
| const Key& lhs, |
| const KeyReference& rhs) const { |
| const auto lhs_ref = |
| KeyReference(lhs.nik, lhs.base_url.possibly_invalid_spec()); |
| return lhs_ref < rhs; |
| } |
| |
| bool NoVarySearchCache::KeyComparator::operator()(const KeyReference& lhs, |
| const Key& rhs) const { |
| const auto rhs_ref = |
| KeyReference(rhs.nik, rhs.base_url.possibly_invalid_spec()); |
| return lhs < rhs_ref; |
| } |
| |
| NoVarySearchCache::QueryStringList::QueryStringList(const Key& key) |
| : key_ref(key) {} |
| |
| NoVarySearchCache::QueryStringList::~QueryStringList() { |
| while (!list.empty()) { |
| list.head()->value()->ToQueryString()->RemoveAndDelete(); |
| } |
| } |
| |
| void NoVarySearchCache::EvictIfFull() { |
| CHECK_LE(size_, max_size_); |
| if (size_ == max_size_) { |
| EraseQuery(lru_.tail()->value()->ToQueryString()); |
| } |
| } |
| |
| void NoVarySearchCache::EraseQuery(QueryString* query_string) { |
| CHECK_GT(size_, 0u); |
| --size_; |
| const QueryStringList& query_strings = query_string->query_string_list_ref(); |
| query_string->RemoveAndDelete(); |
| if (query_strings.list.empty()) { |
| const HttpNoVarySearchData* nvs_data_ref = query_strings.nvs_data_ref.get(); |
| const Key& key_ref = query_strings.key_ref.get(); |
| const auto map_it = map_.find(key_ref); |
| CHECK(map_it != map_.end()); |
| size_t removed_count = map_it->second.erase(*nvs_data_ref); |
| CHECK_EQ(removed_count, 1u); |
| if (map_it->second.empty()) { |
| map_.erase(map_it); |
| } |
| } |
| } |
| |
| // static |
| std::optional<NoVarySearchCache::FindQueryStringResult> |
| NoVarySearchCache::FindQueryStringInList(QueryStringList& query_strings, |
| std::string_view base_url, |
| const GURL& url, |
| const HttpNoVarySearchData& nvs_data) { |
| for (auto* node = query_strings.list.head(); node != query_strings.list.end(); |
| node = node->next()) { |
| QueryString* query_string = node->value()->ToQueryString(); |
| // TODO(crbug.com/382394774): Stop allocating GURLs in a tight loop. |
| GURL node_url = query_string->ReconstructOriginalURL(base_url); |
| CHECK(node_url.is_valid()); |
| if (nvs_data.AreEquivalent(url, node_url)) { |
| return FindQueryStringResult(query_string, std::move(node_url)); |
| } |
| } |
| return std::nullopt; |
| } |
| |
| } // namespace net |