blob: 189c654ed83d01c27c7c44036057bed862502550 [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef NET_HTTP_NO_VARY_SEARCH_CACHE_H_
#define NET_HTTP_NO_VARY_SEARCH_CACHE_H_
#include <stddef.h>
#include <concepts>
#include <optional>
#include <string>
#include <string_view>
#include <tuple>
#include <utility>
#include <vector>
#include "base/containers/linked_list.h"
#include "base/functional/function_ref.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/stack_allocated.h"
#include "base/memory/weak_ptr.h"
#include "net/base/does_url_match_filter.h"
#include "net/base/net_export.h"
#include "net/base/pickle_traits.h"
#include "net/http/http_no_vary_search_data.h"
#include "net/http/http_request_info.h"
#include "third_party/abseil-cpp/absl/container/node_hash_map.h"
#include "url/gurl.h"
namespace base {
class Time;
}
namespace net {
class HttpResponseHeaders;
// An in-memory cache that permits looking up a URL and seeing if it matches a
// previous response according to the rules of the No-Vary-Search header (see
// https://httpwg.org/http-extensions/draft-ietf-httpbis-no-vary-search.html).
// See also the design doc at
// https://docs.google.com/document/d/1RS3q6qZ7-k9CvZsDYseGOXzcdQ9fGZ6YYnaW7fTPu7A/edit
//
// Owned by net::HttpCache.
//
// Ignoring eviction, the data structure is approximately equivalent to
// std::map<std::tuple<CachePartitionKey, BaseURL, HttpNoVarySearchData,
// CanonicalizedQuery>,
// std::unique_ptr<Query>>.
class NET_EXPORT_PRIVATE NoVarySearchCache {
private:
// Declared here so that it can be mentioned in the definition of EraseHandle.
class Query;
public:
// Opaque object that permits erasure of an item from the cache.
// See comments on the Lookup() and Erase() methods for usage.
class NET_EXPORT_PRIVATE EraseHandle {
public:
~EraseHandle();
// Not copyable.
EraseHandle(const EraseHandle&) = delete;
EraseHandle& operator=(const EraseHandle&) = delete;
// Movable.
EraseHandle(EraseHandle&& rhs);
EraseHandle& operator=(EraseHandle&& rhs);
// For unit tests it is useful to be able to inspect this.
bool EqualsForTesting(const EraseHandle& rhs) const;
bool IsGoneForTesting() const;
private:
friend class NoVarySearchCache;
friend class Query;
explicit EraseHandle(base::WeakPtr<Query> query_string);
base::WeakPtr<Query> query_;
};
// An interface for receiving notifications about changes to the
// NoVarySearchCache. Only insertions and refreshes via MaybeInsert() and
// erasures via Erase() are reported to this interface. Evictions are
// implicit, and modifications via ClearData() are expected to be followed by
// persisting a fresh copy of the database.
class NET_EXPORT_PRIVATE Journal {
public:
Journal() = default;
Journal(const Journal&) = delete;
Journal& operator=(const Journal&) = delete;
// Called when an entry is inserted or refreshed by the MaybeInsert()
// method. Not called when MaybeInsert() results in no changes to the
// database. Also called by MergeFrom() for each merged entry.
virtual void OnInsert(const std::string& partition_key,
const std::string& base_url,
const HttpNoVarySearchData& nvs_data,
const std::optional<std::string>& query,
base::Time update_time) = 0;
// Called when an entry is erased by the Erase() method.
virtual void OnErase(const std::string& partition_key,
const std::string& base_url,
const HttpNoVarySearchData& nvs_data,
const std::optional<std::string>& query) = 0;
protected:
// Journal objects are never deleted via a base class pointer.
virtual ~Journal();
};
struct LookupResult {
GURL original_url;
EraseHandle erase_handle;
};
// The cache will hold at most `max_size` entries. Each entry stores the query
// parameter from a previous response, which will typically be 100 to 200
// bytes.
explicit NoVarySearchCache(size_t max_size);
// Move-constructible to permit deserialization and passing between threads.
NoVarySearchCache(NoVarySearchCache&&);
// Not copyable or assignable.
NoVarySearchCache(const NoVarySearchCache&) = delete;
NoVarySearchCache& operator=(const NoVarySearchCache&) = delete;
NoVarySearchCache& operator=(NoVarySearchCache&&) = delete;
~NoVarySearchCache();
// Finds an entry in the cache equivalent to `request.url` and in the same
// cache partition. If a result is returned, then `original_url` can be used
// to find a disk cache entry. `erase_handle` can be used to remove the entry
// from this cache if it was not in the disk cache. Not const because it
// updates the LRU linked list to mark the entry as recently used.
std::optional<LookupResult> Lookup(const HttpRequestInfo& request);
std::optional<LookupResult> Lookup(const HttpRequestInfo& request,
bool& out_base_url_matched);
// Inserts `url` into the cache if a non-default "No-Vary-Search" header was
// found in `headers`. On insertion, will remove any existing matching entry
// with the same No-Vary-Search header, as the older entry would never be
// returned by Lookup() anyway. May evict the oldest entry in the cache to
// avoid the size exceeding `max_size_`.
void MaybeInsert(const HttpRequestInfo& request,
const HttpResponseHeaders& headers);
// Synchronously deletes entries that match `origins` or `domains` with update
// times equal or greater than `delete_begin` and less than `delete_end`.
// Setting `filter_type` to UrlFilterType::kFalseIfMatching inverts the
// meaning of `origins` and `domains` as with DoesURlMatchFilter(), but
// doesn't affect the interpretation of `delete_begin` and `delete_end`.
// In particular, ClearData(URLFilterType::kFalseIfMatching, {}, {},
// base::Time(), base::Time::Max()) will delete everything. Returns `true` if
// anything was removed.
bool ClearData(UrlFilterType filter_type,
const base::flat_set<url::Origin>& origins,
const base::flat_set<std::string>& domains,
base::Time delete_begin,
base::Time delete_end);
// Erases the entry referenced by `erase_handle` from the cache. Does
// nothing if the entry no longer exists.
void Erase(EraseHandle handle);
// Set a Journal to be notified about subsequent changes to the cache. This
// object does not take ownership of the Journal. Calling the method again
// will replace the journal. The method can be called with nullptr to stop
// being notified.
void SetJournal(Journal* journal);
// Adds the specified entry to the cache as if by MaybeInsert(), evicting an
// older entry if the cache is full. The entry is treated as if newly used for
// the purposes of eviction. For use when replaying journalled entries. The
// arguments are expected to match a previous call to Journal::OnInsert() from
// a different instance of NoVarySearchCache, but with the same settings for
// cache partitioning. It can also be called with other valid arguments for
// testing. If`base_url` is not a valid base URL, or `query` contains an
// invalid character, the call is ignored. This will never happen if the
// arguments are unchanged from a call to Journal::OnInsert() with the same
// partitioning. A valid base URL does not contain a query or a fragment.
// Journal methods are not called.
void ReplayInsert(std::string partition_key,
std::string base_url,
HttpNoVarySearchData nvs_data,
std::optional<std::string> query,
base::Time update_time);
// Removes the specified entry from the cache as if by Erase(). For use when
// replaying journalled entries. The arguments are expected to match a
// previous call to Journal::OnErase from a different instance of
// NoVarySearchCache, with the same settings for cache partitioning
// base::Features. If `query` is not found the call silently
// does nothing. Journal methods are not called.
void ReplayErase(const std::string& partition_key,
const std::string& base_url,
const HttpNoVarySearchData& nvs_data,
const std::optional<std::string>& query);
// Merge entries from `newer` in order from the least-recently-used to the
// most-recently-used, treating them as newly used. Less recently-used entries
// will be evicted if necessary to avoid exceeding the maximum size.
// Journal::OnInsert() is called as if the entries were newly inserted (but
// with the original update_time).
void MergeFrom(const NoVarySearchCache& newer);
// Changes the maximum number of entries stored in the cache to the supplied
// value, evicting entries if necessary to fit within the new limit. Optimized
// for the case when `max_size_` doesn't change.
void SetMaxSize(size_t max_size);
// Returns the size (number of stored original query strings) of the cache.
size_t size() const { return size_; }
// Return the maximum size for the cache. Attempting to add more than this
// many entries will result in older entries being evicted.
size_t max_size() const { return max_size_; }
// Returns true if the top-level map is empty. This should be equivalent to
// size() == 0 in the absence of bugs.
bool IsTopLevelMapEmptyForTesting() const;
private:
friend struct PickleTraits<NoVarySearchCache>;
friend struct PickleTraits<std::unique_ptr<NoVarySearchCache::Query>>;
// All the maps use absl::node_hash_map rather than absl::flat_hash_map
// because pointer stability for the keys is needed in order to be able to
// erase a Query object by its pointer.
using CanonicalizedQueryToQueryMap =
absl::node_hash_map<std::string, std::unique_ptr<Query>>;
friend struct PickleTraits<NoVarySearchCache::CanonicalizedQueryToQueryMap>;
struct Queries {
CanonicalizedQueryToQueryMap query_map;
// In order to erase a Query from the cache, we need to be able to
// find the map entries that contain it. To do this we have pointers back to
// the keys.
raw_ptr<const HttpNoVarySearchData> nvs_data_ptr = nullptr;
raw_ptr<const std::string> base_url_ptr = nullptr;
raw_ptr<const std::string> cache_partition_key_ptr = nullptr;
Queries();
~Queries();
// `query_map` is not copyable because the values are unique_ptrs.
Queries(const Queries&) = delete;
Queries& operator=(const Queries&) = delete;
// It is movable.
Queries(Queries&&);
Queries& operator=(Queries&&) = default;
};
friend struct PickleTraits<NoVarySearchCache::Queries>;
using NVSDataToQueriesMap =
absl::node_hash_map<HttpNoVarySearchData, Queries>;
using BaseUrlToNVSDataMap =
absl::node_hash_map<std::string, NVSDataToQueriesMap>;
using CachePartitionKeyToBaseUrlMap =
absl::node_hash_map<std::string, BaseUrlToNVSDataMap>;
// Erases an entry from the cache if `size_ > max_size_`.
void EvictIfOverfull();
// Erases `query_string` from the cache.
void EraseQuery(Query* query_string);
// Inserts `query` or marks it as used in the cache, evicting an older entry
// if necessary to make space. `journal` is notified if set.
void DoInsert(const GURL& url,
std::string cache_partition_key,
std::string base_url,
HttpNoVarySearchData nvs_data,
std::optional<std::string> query,
base::Time update_time,
Journal* journal);
// A convenience method for callers that do not have the original URL handy.
// Reconstructs the original URL and then calls DoInsert().
void ReconstructURLAndDoInsert(std::string partition_key,
std::string base_url,
HttpNoVarySearchData nvs_data,
std::optional<std::string> query,
base::Time update_time,
Journal* journal);
// Scans all the Query objects in `data_map` to find ones in the range
// [delete_begin, delete_end) and appends them to `matches`. `data_map` is
// mutable to reflect that it is returning mutable pointers to Query objects
// that it owns. The returned Query objects are mutable so the caller can
// erase them.
static void FindQuerysInTimeRange(NVSDataToQueriesMap& data_map,
base::Time delete_begin,
base::Time delete_end,
std::vector<Query*>& matches);
// Find a Query matching the query part of `url` based on the rules in
// `nvs_data` if one exists, or returns nullptr.
static Query* FindQuery(CanonicalizedQueryToQueryMap& query_map,
const GURL& url,
const HttpNoVarySearchData& nvs_data);
// Calls f(query_string_ptr) for every Query in `queries`.
static void ForEachQuery(Queries& queries, base::FunctionRef<void(Query*)> f);
// The main cache data structure. The key is the cache partition key as
// generated by HttpCache::GenerateCachePartitionKeyForRequest(). The value is
// a map with base URL stringified as keys. The value of that map is another
// map with NoVarySearchData objects as keys, and the value of that map is
// another map with canonicalized query strings as keys. That map has Query
// objects as keys.
// CachePartitionKey (std::string) ->
// Base URL (std::string) ->
// NVS data (NoVarySearchData) ->
// Canonicalized query (std::string) -> Query object
CachePartitionKeyToBaseUrlMap partitions_;
// lru_.tail() is the least-recently-used Query.
base::LinkedList<Query> lru_;
// The number of Query objects in the cache.
size_t size_ = 0u;
// Query objects will be evicted to avoid exceeding `max_size_`.
size_t max_size_;
// An object to be notified about changes to this cache.
raw_ptr<Journal> journal_ = nullptr;
};
// Specialization of PickleTraits needed for serializing and deserializing
// NoVarySearchCache objects.
template <>
struct NET_EXPORT_PRIVATE PickleTraits<NoVarySearchCache> {
static void Serialize(base::Pickle& pickle, const NoVarySearchCache& cache);
static std::optional<NoVarySearchCache> Deserialize(
base::PickleIterator& iter);
static size_t PickleSize(const NoVarySearchCache& cache);
};
} // namespace net
#endif // NET_HTTP_NO_VARY_SEARCH_CACHE_H_