blob: 904a4dd57ff74205c109362eb3e0eafd3d967008 [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef NET_HTTP_NO_VARY_SEARCH_CACHE_H_
#define NET_HTTP_NO_VARY_SEARCH_CACHE_H_
#include <stddef.h>
#include <compare>
#include <map>
#include <optional>
#include <string>
#include <string_view>
#include <tuple>
#include <utility>
#include <vector>
#include "base/containers/linked_list.h"
#include "base/functional/function_ref.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/stack_allocated.h"
#include "base/memory/weak_ptr.h"
#include "base/types/strong_alias.h"
#include "net/base/does_url_match_filter.h"
#include "net/base/net_export.h"
#include "net/base/pickle_traits.h"
#include "net/http/http_no_vary_search_data.h"
#include "net/http/http_request_info.h"
#include "url/gurl.h"
namespace base {
class Time;
}
namespace net {
class HttpResponseHeaders;
// An in-memory cache that permits looking up a URL and seeing if it matches a
// previous response according to the rules of the No-Vary-Search header (see
// https://httpwg.org/http-extensions/draft-ietf-httpbis-no-vary-search.html).
// See also the design doc at
// https://docs.google.com/document/d/1RS3q6qZ7-k9CvZsDYseGOXzcdQ9fGZ6YYnaW7fTPu7A/edit
//
// Owned by net::HttpCache.
//
// Ignoring eviction, the data structure is approximately equivalent to
// std::map<std::pair<BaseURLCacheKey, HttpNoVarySearchData>,
// std::list<QueryString>>.
//
// BaseURLCacheKey is the output of the HttpCache key algorithm run on the base
// URL (everything before the "?"). So it incorporates the NetworkIsolationKey
// when split cache is enabled.
class NET_EXPORT_PRIVATE NoVarySearchCache {
private:
// Declared here so that it can be mentioned in the definition of EraseHandle.
class QueryString;
public:
// Opaque object that permits erasure of an item from the cache.
// See comments on the Lookup() and Erase() methods for usage.
class NET_EXPORT_PRIVATE EraseHandle {
public:
~EraseHandle();
// Not copyable.
EraseHandle(const EraseHandle&) = delete;
EraseHandle& operator=(const EraseHandle&) = delete;
// Movable.
EraseHandle(EraseHandle&& rhs);
EraseHandle& operator=(EraseHandle&& rhs);
// For unit tests it is useful to be able to inspect this.
bool EqualsForTesting(const EraseHandle& rhs) const;
bool IsGoneForTesting() const;
private:
friend class NoVarySearchCache;
friend class QueryString;
explicit EraseHandle(base::WeakPtr<QueryString> query_string);
base::WeakPtr<QueryString> query_string_;
};
// An interface for receiving notifications about changes to the
// NoVarySearchCache. Only insertions and refreshes via MaybeInsert() and
// erasures via Erase() are reported to this interface. Evictions are
// implicit, and modifications via ClearData() are expected to be followed by
// persisting a fresh copy of the database.
class NET_EXPORT_PRIVATE Journal {
public:
Journal() = default;
Journal(const Journal&) = delete;
Journal& operator=(const Journal&) = delete;
// Called when an entry is inserted or refreshed by the MaybeInsert()
// method. Not called when MaybeInsert() results in no changes to the
// database. Also called by MergeFrom() for each merged entry.
virtual void OnInsert(const std::string& base_url_cache_key,
const HttpNoVarySearchData& nvs_data,
const std::optional<std::string>& query,
base::Time update_time) = 0;
// Called when an entry is erased by the Erase() method.
virtual void OnErase(const std::string& base_url_cache_key,
const HttpNoVarySearchData& nvs_data,
const std::optional<std::string>& query) = 0;
protected:
// Journal objects are never deleted via a base class pointer.
virtual ~Journal();
};
struct LookupResult {
GURL original_url;
EraseHandle erase_handle;
};
// The cache will hold at most `max_size` entries. Each entry stores the query
// parameter from a previous response, which will typically be 100 to 200
// bytes.
explicit NoVarySearchCache(size_t max_size);
// Move-constructible to permit deserialization and passing between threads.
NoVarySearchCache(NoVarySearchCache&&);
// Not copyable or assignable.
NoVarySearchCache(const NoVarySearchCache&) = delete;
NoVarySearchCache& operator=(const NoVarySearchCache&) = delete;
NoVarySearchCache& operator=(NoVarySearchCache&&) = delete;
~NoVarySearchCache();
// Finds an entry in the cache equivalent to `request.url` and in the same
// cache partition. If a result is returned, then `original_url` can be used
// to find a disk cache entry. `erase_handle` can be used to remove the entry
// from this cache if it was not in the disk cache. Not const because it
// updates the LRU linked list to mark the entry as recently used.
std::optional<LookupResult> Lookup(const HttpRequestInfo& request);
// Inserts `url` into the cache if a non-default "No-Vary-Search" header was
// found in `headers`. On insertion, will remove any existing matching entry
// with the same No-Vary-Search header, as the older entry would never be
// returned by Lookup() anyway. May evict the oldest entry in the cache to
// avoid the size exceeding `max_size_`.
void MaybeInsert(const HttpRequestInfo& request,
const HttpResponseHeaders& headers);
// Synchronously deletes entries that match `origins` or `domains` with update
// times equal or greater than `delete_begin` and less than `delete_end`.
// Setting `filter_type` to UrlFilterType::kFalseIfMatching inverts the
// meaning of `origins` and `domains` as with DoesURlMatchFilter(), but
// doesn't affect the interpretation of `delete_begin` and `delete_end`.
// In particular, ClearData(URLFilterType::kFalseIfMatching, {}, {},
// base::Time(), base::Time::Max()) will delete everything. Returns `true` if
// anything was removed.
bool ClearData(UrlFilterType filter_type,
const base::flat_set<url::Origin>& origins,
const base::flat_set<std::string>& domains,
base::Time delete_begin,
base::Time delete_end);
// Erases the entry referenced by `erase_handle` from the cache. Does
// nothing if the entry no longer exists.
void Erase(EraseHandle handle);
// Set a Journal to be notified about subsequent changes to the cache. This
// object does not take ownership of the Journal. Calling the method again
// will replace the journal. The method can be called with nullptr to stop
// being notified.
void SetJournal(Journal* journal);
// Adds the specified entry to the cache as if by MaybeInsert(), evicting an
// older entry if the cache is full. The entry is treated as if newly used for
// the purposes of eviction. For use when replaying journalled entries. The
// arguments are expected to match a previous call to Journal::OnInsert()
// from a different instance of NoVarySearchCache, but with the same settings
// for cache partitioning. It can also be called with other valid arguments
// for testing. If a valid base URL cannot be extracted from
// `base_url_cache_key`, or `query` contains an invalid character, the call is
// ignored. This will never happen if the arguments are unchanged from a call
// to Journal::OnInsert() with the same partitioning. A valid base URL does
// not contain a query or a fragment. Journal methods are not called.
void ReplayInsert(std::string base_url_cache_key,
HttpNoVarySearchData nvs_data,
std::optional<std::string> query,
base::Time update_time);
// Removes the specified entry from the cache as if by Erase(). For use when
// replaying journalled entries. The arguments are expected to match a
// previous call to Journal::OnErase from a different instance of
// NoVarySearchCache, with the same settings for cache partitioning
// base::Features. If `query` is not found the call silently
// does nothing. Journal methods are not called.
void ReplayErase(const std::string& base_url_cache_key,
const HttpNoVarySearchData& nvs_data,
const std::optional<std::string>& query);
// Merge entries from `newer` in order from the least-recently-used to the
// most-recently-used, treating them as newly used. Less recently-used entries
// will be evicted if necessary to avoid exceeding the maximum size.
// Journal::OnInsert() is called as if the entries were newly inserted (but
// with the original update_time).
void MergeFrom(const NoVarySearchCache& newer);
// Returns the size (number of stored original query strings) of the cache.
size_t size() const { return size_; }
// Return the maximum size for the cache. Attempting to add more than this
// many entries will result in older entries being evicted.
size_t max_size() const { return max_size_; }
// Returns true if the top-level map is empty. This should be equivalent to
// size() == 0 in the absence of bugs.
bool IsTopLevelMapEmptyForTesting() const;
private:
friend struct PickleTraits<NoVarySearchCache>;
struct QueryStringList;
friend struct PickleTraits<NoVarySearchCache::QueryStringList>;
using BaseURLCacheKey =
base::StrongAlias<struct BaseURLCacheKeyTagType, std::string>;
friend struct PickleTraits<NoVarySearchCache::BaseURLCacheKey>;
class LruNode;
class QueryStringListNode;
struct QueryStringList {
base::LinkedList<QueryStringListNode> list;
// nvs_data_ref can't be raw_ref because it needs to be lazily initialized
// after the QueryStringList has been added to the map.
raw_ptr<const HttpNoVarySearchData> nvs_data_ref = nullptr;
// key_ref can't be raw_ref because it needs to be added in a second pass
// during deserialization.
raw_ptr<const BaseURLCacheKey> key_ref = nullptr;
// The referent of this reference has to be the actual key in the map. It is
// not sufficient for the value to match, because the lifetime has to be the
// same.
explicit QueryStringList(const BaseURLCacheKey& key);
// Needed during deserialization.
QueryStringList();
// Only used during deserialization. This is O(N) in the size of `list`.
QueryStringList(QueryStringList&&);
// base::LinkedList<> does not do memory management, so make sure the
// contents of `list` are deleted on destruction.
~QueryStringList();
};
struct FindQueryStringResult {
STACK_ALLOCATED(); // `match` doesn't need to be raw_ptr.
public:
QueryString* match;
GURL original_url;
};
// TODO(crbug.com/382394774): Investigate performance of different map types.
using DataMapType = std::map<HttpNoVarySearchData, QueryStringList>;
using OuterMapType = std::map<BaseURLCacheKey, DataMapType, std::less<>>;
// Erases an entry from the cache if `size_ > max_size_`.
void EvictIfOverfull();
// Erases `query_string` from the cache.
void EraseQuery(QueryString* query_string);
// Inserts `query` or marks it as used in the cache. evicting an older entry
// if necessary to make space. `journal` is notified if set.
void DoInsert(const GURL& url,
const GURL& base_url,
std::string base_url_cache_key,
HttpNoVarySearchData nvs_data,
std::optional<std::string_view> query,
base::Time update_time,
Journal* journal);
// A convenience method for callers that do not have the original URL handy.
// Reconstructs the original URL and then calls DoInsert().
void ReconstructURLAndDoInsert(const GURL& base_url,
std::string base_url_cache_key,
HttpNoVarySearchData nvs_data,
std::optional<std::string> query,
base::Time update_time,
Journal* journal);
// Scans all the QueryStrings in `data_map` to find ones in the range
// [delete_begin, delete_end) and appends them to `matches`. `data_map` is
// mutable to reflect that it is returning mutable pointers to QueryString
// objects that it owns. The returned QueryString objects are mutable so the
// caller can erase them.
static void FindQueryStringsInTimeRange(DataMapType& data_map,
base::Time delete_begin,
base::Time delete_end,
std::vector<QueryString*>& matches);
static std::optional<FindQueryStringResult> FindQueryStringInList(
QueryStringList& query_strings,
const GURL& base,
const GURL& url,
const HttpNoVarySearchData& nvs_data);
// Calls f(query_string_ptr) for every QueryString in `list`.
static void ForEachQueryString(base::LinkedList<QueryStringListNode>& list,
base::FunctionRef<void(QueryString*)> f);
// Calls f(const_query_string_ptr) for every QueryString in `list`.
static void ForEachQueryString(
const base::LinkedList<QueryStringListNode>& list,
base::FunctionRef<void(const QueryString*)> f);
// The main cache data structure.
OuterMapType map_;
// lru_.tail() is the least-recently-used QueryString.
base::LinkedList<LruNode> lru_;
// The number of QueryString objects in the cache.
size_t size_ = 0u;
// QueryString objects will be evicted to avoid exceeding `max_size_`.
const size_t max_size_;
// An object to be notified about changes to this cache.
raw_ptr<Journal> journal_ = nullptr;
};
template <>
struct NET_EXPORT_PRIVATE PickleTraits<NoVarySearchCache> {
static void Serialize(base::Pickle& pickle, const NoVarySearchCache& cache);
static std::optional<NoVarySearchCache> Deserialize(
base::PickleIterator& iter);
static size_t PickleSize(const NoVarySearchCache& cache);
};
} // namespace net
#endif // NET_HTTP_NO_VARY_SEARCH_CACHE_H_