blob: 8ee5068d4941ffd3a2abf86dc34448bd05375343 [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CONTENT_BROWSER_INTEREST_GROUP_TRUSTED_SIGNALS_CACHE_IMPL_H_
#define CONTENT_BROWSER_INTEREST_GROUP_TRUSTED_SIGNALS_CACHE_IMPL_H_
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include "base/functional/callback_forward.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_refptr.h"
#include "base/types/optional_ref.h"
#include "base/unguessable_token.h"
#include "base/values.h"
#include "content/browser/interest_group/trusted_signals_fetcher.h"
#include "content/common/content_export.h"
#include "content/services/auction_worklet/public/mojom/trusted_signals_cache.mojom.h"
#include "mojo/public/cpp/bindings/pending_remote.h"
#include "mojo/public/cpp/bindings/receiver_set.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "third_party/blink/public/mojom/interest_group/interest_group_types.mojom-forward.h"
#include "url/gurl.h"
#include "url/origin.h"
namespace content {
struct BiddingAndAuctionServerKey;
// Handles caching (not yet implemented) and dispatching of trusted bidding and
// scoring signals requests. Only handles requests to Trusted Execution
// Environments (TEEs), i.e., versions 2+ of the protocol, so does not handle
// legacy bring-your-own-server (BYOS) requests. The browser process makes a
// request and gets a Handle and a partition ID, which can then be used to fetch
// the response through the Mojo auction_worklet::mojom::TrustedSignalsCache
// API provided by the Cache. The Handle and partition ID are provided
// immediately on invocation, but the network request may not be sent out
// immediately.
//
// The values it vends are guaranteed to remain valid at least until the Handle
// they were returned with is destroyed. Having the cache in the browser process
// allows requests to be sent while the Javascript process is still starting up,
// and allows the cache to live beyond the shutdown of the often short-live
// Javascript processes.
//
// Internally, it uses 4 maps:
//
// * `fetches_`, a multimap of pending/live Fetches, with FetchKeys consisting
// of what must be the same to share a fetch. On fetch completion, ownership of
// the response is passed to the corresponding CompressionGroupData(s) and the
// Fetch is deleted. See FetchKey for more details on why this is a multimap
// rather than a map.
//
// * `compression_group_data_map_`, a map of UnguessableTokens
// (`compression_group_tokens`) to CompressionGroupData, which contain the still
// compressed response for a single partition group within a fetch. A
// CompressionGroupData may have one or more partitions, each of which
// corresponds to a single [Bidding|Scoring]CacheEntry. The lifetime of
// CompressionGroupData is scoped to the Handle objects returned by the Cache.
//
// * `bidding_cache_entries_`, a map of BiddingCacheEntries, with
// BiddingCacheKeys consisting of what must be the same to share a Fetch, a
// compression group, and partition within the group. Fields that can be merged
// between requests to share a partitiong (e.g., trusted signals keys) are part
// of entry itself, not the key. This is a map, not a multimap, so if a
// BiddingCacheEntry cannot be reused (with or without modification) to suit the
// needs of an incoming request, the BiddingCacheEntry is deleted, and removed
// from its CompressionGroupData. Destroying a BiddingCacheEntry in this way
// will not destroy the CompressionGroupData, or the CompressionGroupData's
// fetch, if it has one.
//
// * TODO(https://crbug.com/333445540): A map of ScoringCacheEntries much akin
// to the map of BiddingCacheEntries.
//
// Fetches and CacheEntries have pointers to the corresponding
// CompressionGroupData, while the CompressionGroupData owns the corresponding
// values in the other two maps. Deleting a CompressionGroupData removes the
// corresponding values in the two maps. One CompressionGroupData may own
// multiple CacheEntries, but will only own one live/pending Fetch. Ownership of
// a Fetch may be shared by multiple CompressionGroupData objects with matching
// FetchKeys.
//
// Each handed out Handle object will keep its corresponding
// CompressionGroupData alive until the handle is destroyed.
//
// TODO(https://crbug.com/333445540): Add caching support. Right now, entries
// are cached only as long as there's something that owns a Handle, but should
// instead cache for at least a short duration as long as an entry's TTL hasn't
// expired. Holding onto a CompressionGroupData reference, which is refcounted,
// is all that's needed to keep an entry alive.
//
// TODO(https://crbug.com/333445540): May need some sort of rate limit and size
// cap. Currently, this class creates an arbitrary number of downloads, and
// potentially stores an unlimited amount of data in browser process memory.
class CONTENT_EXPORT TrustedSignalsCacheImpl
: public auction_worklet::mojom::TrustedSignalsCache {
public:
enum class SignalsType {
kBidding,
kScoring,
};
// Callback to retrieve a BiddingAndAuctionServerKey for a given coordinator.
// The `callback` parameter may be invoked synchronously or asynchronously,
// and may fail.
using GetCoordinatorKeyCallback = base::RepeatingCallback<void(
const std::optional<url::Origin>& coordinator,
base::OnceCallback<void(
base::expected<BiddingAndAuctionServerKey, std::string>)> callback)>;
// As long as a Handle is alive, any Mojo
// auction_worklet::mojom::TrustedSignalsCache created by invoking
// CreateMojoPipe() can retrieve the response associated with the
// corresponding signals response ID, which will not change for the lifetime
// of the handle. The ID can be used to request a response from the cache at
// any point in time, but the fetch may be made asynchronously, so there's no
// guarantee of a timely response.
//
// Refcounted so that one handle can be reused for all requests with the same
// `compression_group_token`, so when the Handle is destroyed, we know there
// are no Handles that refer to the corresponding entry in the cache, and it
// may be deleted.
//
// Any pending or future requests through a handed out
// auction_worklet::mojom::TrustedSignalsCache pipe for the
// `compression_group_token` associated with a destroyed Handle will be sent
// an error message.
//
// All outstanding Handles must be released before the TrustedSignalsCacheImpl
// may be destroyed.
//
// Currently, the internal CompressionGroupData class is a subclass of this,
// so callers are hanging on to data associated with a compression group
// directly, but that's not a fundamental design requirement of the API.
class Handle : public base::RefCounted<Handle> {
public:
Handle(Handle&) = delete;
Handle& operator=(Handle&) = delete;
// The token that needs to be passed to GetTrustedSignals() to retrieve the
// response through the auction_worklet::mojom::TrustedSignalsCache API.
// Will not change for the lifetime of the handle.
const base::UnguessableToken& compression_group_token() const {
return compression_group_token_;
}
protected:
friend class base::RefCounted<Handle>;
Handle();
virtual ~Handle();
const base::UnguessableToken compression_group_token_{
base::UnguessableToken::Create()};
};
TrustedSignalsCacheImpl(
scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
GetCoordinatorKeyCallback get_coordinator_key_callback);
~TrustedSignalsCacheImpl() override;
TrustedSignalsCacheImpl(const TrustedSignalsCacheImpl&) = delete;
TrustedSignalsCacheImpl& operator=(const TrustedSignalsCacheImpl&) = delete;
// Creates a TrustedSignalsCache pipe for a bidder script process. It may only
// be used for `signals_type` fetches for `script_origin`, where `origin` is
// origin of the script that will receive those signals (i.e., the seller
// origin or InterestGroup origin, depending on whether these are scoring or
// bidding signals).
mojo::PendingRemote<auction_worklet::mojom::TrustedSignalsCache>
CreateMojoPipe(SignalsType signals_type, const url::Origin& script_origin);
// Requests bidding signals for the specified interest group. Return value is
// a Handle which must be kept alive until the response to the request is no
// longer needed, and which provides a key to identify the response. Also
// returns `partition_id`, which identifies the partition within the
// compression group identified by Handle::compression_group_token() that will
// have the relevant response.
//
// Never starts a network fetch synchronously. Bidder signals are requested
// over the network after a post task.
scoped_refptr<Handle> RequestTrustedBiddingSignals(
const url::Origin& main_frame_origin,
const url::Origin& interest_group_owner,
const std::string& interest_group_name,
blink::mojom::InterestGroup_ExecutionMode execution_mode,
const url::Origin& joining_origin,
const GURL& trusted_signals_url,
const url::Origin& coordinator,
base::optional_ref<const std::vector<std::string>>
trusted_bidding_signals_keys,
base::Value::Dict additional_params,
int& partition_id);
// Requests scoring signals. Return value is a Handle which must be kept alive
// until the response to the request is no longer needed, and which provides a
// key to identify the response. Also returns `partition_id`, which identifies
// the partition within the compression group identified by
// Handle::compression_group_token() that will have the relevant response.
//
// `interest_group_owner` and `joining_origin` are never sent over the wire,
// but are instead both used to determine if different compression groups
// should be used.
//
// Never starts a network fetch synchronously. Scoring signals are requested
// over the network after a post task.
//
// TODO(mmenke): Implement Some way to delay sending the fetch over the wire
// for a certain duration, with some way for an auction to flush requests once
// it knows all signals it needs have been requested. Probably need to either
// add a method to Handle to flush requests, or add an API to flush all
// Fetches matching a passed in {seller origin, joining origin, publisher
// origin} triplet.
scoped_refptr<TrustedSignalsCacheImpl::Handle> RequestTrustedScoringSignals(
const url::Origin& main_frame_origin,
const url::Origin& seller,
const GURL& trusted_signals_url,
const url::Origin& coordinator,
const url::Origin& interest_group_owner,
const url::Origin& joining_origin,
const GURL& render_url,
const std::vector<GURL>& component_render_urls,
base::Value::Dict additional_params,
int& partition_id);
// TrustedSignalsFetcher implementation:
void GetTrustedSignals(
const base::UnguessableToken& compression_group_token,
mojo::PendingRemote<auction_worklet::mojom::TrustedSignalsCacheClient>
client) override;
private:
// Each receiver pipe in `receiver_set_` is restricted to only receive
// scoring/bidding signals for the specific script origin identified by this
// struct.
struct ReceiverRestrictions {
bool operator==(const ReceiverRestrictions& other) const;
SignalsType signals_type;
url::Origin script_origin;
};
// Key used for live or pending requests to a trusted server. Two request with
// the same FetchKey can be merged together, but the requests themselves may
// differ in other fields. Before the network request is started, any request
// with a matching fetch key may be merged into a single request. Once the
// network request is started, however, new requests may only be merged into
// the live request if there's a matching CacheEntry that has already
// requested all information needed for the request.
//
// There may be multiple requests at once with the same FetchKey, in the case
// a network request was started before a new request came in with values that
// do not match any of those in the live fetch.
//
// Combining requests across main frame origins or owners seems potentially
// problematic in terms of cross-origin leaks, so partition on those for now,
// at least.
struct FetchKey {
FetchKey();
// `script_origin` is the origin of the script that will receive the
// response. For bidding signals fetches, it's the interest group owner. For
// scoring signals fetches, it's the seller origin (component or top-level,
// depending on which seller will be receiving the signals).
FetchKey(const url::Origin& main_frame_origin,
SignalsType signals_type,
const url::Origin& script_origin,
const GURL& trusted_signals_url,
const url::Origin& coordinator);
FetchKey(const FetchKey&);
FetchKey(FetchKey&&);
~FetchKey();
FetchKey& operator=(const FetchKey&);
FetchKey& operator=(FetchKey&&);
bool operator<(const FetchKey& other) const;
// Order here matches comparison order in operator<(), and is based on a
// guess on what order will result in the most performant comparisons.
url::Origin script_origin;
SignalsType signals_type;
// The origin of the frame running the auction that needs the signals. This
// could potentially be used to separate compression groups instead of
// fetches, but best to be safe.
url::Origin main_frame_origin;
GURL trusted_signals_url;
url::Origin coordinator;
};
// A pending or live network request. May be for bidding signals or scoring
// signals, but not both.
struct Fetch;
using FetchMap = std::multimap<FetchKey, Fetch>;
// The cached compression group of a trusted signals response, or an error
// message. May be for bidding signals or scoring signals, but not both.
// CompressionGroupData are indexed by UnguessableTokens which can be used to
// retrieve them over the auction_worklet::mojom::TrustedSignalsCache Mojo
// interface.
//
// CompressionGroupData objects are created when RequestTrusted*Signals() is
// called and can't reuse an existing one, at which point a new or existing
// Fetch in `fetch_map_` is also associated with the CompressionGroupData.
// Each CompressionGroupData owns all CacheEntries that refer to it, and the
// compression group of the associated fetch as well. No two
// CompressionGroupData objects represent the same compression group from a
// single Fetch.
//
// CompressionGroupData objects are refcounted, and when the last reference is
// released, all associated CacheEntries are destroyed, and the compression
// group of the associated fetch (if the fetche associated with the
// CompressionGroupData has not yet completed) is destroyed as well.
class CompressionGroupData;
// A key that distinguishes bidding signals entries in the cache. The key is
// used to find all potential matching entries whenever
// RequestTrustedBiddingSignals() is invoked. A response with one key cannot
// be used to satisfy a request with another. There are some cases where even
// when the BiddingCacheKey of a new request matches an existing
// BiddingCacheEntry, the entry cannot be reused, in which case a new Entry is
// used and the old one is thrown out (though the CompressionGroupData will
// remain valid). This can happen in the case of cache expiration or the Entry
// not having the necessary `trusted_bidding_signals_keys` or
// `interest_group_name` after the corresponding network request has been sent
// over the wire.
struct BiddingCacheKey {
BiddingCacheKey();
BiddingCacheKey(BiddingCacheKey&&);
// `interest_group_name` should be nullopt in the case of the
// group-by-origin execution mode, in which case all such groups will be
// pooled together, if the other values match, and the interest group names
// will be stored as a value in the BiddingCacheEntry, rather than as part
// of the key.
BiddingCacheKey(const url::Origin& interest_group_owner,
std::optional<std::string> interest_group_name,
const GURL& trusted_signals_url,
const url::Origin& coordinator,
const url::Origin& main_frame_origin,
const url::Origin& joining_origin,
base::Value::Dict additional_params);
~BiddingCacheKey();
BiddingCacheKey& operator=(BiddingCacheKey&&);
bool operator<(const BiddingCacheKey& other) const;
// Values where mismatches are expected to be more likely are listed
// earlier.
// The interest group name, or nullopt, in the case of the group-by-origin
// execution mode, as all such interest groups can be fetched together, in a
// single partition.
std::optional<std::string> interest_group_name;
FetchKey fetch_key;
url::Origin joining_origin;
base::Value::Dict additional_params;
};
// An indexed entry in the cache for callers of
// RequestTrustedBiddingSignals(). It maps InterestGroup information and main
// frame origins to CompressionGroupData objects and partition IDs.
// BiddingCacheEntries that are sent to a TEE together in the same compressed
// partition share a CompressionGroupData, but have different partition ids.
// BiddingCacheEntries are only destroyed when the corresponding
// CompressionGroupData is destroyed, or when a new BiddingCacheEntry with the
// same key replaces them.
struct BiddingCacheEntry;
using BiddingCacheEntryMap = std::map<BiddingCacheKey, BiddingCacheEntry>;
// A key that distinguishes scoring signals entries in the cache. The key is
// used to find all potential matching entries whenever
// RequestTrustedScoringSignals() is invoked. A response with one key cannot
// be used to satisfy a request with another.
//
// Currently, all parameters of each bid appear in ScoringCacheKeys, unlike
// BiddingCacheKeys, so there's no need to check if a ScoringCacheEntry has
// other fields necessary for there to be a cache hit, other than checking the
// TTL. It may be worth experimenting with matching partitioning with that of
// BiddingCacheEntries, but it seems less likely to be useful here, since most
// requests likely have a single renderURL and no componentRenderURLs. And in
// cases where componentRenderURLs are present, it still seems unlikely that
// merging requests will make it more likely all the values for a single bid
// appear in a single ScoringCacheEntry.
struct ScoringCacheKey {
ScoringCacheKey();
ScoringCacheKey(ScoringCacheKey&&);
ScoringCacheKey(const url::Origin& seller,
const GURL& trusted_signals_url,
const url::Origin& coordinator,
const url::Origin& main_frame_origin,
const url::Origin& interest_group_owner,
const url::Origin& joining_origin,
const GURL& render_url,
const std::vector<GURL>& component_render_urls,
base::Value::Dict additional_params);
~ScoringCacheKey();
ScoringCacheKey& operator=(ScoringCacheKey&&);
bool operator<(const ScoringCacheKey& other) const;
// Values where mismatches are expected to be more likely are listed
// earlier.
GURL render_url;
std::set<GURL> component_render_urls;
FetchKey fetch_key;
url::Origin joining_origin;
url::Origin interest_group_owner;
base::Value::Dict additional_params;
};
// An indexed entry in the cache for callers of
// RequestTrustedScoringSignals(). It information about the bid being scored
// and main frame origins to CompressionGroupData objects and partition IDs.
// ScoringCacheEntries that are sent to a TEE together in the same compressed
// partition share a CompressionGroupData, but have different partition ids.
// ScoringCacheEntries are only destroyed when the corresponding
// CompressionGroupData is destroyed, or when they expire (in the latter case,
// the CompressionGroupData may still have outstanding consumers that have yet
// to fetch it, so may still be needed).
struct ScoringCacheEntry;
using ScoringCacheEntryMap = std::map<ScoringCacheKey, ScoringCacheEntry>;
// Returns a CompressionGroupData that can be used to fetch and store data
// associated with the provided FetchKey and joining origin. The returned
// CompressionGroupData will be associated with a Fetch that has not yet
// started, either a new one or a shared one. May return a new or existing
// CompressionGroupData. Queues any newly created fetch. After calling, the
// caller must associate the returned CompressionGroupData with its
// CacheEntry.
//
// `interest_group_owner_if_scoring_signals` is only needed for scoring
// signals fetches. For bidding signals, the `script_owner` of `fetch_key` is
// the `interest_group_owner`, but for scoring signals, it's not, and
// compression groups need to be split by interest group owner, to protect
// against cross-origin size leaks due to compression.
scoped_refptr<TrustedSignalsCacheImpl::CompressionGroupData>
FindOrCreateCompressionGroupDataAndQueueFetch(
const FetchKey& fetch_key,
const url::Origin& joining_origin,
base::optional_ref<const url::Origin>
interest_group_owner_if_scoring_signals);
// Starts retrieving the coordinator key for the specified fetch. Will invoke
// StartFetch() on complete, which may happen synchronously.
void GetCoordinatorKey(FetchMap::iterator fetch_it);
// If the key was successfully fetched, starts the corresponding Fetch.
void OnCoordinatorKeyReceived(
FetchMap::iterator fetch_it,
base::expected<BiddingAndAuctionServerKey, std::string>
bidding_and_auction_server_key);
// Called by StartFetch() to request bidding/scoring signals.
void StartBiddingSignalsFetch(
FetchMap::iterator fetch_it,
const BiddingAndAuctionServerKey& bidding_and_auction_key);
void StartScoringSignalsFetch(
FetchMap::iterator fetch_it,
const BiddingAndAuctionServerKey& bidding_and_auction_key);
void OnFetchComplete(
FetchMap::iterator fetch_it,
TrustedSignalsFetcher::SignalsFetchResult signals_fetch_result);
// Called when the last reference of a CompressionGroupData object has been
// released, and it's about to be destroyed. Does the following:
//
// * Removes the CompressionGroupData from `compression_group_data_`.
//
// * Destroys all CacheEntries associated with it.
//
// * If there is a pending Fetch associated with the CompressionGroupData,
// removes the associated compression block from the Fetch (since the
// CompressionGroupData corresponds to an entire block), cancelling the
// Fetch if it has no non-empty cache blocks. Since compression block IDs
// are not exposed by the API (only partition IDs within the block are),
// there's no need to maintain compression block IDs.
//
// * If there is a live Fetch associated request, the associated compression
// block isn't cleared, but its pointer to the CompressionGroupData is, and
// the Fetch is cancelled if it has no remaining compression blocks
// associated with CompressionGroupData objects.
void OnCompressionGroupDataDestroyed(
CompressionGroupData& compression_group_data);
// Destroys `cache_entry_it` and removes it from the CompressionGroupData that
// owns it. This does not remove data from the compression group. Its
// CompressionGroupData must not have a pending fetch, as that would mean the
// compression group may not retrieve data that a consumer expects it to
// retrieve, since Fetches rely on cache entries to know what to retrieve when
// they're started.
void DestroyBiddingCacheEntry(BiddingCacheEntryMap::iterator cache_entry_it);
void DestroyScoringCacheEntry(ScoringCacheEntryMap::iterator cache_entry_it);
// Virtual for testing.
virtual std::unique_ptr<TrustedSignalsFetcher> CreateFetcher();
const scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory_;
const GetCoordinatorKeyCallback get_coordinator_key_callback_;
mojo::ReceiverSet<auction_worklet::mojom::TrustedSignalsCache,
ReceiverRestrictions>
receiver_set_;
// Multimap of live and pending fetches. Fetches are removed on completion and
// cancellation. When data is requested from the cache, if data needs to be
// fetched from the network and there's an unstarted pending Fetch with a
// matching FetchKey, the pending Fetch will always be used to request the
// additional data. As a result, for any FetchKey, there will be at most one
// pending Fetch, which will be the last Fetch with that FetchKey, since
// multimap entries are stored in FIFO order.
FetchMap fetches_;
BiddingCacheEntryMap bidding_cache_entries_;
ScoringCacheEntryMap scoring_cache_entries_;
// Map of IDs to CompressionGroupData. CompressionGroupData objects are
// refcounted, and removed from the map whenever the last reference is
// released, at which point any associated BiddingCacheEntries are destroyed,
// and the CompressionGroupData removed from any associated Fetch, destroying
// the Fetch if no longer needed.
std::map<base::UnguessableToken,
raw_ptr<CompressionGroupData, CtnExperimental>>
compression_group_data_map_;
};
} // namespace content
#endif // CONTENT_BROWSER_INTEREST_GROUP_TRUSTED_SIGNALS_CACHE_IMPL_H_