blob: b03570f072f407a1d2c6a58646db0f48a845429d [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/browser/first_party_sets/first_party_set_parser.h"
#include <string>
#include <utility>
#include <vector>
#include "base/containers/contains.h"
#include "base/containers/flat_map.h"
#include "base/containers/flat_set.h"
#include "base/files/file_util.h"
#include "base/json/json_reader.h"
#include "base/json/json_string_value_serializer.h"
#include "base/logging.h"
#include "base/path_service.h"
#include "base/strings/string_util.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "net/base/schemeful_site.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
#include "url/gurl.h"
#include "url/origin.h"
namespace content {
namespace {
// Ensures that the string represents an origin that is non-opaque and HTTPS.
// Returns the registered domain.
absl::optional<net::SchemefulSite> Canonicalize(base::StringPiece origin_string,
bool emit_errors) {
const url::Origin origin(url::Origin::Create(GURL(origin_string)));
if (origin.opaque()) {
if (emit_errors) {
LOG(ERROR) << "First-Party Set origin " << origin_string
<< " is not valid; ignoring.";
}
return absl::nullopt;
}
if (origin.scheme() != "https") {
if (emit_errors) {
LOG(ERROR) << "First-Party Set origin " << origin_string
<< " is not HTTPS; ignoring.";
}
return absl::nullopt;
}
absl::optional<net::SchemefulSite> site =
net::SchemefulSite::CreateIfHasRegisterableDomain(origin);
if (!site.has_value()) {
if (emit_errors) {
LOG(ERROR) << "First-Party Set origin" << origin_string
<< " does not have a valid registered domain; ignoring.";
}
return absl::nullopt;
}
return site;
}
const char kFirstPartySetOwnerField[] = "owner";
const char kFirstPartySetMembersField[] = "members";
const char kFirstPartySetPolicyReplacementsField[] = "replacements";
const char kFirstPartySetPolicyAdditionsField[] = "additions";
// Validates a single First-Party Set and parses it into a SingleSet.
// Note that this is intended for use *only* on sets that were received via the
// Component Updater or from enterprise policy, so this does not check
// assertions or versions. It rejects sets which are non-disjoint with
// previously-encountered sets (i.e. sets which have non-empty intersections
// with `elements`), and singleton sets (i.e. sets must have an owner and at
// least one valid member).
//
// Uses `elements` to check disjointness of sets; outputs the set as `out_set`;
// and augments `elements` to include the elements of the set that was parsed.
//
// Returns a nullopt if parsing and validation were successful, otherwise it
// returns an optional with an appropriate FirstPartySetParser::ParseError.
absl::optional<FirstPartySetParser::ParseError> ParseSet(
const base::Value& value,
base::flat_set<net::SchemefulSite>& elements,
FirstPartySetParser::SingleSet& out_set) {
if (!value.is_dict())
return FirstPartySetParser::ParseError::kInvalidType;
// Confirm that the set has an owner, and the owner is a string.
const std::string* maybe_owner =
value.GetDict().FindString(kFirstPartySetOwnerField);
if (!maybe_owner)
return FirstPartySetParser::ParseError::kInvalidType;
absl::optional<net::SchemefulSite> canonical_owner =
Canonicalize(std::move(*maybe_owner), false /* emit_errors */);
if (!canonical_owner.has_value())
return FirstPartySetParser::ParseError::kInvalidOrigin;
// An owner may not be a member of another set.
if (elements.contains(*canonical_owner))
return FirstPartySetParser::ParseError::kNonDisjointSets;
// Confirm that the members field is present, and is an array of strings.
const base::Value* maybe_members_list =
value.FindListKey(kFirstPartySetMembersField);
if (!maybe_members_list)
return FirstPartySetParser::ParseError::kInvalidType;
if (maybe_members_list->GetListDeprecated().empty())
return FirstPartySetParser::ParseError::kSingletonSet;
std::vector<net::SchemefulSite> members;
// Add each member to our mapping (assuming the member is a string).
for (const auto& item : maybe_members_list->GetListDeprecated()) {
// Members may not be a member of another set, and may not be an owner of
// another set.
if (!item.is_string())
return FirstPartySetParser::ParseError::kInvalidType;
absl::optional<net::SchemefulSite> member =
Canonicalize(item.GetString(), false /* emit_errors */);
if (!member.has_value())
return FirstPartySetParser::ParseError::kInvalidOrigin;
if (*member == *canonical_owner || base::Contains(members, member))
return FirstPartySetParser::ParseError::kRepeatedDomain;
if (elements.contains(*member))
return FirstPartySetParser::ParseError::kNonDisjointSets;
members.push_back(*member);
}
elements.insert(*canonical_owner);
for (const auto& member : members) {
elements.insert(member);
}
out_set = std::make_pair(*canonical_owner, members);
return absl::nullopt;
}
// Parses each set in `policy_sets` by calling ParseSet on each one.
//
// Returns a PolicyParsingError if ParseSet returns an error, which contains the
// error that ParseSet returned along with the type of policy set that was being
// parsed and the index of the set that caused the error.
//
// If no call to ParseSet returns an error, `out_list` is populated with the
// list of parsed sets.
absl::optional<FirstPartySetParser::PolicyParsingError> GetPolicySetsFromList(
const base::Value::List* policy_sets,
base::flat_set<net::SchemefulSite>& elements,
FirstPartySetParser::PolicySetType set_type,
std::vector<FirstPartySetParser::SingleSet>& out_list) {
if (!policy_sets) {
out_list = {};
return absl::nullopt;
}
std::vector<FirstPartySetParser::SingleSet> parsed_sets;
for (int i = 0; i < static_cast<int>(policy_sets->size()); i++) {
FirstPartySetParser::SingleSet out_set;
if (absl::optional<FirstPartySetParser::ParseError> error =
ParseSet((*policy_sets)[i], elements, out_set);
error.has_value()) {
return FirstPartySetParser::PolicyParsingError{error.value(), set_type,
i};
}
parsed_sets.push_back(out_set);
}
out_list = parsed_sets;
return absl::nullopt;
}
} // namespace
FirstPartySetParser::ParsedPolicySetLists::ParsedPolicySetLists(
std::vector<FirstPartySetParser::SingleSet> replacement_list,
std::vector<FirstPartySetParser::SingleSet> addition_list)
: replacements(std::move(replacement_list)),
additions(std::move(addition_list)) {}
FirstPartySetParser::ParsedPolicySetLists::ParsedPolicySetLists() = default;
FirstPartySetParser::ParsedPolicySetLists::ParsedPolicySetLists(
FirstPartySetParser::ParsedPolicySetLists&&) = default;
FirstPartySetParser::ParsedPolicySetLists::ParsedPolicySetLists(
const FirstPartySetParser::ParsedPolicySetLists&) = default;
FirstPartySetParser::ParsedPolicySetLists::~ParsedPolicySetLists() = default;
bool FirstPartySetParser::ParsedPolicySetLists::operator==(
const FirstPartySetParser::ParsedPolicySetLists& other) const {
return std::tie(replacements, additions) ==
std::tie(other.replacements, other.additions);
}
FirstPartySetParser::SetsMap FirstPartySetParser::DeserializeFirstPartySets(
base::StringPiece value) {
if (value.empty())
return {};
std::unique_ptr<base::Value> value_deserialized =
JSONStringValueDeserializer(value).Deserialize(
nullptr /* error_code */, nullptr /* error_message */);
if (!value_deserialized || !value_deserialized->is_dict())
return {};
std::vector<std::pair<net::SchemefulSite, net::SchemefulSite>> map;
base::flat_set<net::SchemefulSite> owner_set;
base::flat_set<net::SchemefulSite> member_set;
for (const auto item : value_deserialized->DictItems()) {
if (!item.second.is_string())
return {};
const absl::optional<net::SchemefulSite> maybe_member =
Canonicalize(item.first, true /* emit_errors */);
const absl::optional<net::SchemefulSite> maybe_owner =
Canonicalize(item.second.GetString(), true /* emit_errors */);
if (!maybe_member.has_value() || !maybe_owner.has_value())
return {};
// Skip the owner entry here and add it later explicitly to prevent the
// singleton sets.
if (*maybe_member == *maybe_owner) {
continue;
}
if (!owner_set.contains(maybe_owner)) {
map.emplace_back(*maybe_owner, *maybe_owner);
}
// Check disjointness. Note that we are relying on the JSON Parser to
// eliminate the possibility of a site being used as a key more than once,
// so we don't have to check for that explicitly.
if (owner_set.contains(*maybe_member) ||
member_set.contains(*maybe_owner)) {
return {};
}
owner_set.insert(*maybe_owner);
member_set.insert(*maybe_member);
map.emplace_back(std::move(*maybe_member), std::move(*maybe_owner));
}
return map;
}
std::string FirstPartySetParser::SerializeFirstPartySets(
const FirstPartySetParser::SetsMap& sets) {
base::DictionaryValue dict;
for (const auto& it : sets) {
std::string maybe_member = it.first.Serialize();
std::string owner = it.second.Serialize();
if (maybe_member != owner) {
dict.SetKey(std::move(maybe_member), base::Value(std::move(owner)));
}
}
std::string dict_serialized;
JSONStringValueSerializer(&dict_serialized).Serialize(dict);
return dict_serialized;
}
absl::optional<net::SchemefulSite>
FirstPartySetParser::CanonicalizeRegisteredDomain(
const base::StringPiece origin_string,
bool emit_errors) {
return Canonicalize(origin_string, emit_errors);
}
base::flat_map<net::SchemefulSite, net::SchemefulSite>
FirstPartySetParser::ParseSetsFromStream(std::istream& input) {
std::vector<std::pair<net::SchemefulSite, net::SchemefulSite>> map;
base::flat_set<net::SchemefulSite> elements;
for (std::string line; std::getline(input, line);) {
base::StringPiece trimmed = base::TrimWhitespaceASCII(line, base::TRIM_ALL);
if (trimmed.empty())
continue;
absl::optional<base::Value> maybe_value = base::JSONReader::Read(
trimmed, base::JSONParserOptions::JSON_ALLOW_TRAILING_COMMAS);
if (!maybe_value.has_value())
return {};
FirstPartySetParser::SingleSet output;
if (absl::optional<FirstPartySetParser::ParseError> error =
ParseSet(*maybe_value, elements, output);
error.has_value()) {
if (*error == FirstPartySetParser::ParseError::kInvalidOrigin) {
// Ignore sets that include an invalid domain (which might have been
// caused by a PSL update), but don't let that break other sets.
continue;
}
// Abort, something is wrong with the component.
return {};
}
auto [owner, members] = output;
map.emplace_back(owner, owner);
for (net::SchemefulSite& member : members) {
map.emplace_back(std::move(member), owner);
}
}
return map;
}
absl::optional<FirstPartySetParser::PolicyParsingError>
FirstPartySetParser::ParseSetsFromEnterprisePolicy(
const base::Value::Dict& policy,
ParsedPolicySetLists* out_sets) {
std::vector<SingleSet> parsed_replacements, parsed_additions;
base::flat_set<net::SchemefulSite> elements;
if (absl::optional<PolicyParsingError> error = GetPolicySetsFromList(
policy.FindList(kFirstPartySetPolicyReplacementsField), elements,
PolicySetType::kReplacement, parsed_replacements);
error.has_value()) {
return error.value();
}
if (absl::optional<PolicyParsingError> error = GetPolicySetsFromList(
policy.FindList(kFirstPartySetPolicyAdditionsField), elements,
PolicySetType::kAddition, parsed_additions);
error.has_value()) {
return error.value();
}
if (out_sets) {
*out_sets = ParsedPolicySetLists(std::move(parsed_replacements),
std::move(parsed_additions));
}
return absl::nullopt;
}
} // namespace content