blob: d6d5a796bba52a9dd98e4299320328755f993174 [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/ntp_snippets/category_rankers/click_based_category_ranker.h"
#include <algorithm>
#include <string>
#include <utility>
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/values.h"
#include "components/ntp_snippets/category_rankers/constant_category_ranker.h"
#include "components/ntp_snippets/content_suggestions_metrics.h"
#include "components/ntp_snippets/features.h"
#include "components/ntp_snippets/pref_names.h"
#include "components/ntp_snippets/time_serialization.h"
#include "components/prefs/pref_registry_simple.h"
#include "components/prefs/pref_service.h"
#include "components/variations/variations_associated_data.h"
namespace ntp_snippets {
namespace {
// In order to increase stability and predictability of the order, an extra
// level of "confidence" is required before moving a category upwards. In other
// words, the category is moved not when it reaches the previous one, but rather
// when it leads by some amount. We refer to this required extra "confidence" as
// a passing margin. Each position has its own passing margin. The category is
// moved upwards (i.e. passes another category) when it has at least passing
// margin of the previous category position more clicks.
const int kDefaultPassingMargin = 5;
const char kPassingMarginParamName[] =
"click_based_category_ranker-passing_margin";
// The first categories get more attention and, therefore, here more stability
// is needed. The passing margin of such categories is increased and they are
// referred to as top categories (with extra margin). Only category position
// defines whether a category is top, but not its content.
const int kDefaultNumTopCategoriesWithExtraMargin = 3;
const char kNumTopCategoriesWithExtraMarginParamName[] =
"click_based_category_ranker-num_top_categories_with_extra_margin";
// The increase of passing margin for each top category compared to the next
// category (e.g. the first top category has passing margin larger by this value
// than the second top category, the last top category has it larger by this
// value than the first non-top category).
const int kDefaultExtraPassingMargin = 2;
const char kExtraPassingMarginParamName[] =
"click_based_category_ranker-extra_passing_margin";
// The ranker must "forget" history with time, so that changes in the user
// behavior are reflected by the order in reasonable time. This is done using
// click count decay with time. However, if there is not enough data, there is
// no need in "forgetting" it. This value defines how many total clicks (across
// categories) are considered enough to decay.
const int kDefaultMinNumClicksToDecay = 30;
const char kMinNumClicksToDecayParamName[] =
"click_based_category_ranker-min_num_clicks_to_decay";
// Time between two consecutive decays (assuming enough clicks).
const int kDefaultTimeBetweenDecaysMinutes = 24 * 60; // 24 hours = 1 day
const char kTimeBetweenDecaysParamName[] =
"click_based_category_ranker-time_between_decays_minutes";
// Decay factor as a fraction. The current value approximates the seventh root
// of 0.5. This yields a 50% decay per seven decays. Seven weak decays are used
// instead of one 50% decay in order to decrease difference of click weight in
// time.
const int kDefaultDecayFactorNumerator = 91;
const int kDefaultDecayFactorDenominator = 100; // pow(0.91, 7) = 0.517
const char kDecayFactorNumeratorParamName[] =
"click_based_category_ranker-decay_factor_numerator";
const char kDecayFactorDenominatorParamName[] =
"click_based_category_ranker-decay_factor_denominator";
// Number of positions by which a dismissed category is downgraded.
const int kDefaultDismissedCategoryPenalty = 1;
const char kDismissedCategoryPenaltyParamName[] =
"click_based_category_ranker-dismissed_category_penalty";
const char kCategoryIdKey[] = "category";
const char kClicksKey[] = "clicks";
const char kLastDismissedKey[] = "last_dismissed";
const char kContentSuggestionsPromotedCategory[] =
"click_based_category_ranker-promoted_category";
int GetExtraPassingMargin() {
return variations::GetVariationParamByFeatureAsInt(
kCategoryRanker, kExtraPassingMarginParamName,
kDefaultExtraPassingMargin);
}
int GetMinNumClicksToDecay() {
return variations::GetVariationParamByFeatureAsInt(
kCategoryRanker, kMinNumClicksToDecayParamName,
kDefaultMinNumClicksToDecay);
}
base::TimeDelta GetTimeBetweenDecays() {
return base::TimeDelta::FromMinutes(
variations::GetVariationParamByFeatureAsInt(
kCategoryRanker, kTimeBetweenDecaysParamName,
kDefaultTimeBetweenDecaysMinutes));
}
int GetDecayFactorNumerator() {
return variations::GetVariationParamByFeatureAsInt(
kCategoryRanker, kDecayFactorNumeratorParamName,
kDefaultDecayFactorNumerator);
}
int GetDecayFactorDenominator() {
return variations::GetVariationParamByFeatureAsInt(
kCategoryRanker, kDecayFactorDenominatorParamName,
kDefaultDecayFactorDenominator);
}
base::Optional<Category> GetPromotedCategoryFromVariations() {
int category_id = variations::GetVariationParamByFeatureAsInt(
kCategoryRanker, kContentSuggestionsPromotedCategory, -1);
if (category_id < 0) {
return base::nullopt;
}
if (!Category::IsValidIDValue(category_id)) {
LOG(WARNING) << "Received invalid category ID for promotion: "
<< category_id << ". Ignoring promotion.";
return base::nullopt;
}
return Category::FromIDValue(category_id);
}
std::string GetOptionalCategoryAsString(
const base::Optional<Category>& optional_category) {
if (optional_category.has_value()) {
return base::IntToString(optional_category->id());
}
return "None";
}
} // namespace
ClickBasedCategoryRanker::ClickBasedCategoryRanker(PrefService* pref_service,
base::Clock* clock)
: pref_service_(pref_service), clock_(clock) {
if (!ReadOrderFromPrefs(&ordered_categories_)) {
// TODO(crbug.com/676273): Handle adding new hardcoded KnownCategories to
// existing order from prefs. Currently such new category is completely
// ignored and may be never shown.
RestoreDefaultOrder();
}
if (ReadLastDecayTimeFromPrefs() == DeserializeTime(0)) {
StoreLastDecayTimeToPrefs(clock_->Now());
}
promoted_category_ = DeterminePromotedCategory();
}
// |ordered_categories_| needs to be properly initialized before calling
// this method.
base::Optional<Category> ClickBasedCategoryRanker::DeterminePromotedCategory() {
base::Optional<Category> promoted = GetPromotedCategoryFromVariations();
if (!promoted.has_value()) {
return base::nullopt;
}
auto promoted_it = FindCategory(promoted.value());
if (promoted_it != ordered_categories_.end() &&
promoted_it->last_dismissed >
clock_->Now() - base::TimeDelta::FromDays(14)) {
// Only promote categories to the top if they weren't dismissed within the
// last 2 weeks.
return base::nullopt;
}
return promoted;
}
ClickBasedCategoryRanker::~ClickBasedCategoryRanker() = default;
bool ClickBasedCategoryRanker::Compare(Category left, Category right) const {
if (!ContainsCategory(left)) {
LOG(DFATAL) << "The category with ID " << left.id()
<< " has not been added using AppendCategoryIfNecessary.";
}
if (!ContainsCategory(right)) {
LOG(DFATAL) << "The category with ID " << right.id()
<< " has not been added using AppendCategoryIfNecessary.";
}
if (left == right) {
return false;
}
if (promoted_category_.has_value() && left == *promoted_category_) {
return true;
}
if (promoted_category_.has_value() && right == *promoted_category_) {
return false;
}
for (const RankedCategory& ranked_category : ordered_categories_) {
if (ranked_category.category == left) {
return true;
}
if (ranked_category.category == right) {
return false;
}
}
// This fallback is provided only to satisfy "Compare" contract if by mistake
// categories are not added using AppendCategoryIfNecessary. One should not
// rely on this, instead the order must be defined explicitly using
// AppendCategoryIfNecessary.
return left.id() < right.id();
}
void ClickBasedCategoryRanker::ClearHistory(base::Time begin, base::Time end) {
// Ignore all partial removals and react only to "entire" history removal.
bool is_entire_history = (begin == base::Time() && end == base::Time::Max());
if (!is_entire_history) {
return;
}
StoreLastDecayTimeToPrefs(DeserializeTime(0));
// The categories added through |AppendCategoryIfNecessary| cannot be
// completely removed, since no one is required to reregister them. Instead
// they are preserved in the default order (sorted by id).
std::vector<RankedCategory> old_categories = ordered_categories_;
RestoreDefaultOrder();
std::vector<Category> added_categories;
for (const RankedCategory& old_category : old_categories) {
auto it =
std::find_if(ordered_categories_.begin(), ordered_categories_.end(),
[old_category](const RankedCategory& other) {
return other.category == old_category.category;
});
if (it == ordered_categories_.end()) {
added_categories.push_back(old_category.category);
}
}
// Sort added categories by id to make their order history independent.
std::sort(added_categories.begin(), added_categories.end(),
Category::CompareByID());
for (Category added_category : added_categories) {
ordered_categories_.push_back(RankedCategory(
added_category, /*clicks=*/0, /*last_dismissed=*/base::Time()));
}
StoreOrderToPrefs(ordered_categories_);
}
void ClickBasedCategoryRanker::AppendCategoryIfNecessary(Category category) {
if (!ContainsCategory(category)) {
ordered_categories_.push_back(RankedCategory(
category, /*clicks=*/0, /*last_dismissed=*/base::Time()));
StoreOrderToPrefs(ordered_categories_);
}
}
void ClickBasedCategoryRanker::InsertCategoryBeforeIfNecessary(
Category category_to_insert,
Category anchor) {
InsertCategoryRelativeToIfNecessary(category_to_insert, anchor,
/*after=*/false);
}
void ClickBasedCategoryRanker::InsertCategoryAfterIfNecessary(
Category category_to_insert,
Category anchor) {
InsertCategoryRelativeToIfNecessary(category_to_insert, anchor,
/*after=*/true);
}
std::vector<CategoryRanker::DebugDataItem>
ClickBasedCategoryRanker::GetDebugData() {
std::vector<CategoryRanker::DebugDataItem> result;
result.push_back(
CategoryRanker::DebugDataItem("Type", "ClickBasedCategoryRanker"));
std::string initial_order_type;
CategoryOrderChoice choice = GetSelectedCategoryOrder();
if (choice == CategoryOrderChoice::GENERAL) {
initial_order_type = "GENERAL";
}
if (choice == CategoryOrderChoice::EMERGING_MARKETS_ORIENTED) {
initial_order_type = "EMERGING_MARKETS_ORIENTED;";
}
result.push_back(
CategoryRanker::DebugDataItem("Initial order type", initial_order_type));
std::vector<std::string> category_strings;
for (const auto& ranked_category : ordered_categories_) {
category_strings.push_back(base::ReplaceStringPlaceholders(
"($1; $2)",
{base::IntToString(ranked_category.category.id()),
base::IntToString(ranked_category.clicks)},
/*offsets=*/nullptr));
}
result.push_back(
CategoryRanker::DebugDataItem("Current order (with click counts)",
base::JoinString(category_strings, ", ")));
result.push_back(CategoryRanker::DebugDataItem(
"Actual promoted category",
GetOptionalCategoryAsString(promoted_category_)));
result.push_back(CategoryRanker::DebugDataItem(
"Raw promoted category from variations",
GetOptionalCategoryAsString(GetPromotedCategoryFromVariations())));
return result;
}
void ClickBasedCategoryRanker::OnSuggestionOpened(Category category) {
if (!ContainsCategory(category)) {
LOG(DFATAL) << "The category with ID " << category.id()
<< " has not been added using AppendCategoryIfNecessary.";
return;
}
DecayClicksIfNeeded();
auto current = FindCategory(category);
DCHECK_GE(current->clicks, 0);
// The overflow is ignored. It is unlikely to happen, because of click count
// decay.
current->clicks++;
// Move the category up if appropriate.
if (current != ordered_categories_.begin()) {
auto previous = current - 1;
const int passing_margin = GetPositionPassingMargin(previous);
if (current->clicks >= previous->clicks + passing_margin) {
const int new_index = previous - ordered_categories_.begin();
ntp_snippets::metrics::OnCategoryMovedUp(new_index);
// It is intended to move only by one position per click in order to avoid
// dramatic changes, which could confuse the user.
std::swap(*current, *previous);
}
}
StoreOrderToPrefs(ordered_categories_);
}
void ClickBasedCategoryRanker::OnCategoryDismissed(Category category) {
if (!ContainsCategory(category)) {
LOG(DFATAL) << "The category with ID " << category.id()
<< " has not been added using AppendCategoryIfNecessary.";
return;
}
const int penalty = GetDismissedCategoryPenalty();
if (penalty != 0) { // Dismissed category penalty is turned on?
auto current = FindCategory(category);
for (int downgrade = 0; downgrade < penalty; ++downgrade) {
auto next = current + 1;
if (next == ordered_categories_.end()) {
break;
}
std::swap(*current, *next);
current = next;
}
DCHECK(current != ordered_categories_.begin());
auto previous = current - 1;
int new_clicks = std::max(previous->clicks - GetPassingMargin(), 0);
// The previous category may have more clicks (but not enough to pass the
// margin, this is possible when penalty >= 2), therefore, we ensure that
// for this category we don't increase clicks.
current->clicks = std::min(current->clicks, new_clicks);
}
FindCategory(category)->last_dismissed = clock_->Now();
if (promoted_category_.has_value() && category == *promoted_category_) {
promoted_category_.reset();
}
StoreOrderToPrefs(ordered_categories_);
}
base::Time ClickBasedCategoryRanker::GetLastDecayTime() const {
return ReadLastDecayTimeFromPrefs();
}
// static
void ClickBasedCategoryRanker::RegisterProfilePrefs(
PrefRegistrySimple* registry) {
registry->RegisterListPref(prefs::kClickBasedCategoryRankerOrderWithClicks);
registry->RegisterInt64Pref(prefs::kClickBasedCategoryRankerLastDecayTime,
/*default_value=*/0);
}
// static
int ClickBasedCategoryRanker::GetPassingMargin() {
return variations::GetVariationParamByFeatureAsInt(
kCategoryRanker, kPassingMarginParamName, kDefaultPassingMargin);
}
// static
int ClickBasedCategoryRanker::GetNumTopCategoriesWithExtraMargin() {
return variations::GetVariationParamByFeatureAsInt(
kCategoryRanker, kNumTopCategoriesWithExtraMarginParamName,
kDefaultNumTopCategoriesWithExtraMargin);
}
// static
int ClickBasedCategoryRanker::GetDismissedCategoryPenalty() {
return variations::GetVariationParamByFeatureAsInt(
kCategoryRanker, kDismissedCategoryPenaltyParamName,
kDefaultDismissedCategoryPenalty);
}
ClickBasedCategoryRanker::RankedCategory::RankedCategory(
Category category,
int clicks,
const base::Time& last_dismissed)
: category(category), clicks(clicks), last_dismissed(last_dismissed) {}
// Returns passing margin for a given position taking into account whether it is
// a top category.
int ClickBasedCategoryRanker::GetPositionPassingMargin(
std::vector<RankedCategory>::const_iterator category_position) const {
int index = category_position - ordered_categories_.cbegin();
int passing_margin_increase = 0;
const int num_top_categories_with_extra_margin =
GetNumTopCategoriesWithExtraMargin();
if (index < num_top_categories_with_extra_margin) {
passing_margin_increase = GetExtraPassingMargin() *
(num_top_categories_with_extra_margin - index);
}
return GetPassingMargin() + passing_margin_increase;
}
void ClickBasedCategoryRanker::RestoreDefaultOrder() {
ordered_categories_.clear();
std::vector<KnownCategories> ordered_known_categories =
ConstantCategoryRanker::GetKnownCategoriesDefaultOrder();
for (KnownCategories known_category : ordered_known_categories) {
AppendKnownCategory(known_category);
}
StoreOrderToPrefs(ordered_categories_);
}
void ClickBasedCategoryRanker::AppendKnownCategory(
KnownCategories known_category) {
Category category = Category::FromKnownCategory(known_category);
DCHECK(!ContainsCategory(category));
ordered_categories_.push_back(
RankedCategory(category, /*clicks=*/0, /*last_dismissed=*/base::Time()));
}
namespace {
base::Time ParseLastDismissedDate(const base::DictionaryValue& value) {
// We don't expect the last-dismissed value to be present in all cases (we
// added this after the fact).
std::string serialized_value;
int64_t parsed_value;
if (value.GetString(kLastDismissedKey, &serialized_value) &&
base::StringToInt64(serialized_value, &parsed_value)) {
return DeserializeTime(parsed_value);
}
return base::Time();
}
} // namespace
bool ClickBasedCategoryRanker::ReadOrderFromPrefs(
std::vector<RankedCategory>* result_categories) const {
result_categories->clear();
const base::ListValue* list =
pref_service_->GetList(prefs::kClickBasedCategoryRankerOrderWithClicks);
if (!list || list->GetSize() == 0) {
return false;
}
for (const base::Value& value : *list) {
const base::DictionaryValue* dictionary;
if (!value.GetAsDictionary(&dictionary)) {
LOG(DFATAL) << "Failed to parse category data from prefs param "
<< prefs::kClickBasedCategoryRankerOrderWithClicks
<< " into dictionary.";
return false;
}
int category_id, clicks;
if (!dictionary->GetInteger(kCategoryIdKey, &category_id)) {
LOG(DFATAL) << "Dictionary does not have '" << kCategoryIdKey << "' key.";
return false;
}
if (!dictionary->GetInteger(kClicksKey, &clicks)) {
LOG(DFATAL) << "Dictionary does not have '" << kClicksKey << "' key.";
return false;
}
base::Time last_dismissed = ParseLastDismissedDate(*dictionary);
Category category = Category::FromIDValue(category_id);
result_categories->push_back(
RankedCategory(category, clicks, last_dismissed));
}
return true;
}
void ClickBasedCategoryRanker::StoreOrderToPrefs(
const std::vector<RankedCategory>& ordered_categories) {
base::ListValue list;
for (const RankedCategory& category : ordered_categories) {
auto dictionary = std::make_unique<base::DictionaryValue>();
dictionary->SetInteger(kCategoryIdKey, category.category.id());
dictionary->SetInteger(kClicksKey, category.clicks);
dictionary->SetString(
kLastDismissedKey,
base::Int64ToString(SerializeTime(category.last_dismissed)));
list.Append(std::move(dictionary));
}
pref_service_->Set(prefs::kClickBasedCategoryRankerOrderWithClicks, list);
}
std::vector<ClickBasedCategoryRanker::RankedCategory>::iterator
ClickBasedCategoryRanker::FindCategory(Category category) {
return std::find_if(ordered_categories_.begin(), ordered_categories_.end(),
[category](const RankedCategory& ranked_category) {
return category == ranked_category.category;
});
}
bool ClickBasedCategoryRanker::ContainsCategory(Category category) const {
for (const auto& ranked_category : ordered_categories_) {
if (category == ranked_category.category) {
return true;
}
}
return false;
}
void ClickBasedCategoryRanker::InsertCategoryRelativeToIfNecessary(
Category category_to_insert,
Category anchor,
bool after) {
DCHECK(ContainsCategory(anchor));
if (ContainsCategory(category_to_insert)) {
return;
}
auto anchor_it = FindCategory(anchor);
ordered_categories_.insert(anchor_it + (after ? 1 : 0),
RankedCategory(category_to_insert,
/*clicks=*/anchor_it->clicks,
/*last_dismissed=*/base::Time()));
StoreOrderToPrefs(ordered_categories_);
}
base::Time ClickBasedCategoryRanker::ReadLastDecayTimeFromPrefs() const {
return DeserializeTime(
pref_service_->GetInt64(prefs::kClickBasedCategoryRankerLastDecayTime));
}
void ClickBasedCategoryRanker::StoreLastDecayTimeToPrefs(
base::Time last_decay_time) {
pref_service_->SetInt64(prefs::kClickBasedCategoryRankerLastDecayTime,
SerializeTime(last_decay_time));
}
bool ClickBasedCategoryRanker::IsEnoughClicksToDecay() const {
int64_t num_clicks = 0;
for (const RankedCategory& ranked_category : ordered_categories_) {
num_clicks += ranked_category.clicks;
}
return num_clicks >= GetMinNumClicksToDecay();
}
bool ClickBasedCategoryRanker::DecayClicksIfNeeded() {
base::Time now = clock_->Now();
base::Time last_decay = ReadLastDecayTimeFromPrefs();
if (last_decay == base::Time::FromInternalValue(0)) {
// No last decay time, start from now.
StoreLastDecayTimeToPrefs(clock_->Now());
return false;
}
DCHECK_LE(last_decay, now);
int num_pending_decays = (now - last_decay) / GetTimeBetweenDecays();
int executed_decays = 0;
while (executed_decays < num_pending_decays && IsEnoughClicksToDecay()) {
for (RankedCategory& ranked_category : ordered_categories_) {
DCHECK_GE(ranked_category.clicks, 0);
const int64_t old_clicks = static_cast<int64_t>(ranked_category.clicks);
ranked_category.clicks =
old_clicks * GetDecayFactorNumerator() / GetDecayFactorDenominator();
}
++executed_decays;
}
// No matter how many decays were actually executed, all of them are marked
// done. Even if some were ignored due to absense of clicks, they would have
// no effect anyway for the same reason.
StoreLastDecayTimeToPrefs(last_decay +
num_pending_decays * GetTimeBetweenDecays());
if (executed_decays > 0) {
StoreOrderToPrefs(ordered_categories_);
return true;
}
return false;
}
} // namespace ntp_snippets