blob: 2764226f42af375ae69b9ec7537182aee1b1fae6 [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/feed/core/user_classifier.h"
#include <algorithm>
#include <cfloat>
#include <string>
#include "base/metrics/histogram_macros.h"
#include "base/stl_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/time/clock.h"
#include "components/feed/core/pref_names.h"
#include "components/feed/feed_feature_list.h"
#include "components/prefs/pref_registry_simple.h"
#include "components/prefs/pref_service.h"
#include "components/variations/variations_associated_data.h"
namespace feed {
namespace {
// The discount rate for computing the discounted-average rates. Must be
// strictly larger than 0 and strictly smaller than 1!
const double kDiscountRatePerDay = 0.25;
const char kDiscountRatePerDayParam[] = "user_classifier_discount_rate_per_day";
// Never consider any larger interval than this (so that extreme situations such
// as losing your phone or going for a long offline vacation do not skew the
// average too much).
// When overriding via variation parameters, it is better to use smaller values
// than |kMaxHours| as this it the maximum value reported in the histograms.
const double kMaxHours = 7 * 24;
const char kMaxHoursParam[] = "user_classifier_max_hours";
// Ignore events within |kMinHours| hours since the last event (|kMinHours| is
// the length of the browsing session where subsequent events of the same type
// do not count again).
const double kMinHours = 0.5;
const char kMinHoursParam[] = "user_classifier_min_hours";
// Classification constants.
const double kActiveConsumerClicksAtLeastOncePerHours = 96;
const char kActiveConsumerClicksAtLeastOncePerHoursParam[] =
"user_classifier_active_consumer_clicks_at_least_once_per_hours";
const double kRareUserOpensNTPAtMostOncePerHours = 96;
const char kRareUserOpensNTPAtMostOncePerHoursParam[] =
"user_classifier_rare_user_opens_ntp_at_most_once_per_hours";
// List of all Events used for iteration.
const UserClassifier::Event kEvents[] = {
UserClassifier::Event::NTP_OPENED, UserClassifier::Event::SUGGESTIONS_USED};
// Arrays of pref names, indexed by Event's int value.
const char* kRateKeys[] = {prefs::kUserClassifierAverageNTPOpenedPerHour,
prefs::kUserClassifierAverageSuggestionsUsedPerHour};
const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP,
prefs::kUserClassifierLastTimeToUseSuggestions};
// Default lengths of the intervals for new users for the events.
const double kInitialHoursBetweenEvents[] = {24, 120};
const char* kInitialHoursBetweenEventsParams[] = {
"user_classifier_default_interval_ntp_opened",
"user_classifier_default_interval_suggestions_used"};
static_assert(base::size(kEvents) ==
static_cast<int>(UserClassifier::Event::COUNT) &&
base::size(kRateKeys) ==
static_cast<int>(UserClassifier::Event::COUNT) &&
base::size(kLastTimeKeys) ==
static_cast<int>(UserClassifier::Event::COUNT) &&
base::size(kInitialHoursBetweenEvents) ==
static_cast<int>(UserClassifier::Event::COUNT) &&
base::size(kInitialHoursBetweenEventsParams) ==
static_cast<int>(UserClassifier::Event::COUNT),
"Fill in info for all event types.");
// Computes the discount rate.
double GetDiscountRatePerHour() {
double discount_rate_per_day = variations::GetVariationParamByFeatureAsDouble(
kInterestFeedContentSuggestions, kDiscountRatePerDayParam,
kDiscountRatePerDay);
// Check for illegal values.
if (discount_rate_per_day <= 0 || discount_rate_per_day >= 1) {
DLOG(WARNING) << "Illegal value " << discount_rate_per_day
<< " for the parameter " << kDiscountRatePerDayParam
<< " (must be strictly between 0 and 1; the default "
<< kDiscountRatePerDay << " is used, instead).";
discount_rate_per_day = kDiscountRatePerDay;
}
// Compute discount_rate_per_hour such that
// discount_rate_per_day = 1 - e^{-discount_rate_per_hour * 24}.
return std::log(1.0 / (1.0 - discount_rate_per_day)) / 24.0;
}
double GetInitialHoursBetweenEvents(UserClassifier::Event event) {
return variations::GetVariationParamByFeatureAsDouble(
kInterestFeedContentSuggestions,
kInitialHoursBetweenEventsParams[static_cast<int>(event)],
kInitialHoursBetweenEvents[static_cast<int>(event)]);
}
double GetMinHours() {
return variations::GetVariationParamByFeatureAsDouble(
kInterestFeedContentSuggestions, kMinHoursParam, kMinHours);
}
double GetMaxHours() {
return variations::GetVariationParamByFeatureAsDouble(
kInterestFeedContentSuggestions, kMaxHoursParam, kMaxHours);
}
// Returns the new value of the rate using its |old_value|, assuming
// |hours_since_last_time| hours have passed since it was last discounted.
double DiscountRate(double old_value,
double hours_since_last_time,
double discount_rate_per_hour) {
// Compute the new discounted average according to the formula
// avg_events := e^{-discount_rate_per_hour * hours_since} * avg_events
return std::exp(-discount_rate_per_hour * hours_since_last_time) * old_value;
}
// Compute the number of hours between two events for the given rate value
// assuming the events were equally distributed.
double GetEstimateHoursBetweenEvents(double rate,
double discount_rate_per_hour,
double min_hours,
double max_hours) {
// The computation below is well-defined only for |rate| > 1 (log of
// negative value or division by zero). When |rate| -> 1, the estimate
// below -> infinity, so max_hours is a natural result, here.
if (rate <= 1) {
return max_hours;
}
// This is the estimate with the assumption that last event happened right
// now and the system is in the steady-state. Solve estimate_hours in the
// steady-state equation:
// rate = 1 + e^{-discount_rate * estimate_hours} * rate,
// i.e.
// -discount_rate * estimate_hours = log((rate - 1) / rate),
// discount_rate * estimate_hours = log(rate / (rate - 1)),
// estimate_hours = log(rate / (rate - 1)) / discount_rate.
double estimate_hours = std::log(rate / (rate - 1)) / discount_rate_per_hour;
return std::max(min_hours, std::min(max_hours, estimate_hours));
}
// The inverse of GetEstimateHoursBetweenEvents().
double GetRateForEstimateHoursBetweenEvents(double estimate_hours,
double discount_rate_per_hour,
double min_hours,
double max_hours) {
// Keep the input value within [min_hours, max_hours].
estimate_hours = std::max(min_hours, std::min(max_hours, estimate_hours));
// Return |rate| such that GetEstimateHoursBetweenEvents for |rate| returns
// |estimate_hours|. Thus, solve |rate| in
// rate = 1 + e^{-discount_rate * estimate_hours} * rate,
// i.e.
// rate * (1 - e^{-discount_rate * estimate_hours}) = 1,
// rate = 1 / (1 - e^{-discount_rate * estimate_hours}).
return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours));
}
} // namespace
UserClassifier::UserClassifier(PrefService* pref_service, base::Clock* clock)
: pref_service_(pref_service),
clock_(clock),
discount_rate_per_hour_(GetDiscountRatePerHour()),
min_hours_(GetMinHours()),
max_hours_(GetMaxHours()),
active_consumer_clicks_at_least_once_per_hours_(
variations::GetVariationParamByFeatureAsDouble(
kInterestFeedContentSuggestions,
kActiveConsumerClicksAtLeastOncePerHoursParam,
kActiveConsumerClicksAtLeastOncePerHours)),
rare_user_opens_ntp_at_most_once_per_hours_(
variations::GetVariationParamByFeatureAsDouble(
kInterestFeedContentSuggestions,
kRareUserOpensNTPAtMostOncePerHoursParam,
kRareUserOpensNTPAtMostOncePerHours)) {
// The pref_service_ can be null in tests.
if (!pref_service_) {
return;
}
// TODO(jkrcal): Store the current discount rate per hour into prefs. If it
// differs from the previous value, rescale the rate values so that the
// expectation does not change abruptly!
// Initialize the prefs storing the last time: the counter has just started!
for (const Event event : kEvents) {
if (!HasLastTime(event)) {
SetLastTimeToNow(event);
}
}
}
UserClassifier::~UserClassifier() = default;
// static
void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
double discount_rate = GetDiscountRatePerHour();
double min_hours = GetMinHours();
double max_hours = GetMaxHours();
for (Event event : kEvents) {
double default_rate = GetRateForEstimateHoursBetweenEvents(
GetInitialHoursBetweenEvents(event), discount_rate, min_hours,
max_hours);
registry->RegisterDoublePref(kRateKeys[static_cast<int>(event)],
default_rate);
registry->RegisterTimePref(kLastTimeKeys[static_cast<int>(event)],
base::Time());
}
}
void UserClassifier::OnEvent(Event event) {
DCHECK_NE(event, Event::COUNT);
UpdateRateOnEvent(event);
// TODO(skym): Record average hour for metric in a histogram.
}
double UserClassifier::GetEstimatedAvgTime(Event event) const {
DCHECK_NE(event, Event::COUNT);
double rate = GetUpToDateRate(event);
return GetEstimateHoursBetweenEvents(rate, discount_rate_per_hour_,
min_hours_, max_hours_);
}
UserClassifier::UserClass UserClassifier::GetUserClass() const {
// The pref_service_ can be null in tests.
if (!pref_service_) {
return UserClass::ACTIVE_NTP_USER;
}
if (GetEstimatedAvgTime(Event::NTP_OPENED) >=
rare_user_opens_ntp_at_most_once_per_hours_) {
return UserClass::RARE_NTP_USER;
}
if (GetEstimatedAvgTime(Event::SUGGESTIONS_USED) <=
active_consumer_clicks_at_least_once_per_hours_) {
return UserClass::ACTIVE_SUGGESTIONS_CONSUMER;
}
return UserClass::ACTIVE_NTP_USER;
}
std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
switch (GetUserClass()) {
case UserClass::RARE_NTP_USER:
return "Rare user of the NTP";
case UserClass::ACTIVE_NTP_USER:
return "Active user of the NTP";
case UserClass::ACTIVE_SUGGESTIONS_CONSUMER:
return "Active consumer of NTP articles";
}
NOTREACHED();
return std::string();
}
void UserClassifier::ClearClassificationForDebugging() {
// The pref_service_ can be null in tests.
if (!pref_service_) {
return;
}
for (const Event& event : kEvents) {
ClearRate(event);
SetLastTimeToNow(event);
}
}
double UserClassifier::UpdateRateOnEvent(Event event) {
// The pref_service_ can be null in tests.
if (!pref_service_) {
return 0;
}
double hours_since_last_time =
std::min(max_hours_, GetHoursSinceLastTime(event));
// Ignore events within the same "browsing session".
if (hours_since_last_time < min_hours_) {
return GetUpToDateRate(event);
}
SetLastTimeToNow(event);
double rate = GetRate(event);
// Add 1 to the discounted rate as the event has happened right now.
double new_rate =
1 + DiscountRate(rate, hours_since_last_time, discount_rate_per_hour_);
SetRate(event, new_rate);
return new_rate;
}
double UserClassifier::GetUpToDateRate(Event event) const {
// The pref_service_ can be null in tests.
if (!pref_service_) {
return 0;
}
double hours_since_last_time =
std::min(max_hours_, GetHoursSinceLastTime(event));
double rate = GetRate(event);
return DiscountRate(rate, hours_since_last_time, discount_rate_per_hour_);
}
double UserClassifier::GetHoursSinceLastTime(Event event) const {
if (!HasLastTime(event)) {
return 0;
}
base::TimeDelta since_last_time =
clock_->Now() -
pref_service_->GetTime(kLastTimeKeys[static_cast<int>(event)]);
return since_last_time.InSecondsF() / 3600;
}
bool UserClassifier::HasLastTime(Event event) const {
return pref_service_->HasPrefPath(kLastTimeKeys[static_cast<int>(event)]);
}
void UserClassifier::SetLastTimeToNow(Event event) {
pref_service_->SetTime(kLastTimeKeys[static_cast<int>(event)], clock_->Now());
}
double UserClassifier::GetRate(Event event) const {
return pref_service_->GetDouble(kRateKeys[static_cast<int>(event)]);
}
void UserClassifier::SetRate(Event event, double rate) {
pref_service_->SetDouble(kRateKeys[static_cast<int>(event)], rate);
}
void UserClassifier::ClearRate(Event event) {
pref_service_->ClearPref(kRateKeys[static_cast<int>(event)]);
}
} // namespace feed