| // Copyright 2018 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/feed/core/user_classifier.h" |
| |
| #include <algorithm> |
| #include <cfloat> |
| #include <string> |
| |
| #include "base/metrics/histogram_macros.h" |
| #include "base/stl_util.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/time/clock.h" |
| #include "components/feed/core/pref_names.h" |
| #include "components/feed/feed_feature_list.h" |
| #include "components/prefs/pref_registry_simple.h" |
| #include "components/prefs/pref_service.h" |
| #include "components/variations/variations_associated_data.h" |
| |
| namespace feed { |
| |
| namespace { |
| |
| // The discount rate for computing the discounted-average rates. Must be |
| // strictly larger than 0 and strictly smaller than 1! |
| const double kDiscountRatePerDay = 0.25; |
| const char kDiscountRatePerDayParam[] = "user_classifier_discount_rate_per_day"; |
| |
| // Never consider any larger interval than this (so that extreme situations such |
| // as losing your phone or going for a long offline vacation do not skew the |
| // average too much). |
| // When overriding via variation parameters, it is better to use smaller values |
| // than |kMaxHours| as this it the maximum value reported in the histograms. |
| const double kMaxHours = 7 * 24; |
| const char kMaxHoursParam[] = "user_classifier_max_hours"; |
| |
| // Ignore events within |kMinHours| hours since the last event (|kMinHours| is |
| // the length of the browsing session where subsequent events of the same type |
| // do not count again). |
| const double kMinHours = 0.5; |
| const char kMinHoursParam[] = "user_classifier_min_hours"; |
| |
| // Classification constants. |
| const double kActiveConsumerClicksAtLeastOncePerHours = 96; |
| const char kActiveConsumerClicksAtLeastOncePerHoursParam[] = |
| "user_classifier_active_consumer_clicks_at_least_once_per_hours"; |
| const double kRareUserOpensNTPAtMostOncePerHours = 96; |
| const char kRareUserOpensNTPAtMostOncePerHoursParam[] = |
| "user_classifier_rare_user_opens_ntp_at_most_once_per_hours"; |
| |
| // List of all Events used for iteration. |
| const UserClassifier::Event kEvents[] = { |
| UserClassifier::Event::NTP_OPENED, UserClassifier::Event::SUGGESTIONS_USED}; |
| |
| // Arrays of pref names, indexed by Event's int value. |
| const char* kRateKeys[] = {prefs::kUserClassifierAverageNTPOpenedPerHour, |
| prefs::kUserClassifierAverageSuggestionsUsedPerHour}; |
| const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP, |
| prefs::kUserClassifierLastTimeToUseSuggestions}; |
| |
| // Default lengths of the intervals for new users for the events. |
| const double kInitialHoursBetweenEvents[] = {24, 120}; |
| const char* kInitialHoursBetweenEventsParams[] = { |
| "user_classifier_default_interval_ntp_opened", |
| "user_classifier_default_interval_suggestions_used"}; |
| |
| static_assert(base::size(kEvents) == |
| static_cast<int>(UserClassifier::Event::COUNT) && |
| base::size(kRateKeys) == |
| static_cast<int>(UserClassifier::Event::COUNT) && |
| base::size(kLastTimeKeys) == |
| static_cast<int>(UserClassifier::Event::COUNT) && |
| base::size(kInitialHoursBetweenEvents) == |
| static_cast<int>(UserClassifier::Event::COUNT) && |
| base::size(kInitialHoursBetweenEventsParams) == |
| static_cast<int>(UserClassifier::Event::COUNT), |
| "Fill in info for all event types."); |
| |
| // Computes the discount rate. |
| double GetDiscountRatePerHour() { |
| double discount_rate_per_day = variations::GetVariationParamByFeatureAsDouble( |
| kInterestFeedContentSuggestions, kDiscountRatePerDayParam, |
| kDiscountRatePerDay); |
| // Check for illegal values. |
| if (discount_rate_per_day <= 0 || discount_rate_per_day >= 1) { |
| DLOG(WARNING) << "Illegal value " << discount_rate_per_day |
| << " for the parameter " << kDiscountRatePerDayParam |
| << " (must be strictly between 0 and 1; the default " |
| << kDiscountRatePerDay << " is used, instead)."; |
| discount_rate_per_day = kDiscountRatePerDay; |
| } |
| // Compute discount_rate_per_hour such that |
| // discount_rate_per_day = 1 - e^{-discount_rate_per_hour * 24}. |
| return std::log(1.0 / (1.0 - discount_rate_per_day)) / 24.0; |
| } |
| |
| double GetInitialHoursBetweenEvents(UserClassifier::Event event) { |
| return variations::GetVariationParamByFeatureAsDouble( |
| kInterestFeedContentSuggestions, |
| kInitialHoursBetweenEventsParams[static_cast<int>(event)], |
| kInitialHoursBetweenEvents[static_cast<int>(event)]); |
| } |
| |
| double GetMinHours() { |
| return variations::GetVariationParamByFeatureAsDouble( |
| kInterestFeedContentSuggestions, kMinHoursParam, kMinHours); |
| } |
| |
| double GetMaxHours() { |
| return variations::GetVariationParamByFeatureAsDouble( |
| kInterestFeedContentSuggestions, kMaxHoursParam, kMaxHours); |
| } |
| |
| // Returns the new value of the rate using its |old_value|, assuming |
| // |hours_since_last_time| hours have passed since it was last discounted. |
| double DiscountRate(double old_value, |
| double hours_since_last_time, |
| double discount_rate_per_hour) { |
| // Compute the new discounted average according to the formula |
| // avg_events := e^{-discount_rate_per_hour * hours_since} * avg_events |
| return std::exp(-discount_rate_per_hour * hours_since_last_time) * old_value; |
| } |
| |
| // Compute the number of hours between two events for the given rate value |
| // assuming the events were equally distributed. |
| double GetEstimateHoursBetweenEvents(double rate, |
| double discount_rate_per_hour, |
| double min_hours, |
| double max_hours) { |
| // The computation below is well-defined only for |rate| > 1 (log of |
| // negative value or division by zero). When |rate| -> 1, the estimate |
| // below -> infinity, so max_hours is a natural result, here. |
| if (rate <= 1) { |
| return max_hours; |
| } |
| |
| // This is the estimate with the assumption that last event happened right |
| // now and the system is in the steady-state. Solve estimate_hours in the |
| // steady-state equation: |
| // rate = 1 + e^{-discount_rate * estimate_hours} * rate, |
| // i.e. |
| // -discount_rate * estimate_hours = log((rate - 1) / rate), |
| // discount_rate * estimate_hours = log(rate / (rate - 1)), |
| // estimate_hours = log(rate / (rate - 1)) / discount_rate. |
| double estimate_hours = std::log(rate / (rate - 1)) / discount_rate_per_hour; |
| return std::max(min_hours, std::min(max_hours, estimate_hours)); |
| } |
| |
| // The inverse of GetEstimateHoursBetweenEvents(). |
| double GetRateForEstimateHoursBetweenEvents(double estimate_hours, |
| double discount_rate_per_hour, |
| double min_hours, |
| double max_hours) { |
| // Keep the input value within [min_hours, max_hours]. |
| estimate_hours = std::max(min_hours, std::min(max_hours, estimate_hours)); |
| // Return |rate| such that GetEstimateHoursBetweenEvents for |rate| returns |
| // |estimate_hours|. Thus, solve |rate| in |
| // rate = 1 + e^{-discount_rate * estimate_hours} * rate, |
| // i.e. |
| // rate * (1 - e^{-discount_rate * estimate_hours}) = 1, |
| // rate = 1 / (1 - e^{-discount_rate * estimate_hours}). |
| return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours)); |
| } |
| |
| } // namespace |
| |
| UserClassifier::UserClassifier(PrefService* pref_service, base::Clock* clock) |
| : pref_service_(pref_service), |
| clock_(clock), |
| discount_rate_per_hour_(GetDiscountRatePerHour()), |
| min_hours_(GetMinHours()), |
| max_hours_(GetMaxHours()), |
| active_consumer_clicks_at_least_once_per_hours_( |
| variations::GetVariationParamByFeatureAsDouble( |
| kInterestFeedContentSuggestions, |
| kActiveConsumerClicksAtLeastOncePerHoursParam, |
| kActiveConsumerClicksAtLeastOncePerHours)), |
| rare_user_opens_ntp_at_most_once_per_hours_( |
| variations::GetVariationParamByFeatureAsDouble( |
| kInterestFeedContentSuggestions, |
| kRareUserOpensNTPAtMostOncePerHoursParam, |
| kRareUserOpensNTPAtMostOncePerHours)) { |
| // The pref_service_ can be null in tests. |
| if (!pref_service_) { |
| return; |
| } |
| |
| // TODO(jkrcal): Store the current discount rate per hour into prefs. If it |
| // differs from the previous value, rescale the rate values so that the |
| // expectation does not change abruptly! |
| |
| // Initialize the prefs storing the last time: the counter has just started! |
| for (const Event event : kEvents) { |
| if (!HasLastTime(event)) { |
| SetLastTimeToNow(event); |
| } |
| } |
| } |
| |
| UserClassifier::~UserClassifier() = default; |
| |
| // static |
| void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) { |
| double discount_rate = GetDiscountRatePerHour(); |
| double min_hours = GetMinHours(); |
| double max_hours = GetMaxHours(); |
| |
| for (Event event : kEvents) { |
| double default_rate = GetRateForEstimateHoursBetweenEvents( |
| GetInitialHoursBetweenEvents(event), discount_rate, min_hours, |
| max_hours); |
| registry->RegisterDoublePref(kRateKeys[static_cast<int>(event)], |
| default_rate); |
| registry->RegisterTimePref(kLastTimeKeys[static_cast<int>(event)], |
| base::Time()); |
| } |
| } |
| |
| void UserClassifier::OnEvent(Event event) { |
| DCHECK_NE(event, Event::COUNT); |
| UpdateRateOnEvent(event); |
| // TODO(skym): Record average hour for metric in a histogram. |
| } |
| |
| double UserClassifier::GetEstimatedAvgTime(Event event) const { |
| DCHECK_NE(event, Event::COUNT); |
| double rate = GetUpToDateRate(event); |
| return GetEstimateHoursBetweenEvents(rate, discount_rate_per_hour_, |
| min_hours_, max_hours_); |
| } |
| |
| UserClassifier::UserClass UserClassifier::GetUserClass() const { |
| // The pref_service_ can be null in tests. |
| if (!pref_service_) { |
| return UserClass::ACTIVE_NTP_USER; |
| } |
| |
| if (GetEstimatedAvgTime(Event::NTP_OPENED) >= |
| rare_user_opens_ntp_at_most_once_per_hours_) { |
| return UserClass::RARE_NTP_USER; |
| } |
| |
| if (GetEstimatedAvgTime(Event::SUGGESTIONS_USED) <= |
| active_consumer_clicks_at_least_once_per_hours_) { |
| return UserClass::ACTIVE_SUGGESTIONS_CONSUMER; |
| } |
| |
| return UserClass::ACTIVE_NTP_USER; |
| } |
| |
| std::string UserClassifier::GetUserClassDescriptionForDebugging() const { |
| switch (GetUserClass()) { |
| case UserClass::RARE_NTP_USER: |
| return "Rare user of the NTP"; |
| case UserClass::ACTIVE_NTP_USER: |
| return "Active user of the NTP"; |
| case UserClass::ACTIVE_SUGGESTIONS_CONSUMER: |
| return "Active consumer of NTP articles"; |
| } |
| NOTREACHED(); |
| return std::string(); |
| } |
| |
| void UserClassifier::ClearClassificationForDebugging() { |
| // The pref_service_ can be null in tests. |
| if (!pref_service_) { |
| return; |
| } |
| |
| for (const Event& event : kEvents) { |
| ClearRate(event); |
| SetLastTimeToNow(event); |
| } |
| } |
| |
| double UserClassifier::UpdateRateOnEvent(Event event) { |
| // The pref_service_ can be null in tests. |
| if (!pref_service_) { |
| return 0; |
| } |
| |
| double hours_since_last_time = |
| std::min(max_hours_, GetHoursSinceLastTime(event)); |
| // Ignore events within the same "browsing session". |
| if (hours_since_last_time < min_hours_) { |
| return GetUpToDateRate(event); |
| } |
| |
| SetLastTimeToNow(event); |
| |
| double rate = GetRate(event); |
| // Add 1 to the discounted rate as the event has happened right now. |
| double new_rate = |
| 1 + DiscountRate(rate, hours_since_last_time, discount_rate_per_hour_); |
| SetRate(event, new_rate); |
| return new_rate; |
| } |
| |
| double UserClassifier::GetUpToDateRate(Event event) const { |
| // The pref_service_ can be null in tests. |
| if (!pref_service_) { |
| return 0; |
| } |
| |
| double hours_since_last_time = |
| std::min(max_hours_, GetHoursSinceLastTime(event)); |
| |
| double rate = GetRate(event); |
| return DiscountRate(rate, hours_since_last_time, discount_rate_per_hour_); |
| } |
| |
| double UserClassifier::GetHoursSinceLastTime(Event event) const { |
| if (!HasLastTime(event)) { |
| return 0; |
| } |
| |
| base::TimeDelta since_last_time = |
| clock_->Now() - |
| pref_service_->GetTime(kLastTimeKeys[static_cast<int>(event)]); |
| return since_last_time.InSecondsF() / 3600; |
| } |
| |
| bool UserClassifier::HasLastTime(Event event) const { |
| return pref_service_->HasPrefPath(kLastTimeKeys[static_cast<int>(event)]); |
| } |
| |
| void UserClassifier::SetLastTimeToNow(Event event) { |
| pref_service_->SetTime(kLastTimeKeys[static_cast<int>(event)], clock_->Now()); |
| } |
| |
| double UserClassifier::GetRate(Event event) const { |
| return pref_service_->GetDouble(kRateKeys[static_cast<int>(event)]); |
| } |
| |
| void UserClassifier::SetRate(Event event, double rate) { |
| pref_service_->SetDouble(kRateKeys[static_cast<int>(event)], rate); |
| } |
| |
| void UserClassifier::ClearRate(Event event) { |
| pref_service_->ClearPref(kRateKeys[static_cast<int>(event)]); |
| } |
| |
| } // namespace feed |