blob: 2bc6e0dd40439df2f59c7256c8a4d52c2c4de903 [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/translate/core/browser/translate_ranker.h"
#include <cmath>
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/command_line.h"
#include "base/metrics/histogram_macros.h"
#include "base/profiler/scoped_tracker.h"
#include "base/strings/string_util.h"
#include "components/metrics/proto/translate_event.pb.h"
#include "components/translate/core/browser/proto/translate_ranker_model.pb.h"
#include "components/translate/core/browser/translate_download_manager.h"
#include "components/translate/core/browser/translate_prefs.h"
#include "components/translate/core/browser/translate_url_fetcher.h"
#include "components/translate/core/common/translate_switches.h"
namespace translate {
namespace {
typedef google::protobuf::Map<std::string, float> WeightMap;
const double kTranslationOfferThreshold = 0.5;
// Parameters for model fetching.
const char kTranslateRankerModelURL[] =
"https://chromium-i18n.appspot.com/ssl-translate-ranker-model";
const int kMaxRetryOn5xx = 3;
const int kDownloadRefractoryPeriodMin = 15;
const char kUnknown[] = "UNKNOWN";
// Enumeration denoting the outcome of an attempt to download the translate
// ranker model. This must be kept in sync with the TranslateRankerModelStatus
// enum in histograms.xml
enum ModelStatus {
MODEL_STATUS_OK = 0,
MODEL_STATUS_DOWNLOAD_THROTTLED = 1,
MODEL_STATUS_DOWNLOAD_FAILED = 2,
MODEL_STATUS_PARSE_FAILED = 3,
MODEL_STATUS_VALIDATION_FAILED = 4,
// Insert new values above this line.
MODEL_STATUS_MAX
};
double Sigmoid(double x) {
return 1.0 / (1.0 + exp(-x));
}
double ScoreComponent(const WeightMap& weights, const std::string& key) {
WeightMap::const_iterator i = weights.find(base::ToLowerASCII(key));
if (i == weights.end())
i = weights.find(kUnknown);
return i == weights.end() ? 0.0 : i->second;
}
GURL GetTranslateRankerURL() {
base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
return GURL(command_line->HasSwitch(switches::kTranslateRankerModelURL)
? command_line->GetSwitchValueASCII(
switches::kTranslateRankerModelURL)
: kTranslateRankerModelURL);
}
bool IsQueryEnabled() {
return base::FeatureList::IsEnabled(kTranslateRankerQuery);
}
bool IsEnforcementEnabled() {
return base::FeatureList::IsEnabled(kTranslateRankerEnforcement);
}
void ReportModelStatus(ModelStatus model_status) {
UMA_HISTOGRAM_ENUMERATION("Translate.Ranker.Model.Status", model_status,
MODEL_STATUS_MAX);
}
} // namespace
const base::Feature kTranslateRankerQuery{"TranslateRankerQuery",
base::FEATURE_DISABLED_BY_DEFAULT};
const base::Feature kTranslateRankerEnforcement{
"TranslateRankerEnforcement", base::FEATURE_DISABLED_BY_DEFAULT};
const base::Feature kTranslateRankerLogging{"TranslateRankerLogging",
base::FEATURE_DISABLED_BY_DEFAULT};
TranslateRanker::~TranslateRanker() {}
// static
bool TranslateRanker::IsEnabled() {
return IsQueryEnabled() || IsEnforcementEnabled();
}
// static
bool TranslateRanker::IsLoggingEnabled() {
return base::FeatureList::IsEnabled(kTranslateRankerLogging);
}
// static
TranslateRanker* TranslateRanker::GetInstance() {
return base::Singleton<TranslateRanker>::get();
}
std::unique_ptr<TranslateRanker> TranslateRanker::CreateForTesting(
const std::string& model_data) {
std::unique_ptr<TranslateRanker> ranker(new TranslateRanker());
CHECK(ranker != nullptr);
ranker->ParseModel(0, true, model_data);
CHECK(ranker->model_ != nullptr);
return ranker;
}
bool TranslateRanker::ShouldOfferTranslation(
const TranslatePrefs& translate_prefs,
const std::string& src_lang,
const std::string& dst_lang) {
// The ranker is a gate in the "show a translation prompt" flow. To retain
// the pre-existing functionality, it defaults to returning true in the
// absence of a model or if enforcement is disabled. As this is ranker is
// subsumed into a more general assist ranker, this default will go away
// (or become False).
const bool kDefaultResponse = true;
// If we don't have a model, request one and return the default.
if (model_ == nullptr) {
FetchModelData();
return kDefaultResponse;
}
DCHECK(model_->has_logistic_regression_model());
SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ShouldOfferTranslation");
// TODO(rogerm): Remove ScopedTracker below once crbug.com/646711 is closed.
tracked_objects::ScopedTracker tracking_profile(
FROM_HERE_WITH_EXPLICIT_FUNCTION(
"646711 translate::TranslateRanker::ShouldOfferTranslation"));
const std::string& app_locale =
TranslateDownloadManager::GetInstance()->application_locale();
const std::string& country = translate_prefs.GetCountry();
double accept_count = translate_prefs.GetTranslationAcceptedCount(src_lang);
double denied_count = translate_prefs.GetTranslationDeniedCount(src_lang);
double ignored_count =
model_->logistic_regression_model().has_ignore_ratio_weight()
? translate_prefs.GetTranslationIgnoredCount(src_lang)
: 0.0;
double total_count = accept_count + denied_count + ignored_count;
double accept_ratio =
(total_count == 0.0) ? 0.0 : (accept_count / total_count);
double decline_ratio =
(total_count == 0.0) ? 0.0 : (denied_count / total_count);
double ignore_ratio =
(total_count == 0.0) ? 0.0 : (ignored_count / total_count);
DVLOG(3) << "TranslateRanker: features=["
<< "src_lang='" << src_lang << "', dst_lang='" << dst_lang
<< "', country='" << country << "', locale='" << app_locale
<< ", accept_count=" << accept_count
<< ", denied_count=" << denied_count
<< ", ignored_count=" << ignored_count
<< ", total_count=" << total_count
<< ", accept_ratio=" << accept_ratio
<< ", decline_ratio=" << decline_ratio
<< ", ignore_ratio=" << ignore_ratio << "]";
double score = CalculateScore(accept_ratio, decline_ratio, ignore_ratio,
src_lang, dst_lang, app_locale, country);
DVLOG(2) << "TranslateRanker Score: " << score;
bool result = (score >= kTranslationOfferThreshold);
UMA_HISTOGRAM_BOOLEAN("Translate.Ranker.QueryResult", result);
// If enforcement is enabled, return the real result; otherwise, return the
// default.
return IsEnforcementEnabled() ? result : kDefaultResponse;
}
TranslateRanker::TranslateRanker() {}
double TranslateRanker::CalculateScore(double accept_ratio,
double decline_ratio,
double ignore_ratio,
const std::string& src_lang,
const std::string& dst_lang,
const std::string& locale,
const std::string& country) {
SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.CalculateScore");
DCHECK(model_ != nullptr);
DCHECK(model_->has_logistic_regression_model());
const chrome_intelligence::TranslateRankerModel::LogisticRegressionModel&
logit = model_->logistic_regression_model();
double dot_product =
(accept_ratio * logit.accept_ratio_weight()) +
(decline_ratio * logit.decline_ratio_weight()) +
(ignore_ratio * logit.ignore_ratio_weight()) +
ScoreComponent(logit.source_language_weight(), src_lang) +
ScoreComponent(logit.dest_language_weight(), dst_lang) +
ScoreComponent(logit.country_weight(), country) +
ScoreComponent(logit.locale_weight(), locale);
return Sigmoid(dot_product + logit.bias());
}
int TranslateRanker::GetModelVersion() const {
return (model_ == nullptr) ? 0 : model_->version();
}
void TranslateRanker::FetchModelData() {
// Exit if the model has already been successfully loaded.
if (model_ != nullptr) {
return;
}
// Exit if the download has been throttled.
if (base::Time::NowFromSystemTime() < next_earliest_download_time_) {
return;
}
// Create the model fetcher if it does not exist.
if (model_fetcher_ == nullptr) {
model_fetcher_.reset(new TranslateURLFetcher(kFetcherId));
model_fetcher_->set_max_retry_on_5xx(kMaxRetryOn5xx);
}
// If a request is already in flight, do not issue a new one.
if (model_fetcher_->state() == TranslateURLFetcher::REQUESTING) {
DVLOG(2) << "TranslateRanker: Download complete or in progress.";
return;
}
DVLOG(2) << "TranslateRanker: Downloading model...";
download_start_time_ = base::Time::Now();
bool result = model_fetcher_->Request(
GetTranslateRankerURL(),
base::Bind(&TranslateRanker::ParseModel, base::Unretained(this)));
if (!result) {
ReportModelStatus(MODEL_STATUS_DOWNLOAD_THROTTLED);
next_earliest_download_time_ =
base::Time::NowFromSystemTime() +
base::TimeDelta::FromMinutes(kDownloadRefractoryPeriodMin);
}
}
void TranslateRanker::ParseModel(int /* id */,
bool success,
const std::string& data) {
UMA_HISTOGRAM_MEDIUM_TIMES("Translate.Ranker.Timer.DownloadModel",
base::Time::Now() - download_start_time_);
SCOPED_UMA_HISTOGRAM_TIMER("Translate.Ranker.Timer.ParseModel");
// We should not be here if the model has already been downloaded and parsed.
DCHECK(model_ == nullptr);
// On failure, we just abort. The TranslateRanker will retry on a subsequent
// translation opportunity. The TranslateURLFetcher enforces a limit for
// retried requests.
if (!success) {
ReportModelStatus(MODEL_STATUS_DOWNLOAD_FAILED);
return;
}
// Create a new model instance, parse and validate the data, and move it over
// to be used by the ranker.
std::unique_ptr<chrome_intelligence::TranslateRankerModel> new_model(
new chrome_intelligence::TranslateRankerModel());
bool is_parseable = new_model->ParseFromString(data);
if (!is_parseable) {
ReportModelStatus(MODEL_STATUS_PARSE_FAILED);
return;
}
bool is_valid = new_model->has_logistic_regression_model();
if (!is_valid) {
ReportModelStatus(MODEL_STATUS_VALIDATION_FAILED);
return;
}
ReportModelStatus(MODEL_STATUS_OK);
model_ = std::move(new_model);
model_fetcher_.reset();
}
void TranslateRanker::FlushTranslateEvents(
std::vector<metrics::TranslateEventProto>* translate_events) {
if (IsLoggingEnabled()) {
translate_events->swap(translate_events_cache_);
translate_events_cache_.clear();
}
}
void TranslateRanker::RecordTranslateEvent(
const metrics::TranslateEventProto& translate_event) {
if (IsLoggingEnabled())
translate_events_cache_.push_back(translate_event);
}
} // namespace translate