blob: 03195fc16230d6e6f477fddc21c6e2e862af2f95 [file] [log] [blame]
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/language_usage_metrics/language_usage_metrics.h"
#include "base/macros.h"
#include "base/metrics/histogram.h"
#include "base/metrics/histogram_samples.h"
#include "base/metrics/statistics_recorder.h"
#include "components/language/core/browser/url_language_histogram.h"
#include "components/prefs/testing_pref_service.h"
#include "testing/gtest/include/gtest/gtest.h"
using base::HistogramBase;
using base::HistogramSamples;
using base::SampleCountIterator;
using base::StatisticsRecorder;
using language::UrlLanguageHistogram;
namespace language_usage_metrics {
namespace {
class MetricsRecorder {
public:
explicit MetricsRecorder(const char* key) : key_(key) {
HistogramBase* histogram = StatisticsRecorder::FindHistogram(key_);
if (histogram)
base_samples_ = histogram->SnapshotSamples();
}
void CheckTotalCount(int count) {
Snapshot();
EXPECT_EQ(count, GetTotalCount());
}
void CheckValueCount(HistogramBase::Sample value, int count) {
Snapshot();
EXPECT_EQ(count, GetCountWithoutSnapshot(value));
}
private:
void Snapshot() {
HistogramBase* histogram = StatisticsRecorder::FindHistogram(key_);
if (!histogram)
return;
samples_ = histogram->SnapshotSamples();
}
HistogramBase::Count GetCountWithoutSnapshot(HistogramBase::Sample value) {
if (!samples_)
return 0;
HistogramBase::Count count = samples_->GetCount(value);
if (!base_samples_)
return count;
return count - base_samples_->GetCount(value);
}
HistogramBase::Count GetTotalCount() {
if (!samples_)
return 0;
HistogramBase::Count count = samples_->TotalCount();
if (!base_samples_)
return count;
return count - base_samples_->TotalCount();
}
std::string key_;
std::unique_ptr<HistogramSamples> base_samples_;
std::unique_ptr<HistogramSamples> samples_;
DISALLOW_COPY_AND_ASSIGN(MetricsRecorder);
};
void RecordPageLanguageVisits(UrlLanguageHistogram& language_histogram,
std::string language,
int count) {
for (int i = 0; i < count; i++) {
language_histogram.OnPageVisited(language);
}
}
struct LanguageCodeHash {
LanguageCodeHash() = default;
LanguageCodeHash(const std::string& code, int hash)
: code(code), hash(hash) {}
std::string code;
int hash;
};
} // namespace
TEST(LanguageUsageMetricsTest, RecordPageLanguageCounts) {
const LanguageCodeHash EN("en", 25966);
const LanguageCodeHash ES("es", 25971);
const LanguageCodeHash JP("ja", 27233);
TestingPrefServiceSimple prefs;
UrlLanguageHistogram::RegisterProfilePrefs(prefs.registry());
UrlLanguageHistogram url_hist(&prefs);
// Initialize recorder
MetricsRecorder recorder("LanguageUsage.MostFrequentPageLanguages");
recorder.CheckTotalCount(0);
// Check that nothing is recorded if less than 10 page visits.
RecordPageLanguageVisits(url_hist, EN.code, 8);
RecordPageLanguageVisits(url_hist, ES.code, 1);
LanguageUsageMetrics::RecordPageLanguages(url_hist);
recorder.CheckTotalCount(0);
// Check that recording works at 10 page visits.
RecordPageLanguageVisits(url_hist, EN.code, 1);
LanguageUsageMetrics::RecordPageLanguages(url_hist);
recorder.CheckTotalCount(2);
recorder.CheckValueCount(EN.hash, 1);
recorder.CheckValueCount(ES.hash, 1);
// Check that languages with frequency below 0.05 are not recorded.
RecordPageLanguageVisits(url_hist, EN.code, 28); // 37/40
RecordPageLanguageVisits(url_hist, ES.code, 1); // 2/40 -> exactly 0.05
RecordPageLanguageVisits(url_hist, JP.code, 1); // 1/40 -> below 0.05
LanguageUsageMetrics::RecordPageLanguages(url_hist);
recorder.CheckTotalCount(4);
recorder.CheckValueCount(EN.hash, 2);
recorder.CheckValueCount(ES.hash, 2);
recorder.CheckValueCount(JP.hash, 0);
}
TEST(LanguageUsageMetricsTest, RecordAcceptLanguages) {
const LanguageCodeHash EN("en", 25966);
const LanguageCodeHash ES("es", 25971);
const LanguageCodeHash JP("ja", 27233);
// Initialize recorders
MetricsRecorder recorder("LanguageUsage.AcceptLanguage");
MetricsRecorder recorder_count("LanguageUsage.AcceptLanguage.Count");
recorder.CheckTotalCount(0);
recorder_count.CheckTotalCount(0);
LanguageUsageMetrics::RecordAcceptLanguages("en");
LanguageUsageMetrics::RecordAcceptLanguages("en");
recorder.CheckTotalCount(2);
recorder.CheckValueCount(EN.hash, 2);
recorder_count.CheckTotalCount(2);
recorder_count.CheckValueCount(1, 2);
LanguageUsageMetrics::RecordAcceptLanguages("en,es");
recorder.CheckTotalCount(4);
recorder.CheckValueCount(EN.hash, 3);
recorder.CheckValueCount(ES.hash, 1);
recorder_count.CheckTotalCount(3);
recorder_count.CheckValueCount(1, 2);
recorder_count.CheckValueCount(2, 1);
LanguageUsageMetrics::RecordAcceptLanguages("en,es,ja-JP");
recorder.CheckTotalCount(7);
recorder.CheckTotalCount(7);
recorder.CheckValueCount(EN.hash, 4);
recorder.CheckValueCount(ES.hash, 2);
recorder.CheckValueCount(JP.hash, 1);
recorder_count.CheckTotalCount(4);
recorder_count.CheckValueCount(1, 2);
recorder_count.CheckValueCount(2, 1);
recorder_count.CheckValueCount(3, 1);
}
TEST(LanguageUsageMetricsTest, RecordApplicationLanguage) {
const LanguageCodeHash EN("en", 25966);
const LanguageCodeHash ES("es", 25971);
// Initialize recorder
MetricsRecorder recorder("LanguageUsage.ApplicationLanguage");
LanguageUsageMetrics::RecordApplicationLanguage("en");
LanguageUsageMetrics::RecordApplicationLanguage("en-US");
LanguageUsageMetrics::RecordApplicationLanguage("en-UK");
recorder.CheckTotalCount(3);
recorder.CheckValueCount(EN.hash, 3);
LanguageUsageMetrics::RecordApplicationLanguage("es");
LanguageUsageMetrics::RecordApplicationLanguage("es-ES");
LanguageUsageMetrics::RecordApplicationLanguage("es-419");
recorder.CheckTotalCount(6);
recorder.CheckValueCount(ES.hash, 3);
}
TEST(LanguageUsageMetricsTest, ParseAcceptLanguages) {
std::set<int> language_set;
std::set<int>::const_iterator it;
const int ENGLISH = 25966;
const int SPANISH = 25971;
const int JAPANESE = 27233;
// Basic single language case.
LanguageUsageMetrics::ParseAcceptLanguages("ja", &language_set);
EXPECT_EQ(1U, language_set.size());
EXPECT_EQ(JAPANESE, *language_set.begin());
// Empty language.
LanguageUsageMetrics::ParseAcceptLanguages(std::string(), &language_set);
EXPECT_EQ(0U, language_set.size());
// Country code is ignored.
LanguageUsageMetrics::ParseAcceptLanguages("ja-JP", &language_set);
EXPECT_EQ(1U, language_set.size());
EXPECT_EQ(JAPANESE, *language_set.begin());
// Case is ignored.
LanguageUsageMetrics::ParseAcceptLanguages("Ja-jP", &language_set);
EXPECT_EQ(1U, language_set.size());
EXPECT_EQ(JAPANESE, *language_set.begin());
// Underscore as the separator.
LanguageUsageMetrics::ParseAcceptLanguages("ja_JP", &language_set);
EXPECT_EQ(1U, language_set.size());
EXPECT_EQ(JAPANESE, *language_set.begin());
// The result contains a same language code only once.
LanguageUsageMetrics::ParseAcceptLanguages("ja-JP,ja", &language_set);
EXPECT_EQ(1U, language_set.size());
EXPECT_EQ(JAPANESE, *language_set.begin());
// Basic two languages case.
LanguageUsageMetrics::ParseAcceptLanguages("en,ja", &language_set);
EXPECT_EQ(2U, language_set.size());
it = language_set.begin();
EXPECT_EQ(ENGLISH, *it);
EXPECT_EQ(JAPANESE, *++it);
// Multiple languages.
LanguageUsageMetrics::ParseAcceptLanguages("ja-JP,en,es,ja,en-US",
&language_set);
EXPECT_EQ(3U, language_set.size());
it = language_set.begin();
EXPECT_EQ(ENGLISH, *it);
EXPECT_EQ(SPANISH, *++it);
EXPECT_EQ(JAPANESE, *++it);
// Two empty languages.
LanguageUsageMetrics::ParseAcceptLanguages(",", &language_set);
EXPECT_EQ(0U, language_set.size());
// Trailing comma.
LanguageUsageMetrics::ParseAcceptLanguages("ja,", &language_set);
EXPECT_EQ(1U, language_set.size());
EXPECT_EQ(JAPANESE, *language_set.begin());
// Leading comma.
LanguageUsageMetrics::ParseAcceptLanguages(",es", &language_set);
EXPECT_EQ(1U, language_set.size());
EXPECT_EQ(SPANISH, *language_set.begin());
// Combination of invalid and valid.
LanguageUsageMetrics::ParseAcceptLanguages("1234,en", &language_set);
EXPECT_EQ(1U, language_set.size());
it = language_set.begin();
EXPECT_EQ(ENGLISH, *it);
}
TEST(LanguageUsageMetricsTest, ToLanguageCode) {
const int SPANISH = 25971;
const int JAPANESE = 27233;
// Basic case.
EXPECT_EQ(JAPANESE, LanguageUsageMetrics::ToLanguageCode("ja"));
// Case is ignored.
EXPECT_EQ(SPANISH, LanguageUsageMetrics::ToLanguageCode("Es"));
// Coutry code is ignored.
EXPECT_EQ(JAPANESE, LanguageUsageMetrics::ToLanguageCode("ja-JP"));
// Invalid locales are considered as unknown language.
EXPECT_EQ(0, LanguageUsageMetrics::ToLanguageCode(std::string()));
EXPECT_EQ(0, LanguageUsageMetrics::ToLanguageCode("1234"));
// "xx" is not acceptable because it doesn't exist in ISO 639-1 table.
// However, LanguageUsageMetrics doesn't tell what code is valid.
EXPECT_EQ(30840, LanguageUsageMetrics::ToLanguageCode("xx"));
}
} // namespace language_usage_metrics