blob: 1bad31c8bed86ff3fa55452b88346afea41d5bbd [file] [log] [blame]
// Copyright (c) 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/history/core/browser/top_sites_impl.h"
#include <stdint.h>
#include <algorithm>
#include <memory>
#include <utility>
#include "base/bind.h"
#include "base/callback_helpers.h"
#include "base/check.h"
#include "base/hash/md5.h"
#include "base/location.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/task/single_thread_task_runner.h"
#include "base/task/task_runner.h"
#include "base/threading/thread_task_runner_handle.h"
#include "base/values.h"
#include "build/build_config.h"
#include "components/history/core/browser/history_backend.h"
#include "components/history/core/browser/history_constants.h"
#include "components/history/core/browser/history_db_task.h"
#include "components/history/core/browser/page_usage_data.h"
#include "components/history/core/browser/top_sites_observer.h"
#include "components/history/core/browser/url_utils.h"
#include "components/prefs/pref_registry_simple.h"
#include "components/prefs/pref_service.h"
#include "components/prefs/scoped_user_pref_update.h"
#include "url/gurl.h"
namespace history {
namespace {
void RunOrPostGetMostVisitedURLsCallback(
base::SequencedTaskRunner* task_runner,
TopSitesImpl::GetMostVisitedURLsCallback callback,
const MostVisitedURLList& urls) {
if (task_runner->RunsTasksInCurrentSequence())
std::move(callback).Run(urls);
else
task_runner->PostTask(FROM_HERE, base::BindOnce(std::move(callback), urls));
}
// Checks if the titles stored in `old_list` and `new_list` have changes.
bool DoTitlesDiffer(const MostVisitedURLList& old_list,
const MostVisitedURLList& new_list) {
// If the two lists have different sizes, the most visited titles are
// considered to have changes.
if (old_list.size() != new_list.size())
return true;
return !std::equal(std::begin(old_list), std::end(old_list),
std::begin(new_list),
[](const auto& old_item_ptr, const auto& new_item_ptr) {
return old_item_ptr.title == new_item_ptr.title;
});
}
// The delay for the first HistoryService query at startup.
constexpr base::TimeDelta kFirstDelayAtStartup = base::Seconds(15);
// The delay for the all HistoryService queries other than the first one.
#if defined(OS_IOS) || defined(OS_ANDROID)
// On mobile, having the max at 60 minutes results in the topsites database
// being not updated often enough since the app isn't usually running for long
// stretches of time.
constexpr base::TimeDelta kDelayForUpdates = base::Minutes(5);
#else
constexpr base::TimeDelta kDelayForUpdates = base::Minutes(60);
#endif // defined(OS_IOS) || defined(OS_ANDROID)
// Key for preference listing the URLs that should not be shown as most visited
// tiles.
// TODO(sky): rename actual value to 'most_visited_blocked_urls.'
const char kBlockedUrlsPrefsKey[] = "ntp.most_visited_blacklist";
} // namespace
// Initially, histogram is not recorded.
bool TopSitesImpl::histogram_recorded_ = false;
TopSitesImpl::TopSitesImpl(PrefService* pref_service,
HistoryService* history_service,
const PrepopulatedPageList& prepopulated_pages,
const CanAddURLToHistoryFn& can_add_url_to_history)
: backend_(nullptr),
prepopulated_pages_(prepopulated_pages),
pref_service_(pref_service),
history_service_(history_service),
can_add_url_to_history_(can_add_url_to_history),
loaded_(false) {
DCHECK(pref_service_);
DCHECK(!can_add_url_to_history_.is_null());
}
void TopSitesImpl::Init(const base::FilePath& db_name) {
// Create the backend here, rather than in the constructor, so unit tests that
// do not need the backend can run without a problem.
backend_ = new TopSitesBackend();
backend_->Init(db_name);
backend_->GetMostVisitedSites(
base::BindOnce(&TopSitesImpl::OnGotMostVisitedURLs,
base::Unretained(this)),
&cancelable_task_tracker_);
}
// WARNING: this function may be invoked on any thread.
void TopSitesImpl::GetMostVisitedURLs(GetMostVisitedURLsCallback callback) {
MostVisitedURLList filtered_urls;
{
base::AutoLock lock(lock_);
if (!loaded_) {
// A request came in before we finished loading. Store the callback and
// we'll run it on current thread when we finish loading.
pending_callbacks_.push_back(
base::BindOnce(&RunOrPostGetMostVisitedURLsCallback,
base::RetainedRef(base::ThreadTaskRunnerHandle::Get()),
std::move(callback)));
return;
}
filtered_urls = thread_safe_cache_;
}
std::move(callback).Run(filtered_urls);
}
static bool Contains(const MostVisitedURLList& urls, const GURL& url) {
return std::find_if(urls.begin(), urls.end(),
[&url](const MostVisitedURL& item) {
return item.url == url;
}) != urls.end();
}
void TopSitesImpl::SyncWithHistory() {
DCHECK(thread_checker_.CalledOnValidThread());
if (loaded_)
StartQueryForMostVisited();
}
bool TopSitesImpl::HasBlockedUrls() const {
const base::Value* blocked_urls =
pref_service_->GetDictionary(kBlockedUrlsPrefsKey);
return blocked_urls && !blocked_urls->DictEmpty();
}
void TopSitesImpl::AddBlockedUrl(const GURL& url) {
DCHECK(thread_checker_.CalledOnValidThread());
{
DictionaryPrefUpdateDeprecated update(pref_service_, kBlockedUrlsPrefsKey);
base::Value* blocked_urls = update.Get();
blocked_urls->SetKey(GetURLHash(url), base::Value());
}
ResetThreadSafeCache();
NotifyTopSitesChanged(TopSitesObserver::ChangeReason::BLOCKED_URLS);
}
void TopSitesImpl::RemoveBlockedUrl(const GURL& url) {
DCHECK(thread_checker_.CalledOnValidThread());
{
DictionaryPrefUpdateDeprecated update(pref_service_, kBlockedUrlsPrefsKey);
base::Value* blocked_urls = update.Get();
blocked_urls->RemoveKey(GetURLHash(url));
}
ResetThreadSafeCache();
NotifyTopSitesChanged(TopSitesObserver::ChangeReason::BLOCKED_URLS);
}
bool TopSitesImpl::IsBlocked(const GURL& url) {
DCHECK(thread_checker_.CalledOnValidThread());
const base::Value* blocked_urls =
pref_service_->GetDictionary(kBlockedUrlsPrefsKey);
return blocked_urls && blocked_urls->FindKey(GetURLHash(url));
}
void TopSitesImpl::ClearBlockedUrls() {
DCHECK(thread_checker_.CalledOnValidThread());
{
DictionaryPrefUpdateDeprecated update(pref_service_, kBlockedUrlsPrefsKey);
base::Value* blocked_urls = update.Get();
blocked_urls->DictClear();
}
ResetThreadSafeCache();
NotifyTopSitesChanged(TopSitesObserver::ChangeReason::BLOCKED_URLS);
}
bool TopSitesImpl::IsFull() {
return loaded_ && top_sites_.size() >= kTopSitesNumber;
}
PrepopulatedPageList TopSitesImpl::GetPrepopulatedPages() {
return prepopulated_pages_;
}
bool TopSitesImpl::loaded() const {
return loaded_;
}
void TopSitesImpl::OnNavigationCommitted(const GURL& url) {
DCHECK(thread_checker_.CalledOnValidThread());
if (!loaded_)
return;
if (can_add_url_to_history_.Run(url))
ScheduleUpdateTimer();
}
void TopSitesImpl::ShutdownOnUIThread() {
history_service_ = nullptr;
history_service_observation_.Reset();
// Cancel all requests so that the service doesn't callback to us after we've
// invoked Shutdown (this could happen if we have a pending request and
// Shutdown is invoked).
cancelable_task_tracker_.TryCancelAll();
if (backend_)
backend_->Shutdown();
}
// static
void TopSitesImpl::RegisterPrefs(PrefRegistrySimple* registry) {
registry->RegisterDictionaryPref(kBlockedUrlsPrefsKey);
}
TopSitesImpl::~TopSitesImpl() = default;
void TopSitesImpl::StartQueryForMostVisited() {
constexpr int kDaysOfHistory = 90;
DCHECK(loaded_);
timer_.Stop();
if (!history_service_)
return;
history_service_->QueryMostVisitedURLs(
num_results_to_request_from_history(), kDaysOfHistory,
base::BindOnce(&TopSitesImpl::OnTopSitesAvailableFromHistory,
base::Unretained(this)),
&cancelable_task_tracker_);
}
// static
void TopSitesImpl::DiffMostVisited(const MostVisitedURLList& old_list,
const MostVisitedURLList& new_list,
TopSitesDelta* delta) {
// Add all the old URLs for quick lookup. This maps URLs to the corresponding
// index in the input.
std::map<GURL, size_t> all_old_urls;
for (size_t i = 0; i < old_list.size(); i++)
all_old_urls[old_list[i].url] = i;
// Check all the URLs in the new set to see which ones are new or just moved.
// When we find a match in the old set, we'll reset its index to our special
// marker. This allows us to quickly identify the deleted ones in a later
// pass.
constexpr size_t kAlreadyFoundMarker = static_cast<size_t>(-1);
int rank = -1;
for (const auto& new_url : new_list) {
rank++;
auto found = all_old_urls.find(new_url.url);
if (found == all_old_urls.end()) {
delta->added.emplace_back(MostVisitedURLWithRank{new_url, rank});
} else {
DCHECK(found->second != kAlreadyFoundMarker)
<< "Same URL appears twice in the new list.";
int old_rank = found->second;
if (old_rank != rank)
delta->moved.emplace_back(MostVisitedURLWithRank{new_url, rank});
found->second = kAlreadyFoundMarker;
}
}
// Any member without the special marker in the all_old_urls list means that
// there wasn't a "new" URL that mapped to it, so it was deleted.
for (const std::pair<const GURL, size_t>& old_url : all_old_urls) {
if (old_url.second != kAlreadyFoundMarker)
delta->deleted.push_back(old_list[old_url.second]);
}
}
bool TopSitesImpl::AddPrepopulatedPages(MostVisitedURLList* urls) const {
bool added = false;
for (const auto& prepopulated_page : prepopulated_pages_) {
if (urls->size() >= kTopSitesNumber)
break;
if (!Contains(*urls, prepopulated_page.most_visited.url)) {
urls->push_back(prepopulated_page.most_visited);
added = true;
}
}
return added;
}
MostVisitedURLList TopSitesImpl::ApplyBlockedUrls(
const MostVisitedURLList& urls) {
MostVisitedURLList result;
for (const auto& url : urls) {
if (IsBlocked(url.url))
continue;
if (result.size() >= kTopSitesNumber)
break;
result.push_back(url);
}
return result;
}
// static
std::string TopSitesImpl::GetURLHash(const GURL& url) {
// We don't use canonical URLs here to be able to block only one of the two
// 'duplicate' sites, e.g. 'gmail.com' and 'mail.google.com'.
return base::MD5String(url.spec());
}
void TopSitesImpl::SetTopSites(MostVisitedURLList top_sites,
const CallLocation location) {
DCHECK(thread_checker_.CalledOnValidThread());
AddPrepopulatedPages(&top_sites);
TopSitesDelta delta;
DiffMostVisited(top_sites_, top_sites, &delta);
TopSitesBackend::RecordHistogram record_or_not =
TopSitesBackend::RECORD_HISTOGRAM_NO;
// Record the delta size into a histogram if this function is called from
// function OnGotMostVisitedURLs and no histogram value has been recorded
// before.
if (location == CALL_LOCATION_FROM_ON_GOT_MOST_VISITED_URLS &&
!histogram_recorded_) {
size_t delta_size =
delta.deleted.size() + delta.added.size() + delta.moved.size();
UMA_HISTOGRAM_COUNTS_100("History.FirstSetTopSitesDeltaSize", delta_size);
// Will be passed to TopSitesBackend to let it record the histogram too.
record_or_not = TopSitesBackend::RECORD_HISTOGRAM_YES;
// Change it to true so that the histogram will not be recorded any more.
histogram_recorded_ = true;
}
bool should_notify_observers = false;
// If there is a change in urls, update the db and notify observers.
if (!delta.deleted.empty() || !delta.added.empty() || !delta.moved.empty()) {
backend_->UpdateTopSites(delta, record_or_not);
should_notify_observers = true;
}
// If there is no url change in top sites, check if the titles have changes.
// Notify observers if there's a change in titles.
if (!should_notify_observers)
should_notify_observers = DoTitlesDiffer(top_sites_, top_sites);
// We always do the following steps (setting top sites in cache, and resetting
// thread safe cache ...) as this method is invoked during startup at which
// point the caches haven't been updated yet.
top_sites_ = std::move(top_sites);
ResetThreadSafeCache();
if (should_notify_observers)
NotifyTopSitesChanged(TopSitesObserver::ChangeReason::MOST_VISITED);
}
int TopSitesImpl::num_results_to_request_from_history() const {
DCHECK(thread_checker_.CalledOnValidThread());
const base::Value* blocked_urls =
pref_service_->GetDictionary(kBlockedUrlsPrefsKey);
return kTopSitesNumber + (blocked_urls ? blocked_urls->DictSize() : 0);
}
void TopSitesImpl::MoveStateToLoaded() {
DCHECK(thread_checker_.CalledOnValidThread());
MostVisitedURLList urls;
PendingCallbacks pending_callbacks;
{
base::AutoLock lock(lock_);
if (loaded_)
return; // Don't do anything if we're already loaded.
loaded_ = true;
// Now that we're loaded we can service the queued up callbacks. Copy them
// here and service them outside the lock.
if (!pending_callbacks_.empty()) {
urls = thread_safe_cache_;
pending_callbacks.swap(pending_callbacks_);
}
}
for (auto& callback : pending_callbacks)
std::move(callback).Run(urls);
if (history_service_)
history_service_observation_.Observe(history_service_.get());
NotifyTopSitesLoaded();
}
void TopSitesImpl::ResetThreadSafeCache() {
base::AutoLock lock(lock_);
thread_safe_cache_ = ApplyBlockedUrls(top_sites_);
}
void TopSitesImpl::ScheduleUpdateTimer() {
if (timer_.IsRunning())
return;
timer_.Start(FROM_HERE, kDelayForUpdates, this,
&TopSitesImpl::StartQueryForMostVisited);
}
void TopSitesImpl::OnGotMostVisitedURLs(MostVisitedURLList sites) {
DCHECK(thread_checker_.CalledOnValidThread());
// Set `top_sites_` directly so that SetTopSites() diffs correctly.
top_sites_ = sites;
SetTopSites(std::move(sites), CALL_LOCATION_FROM_ON_GOT_MOST_VISITED_URLS);
MoveStateToLoaded();
// Start a timer that refreshes top sites from history.
timer_.Start(FROM_HERE, kFirstDelayAtStartup, this,
&TopSitesImpl::StartQueryForMostVisited);
}
void TopSitesImpl::OnTopSitesAvailableFromHistory(MostVisitedURLList pages) {
SetTopSites(std::move(pages), CALL_LOCATION_FROM_OTHER_PLACES);
}
void TopSitesImpl::OnURLsDeleted(HistoryService* history_service,
const DeletionInfo& deletion_info) {
if (!loaded_)
return;
if (deletion_info.IsAllHistory()) {
SetTopSites(MostVisitedURLList(), CALL_LOCATION_FROM_OTHER_PLACES);
backend_->ResetDatabase();
}
StartQueryForMostVisited();
}
} // namespace history