blob: f68b1806aa582b770cc14ef6ac6cf1c997ae984c [file] [log] [blame]
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/ntp_snippets/remote/remote_suggestion.h"
#include "base/feature_list.h"
#include "base/memory/ptr_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/values.h"
#include "components/ntp_snippets/category.h"
#include "components/ntp_snippets/features.h"
#include "components/ntp_snippets/remote/proto/ntp_snippets.pb.h"
namespace {
struct SnippetSource {
SnippetSource(const GURL& url,
const std::string& publisher_name,
const GURL& amp_url)
: url(url), publisher_name(publisher_name), amp_url(amp_url) {}
GURL url;
std::string publisher_name;
GURL amp_url;
};
const SnippetSource& FindBestSource(const std::vector<SnippetSource>& sources) {
// The same article can be hosted by multiple sources, e.g. nytimes.com,
// cnn.com, etc. We need to parse the list of sources for this article and
// find the best match. In order of preference:
// 1) A source that has URL, publisher name, AMP URL
// 2) A source that has URL, publisher name
// 3) A source that has URL and AMP URL, or URL only (since we won't show
// the snippet to users if the article does not have a publisher name, it
// doesn't matter whether the snippet has the AMP URL or not)
int best_source_index = 0;
for (size_t i = 0; i < sources.size(); ++i) {
const SnippetSource& source = sources[i];
if (!source.publisher_name.empty()) {
best_source_index = i;
if (!source.amp_url.is_empty()) {
// This is the best possible source, stop looking.
break;
}
}
}
return sources[best_source_index];
}
// dict.Get() specialization for base::Time values
bool GetTimeValue(const base::DictionaryValue& dict,
const std::string& key,
base::Time* time) {
std::string time_value;
return dict.GetString(key, &time_value) &&
base::Time::FromString(time_value.c_str(), time);
}
// dict.Get() specialization for GURL values
bool GetURLValue(const base::DictionaryValue& dict,
const std::string& key,
GURL* url) {
std::string spec;
if (!dict.GetString(key, &spec)) {
return false;
}
*url = GURL(spec);
return url->is_valid();
}
} // namespace
namespace ntp_snippets {
const int kArticlesRemoteId = 1;
static_assert(
static_cast<int>(KnownCategories::ARTICLES) -
static_cast<int>(KnownCategories::REMOTE_CATEGORIES_OFFSET) ==
kArticlesRemoteId,
"kArticlesRemoteId has a wrong value?!");
const int kChromeReaderDefaultExpiryTimeMins = 3 * 24 * 60;
RemoteSuggestion::RemoteSuggestion(const std::vector<std::string>& ids,
int remote_category_id)
: ids_(ids),
score_(0),
is_dismissed_(false),
remote_category_id_(remote_category_id),
should_notify_(false) {}
RemoteSuggestion::~RemoteSuggestion() = default;
// static
std::unique_ptr<RemoteSuggestion>
RemoteSuggestion::CreateFromChromeReaderDictionary(
const base::DictionaryValue& dict) {
const base::DictionaryValue* content = nullptr;
if (!dict.GetDictionary("contentInfo", &content)) {
return nullptr;
}
// Need at least a primary id.
std::string primary_id;
if (!content->GetString("url", &primary_id) || primary_id.empty()) {
return nullptr;
}
const base::ListValue* corpus_infos_list = nullptr;
if (!content->GetList("sourceCorpusInfo", &corpus_infos_list)) {
DLOG(WARNING) << "No sources found for article " << primary_id;
return nullptr;
}
std::vector<std::string> ids(1, primary_id);
std::vector<SnippetSource> sources;
for (const auto& value : *corpus_infos_list) {
const base::DictionaryValue* dict_value = nullptr;
if (!value->GetAsDictionary(&dict_value)) {
DLOG(WARNING) << "Invalid source info for article " << primary_id;
continue;
}
std::string corpus_id_str;
GURL corpus_id;
if (dict_value->GetString("corpusId", &corpus_id_str)) {
corpus_id = GURL(corpus_id_str);
}
if (!corpus_id.is_valid()) {
// We must at least have a valid source URL.
DLOG(WARNING) << "Invalid article url " << corpus_id_str;
continue;
}
const base::DictionaryValue* publisher_data = nullptr;
std::string site_title;
if (dict_value->GetDictionary("publisherData", &publisher_data)) {
if (!publisher_data->GetString("sourceName", &site_title)) {
// It's possible but not desirable to have no publisher data.
DLOG(WARNING) << "No publisher name for article " << corpus_id_str;
}
} else {
DLOG(WARNING) << "No publisher data for article " << corpus_id_str;
}
std::string amp_url_str;
GURL amp_url;
// Expected to not have AMP url sometimes.
if (dict_value->GetString("ampUrl", &amp_url_str)) {
amp_url = GURL(amp_url_str);
DLOG_IF(WARNING, !amp_url.is_valid()) << "Invalid AMP url "
<< amp_url_str;
}
sources.emplace_back(corpus_id, site_title,
amp_url.is_valid() ? amp_url : GURL());
// We use the raw string so that we can compare it against other primary
// IDs. Parsing the ID as a URL might add a trailing slash (and we don't do
// this for the primary ID).
ids.push_back(corpus_id_str);
}
if (sources.empty()) {
DLOG(WARNING) << "No sources found for article " << primary_id;
return nullptr;
}
std::unique_ptr<RemoteSuggestion> snippet(
new RemoteSuggestion(ids, kArticlesRemoteId));
std::string title;
if (content->GetString("title", &title)) {
snippet->title_ = title;
}
std::string salient_image_url;
if (content->GetString("thumbnailUrl", &salient_image_url)) {
snippet->salient_image_url_ = GURL(salient_image_url);
}
std::string snippet_str;
if (content->GetString("snippet", &snippet_str)) {
snippet->snippet_ = snippet_str;
}
// The creation and expiry timestamps are uint64s which are stored as strings.
std::string creation_timestamp_str;
if (content->GetString("creationTimestampSec", &creation_timestamp_str)) {
snippet->publish_date_ = TimeFromJsonString(creation_timestamp_str);
}
std::string expiry_timestamp_str;
if (content->GetString("expiryTimestampSec", &expiry_timestamp_str)) {
snippet->expiry_date_ = TimeFromJsonString(expiry_timestamp_str);
}
// If publish and/or expiry date are missing, fill in reasonable defaults.
if (snippet->publish_date_.is_null()) {
snippet->publish_date_ = base::Time::Now();
}
if (snippet->expiry_date_.is_null()) {
snippet->expiry_date_ =
snippet->publish_date() +
base::TimeDelta::FromMinutes(kChromeReaderDefaultExpiryTimeMins);
}
const SnippetSource& source = FindBestSource(sources);
snippet->url_ = source.url;
snippet->publisher_name_ = source.publisher_name;
snippet->amp_url_ = source.amp_url;
double score;
if (dict.GetDouble("score", &score)) {
snippet->score_ = score;
}
return snippet;
}
// static
std::unique_ptr<RemoteSuggestion>
RemoteSuggestion::CreateFromContentSuggestionsDictionary(
const base::DictionaryValue& dict,
int remote_category_id) {
const base::ListValue* ids;
if (!dict.GetList("ids", &ids)) {
return nullptr;
}
std::vector<std::string> parsed_ids;
for (const auto& value : *ids) {
std::string id;
if (!value->GetAsString(&id)) {
return nullptr;
}
parsed_ids.push_back(id);
}
if (parsed_ids.empty()) {
return nullptr;
}
auto snippet = MakeUnique(parsed_ids, remote_category_id);
if (!(dict.GetString("title", &snippet->title_) &&
dict.GetString("snippet", &snippet->snippet_) &&
GetTimeValue(dict, "creationTime", &snippet->publish_date_) &&
GetTimeValue(dict, "expirationTime", &snippet->expiry_date_) &&
GetURLValue(dict, "imageUrl", &snippet->salient_image_url_) &&
dict.GetString("attribution", &snippet->publisher_name_) &&
GetURLValue(dict, "fullPageUrl", &snippet->url_))) {
return nullptr;
}
GetURLValue(dict, "ampUrl", &snippet->amp_url_); // May fail; OK.
// TODO(sfiera): also favicon URL.
double score;
if (dict.GetDouble("score", &score)) {
snippet->score_ = score;
}
const base::DictionaryValue* notification_info = nullptr;
if (dict.GetDictionary("notificationInfo", &notification_info)) {
if (notification_info->GetBoolean("shouldNotify",
&snippet->should_notify_) &&
snippet->should_notify_) {
if (!GetTimeValue(*notification_info, "deadline",
&snippet->notification_deadline_)) {
snippet->notification_deadline_ = base::Time::Max();
}
}
}
return snippet;
}
// static
std::unique_ptr<RemoteSuggestion> RemoteSuggestion::CreateFromProto(
const SnippetProto& proto) {
// Need at least the id.
if (proto.ids_size() == 0 || proto.ids(0).empty()) {
return nullptr;
}
int remote_category_id = proto.has_remote_category_id()
? proto.remote_category_id()
: kArticlesRemoteId;
std::vector<std::string> ids(proto.ids().begin(), proto.ids().end());
auto snippet = MakeUnique(ids, remote_category_id);
snippet->title_ = proto.title();
snippet->snippet_ = proto.snippet();
snippet->salient_image_url_ = GURL(proto.salient_image_url());
snippet->publish_date_ = base::Time::FromInternalValue(proto.publish_date());
snippet->expiry_date_ = base::Time::FromInternalValue(proto.expiry_date());
snippet->score_ = proto.score();
snippet->is_dismissed_ = proto.dismissed();
std::vector<SnippetSource> sources;
for (int i = 0; i < proto.sources_size(); ++i) {
const SnippetSourceProto& source_proto = proto.sources(i);
GURL url(source_proto.url());
if (!url.is_valid()) {
// We must at least have a valid source URL.
DLOG(WARNING) << "Invalid article url " << source_proto.url();
continue;
}
GURL amp_url;
if (source_proto.has_amp_url()) {
amp_url = GURL(source_proto.amp_url());
DLOG_IF(WARNING, !amp_url.is_valid()) << "Invalid AMP URL "
<< source_proto.amp_url();
}
sources.emplace_back(url, source_proto.publisher_name(), amp_url);
}
if (sources.empty()) {
DLOG(WARNING) << "No sources found for article " << snippet->id();
return nullptr;
}
const SnippetSource& source = FindBestSource(sources);
snippet->url_ = source.url;
snippet->publisher_name_ = source.publisher_name;
snippet->amp_url_ = source.amp_url;
return snippet;
}
// static
std::unique_ptr<RemoteSuggestion> RemoteSuggestion::CreateForTesting(
const std::string& id,
int remote_category_id,
const GURL& url,
const std::string& publisher_name,
const GURL& amp_url) {
auto snippet =
MakeUnique(std::vector<std::string>(1, id), remote_category_id);
snippet->url_ = url;
snippet->publisher_name_ = publisher_name;
snippet->amp_url_ = amp_url;
return snippet;
}
SnippetProto RemoteSuggestion::ToProto() const {
SnippetProto result;
for (const std::string& id : ids_) {
result.add_ids(id);
}
if (!title_.empty()) {
result.set_title(title_);
}
if (!snippet_.empty()) {
result.set_snippet(snippet_);
}
if (salient_image_url_.is_valid()) {
result.set_salient_image_url(salient_image_url_.spec());
}
if (!publish_date_.is_null()) {
result.set_publish_date(publish_date_.ToInternalValue());
}
if (!expiry_date_.is_null()) {
result.set_expiry_date(expiry_date_.ToInternalValue());
}
result.set_score(score_);
result.set_dismissed(is_dismissed_);
result.set_remote_category_id(remote_category_id_);
SnippetSourceProto* source_proto = result.add_sources();
source_proto->set_url(url_.spec());
if (!publisher_name_.empty()) {
source_proto->set_publisher_name(publisher_name_);
}
if (amp_url_.is_valid()) {
source_proto->set_amp_url(amp_url_.spec());
}
return result;
}
ContentSuggestion RemoteSuggestion::ToContentSuggestion(
Category category) const {
GURL url = url_;
if (base::FeatureList::IsEnabled(kPreferAmpUrlsFeature) &&
!amp_url_.is_empty()) {
url = amp_url_;
}
ContentSuggestion suggestion(category, id(), url);
suggestion.set_title(base::UTF8ToUTF16(title_));
suggestion.set_snippet_text(base::UTF8ToUTF16(snippet_));
suggestion.set_publish_date(publish_date_);
suggestion.set_publisher_name(base::UTF8ToUTF16(publisher_name_));
suggestion.set_score(score_);
if (should_notify_) {
NotificationExtra extra;
extra.deadline = notification_deadline_;
suggestion.set_notification_extra(
base::MakeUnique<NotificationExtra>(extra));
}
return suggestion;
}
// static
base::Time RemoteSuggestion::TimeFromJsonString(
const std::string& timestamp_str) {
int64_t timestamp;
if (!base::StringToInt64(timestamp_str, &timestamp)) {
// Even if there's an error in the conversion, some garbage data may still
// be written to the output var, so reset it.
DLOG(WARNING) << "Invalid json timestamp: " << timestamp_str;
timestamp = 0;
}
return base::Time::UnixEpoch() + base::TimeDelta::FromSeconds(timestamp);
}
// static
std::string RemoteSuggestion::TimeToJsonString(const base::Time& time) {
return base::Int64ToString((time - base::Time::UnixEpoch()).InSeconds());
}
// static
std::unique_ptr<RemoteSuggestion> RemoteSuggestion::MakeUnique(
const std::vector<std::string>& ids,
int remote_category_id) {
return base::WrapUnique(new RemoteSuggestion(ids, remote_category_id));
}
} // namespace ntp_snippets