blob: 1f1495f487410e09831ee5554f8c1706bc9eb493 [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "extensions/renderer/i18n_hooks_util.h"
#include <vector>
#include "base/logging.h"
#include "base/macros.h"
#include "base/metrics/histogram_macros.h"
#include "content/public/renderer/render_frame.h"
#include "extensions/common/extension_messages.h"
#include "extensions/common/message_bundle.h"
#include "gin/data_object_builder.h"
#include "third_party/cld_3/src/src/nnet_language_identifier.h"
namespace extensions {
namespace i18n_hooks {
namespace {
// Max number of languages to detect.
const int kCldNumLangs = 3;
// CLD3 minimum reliable byte threshold. Predictions for inputs below this size
// in bytes will be considered unreliable.
const int kCld3MinimumByteThreshold = 50;
struct DetectedLanguage {
DetectedLanguage(const std::string& language, int percentage)
: language(language), percentage(percentage) {}
// Returns a new v8::Local<v8::Value> representing the serialized form of
// this DetectedLanguage object.
v8::Local<v8::Value> ToV8(v8::Isolate* isolate) const;
std::string language;
int percentage;
};
// LanguageDetectionResult object that holds detected langugae reliability and
// array of DetectedLanguage
struct LanguageDetectionResult {
LanguageDetectionResult() {}
~LanguageDetectionResult() {}
// Returns a new v8::Local<v8::Value> representing the serialized form of
// this Result object.
v8::Local<v8::Value> ToV8(v8::Local<v8::Context> context) const;
// CLD detected language reliability
bool is_reliable = false;
// Array of detectedLanguage of size 1-3. The null is returned if
// there were no languages detected
std::vector<DetectedLanguage> languages;
private:
DISALLOW_COPY_AND_ASSIGN(LanguageDetectionResult);
};
v8::Local<v8::Value> DetectedLanguage::ToV8(v8::Isolate* isolate) const {
return gin::DataObjectBuilder(isolate)
.Set("language", language)
.Set("percentage", percentage)
.Build();
}
v8::Local<v8::Value> LanguageDetectionResult::ToV8(
v8::Local<v8::Context> context) const {
v8::Isolate* isolate = context->GetIsolate();
DCHECK(isolate->GetCurrentContext() == context);
v8::Local<v8::Array> v8_languages = v8::Array::New(isolate, languages.size());
for (uint32_t i = 0; i < languages.size(); ++i) {
bool success =
v8_languages->CreateDataProperty(context, i, languages[i].ToV8(isolate))
.ToChecked();
DCHECK(success) << "CreateDataProperty() should never fail.";
}
return gin::DataObjectBuilder(isolate)
.Set("isReliable", is_reliable)
.Set("languages", v8_languages.As<v8::Value>())
.Build();
}
void InitDetectedLanguages(
const std::vector<chrome_lang_id::NNetLanguageIdentifier::Result>&
lang_results,
LanguageDetectionResult* result) {
std::vector<DetectedLanguage>* detected_languages = &result->languages;
DCHECK(detected_languages->empty());
bool* is_reliable = &result->is_reliable;
// is_reliable is set to "true", so that the reliability can be calculated by
// &&'ing the reliability of each predicted language.
*is_reliable = true;
for (const auto& lang_result : lang_results) {
const std::string& language_code = lang_result.language;
// If a language is kUnknown, then the remaining ones are also kUnknown.
if (language_code == chrome_lang_id::NNetLanguageIdentifier::kUnknown) {
break;
}
// The list of languages supported by CLD3 is saved in kLanguageNames
// in the following file:
// //src/third_party/cld_3/src/src/task_context_params.cc
// Among the entries in this list are transliterated languages
// (called xx-Latn) which don't belong to the spec ISO639-1 used by
// the previous model, CLD2. Thus, to maintain backwards compatibility,
// xx-Latn predictions are ignored for now.
if (base::EndsWith(language_code, "-Latn",
base::CompareCase::INSENSITIVE_ASCII)) {
continue;
}
*is_reliable = *is_reliable && lang_result.is_reliable;
const int percent = static_cast<int>(100 * lang_result.proportion);
detected_languages->emplace_back(language_code, percent);
}
if (detected_languages->empty())
*is_reliable = false;
}
} // namespace
v8::Local<v8::Value> GetI18nMessage(const std::string& message_name,
const std::string& extension_id,
v8::Local<v8::Value> v8_substitutions,
content::RenderFrame* render_frame,
v8::Local<v8::Context> context) {
v8::Isolate* isolate = context->GetIsolate();
L10nMessagesMap* l10n_messages = nullptr;
{
ExtensionToL10nMessagesMap& messages_map = *GetExtensionToL10nMessagesMap();
auto iter = messages_map.find(extension_id);
if (iter != messages_map.end()) {
l10n_messages = &iter->second;
} else {
if (!render_frame)
return v8::Undefined(isolate);
l10n_messages = &messages_map[extension_id];
// A sync call to load message catalogs for current extension.
// TODO(devlin): Wait, what?! A synchronous call to the browser to perform
// potentially blocking work reading files from disk? That's Bad.
{
SCOPED_UMA_HISTOGRAM_TIMER("Extensions.SyncGetMessageBundle");
render_frame->Send(
new ExtensionHostMsg_GetMessageBundle(extension_id, l10n_messages));
}
}
}
std::string message =
MessageBundle::GetL10nMessage(message_name, *l10n_messages);
std::vector<std::string> substitutions;
// For now, we just suppress all errors, but that's really not the best.
// See https://crbug.com/807769.
v8::TryCatch try_catch(isolate);
if (v8_substitutions->IsArray()) {
// chrome.i18n.getMessage("message_name", ["more", "params"]);
v8::Local<v8::Array> placeholders = v8_substitutions.As<v8::Array>();
uint32_t count = placeholders->Length();
if (count > 9)
return v8::Undefined(isolate);
for (uint32_t i = 0; i < count; ++i) {
v8::Local<v8::Value> placeholder;
if (!placeholders->Get(context, i).ToLocal(&placeholder))
return v8::Undefined(isolate);
// Note: this tries to convert each entry to a JS string, which can fail.
// If it does, String::Utf8Value() catches the error and doesn't surface
// it to the calling script (though the call may still be observable,
// since this goes through an object's toString() method). If it fails,
// we just silently ignore the value.
v8::String::Utf8Value string_value(isolate, placeholder);
if (*string_value)
substitutions.push_back(*string_value);
}
} else if (v8_substitutions->IsString()) {
// chrome.i18n.getMessage("message_name", "one param");
substitutions.push_back(gin::V8ToString(isolate, v8_substitutions));
}
// TODO(devlin): We currently just ignore any non-string, non-array values
// for substitutions, but the type is documented as 'any'. We should either
// enforce type more heavily, or throw an error here.
// NOTE: We call ReplaceStringPlaceholders even if |substitutions| is empty
// because we substitute $$ to be $ (in order to display a dollar sign in a
// message). See https://crbug.com/127243.
message = base::ReplaceStringPlaceholders(message, substitutions, nullptr);
return gin::StringToV8(isolate, message);
}
v8::Local<v8::Value> DetectTextLanguage(v8::Local<v8::Context> context,
const std::string& text) {
chrome_lang_id::NNetLanguageIdentifier nnet_lang_id(/*min_num_bytes=*/0,
/*max_num_bytes=*/512);
std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> lang_results =
nnet_lang_id.FindTopNMostFreqLangs(text, kCldNumLangs);
// is_reliable is set to false if we believe the input is too short to be
// accurately identified by the current model.
if (text.size() < kCld3MinimumByteThreshold) {
for (auto& result : lang_results)
result.is_reliable = false;
}
LanguageDetectionResult result;
// Populate LanguageDetectionResult with prediction reliability, languages,
// and the corresponding percentages.
InitDetectedLanguages(lang_results, &result);
return result.ToV8(context);
}
} // namespace i18n_hooks
} // namespace extensions