blob: 47aa5374b323aa2a6cad44ac13a8644b2e9c7fba [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/speech/on_device_speech_recognition_impl.h"
#include "base/rand_util.h"
#include "base/strings/string_util.h"
#include "base/task/single_thread_task_runner.h"
#include "base/task/task_runner.h"
#include "base/time/time.h"
#include "chrome/browser/browser_process.h"
#include "chrome/browser/content_settings/host_content_settings_map_factory.h"
#include "chrome/browser/profiles/profile.h"
#include "components/content_settings/core/browser/host_content_settings_map.h"
#include "components/content_settings/core/common/content_settings.h"
#include "components/content_settings/core/common/content_settings_types.h"
#include "components/prefs/pref_service.h"
#include "components/soda/constants.h"
#include "components/soda/pref_names.h"
#include "components/soda/soda_installer.h"
#include "content/public/browser/document_user_data.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/render_process_host.h"
#include "media/base/media_switches.h"
#include "media/mojo/mojom/speech_recognizer.mojom.h"
#include "mojo/public/cpp/bindings/receiver_set.h"
#include "mojo/public/cpp/bindings/self_owned_receiver.h"
#if !BUILDFLAG(IS_ANDROID)
#include "components/soda/soda_util.h"
namespace {
const char kOnDeviceLanguagesDownloadedKey[] = "ondevice-languages-downloaded";
// Returns a boolean indicating whether the language is enabled.
bool IsLanguageInstallable(const std::string& language_code,
bool is_soda_binary_installed) {
return base::Contains(
speech::SodaInstaller::GetInstance()->GetLiveCaptionEnabledLanguages(),
language_code);
}
bool IsLanguageInstalled(const std::string& language_code) {
for (const auto& language : g_browser_process->local_state()->GetList(
prefs::kSodaRegisteredLanguagePacks)) {
if (language.GetString() == language_code) {
return true;
}
}
return false;
}
} // namespace
#endif // !BUILDFLAG(IS_ANDROID)
namespace speech {
OnDeviceSpeechRecognitionImpl::~OnDeviceSpeechRecognitionImpl() {
#if !BUILDFLAG(IS_ANDROID)
speech::SodaInstaller* soda_installer = speech::SodaInstaller::GetInstance();
// `soda_installer` is not guaranteed to be valid, since it's possible for
// this class to out-live it. This means that this class cannot use
// ScopedObservation and needs to manage removing the observer itself.
if (soda_installer) {
soda_installer->RemoveObserver(this);
}
#endif // !BUILDFLAG(IS_ANDROID)
}
void OnDeviceSpeechRecognitionImpl::Bind(
mojo::PendingReceiver<media::mojom::OnDeviceSpeechRecognition> receiver) {
receiver_.Bind(std::move(receiver));
}
void OnDeviceSpeechRecognitionImpl::Available(
const std::vector<std::string>& languages,
OnDeviceSpeechRecognitionImpl::AvailableCallback callback) {
#if BUILDFLAG(IS_ANDROID)
std::move(callback).Run(media::mojom::AvailabilityStatus::kUnavailable);
#else
if (!CanRenderFrameHostUseOnDeviceSpeechRecognition()) {
std::move(callback).Run(media::mojom::AvailabilityStatus::kUnavailable);
return;
}
if (languages.empty()) {
std::move(callback).Run(media::mojom::AvailabilityStatus::kUnavailable);
return;
}
media::mojom::AvailabilityStatus overall_status =
media::mojom::AvailabilityStatus::kAvailable;
for (const std::string& language : languages) {
std::optional<speech::SodaLanguagePackComponentConfig> language_config =
speech::GetLanguageComponentConfigMatchingLanguageSubtag(language);
if (!language_config.has_value()) {
std::move(callback).Run(media::mojom::AvailabilityStatus::kUnavailable);
return;
}
// According to the spec, the status returned by this API should be the
// minimum status. I.e., the API returns:
// 'available' if all languages are available
// 'downloading' if all languages are either downloading or available
// 'downloadable' if all languages are either available, downloading, or
// downloadable 'unavailable' in if one or more language is unavailable
media::mojom::AvailabilityStatus status =
GetMaskedAvailabilityStatus(language_config.value().language_name);
if (status < overall_status) {
overall_status = status;
}
}
std::move(callback).Run(overall_status);
#endif // BUILDFLAG(IS_ANDROID)
}
void OnDeviceSpeechRecognitionImpl::Install(
const std::vector<std::string>& languages,
OnDeviceSpeechRecognitionImpl::InstallCallback callback) {
#if BUILDFLAG(IS_ANDROID)
std::move(callback).Run(false);
}
#else
for (const std::string& language : languages) {
std::optional<speech::SodaLanguagePackComponentConfig> language_config =
speech::GetLanguageComponentConfigMatchingLanguageSubtag(language);
if (!language_config.has_value() ||
!IsLanguageInstallable(
language_config.value().language_name,
speech::SodaInstaller::GetInstance()->IsSodaBinaryInstalled())) {
std::move(callback).Run(false);
return;
}
}
if (!CanRenderFrameHostUseOnDeviceSpeechRecognition()) {
std::move(callback).Run(false);
return;
}
base::SingleThreadTaskRunner::GetCurrentDefault()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&OnDeviceSpeechRecognitionImpl::InstallLanguageInternal,
weak_ptr_factory_.GetWeakPtr(), languages,
std::move(callback)),
GetDownloadDelay(languages));
}
void OnDeviceSpeechRecognitionImpl::OnSodaInstalled(
speech::LanguageCode language_code) {
ProcessLanguageInstallationUpdate(GetLanguageName(language_code),
/*installation_success=*/true);
}
void OnDeviceSpeechRecognitionImpl::OnSodaInstallError(
speech::LanguageCode language_code,
speech::SodaInstaller::ErrorCode error_code) {
ProcessLanguageInstallationUpdate(GetLanguageName(language_code),
/*installation_success=*/false);
}
#endif // !BUILDFLAG(IS_ANDROID)
OnDeviceSpeechRecognitionImpl::OnDeviceSpeechRecognitionImpl(
content::RenderFrameHost* frame_host)
: content::DocumentUserData<OnDeviceSpeechRecognitionImpl>(frame_host) {
#if !BUILDFLAG(IS_ANDROID)
speech::SodaInstaller* soda_installer = speech::SodaInstaller::GetInstance();
if (soda_installer) {
soda_installer->AddObserver(this);
}
#endif // !BUILDFLAG(IS_ANDROID)
}
bool OnDeviceSpeechRecognitionImpl::
CanRenderFrameHostUseOnDeviceSpeechRecognition() {
if (render_frame_host().GetStoragePartition() !=
render_frame_host().GetBrowserContext()->GetDefaultStoragePartition()) {
return !render_frame_host().GetLastCommittedURL().SchemeIsHTTPOrHTTPS();
}
return true;
}
#if !BUILDFLAG(IS_ANDROID)
void OnDeviceSpeechRecognitionImpl::InstallLanguageInternal(
const std::vector<std::string>& languages,
OnDeviceSpeechRecognitionImpl::InstallCallback callback) {
std::set<std::string> language_names_key;
for (const std::string& subtag : languages) {
std::optional<speech::SodaLanguagePackComponentConfig> lang_config =
speech::GetLanguageComponentConfigMatchingLanguageSubtag(subtag);
if (lang_config.has_value()) {
language_names_key.insert(lang_config.value().language_name);
}
}
if (language_names_key.empty()) {
std::move(callback).Run(false);
return;
}
language_installation_callbacks_[language_names_key].push_back(
std::move(callback));
// `InstallSoda` will only install the SODA binary if it is not already
// installed.
speech::SodaInstaller::GetInstance()->InstallSoda(
g_browser_process->local_state());
// `InstallLanguage` will only install languages that are not already
// installed.
for (const std::string& language : language_names_key) {
speech::SodaInstaller::GetInstance()->InstallLanguage(
language, g_browser_process->local_state());
SetOnDeviceLanguageDownloaded(language);
}
}
void OnDeviceSpeechRecognitionImpl::ProcessLanguageInstallationUpdate(
const std::string& language,
bool installation_success) {
for (auto it = language_installation_callbacks_.begin();
it != language_installation_callbacks_.end();) {
std::set<std::string> pending_languages_key = it->first;
if (pending_languages_key.count(language)) {
// This callback group was waiting for the processed `language`.
std::list<InstallCallback> moved_callbacks = std::move(it->second);
it = language_installation_callbacks_.erase(it);
if (!installation_success) {
// Installation failed for this language; fail all callbacks in this
// group.
for (auto& callback : moved_callbacks) {
std::move(callback).Run(false);
}
} else {
// Installation succeeded for this language.
// Remove it from the pending set for this group.
std::set<std::string> updated_key = pending_languages_key;
updated_key.erase(language);
if (updated_key.empty()) {
// All languages for this group are now installed.
for (auto& callback : moved_callbacks) {
std::move(callback).Run(true);
}
} else {
// Still waiting for other languages in this group.
// Re-insert with the updated key, merging if the key now matches an
// existing one.
auto [inserted_it, success] =
language_installation_callbacks_.emplace(
std::move(updated_key), std::list<InstallCallback>());
inserted_it->second.splice(inserted_it->second.end(),
moved_callbacks);
}
}
} else {
// This group of callbacks was not waiting for the current `language`.
++it;
}
}
}
base::Value
OnDeviceSpeechRecognitionImpl::GetOnDeviceLanguagesDownloadedValue() {
GURL url = render_frame_host().GetLastCommittedOrigin().GetURL();
return HostContentSettingsMapFactory::GetForProfile(
render_frame_host().GetBrowserContext())
->GetWebsiteSetting(url, url,
ContentSettingsType::
ON_DEVICE_SPEECH_RECOGNITION_LANGUAGES_DOWNLOADED,
/*info=*/nullptr);
}
void OnDeviceSpeechRecognitionImpl::
SetOnDeviceLanguagesDownloadedContentSetting(
base::Value on_device_languages_downloaded) {
GURL url = render_frame_host().GetLastCommittedOrigin().GetURL();
HostContentSettingsMapFactory::GetForProfile(
render_frame_host().GetBrowserContext())
->SetWebsiteSettingDefaultScope(
url, url,
ContentSettingsType::
ON_DEVICE_SPEECH_RECOGNITION_LANGUAGES_DOWNLOADED,
std::move(on_device_languages_downloaded));
}
media::mojom::AvailabilityStatus
OnDeviceSpeechRecognitionImpl::GetMaskedAvailabilityStatus(
const std::string& language) {
media::mojom::AvailabilityStatus availability_status =
IsOnDeviceSpeechRecognitionAvailable(language);
if (availability_status == media::mojom::AvailabilityStatus::kAvailable &&
!HasOnDeviceLanguageDownloaded(language)) {
return media::mojom::AvailabilityStatus::kDownloadable;
}
return availability_status;
}
bool OnDeviceSpeechRecognitionImpl::HasOnDeviceLanguageDownloaded(
const std::string& language) {
const GURL url = render_frame_host().GetLastCommittedOrigin().GetURL();
if (!url.is_valid() || url.SchemeIsFile()) {
return transient_on_device_languages_downloaded_.contains(language);
}
base::Value on_device_languages_downloaded_value =
GetOnDeviceLanguagesDownloadedValue();
if (on_device_languages_downloaded_value.is_dict()) {
return on_device_languages_downloaded_value.GetDict()
.EnsureList(kOnDeviceLanguagesDownloadedKey)
->contains(language);
}
return false;
}
void OnDeviceSpeechRecognitionImpl::SetOnDeviceLanguageDownloaded(
const std::string& language) {
const GURL url = render_frame_host().GetLastCommittedOrigin().GetURL();
if (!url.is_valid() || url.SchemeIsFile()) {
transient_on_device_languages_downloaded_.insert(language);
return;
}
base::Value on_device_languages_downloaded_value =
GetOnDeviceLanguagesDownloadedValue();
// Initialize a list to store data, if none exists.
if (!on_device_languages_downloaded_value.is_dict()) {
on_device_languages_downloaded_value = base::Value(base::Value::Dict());
}
// Update or initialize the list of targets for the source language.
base::Value::List* on_device_languages_downloaded_list =
on_device_languages_downloaded_value.GetDict().EnsureList(
kOnDeviceLanguagesDownloadedKey);
if (!on_device_languages_downloaded_list->contains(language)) {
on_device_languages_downloaded_list->Append(language);
}
SetOnDeviceLanguagesDownloadedContentSetting(
std::move(on_device_languages_downloaded_value));
}
base::TimeDelta OnDeviceSpeechRecognitionImpl::GetDownloadDelay(
const std::vector<std::string>& languages) {
for (const std::string& language_subtag : languages) {
std::optional<speech::SodaLanguagePackComponentConfig> lang_config =
speech::GetLanguageComponentConfigMatchingLanguageSubtag(
language_subtag);
if (!lang_config.has_value()) {
// If the subtag is invalid or doesn't map to a SODA language,
// skip it for delay calculation.
continue;
}
const std::string& soda_language_name = lang_config.value().language_name;
// Check if SODA is already installed for the given language. If it is and
// the origin isn't supposed to know that, then add a delay to simulate a
// real download before proceeding.
if (GetMaskedAvailabilityStatus(soda_language_name) ==
media::mojom::AvailabilityStatus::kDownloadable &&
IsLanguageInstalled(soda_language_name)) {
return base::RandTimeDelta(base::Seconds(2), base::Seconds(3));
}
}
return base::TimeDelta();
}
#endif // !BUILDFLAG(IS_ANDROID)
DOCUMENT_USER_DATA_KEY_IMPL(OnDeviceSpeechRecognitionImpl);
} // namespace speech