|  | // Copyright 2018 The Chromium Authors | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "content/browser/speech/tts_controller_impl.h" | 
|  |  | 
|  | #include <stddef.h> | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <string> | 
|  | #include <vector> | 
|  |  | 
|  | #include "base/containers/queue.h" | 
|  | #include "base/functional/bind.h" | 
|  | #include "base/json/json_reader.h" | 
|  | #include "base/metrics/histogram_macros.h" | 
|  | #include "base/metrics/user_metrics.h" | 
|  | #include "base/observer_list.h" | 
|  | #include "base/strings/string_util.h" | 
|  | #include "base/task/single_thread_task_runner.h" | 
|  | #include "base/values.h" | 
|  | #include "build/build_config.h" | 
|  | #include "content/browser/speech/tts_utterance_impl.h" | 
|  | #include "content/public/browser/content_browser_client.h" | 
|  | #include "content/public/browser/tts_utterance.h" | 
|  | #include "content/public/browser/visibility.h" | 
|  | #include "content/public/browser/web_contents.h" | 
|  | #include "content/public/common/content_client.h" | 
|  | #include "services/data_decoder/public/cpp/safe_xml_parser.h" | 
|  | #include "services/data_decoder/public/mojom/xml_parser.mojom.h" | 
|  | #include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h" | 
|  | #include "ui/base/l10n/l10n_util.h" | 
|  |  | 
|  | #if BUILDFLAG(IS_CHROMEOS) | 
|  | #include "content/public/browser/tts_controller_delegate.h" | 
|  | #endif | 
|  |  | 
|  | namespace content { | 
|  | namespace { | 
|  | // A value to be used to indicate that there is no char index available. | 
|  | const int kInvalidCharIndex = -1; | 
|  |  | 
|  | // A value to be used to indicate that there is no length available. | 
|  | const int kInvalidLength = -1; | 
|  |  | 
|  | #if BUILDFLAG(IS_CHROMEOS) | 
|  | bool VoiceIdMatches( | 
|  | const std::optional<TtsControllerDelegate::PreferredVoiceId>& id, | 
|  | const content::VoiceData& voice) { | 
|  | if (!id.has_value() || voice.name.empty() || | 
|  | (voice.engine_id.empty() && !voice.native)) | 
|  | return false; | 
|  | if (voice.native) | 
|  | return id->name == voice.name && id->id.empty(); | 
|  | return id->name == voice.name && id->id == voice.engine_id; | 
|  | } | 
|  | #endif  // BUILDFLAG(IS_CHROMEOS) | 
|  |  | 
|  | TtsUtteranceImpl* AsUtteranceImpl(TtsUtterance* utterance) { | 
|  | return static_cast<TtsUtteranceImpl*>(utterance); | 
|  | } | 
|  |  | 
|  | bool IsUtteranceSpokenByRemoteEngine(TtsUtterance* utterance) { | 
|  | if (utterance && !utterance->GetEngineId().empty()) { | 
|  | TtsUtteranceImpl* utterance_impl = AsUtteranceImpl(utterance); | 
|  | return utterance_impl->spoken_by_remote_engine(); | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | // | 
|  | // VoiceData | 
|  | // | 
|  |  | 
|  | VoiceData::VoiceData() : remote(false), native(false) {} | 
|  |  | 
|  | VoiceData::VoiceData(const VoiceData& other) = default; | 
|  |  | 
|  | VoiceData::~VoiceData() {} | 
|  |  | 
|  | // | 
|  | // TtsController | 
|  | // | 
|  |  | 
|  | TtsController* TtsController::GetInstance() { | 
|  | return TtsControllerImpl::GetInstance(); | 
|  | } | 
|  |  | 
|  | void TtsController::SkipAddNetworkChangeObserverForTests(bool enabled) { | 
|  | return TtsControllerImpl::SkipAddNetworkChangeObserverForTests(enabled); | 
|  | } | 
|  |  | 
|  | // IMPORTANT! | 
|  | // These values are written to logs.  Do not renumber or delete | 
|  | // existing items; add new entries to the end of the list. | 
|  | // LINT.IfChange(UMATextToSpeechEvent) | 
|  | enum class UMATextToSpeechEvent { | 
|  | START = 0, | 
|  | END = 1, | 
|  | WORD = 2, | 
|  | SENTENCE = 3, | 
|  | MARKER = 4, | 
|  | INTERRUPTED = 5, | 
|  | CANCELLED = 6, | 
|  | SPEECH_ERROR = 7, | 
|  | PAUSE = 8, | 
|  | RESUME = 9, | 
|  |  | 
|  | // This must always be the last enum. It's okay for its value to | 
|  | // increase, but none of the other enum values may change. | 
|  | COUNT | 
|  | }; | 
|  | // LINT.ThenChange(/tools/metrics/histograms/metadata/accessibility/enums.xml:TextToSpeechEvent) | 
|  |  | 
|  | // | 
|  | // TtsControllerImpl | 
|  | // | 
|  |  | 
|  | // static | 
|  | bool TtsControllerImpl::skip_add_network_change_observer_for_tests_ = false; | 
|  |  | 
|  | // static | 
|  | TtsControllerImpl* TtsControllerImpl::GetInstance() { | 
|  | return base::Singleton<TtsControllerImpl>::get(); | 
|  | } | 
|  |  | 
|  | // static | 
|  | void TtsControllerImpl::SkipAddNetworkChangeObserverForTests(bool enabled) { | 
|  | TtsControllerImpl::skip_add_network_change_observer_for_tests_ = enabled; | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::SetStopSpeakingWhenHidden(bool value) { | 
|  | stop_speaking_when_hidden_ = value; | 
|  | } | 
|  |  | 
|  | TtsControllerImpl::TtsControllerImpl() { | 
|  | if (!skip_add_network_change_observer_for_tests_) { | 
|  | net::NetworkChangeNotifier::AddNetworkChangeObserver(this); | 
|  | } | 
|  | OnNetworkChanged(net::NetworkChangeNotifier::GetConnectionType()); | 
|  | } | 
|  |  | 
|  | TtsControllerImpl::~TtsControllerImpl() { | 
|  | if (current_utterance_) { | 
|  | current_utterance_->Finish(); | 
|  | SetCurrentUtterance(nullptr); | 
|  | } | 
|  |  | 
|  | // Clear any queued utterances too. | 
|  | ClearUtteranceQueue(false);  // Don't sent events. | 
|  |  | 
|  | net::NetworkChangeNotifier::RemoveNetworkChangeObserver(this); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::SpeakOrEnqueue( | 
|  | std::unique_ptr<TtsUtterance> utterance) { | 
|  | if (!ShouldSpeakUtterance(utterance.get())) { | 
|  | utterance->Finish(); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // If the TTS platform or tts engine delegate is still loading or | 
|  | // initializing, queue or flush the utterance. The utterances can be sent to | 
|  | // platform specific implementation or to the engine implementation. Every | 
|  | // utterances are postponed until the platform specific implementation and | 
|  | // built in tts engine are loaded to avoid races where the utterance gets | 
|  | // dropped unexpectedly. | 
|  | if (TtsPlatformLoading() || | 
|  | (engine_delegate_ && !engine_delegate_->IsBuiltInTtsEngineInitialized( | 
|  | utterance->GetBrowserContext()))) { | 
|  | GetTtsPlatform()->LoadBuiltInTtsEngine(utterance->GetBrowserContext()); | 
|  |  | 
|  | if (utterance->GetShouldClearQueue()) | 
|  | ClearUtteranceQueue(true); | 
|  |  | 
|  | utterance_list_.emplace_back(std::move(utterance)); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // If we're paused and we get an utterance that can't be queued, | 
|  | // flush the queue but stay in the paused state. | 
|  | if (paused_ && utterance->GetShouldClearQueue()) { | 
|  | Stop(); | 
|  | utterance_list_.emplace_back(std::move(utterance)); | 
|  | paused_ = true; | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (paused_ || (IsSpeaking() && !utterance->GetShouldClearQueue())) { | 
|  | utterance_list_.emplace_back(std::move(utterance)); | 
|  | } else { | 
|  | Stop(); | 
|  | SpeakNow(std::move(utterance)); | 
|  | } | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::Stop() { | 
|  | Stop(GURL()); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::Stop(const GURL& source_url) { | 
|  | StopAndClearQueue(source_url); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::StopAndClearQueue(const GURL& source_url) { | 
|  | if (StopCurrentUtteranceIfMatches(source_url)) | 
|  | ClearUtteranceQueue(true); | 
|  | } | 
|  |  | 
|  | bool TtsControllerImpl::StopCurrentUtteranceIfMatches(const GURL& source_url) { | 
|  | base::RecordAction(base::UserMetricsAction("TextToSpeech.Stop")); | 
|  |  | 
|  | paused_ = false; | 
|  |  | 
|  | if (!source_url.is_empty() && current_utterance_ && | 
|  | current_utterance_->GetSrcUrl().DeprecatedGetOriginAsURL() != | 
|  | source_url.DeprecatedGetOriginAsURL()) | 
|  | return false; | 
|  |  | 
|  | StopCurrentUtterance(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::StopCurrentUtterance() { | 
|  | bool spoken_by_remote_engine = | 
|  | IsUtteranceSpokenByRemoteEngine(current_utterance_.get()); | 
|  | if (engine_delegate_ && current_utterance_ && | 
|  | !current_utterance_->GetEngineId().empty() && !spoken_by_remote_engine) { | 
|  | engine_delegate_->Stop(current_utterance_.get()); | 
|  | } else if (TtsPlatformReady()) { | 
|  | GetTtsPlatform()->ClearError(); | 
|  | GetTtsPlatform()->StopSpeaking(); | 
|  | } | 
|  |  | 
|  | if (current_utterance_) { | 
|  | current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, | 
|  | kInvalidLength, std::string()); | 
|  | } | 
|  |  | 
|  | FinishCurrentUtterance(); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::Pause() { | 
|  | base::RecordAction(base::UserMetricsAction("TextToSpeech.Pause")); | 
|  |  | 
|  | if (paused_) | 
|  | return; | 
|  |  | 
|  | paused_ = true; | 
|  | bool spoken_by_remote_engine = | 
|  | IsUtteranceSpokenByRemoteEngine(current_utterance_.get()); | 
|  | if (engine_delegate_ && current_utterance_ && | 
|  | !current_utterance_->GetEngineId().empty() && !spoken_by_remote_engine) { | 
|  | engine_delegate_->Pause(current_utterance_.get()); | 
|  | } else if (current_utterance_) { | 
|  | DCHECK(TtsPlatformReady()); | 
|  | GetTtsPlatform()->ClearError(); | 
|  | GetTtsPlatform()->Pause(); | 
|  | } | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::Resume() { | 
|  | base::RecordAction(base::UserMetricsAction("TextToSpeech.Resume")); | 
|  |  | 
|  | if (!paused_) | 
|  | return; | 
|  |  | 
|  | paused_ = false; | 
|  | bool spoken_by_remote_engine = | 
|  | IsUtteranceSpokenByRemoteEngine(current_utterance_.get()); | 
|  | if (engine_delegate_ && current_utterance_ && | 
|  | !current_utterance_->GetEngineId().empty() && !spoken_by_remote_engine) { | 
|  | engine_delegate_->Resume(current_utterance_.get()); | 
|  | } else if (current_utterance_) { | 
|  | DCHECK(TtsPlatformReady()); | 
|  | GetTtsPlatform()->ClearError(); | 
|  | GetTtsPlatform()->Resume(); | 
|  | } else { | 
|  | SpeakNextUtterance(); | 
|  | } | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::UninstallLanguageRequest( | 
|  | content::BrowserContext* browser_context, | 
|  | const std::string& lang, | 
|  | const std::string& client_id, | 
|  | int source, | 
|  | bool uninstall_immediately) { | 
|  | if (!engine_delegate_) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | engine_delegate_->UninstallLanguageRequest(browser_context, lang, client_id, | 
|  | source, uninstall_immediately); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::InstallLanguageRequest(BrowserContext* browser_context, | 
|  | const std::string& lang, | 
|  | const std::string& client_id, | 
|  | int source) { | 
|  | if (!engine_delegate_) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | engine_delegate_->InstallLanguageRequest(browser_context, lang, client_id, | 
|  | source); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::LanguageStatusRequest(BrowserContext* browser_context, | 
|  | const std::string& lang, | 
|  | const std::string& client_id, | 
|  | int source) { | 
|  | if (!engine_delegate_) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | engine_delegate_->LanguageStatusRequest(browser_context, lang, client_id, | 
|  | source); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::OnTtsEvent(int utterance_id, | 
|  | TtsEventType event_type, | 
|  | int char_index, | 
|  | int length, | 
|  | const std::string& error_message) { | 
|  | // We may sometimes receive completion callbacks "late", after we've | 
|  | // already finished the utterance (for example because another utterance | 
|  | // interrupted or we got a call to Stop). This is normal and we can | 
|  | // safely just ignore these events. | 
|  | if (!current_utterance_ || utterance_id != current_utterance_->GetId()) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | UMATextToSpeechEvent metric; | 
|  | switch (event_type) { | 
|  | case TTS_EVENT_START: | 
|  | metric = UMATextToSpeechEvent::START; | 
|  | break; | 
|  | case TTS_EVENT_END: | 
|  | metric = UMATextToSpeechEvent::END; | 
|  | break; | 
|  | case TTS_EVENT_WORD: | 
|  | metric = UMATextToSpeechEvent::WORD; | 
|  | break; | 
|  | case TTS_EVENT_SENTENCE: | 
|  | metric = UMATextToSpeechEvent::SENTENCE; | 
|  | break; | 
|  | case TTS_EVENT_MARKER: | 
|  | metric = UMATextToSpeechEvent::MARKER; | 
|  | break; | 
|  | case TTS_EVENT_INTERRUPTED: | 
|  | metric = UMATextToSpeechEvent::INTERRUPTED; | 
|  | break; | 
|  | case TTS_EVENT_CANCELLED: | 
|  | metric = UMATextToSpeechEvent::CANCELLED; | 
|  | break; | 
|  | case TTS_EVENT_ERROR: | 
|  | metric = UMATextToSpeechEvent::SPEECH_ERROR; | 
|  | break; | 
|  | case TTS_EVENT_PAUSE: | 
|  | metric = UMATextToSpeechEvent::PAUSE; | 
|  | break; | 
|  | case TTS_EVENT_RESUME: | 
|  | metric = UMATextToSpeechEvent::RESUME; | 
|  | break; | 
|  | default: | 
|  | NOTREACHED(); | 
|  | } | 
|  | UMA_HISTOGRAM_ENUMERATION("TextToSpeech.Event", metric, | 
|  | UMATextToSpeechEvent::COUNT); | 
|  |  | 
|  | current_utterance_->OnTtsEvent(event_type, char_index, length, error_message); | 
|  | if (current_utterance_->IsFinished()) { | 
|  | FinishCurrentUtterance(); | 
|  | SpeakNextUtterance(); | 
|  | } | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::OnTtsUtteranceBecameInvalid(int utterance_id) { | 
|  | #if BUILDFLAG(IS_CHROMEOS) | 
|  | // This handles the case that the utterance originated from the standalone | 
|  | // browser becomes invalid, we need to stop | 
|  | RemoveUtteranceAndStopIfNeeded(utterance_id); | 
|  | #else | 
|  | NOTREACHED(); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::GetVoices(BrowserContext* browser_context, | 
|  | const GURL& source_url, | 
|  | std::vector<VoiceData>* out_voices) { | 
|  | // Initialize GetTtsPlatform first, so that engine_delegate_ can be set | 
|  | // if necessary. | 
|  | TtsPlatform* tts_platform = GetTtsPlatform(); | 
|  |  | 
|  | DCHECK(tts_platform); | 
|  | // Ensure we have all built-in voices loaded. This is a no-op if already | 
|  | // loaded. | 
|  | tts_platform->LoadBuiltInTtsEngine(browser_context); | 
|  | if (TtsPlatformReady()) | 
|  | tts_platform->GetVoices(out_voices); | 
|  |  | 
|  | if (browser_context && engine_delegate_ && | 
|  | engine_delegate_->IsBuiltInTtsEngineInitialized(browser_context)) { | 
|  | engine_delegate_->GetVoices(browser_context, source_url, out_voices); | 
|  | } | 
|  |  | 
|  | tts_platform->FinalizeVoiceOrdering(*out_voices); | 
|  |  | 
|  | if (!allow_remote_voices_) { | 
|  | auto it = | 
|  | std::remove_if(out_voices->begin(), out_voices->end(), | 
|  | [](const VoiceData& voice) { return voice.remote; }); | 
|  | out_voices->resize(it - out_voices->begin()); | 
|  | } | 
|  | } | 
|  |  | 
|  | bool TtsControllerImpl::IsSpeaking() { | 
|  | return current_utterance_ != nullptr || | 
|  | (TtsPlatformReady() && GetTtsPlatform()->IsSpeaking()); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::UpdateLanguageStatus( | 
|  | BrowserContext* browser_context, | 
|  | const std::string& lang, | 
|  | LanguageInstallStatus install_status, | 
|  | const std::string& error) { | 
|  | if (update_language_status_delegates_.empty()) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | for (auto& delegate : update_language_status_delegates_) { | 
|  | delegate.OnUpdateLanguageStatus(browser_context, lang, install_status, | 
|  | error); | 
|  | } | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::AddUpdateLanguageStatusDelegate( | 
|  | UpdateLanguageStatusDelegate* delegate) { | 
|  | update_language_status_delegates_.AddObserver(delegate); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::RemoveUpdateLanguageStatusDelegate( | 
|  | UpdateLanguageStatusDelegate* delegate) { | 
|  | update_language_status_delegates_.RemoveObserver(delegate); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::VoicesChanged() { | 
|  | if (voices_changed_delegates_.empty() || TtsPlatformLoading()) | 
|  | return; | 
|  |  | 
|  | // Existence of platform tts indicates explicit requests to tts. Since | 
|  | // |VoicesChanged| can occur implicitly, only send if needed. | 
|  | for (auto& delegate : voices_changed_delegates_) | 
|  | delegate.OnVoicesChanged(); | 
|  |  | 
|  | if (!current_utterance_ && !utterance_list_.empty()) | 
|  | SpeakNextUtterance(); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::AddVoicesChangedDelegate( | 
|  | VoicesChangedDelegate* delegate) { | 
|  | voices_changed_delegates_.AddObserver(delegate); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::RemoveVoicesChangedDelegate( | 
|  | VoicesChangedDelegate* delegate) { | 
|  | voices_changed_delegates_.RemoveObserver(delegate); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::RemoveUtteranceEventDelegate( | 
|  | UtteranceEventDelegate* delegate) { | 
|  | // First clear any pending utterances with this delegate. | 
|  | std::list<std::unique_ptr<TtsUtterance>> old_list; | 
|  | utterance_list_.swap(old_list); | 
|  | while (!old_list.empty()) { | 
|  | std::unique_ptr<TtsUtterance> utterance = std::move(old_list.front()); | 
|  | old_list.pop_front(); | 
|  | if (utterance->GetEventDelegate() != delegate) | 
|  | utterance_list_.emplace_back(std::move(utterance)); | 
|  | } | 
|  |  | 
|  | if (current_utterance_ && | 
|  | current_utterance_->GetEventDelegate() == delegate) { | 
|  | current_utterance_->SetEventDelegate(nullptr); | 
|  | if (engine_delegate_ && !current_utterance_->GetEngineId().empty()) { | 
|  | engine_delegate_->Stop(current_utterance_.get()); | 
|  | } else { | 
|  | DCHECK(TtsPlatformReady()); | 
|  | GetTtsPlatform()->ClearError(); | 
|  | GetTtsPlatform()->StopSpeaking(); | 
|  | } | 
|  |  | 
|  | FinishCurrentUtterance(); | 
|  | SpeakNextUtterance(); | 
|  | } | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::SetTtsEngineDelegate(TtsEngineDelegate* delegate) { | 
|  | engine_delegate_ = delegate; | 
|  | } | 
|  |  | 
|  | TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() { | 
|  | return engine_delegate_; | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::RefreshVoices() { | 
|  | GetTtsPlatform()->RefreshVoices(); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::Shutdown() { | 
|  | if (tts_platform_) | 
|  | tts_platform_->Shutdown(); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::OnBrowserContextDestroyed( | 
|  | BrowserContext* browser_context) { | 
|  | bool did_clear_utterances = false; | 
|  |  | 
|  | // First clear the BrowserContext from any utterances. | 
|  | for (std::unique_ptr<TtsUtterance>& utterance : utterance_list_) { | 
|  | if (utterance->GetBrowserContext() == browser_context) { | 
|  | utterance->ClearBrowserContext(); | 
|  | did_clear_utterances = true; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (current_utterance_ && | 
|  | current_utterance_->GetBrowserContext() == browser_context) { | 
|  | current_utterance_->ClearBrowserContext(); | 
|  | did_clear_utterances = true; | 
|  | } | 
|  |  | 
|  | // If we cleared the BrowserContext from any utterances, stop speech | 
|  | // just to be safe. Do this using PostTask because calling Stop might | 
|  | // try to send notifications and that can trigger code paths that try | 
|  | // to access the BrowserContext that's being deleted. Note that it's | 
|  | // safe to use base::Unretained because this is a singleton. | 
|  | if (did_clear_utterances) { | 
|  | base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( | 
|  | FROM_HERE, base::BindOnce(&TtsControllerImpl::StopAndClearQueue, | 
|  | base::Unretained(this), GURL())); | 
|  | } | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::SetTtsPlatform(TtsPlatform* tts_platform) { | 
|  | tts_platform_ = tts_platform; | 
|  | } | 
|  |  | 
|  | int TtsControllerImpl::QueueSize() { | 
|  | return static_cast<int>(utterance_list_.size()); | 
|  | } | 
|  |  | 
|  | TtsPlatform* TtsControllerImpl::GetTtsPlatform() { | 
|  | if (!tts_platform_) | 
|  | tts_platform_ = TtsPlatform::GetInstance(); | 
|  | DCHECK(tts_platform_); | 
|  | return tts_platform_; | 
|  | } | 
|  |  | 
|  | bool TtsControllerImpl::TtsPlatformReady() { | 
|  | TtsPlatform* tts_platform = GetTtsPlatform(); | 
|  | return tts_platform->PlatformImplSupported() && | 
|  | tts_platform->PlatformImplInitialized(); | 
|  | } | 
|  |  | 
|  | bool TtsControllerImpl::TtsPlatformLoading() { | 
|  | // If the platform implementation is supported, it is considered to be in | 
|  | // loading state until the platform is inititialized. Typically, that means | 
|  | // the libraries are loaded and the voices are being loaded. | 
|  | TtsPlatform* tts_platform = GetTtsPlatform(); | 
|  | return tts_platform->PlatformImplSupported() && | 
|  | !tts_platform->PlatformImplInitialized(); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::SpeakNow(std::unique_ptr<TtsUtterance> utterance) { | 
|  | // Get all available voices and try to find a matching voice. | 
|  | std::vector<VoiceData> voices; | 
|  | GetVoices(utterance->GetBrowserContext(), utterance->GetSrcUrl(), &voices); | 
|  |  | 
|  | // Get the best matching voice. If nothing matches, just set "native" | 
|  | // to true because that might trigger deferred loading of native voices. | 
|  | // TODO(katie): Move most of the GetMatchingVoice logic into content/ and | 
|  | // use the TTS controller delegate to get chrome-specific info as needed. | 
|  | int index = GetMatchingVoice(utterance.get(), voices); | 
|  | VoiceData voice; | 
|  | if (index >= 0) { | 
|  | voice = voices[index]; | 
|  | } else { | 
|  | voice.native = true; | 
|  | voice.engine_id = utterance->GetEngineId(); | 
|  | voice.name = utterance->GetVoiceName(); | 
|  | voice.lang = utterance->GetLang(); | 
|  | } | 
|  |  | 
|  | UpdateUtteranceDefaults(utterance.get()); | 
|  |  | 
|  | GetTtsPlatform()->WillSpeakUtteranceWithVoice(utterance.get(), voice); | 
|  |  | 
|  | base::RecordAction(base::UserMetricsAction("TextToSpeech.Speak")); | 
|  | UMA_HISTOGRAM_COUNTS_100000("TextToSpeech.Utterance.Rate", | 
|  | utterance->GetContinuousParameters().rate); | 
|  | UMA_HISTOGRAM_COUNTS_100000("TextToSpeech.Utterance.TextLength", | 
|  | utterance->GetText().size()); | 
|  | UMA_HISTOGRAM_BOOLEAN("TextToSpeech.Utterance.FromExtensionAPI", | 
|  | !utterance->GetSrcUrl().is_empty()); | 
|  | UMA_HISTOGRAM_BOOLEAN("TextToSpeech.Utterance.HasVoiceName", | 
|  | !utterance->GetVoiceName().empty()); | 
|  | UMA_HISTOGRAM_BOOLEAN("TextToSpeech.Utterance.Native", voice.native); | 
|  |  | 
|  | if (!voice.native) { | 
|  | #if !BUILDFLAG(IS_ANDROID) | 
|  | DCHECK(!voice.engine_id.empty()); | 
|  | SetCurrentUtterance(std::move(utterance)); | 
|  | current_utterance_->SetEngineId(voice.engine_id); | 
|  | if (engine_delegate_) { | 
|  | engine_delegate_->Speak(current_utterance_.get(), voice); | 
|  | } | 
|  |  | 
|  | bool sends_end_event = | 
|  | voice.events.find(TTS_EVENT_END) != voice.events.end(); | 
|  | if (!sends_end_event) { | 
|  | current_utterance_->Finish(); | 
|  | SetCurrentUtterance(nullptr); | 
|  | SpeakNextUtterance(); | 
|  | } | 
|  | #endif  // !BUILDFLAG(IS_ANDROID) | 
|  | } else { | 
|  | // It's possible for certain platforms to send start events immediately | 
|  | // during |speak|. | 
|  | SetCurrentUtterance(std::move(utterance)); | 
|  | if (TtsPlatformReady()) { | 
|  | GetTtsPlatform()->ClearError(); | 
|  | GetTtsPlatform()->Speak( | 
|  | current_utterance_->GetId(), current_utterance_->GetText(), | 
|  | current_utterance_->GetLang(), voice, | 
|  | current_utterance_->GetContinuousParameters(), | 
|  | base::BindOnce(&TtsControllerImpl::OnSpeakFinished, | 
|  | base::Unretained(this), current_utterance_->GetId())); | 
|  | } else { | 
|  | // The TTS platform is not supported. | 
|  | OnSpeakFinished(current_utterance_->GetId(), false); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::OnSpeakFinished(int utterance_id, bool success) { | 
|  | if (success) | 
|  | return; | 
|  |  | 
|  | // Since OnSpeakFinished could run asynchronously, it is possible that the | 
|  | // current utterance has changed. Ignore any such spurious callbacks. | 
|  | if (!current_utterance_ || current_utterance_->GetId() != utterance_id) | 
|  | return; | 
|  |  | 
|  | // If the native voice wasn't able to process this speech, see if the browser | 
|  | // has built-in TTS that crashed and needs re-loading or the utterance came | 
|  | // from a profile that no longer exists e.g. login. | 
|  | // The controller only ends up here if we had at some point completely | 
|  | // initialized native tts and tts engine delegate (see SpeakOrEnqueue), so | 
|  | // drop the utterance from re-processing. | 
|  | GetTtsPlatform()->LoadBuiltInTtsEngine( | 
|  | current_utterance_->GetBrowserContext()); | 
|  |  | 
|  | current_utterance_->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex, | 
|  | kInvalidLength, GetTtsPlatform()->GetError()); | 
|  | SetCurrentUtterance(nullptr); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::ClearUtteranceQueue(bool send_events) { | 
|  | while (!utterance_list_.empty()) { | 
|  | std::unique_ptr<TtsUtterance> utterance = | 
|  | std::move(utterance_list_.front()); | 
|  | utterance_list_.pop_front(); | 
|  | if (send_events) { | 
|  | utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex, | 
|  | kInvalidLength, std::string()); | 
|  | } else { | 
|  | utterance->Finish(); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::FinishCurrentUtterance() { | 
|  | if (!current_utterance_) | 
|  | return; | 
|  |  | 
|  | if (!current_utterance_->IsFinished()) { | 
|  | current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, | 
|  | kInvalidLength, std::string()); | 
|  | } | 
|  |  | 
|  | SetCurrentUtterance(nullptr); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::SpeakNextUtterance() { | 
|  | if (paused_) | 
|  | return; | 
|  |  | 
|  | // Start speaking the next utterance in the queue.  Keep trying in case | 
|  | // one fails but there are still more in the queue to try. | 
|  | TtsUtterance* previous_utterance = nullptr; | 
|  | while (!utterance_list_.empty() && !current_utterance_) { | 
|  | std::unique_ptr<TtsUtterance> utterance = | 
|  | std::move(utterance_list_.front()); | 
|  | utterance_list_.pop_front(); | 
|  | DCHECK(previous_utterance != utterance.get()); | 
|  |  | 
|  | if (ShouldSpeakUtterance(utterance.get())) | 
|  | SpeakNow(std::move(utterance)); | 
|  | else | 
|  | utterance->Finish(); | 
|  |  | 
|  | previous_utterance = utterance.get(); | 
|  | } | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::UpdateUtteranceDefaults(TtsUtterance* utterance) { | 
|  | double rate = utterance->GetContinuousParameters().rate; | 
|  | double pitch = utterance->GetContinuousParameters().pitch; | 
|  | double volume = utterance->GetContinuousParameters().volume; | 
|  | #if BUILDFLAG(IS_CHROMEOS) | 
|  | if (GetTtsControllerDelegate()) | 
|  | GetTtsControllerDelegate()->UpdateUtteranceDefaultsFromPrefs( | 
|  | utterance, &rate, &pitch, &volume); | 
|  | #else | 
|  | // Update pitch, rate and volume to defaults if not explicity set on | 
|  | // this utterance. | 
|  | if (rate == blink::mojom::kSpeechSynthesisDoublePrefNotSet) | 
|  | rate = blink::mojom::kSpeechSynthesisDefaultRate; | 
|  | if (pitch == blink::mojom::kSpeechSynthesisDoublePrefNotSet) | 
|  | pitch = blink::mojom::kSpeechSynthesisDefaultPitch; | 
|  | if (volume == blink::mojom::kSpeechSynthesisDoublePrefNotSet) | 
|  | volume = blink::mojom::kSpeechSynthesisDefaultVolume; | 
|  | #endif  // BUILDFLAG(IS_CHROMEOS) | 
|  | utterance->SetContinuousParameters(rate, pitch, volume); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::StripSSML( | 
|  | const std::string& utterance, | 
|  | base::OnceCallback<void(const std::string&)> on_ssml_parsed) { | 
|  | // Skip parsing and return if not xml. | 
|  | if (utterance.find("<?xml") == std::string::npos) { | 
|  | std::move(on_ssml_parsed).Run(utterance); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Parse using safe, out-of-process Xml Parser. | 
|  | data_decoder::DataDecoder::ParseXmlIsolated( | 
|  | utterance, | 
|  | data_decoder::mojom::XmlParser::WhitespaceBehavior::kPreserveSignificant, | 
|  | base::BindOnce(&TtsControllerImpl::StripSSMLHelper, utterance, | 
|  | std::move(on_ssml_parsed))); | 
|  | } | 
|  |  | 
|  | // Called when ParseXml finishes. | 
|  | // Uses parsed xml to build parsed utterance text. | 
|  | void TtsControllerImpl::StripSSMLHelper( | 
|  | const std::string& utterance, | 
|  | base::OnceCallback<void(const std::string&)> on_ssml_parsed, | 
|  | data_decoder::DataDecoder::ValueOrError result) { | 
|  | // Error checks. | 
|  | // If invalid xml, return original utterance text. | 
|  | if (!result.has_value()) { | 
|  | std::move(on_ssml_parsed).Run(utterance); | 
|  | return; | 
|  | } | 
|  |  | 
|  | std::string root_tag_name; | 
|  | data_decoder::GetXmlElementTagName(*result, &root_tag_name); | 
|  | // Root element must be <speak>. | 
|  | if (root_tag_name.compare("speak") != 0) { | 
|  | std::move(on_ssml_parsed).Run(utterance); | 
|  | return; | 
|  | } | 
|  |  | 
|  | std::string parsed_text; | 
|  | // Change from unique_ptr to base::Value* so recursion will work. | 
|  | PopulateParsedText(&parsed_text, &*result); | 
|  |  | 
|  | // Run with parsed_text. | 
|  | std::move(on_ssml_parsed).Run(parsed_text); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::PopulateParsedText(std::string* parsed_text, | 
|  | const base::Value* element) { | 
|  | DCHECK(parsed_text); | 
|  | if (!element || !element->is_dict()) { | 
|  | return; | 
|  | } | 
|  | // Add element's text if present. | 
|  | // Note: We don't use data_decoder::GetXmlElementText because it gets the text | 
|  | // of element's first child, not text of current element. | 
|  | const std::string* text_value = | 
|  | element->GetDict().FindString(data_decoder::mojom::XmlParser::kTextKey); | 
|  | if (text_value) | 
|  | *parsed_text += *text_value; | 
|  |  | 
|  | const base::Value::List* children = | 
|  | data_decoder::GetXmlElementChildren(*element); | 
|  | if (!children) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | for (const auto& entry : *children) { | 
|  | // We need to iterate over all children because some text elements are | 
|  | // nested within other types of elements, such as <emphasis> tags. | 
|  | PopulateParsedText(parsed_text, &entry); | 
|  | } | 
|  | } | 
|  |  | 
|  | int TtsControllerImpl::GetMatchingVoice(TtsUtterance* utterance, | 
|  | const std::vector<VoiceData>& voices) { | 
|  | const std::string app_lang = | 
|  | GetContentClient()->browser()->GetApplicationLocale(); | 
|  | // Start with a best score of -1, that way even if none of the criteria | 
|  | // match, something will be returned if there are any voices. | 
|  | int best_score = -1; | 
|  | int best_score_index = -1; | 
|  | #if BUILDFLAG(IS_CHROMEOS) | 
|  | TtsControllerDelegate* delegate = GetTtsControllerDelegate(); | 
|  | std::unique_ptr<TtsControllerDelegate::PreferredVoiceIds> preferred_ids = | 
|  | delegate ? delegate->GetPreferredVoiceIdsForUtterance(utterance) | 
|  | : nullptr; | 
|  | #endif  // BUILDFLAG(IS_CHROMEOS) | 
|  | for (size_t i = 0; i < voices.size(); ++i) { | 
|  | const content::VoiceData& voice = voices[i]; | 
|  | int score = 0; | 
|  |  | 
|  | // If the extension ID is specified, check for an exact match. | 
|  | if (!utterance->GetEngineId().empty() && | 
|  | utterance->GetEngineId() != voice.engine_id) | 
|  | continue; | 
|  |  | 
|  | // If the voice name is specified, check for an exact match. | 
|  | if (!utterance->GetVoiceName().empty() && | 
|  | voice.name != utterance->GetVoiceName()) | 
|  | continue; | 
|  |  | 
|  | // Prefer the utterance language. | 
|  | if (!voice.lang.empty() && !utterance->GetLang().empty()) { | 
|  | std::string voice_language = | 
|  | base::ToLowerASCII(l10n_util::GetLanguage(voice.lang)); | 
|  | std::string voice_country = | 
|  | base::ToLowerASCII(l10n_util::GetCountry(voice.lang)); | 
|  | std::string utterance_language = | 
|  | base::ToLowerASCII(l10n_util::GetLanguage(utterance->GetLang())); | 
|  | std::string utterance_country = | 
|  | base::ToLowerASCII(l10n_util::GetCountry(utterance->GetLang())); | 
|  |  | 
|  | // An exact locale match is worth more than a partial match. | 
|  | // Convert locales to lowercase to handle cases like "en-us" vs. "en-US". | 
|  | // Cases where language and country match should score the same as an | 
|  | // exact match. | 
|  | if (voice_language == utterance_language && | 
|  | (voice_country == utterance_country || | 
|  | (utterance_country.empty() && voice_language == voice_country) || | 
|  | (voice_country.empty() && | 
|  | utterance_language == utterance_country))) { | 
|  | score += 128; | 
|  | } else if (voice_language == utterance_language) { | 
|  | score += 64; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Next, prefer required event types. | 
|  | if (!utterance->GetRequiredEventTypes().empty()) { | 
|  | bool has_all_required_event_types = true; | 
|  | for (TtsEventType event_type : utterance->GetRequiredEventTypes()) { | 
|  | if (voice.events.find(event_type) == voice.events.end()) { | 
|  | has_all_required_event_types = false; | 
|  | break; | 
|  | } | 
|  | } | 
|  | if (has_all_required_event_types) | 
|  | score += 32; | 
|  | } | 
|  |  | 
|  | #if BUILDFLAG(IS_CHROMEOS) | 
|  | if (preferred_ids) { | 
|  | // First prefer the user's preference voice for the utterance language, | 
|  | // if the utterance language is specified. | 
|  | if (!utterance->GetLang().empty() && | 
|  | VoiceIdMatches(preferred_ids->lang_voice_id, voice)) { | 
|  | score += 16; | 
|  | } | 
|  |  | 
|  | // Then prefer the user's preference voice for the system language. | 
|  | // This is a lower priority match than the utterance voice. | 
|  | if (VoiceIdMatches(preferred_ids->locale_voice_id, voice)) | 
|  | score += 8; | 
|  |  | 
|  | // Finally, prefer the user's preference voice for any language. This will | 
|  | // pick the default voice if there is no better match for the current | 
|  | // system language and utterance language. | 
|  | if (VoiceIdMatches(preferred_ids->any_locale_voice_id, voice)) | 
|  | score += 4; | 
|  | } | 
|  | #endif  // BUILDFLAG(IS_CHROMEOS) | 
|  |  | 
|  | // Finally, prefer system language. | 
|  | if (!voice.lang.empty()) { | 
|  | if (voice.lang == app_lang) { | 
|  | score += 2; | 
|  | } else if (base::EqualsCaseInsensitiveASCII( | 
|  | l10n_util::GetLanguage(voice.lang), | 
|  | l10n_util::GetLanguage(app_lang))) { | 
|  | score += 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (score > best_score) { | 
|  | best_score = score; | 
|  | best_score_index = i; | 
|  | } | 
|  | } | 
|  |  | 
|  | return best_score_index; | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::SetCurrentUtterance( | 
|  | std::unique_ptr<TtsUtterance> utterance) { | 
|  | current_utterance_ = std::move(utterance); | 
|  | Observe(current_utterance_ | 
|  | ? AsUtteranceImpl(current_utterance_.get())->GetWebContents() | 
|  | : nullptr); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::StopCurrentUtteranceAndRemoveUtterancesMatching( | 
|  | WebContents* wc) { | 
|  | DCHECK(wc); | 
|  | // Removes any utterances that match the WebContents from the current | 
|  | // utterance (which our inherited WebContentsObserver starts observing every | 
|  | // time the utterance changes). | 
|  | // | 
|  | // This is called when the WebContents for the current utterance is destroyed | 
|  | // or hidden. In the case where it's destroyed, this is done to avoid | 
|  | // attempting to start a utterance that is very likely to be destroyed right | 
|  | // away, and there are also subtle timing issues if we didn't do this (if a | 
|  | // queued utterance has already received WebContentsDestroyed(), and we start | 
|  | // it, we won't get the corresponding WebContentsDestroyed()). | 
|  | auto eraser = [wc](const std::unique_ptr<TtsUtterance>& utterance) { | 
|  | TtsUtteranceImpl* utterance_impl = AsUtteranceImpl(utterance.get()); | 
|  | if (utterance_impl->GetWebContents() == wc) { | 
|  | utterance_impl->Finish(); | 
|  | return true; | 
|  | } | 
|  | return false; | 
|  | }; | 
|  | utterance_list_.erase( | 
|  | std::remove_if(utterance_list_.begin(), utterance_list_.end(), eraser), | 
|  | utterance_list_.end()); | 
|  | const bool stopped = StopCurrentUtteranceIfMatches(GURL()); | 
|  | DCHECK(stopped); | 
|  | SpeakNextUtterance(); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::RemoveUtteranceAndStopIfNeeded(int utterance_id) { | 
|  | for (std::list<std::unique_ptr<TtsUtterance>>::iterator it = | 
|  | utterance_list_.begin(); | 
|  | it != utterance_list_.end(); ++it) { | 
|  | if ((*it)->GetId() == utterance_id) { | 
|  | TtsUtteranceImpl* utterance_impl = AsUtteranceImpl((*it).get()); | 
|  | utterance_impl->Finish(); | 
|  | utterance_list_.erase(it); | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | const bool stopped = StopCurrentUtteranceIfMatches(utterance_id); | 
|  | if (stopped) | 
|  | SpeakNextUtterance(); | 
|  | } | 
|  |  | 
|  | bool TtsControllerImpl::StopCurrentUtteranceIfMatches(int utterance_id) { | 
|  | paused_ = false; | 
|  |  | 
|  | if (current_utterance_->GetId() != utterance_id) | 
|  | return false; | 
|  |  | 
|  | StopCurrentUtterance(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | bool TtsControllerImpl::ShouldSpeakUtterance(TtsUtterance* utterance) { | 
|  | TtsUtteranceImpl* utterance_impl = AsUtteranceImpl(utterance); | 
|  | if (!utterance_impl->was_created_with_web_contents() || | 
|  | utterance_impl->ShouldAlwaysBeSpoken()) { | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // If the WebContents that created the utterance has been destroyed, don't | 
|  | // speak it. | 
|  | if (!utterance_impl->GetWebContents()) | 
|  | return false; | 
|  |  | 
|  | // Allow speaking if either the WebContents is visible, or the WebContents | 
|  | // isn't required to be visible before speaking. | 
|  | return !stop_speaking_when_hidden_ || | 
|  | utterance_impl->GetWebContents()->GetVisibility() != | 
|  | Visibility::HIDDEN; | 
|  | } | 
|  |  | 
|  | // | 
|  | // WebContentsObserver | 
|  | // | 
|  |  | 
|  | void TtsControllerImpl::WebContentsDestroyed() { | 
|  | StopCurrentUtteranceAndRemoveUtterancesMatching(web_contents()); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::OnVisibilityChanged(Visibility visibility) { | 
|  | if (visibility == Visibility::HIDDEN && stop_speaking_when_hidden_) | 
|  | StopCurrentUtteranceAndRemoveUtterancesMatching(web_contents()); | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::OnNetworkChanged( | 
|  | net::NetworkChangeNotifier::ConnectionType type) { | 
|  | switch (type) { | 
|  | // Non-cellular connections. | 
|  | case net::NetworkChangeNotifier::ConnectionType::CONNECTION_UNKNOWN: | 
|  | case net::NetworkChangeNotifier::ConnectionType::CONNECTION_ETHERNET: | 
|  | case net::NetworkChangeNotifier::ConnectionType::CONNECTION_WIFI: | 
|  | case net::NetworkChangeNotifier::ConnectionType::CONNECTION_BLUETOOTH: | 
|  | allow_remote_voices_ = true; | 
|  | break; | 
|  |  | 
|  | // Cellular connections. | 
|  | case net::NetworkChangeNotifier::ConnectionType::CONNECTION_2G: | 
|  | case net::NetworkChangeNotifier::ConnectionType::CONNECTION_3G: | 
|  | case net::NetworkChangeNotifier::ConnectionType::CONNECTION_4G: | 
|  | case net::NetworkChangeNotifier::ConnectionType::CONNECTION_NONE: | 
|  | case net::NetworkChangeNotifier::ConnectionType::CONNECTION_5G: | 
|  | allow_remote_voices_ = false; | 
|  | } | 
|  | } | 
|  |  | 
|  | #if BUILDFLAG(IS_CHROMEOS) | 
|  | TtsControllerDelegate* TtsControllerImpl::GetTtsControllerDelegate() { | 
|  | if (delegate_) | 
|  | return delegate_; | 
|  | if (GetContentClient() && GetContentClient()->browser()) { | 
|  | delegate_ = GetContentClient()->browser()->GetTtsControllerDelegate(); | 
|  | return delegate_; | 
|  | } | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | void TtsControllerImpl::SetTtsControllerDelegateForTesting( | 
|  | TtsControllerDelegate* delegate) { | 
|  | delegate_ = delegate; | 
|  | } | 
|  | #endif  // BUILDFLAG(IS_CHROMEOS) | 
|  |  | 
|  | }  // namespace content |