| // Copyright 2023 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/browser/ash/accessibility/service/tts_client_impl.h" |
| |
| #include "content/public/browser/browser_context.h" |
| #include "content/public/browser/tts_controller.h" |
| #include "services/accessibility/public/mojom/tts.mojom.h" |
| #include "ui/base/l10n/l10n_util.h" |
| |
| namespace ash { |
| |
| namespace { |
| |
| // The max utterance length allowed by the TTS extension API. |
| const int kMaxUtteranceLength = 32768; |
| // The minimum speech rate allowed by the TTS extension API. |
| const double kMinRate = 0.1; |
| // The maximum speech rate allowed by the TTS extension API. |
| const double kMaxRate = 10.0; |
| // The maximum speech pitch allowed by the TTS extension API. |
| const double kMaxPitch = 2.0; |
| // The maximum speech volume allowed by the TTS extension API. |
| const double kMaxVolume = 1.0; |
| |
| ax::mojom::TtsEventType ToMojo(content::TtsEventType event_type) { |
| switch (event_type) { |
| case content::TTS_EVENT_START: |
| return ax::mojom::TtsEventType::kStart; |
| case content::TTS_EVENT_END: |
| return ax::mojom::TtsEventType::kEnd; |
| case content::TTS_EVENT_WORD: |
| return ax::mojom::TtsEventType::kWord; |
| case content::TTS_EVENT_SENTENCE: |
| return ax::mojom::TtsEventType::kSentence; |
| case content::TTS_EVENT_MARKER: |
| return ax::mojom::TtsEventType::kMarker; |
| case content::TTS_EVENT_INTERRUPTED: |
| return ax::mojom::TtsEventType::kInterrupted; |
| case content::TTS_EVENT_CANCELLED: |
| return ax::mojom::TtsEventType::kCancelled; |
| case content::TTS_EVENT_ERROR: |
| return ax::mojom::TtsEventType::kError; |
| case content::TTS_EVENT_PAUSE: |
| return ax::mojom::TtsEventType::kPause; |
| case content::TTS_EVENT_RESUME: |
| return ax::mojom::TtsEventType::kResume; |
| } |
| } |
| |
| // Owned by TtsUtterance. |
| class AtpTtsEventHandler : public content::UtteranceEventDelegate { |
| public: |
| AtpTtsEventHandler() = default; |
| ~AtpTtsEventHandler() override = default; |
| AtpTtsEventHandler(const AtpTtsEventHandler&) = delete; |
| AtpTtsEventHandler& operator=(const AtpTtsEventHandler&) = delete; |
| |
| // content::UtteranceEventDelegate: |
| void OnTtsEvent(content::TtsUtterance* utterance, |
| content::TtsEventType event_type, |
| int char_index, |
| int length, |
| const std::string& error_message) override { |
| auto mojom_event = ax::mojom::TtsEvent::New(); |
| mojom_event->type = ToMojo(event_type); |
| mojom_event->char_index = char_index; |
| mojom_event->length = length; |
| mojom_event->is_final = utterance->IsFinished(); |
| if (event_type == content::TTS_EVENT_ERROR) { |
| mojom_event->error_message = error_message; |
| } |
| utterance_client_->OnEvent(std::move(mojom_event)); |
| } |
| mojo::PendingReceiver<ax::mojom::TtsUtteranceClient> PassReceiver() { |
| return utterance_client_.BindNewPipeAndPassReceiver(); |
| } |
| |
| private: |
| mojo::Remote<ax::mojom::TtsUtteranceClient> utterance_client_; |
| }; |
| |
| } // namespace |
| |
| TtsClientImpl::TtsClientImpl(content::BrowserContext* profile) |
| : profile_(profile) { |
| CHECK(profile_); |
| } |
| |
| TtsClientImpl::~TtsClientImpl() = default; |
| |
| void TtsClientImpl::Bind(mojo::PendingReceiver<Tts> tts_receiver) { |
| tts_receivers_.Add(this, std::move(tts_receiver)); |
| } |
| |
| void TtsClientImpl::Speak(const std::string& utterance, |
| ax::mojom::TtsOptionsPtr options, |
| SpeakCallback callback) { |
| auto result = ax::mojom::TtsSpeakResult::New(); |
| if (utterance.size() > kMaxUtteranceLength) { |
| result->error = ax::mojom::TtsError::kErrorUtteranceTooLong; |
| std::move(callback).Run(std::move(result)); |
| return; |
| } |
| |
| // Check for errors in options. |
| // TODO(crbug.com/41278287): Centralize the struct validation. |
| if (options->rate < kMinRate || options->rate > kMaxRate) { |
| result->error = ax::mojom::TtsError::kErrorInvalidRate; |
| std::move(callback).Run(std::move(result)); |
| return; |
| } |
| if (options->pitch < 0.0 || options->pitch > kMaxPitch) { |
| result->error = ax::mojom::TtsError::kErrorInvalidPitch; |
| std::move(callback).Run(std::move(result)); |
| return; |
| } |
| if (options->volume < 0.0 || options->volume > kMaxVolume) { |
| result->error = ax::mojom::TtsError::kErrorInvalidVolume; |
| std::move(callback).Run(std::move(result)); |
| return; |
| } |
| |
| // Only make the utterance once we know we aren't going to return early. |
| std::unique_ptr<content::TtsUtterance> tts_utterance = |
| content::TtsUtterance::Create(profile_); |
| tts_utterance->SetText(utterance); |
| // TODO(b:277221897): Pass a fake GURL matching the ash extension URL. |
| // This will support both UMA and using enhanced network voices in ATP |
| // select-to-speak. |
| tts_utterance->SetSrcUrl(GURL("")); |
| |
| tts_utterance->SetContinuousParameters(options->rate, options->pitch, |
| options->volume); |
| tts_utterance->SetShouldClearQueue(!options->enqueue); |
| if (options->lang) { |
| std::string lang = options->lang.value(); |
| if (!lang.empty() && !l10n_util::IsValidLocaleSyntax(lang)) { |
| result->error = ax::mojom::TtsError::kErrorInvalidLang; |
| std::move(callback).Run(std::move(result)); |
| return; |
| } |
| tts_utterance->SetLang(options->lang.value()); |
| } |
| if (options->voice_name) { |
| tts_utterance->SetVoiceName(options->voice_name.value()); |
| } |
| if (options->engine_id) { |
| tts_utterance->SetEngineId(options->engine_id.value()); |
| } |
| if (options->on_event) { |
| auto atp_tts_event_handler = std::make_unique<AtpTtsEventHandler>(); |
| result->utterance_client = atp_tts_event_handler->PassReceiver(); |
| tts_utterance->SetEventDelegate(std::move(atp_tts_event_handler)); |
| } |
| // Note: we don't need desired/required event types because they aren't |
| // passed by ChromeVox or STS. We don't need an options_dict, it's redundant, |
| // and we don't need a src_id because each ATP utterance has its own callback. |
| |
| // Send the callback back to ATP with the utterance client. |
| result->error = ax::mojom::TtsError::kNoError; |
| std::move(callback).Run(std::move(result)); |
| |
| // Start speech. |
| content::TtsController* controller = content::TtsController::GetInstance(); |
| controller->SpeakOrEnqueue(std::move(tts_utterance)); |
| } |
| |
| void TtsClientImpl::Stop() { |
| content::TtsController* controller = content::TtsController::GetInstance(); |
| // TODO(b:277221897): Pass a fake GURL matching the ash extension URL so that |
| // extensions cannot clobber other speech. |
| controller->Stop(GURL("")); |
| } |
| |
| void TtsClientImpl::Pause() { |
| content::TtsController::GetInstance()->Pause(); |
| } |
| |
| void TtsClientImpl::Resume() { |
| content::TtsController::GetInstance()->Resume(); |
| } |
| |
| void TtsClientImpl::IsSpeaking(IsSpeakingCallback callback) { |
| std::move(callback).Run(content::TtsController::GetInstance()->IsSpeaking()); |
| } |
| |
| void TtsClientImpl::GetVoices(GetVoicesCallback callback) { |
| std::vector<content::VoiceData> voices; |
| // TODO(b:277221897): Pass a fake GURL matching the extension URL so that |
| // Select to Speak can get the enhanced network voices. |
| content::TtsController::GetInstance()->GetVoices(profile_, GURL(""), &voices); |
| std::vector<ax::mojom::TtsVoicePtr> results; |
| for (auto& voice : voices) { |
| auto result = ax::mojom::TtsVoice::New(); |
| result->voice_name = voice.name; |
| result->lang = voice.lang; |
| result->remote = voice.remote; |
| result->engine_id = voice.engine_id; |
| if (!voice.events.empty()) { |
| result->event_types = std::vector<ax::mojom::TtsEventType>(); |
| for (auto type : voice.events) { |
| result->event_types->emplace_back(ToMojo(type)); |
| } |
| } |
| results.emplace_back(std::move(result)); |
| } |
| std::move(callback).Run(std::move(results)); |
| } |
| |
| } // namespace ash |