chrome/browser/ash/accessibility/service/tts_client_impl.cc - chromium/src - Git at Google

 // Copyright 2023 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "chrome/browser/ash/accessibility/service/tts_client_impl.h"

 #include "content/public/browser/browser_context.h"
 #include "content/public/browser/tts_controller.h"
 #include "services/accessibility/public/mojom/tts.mojom.h"
 #include "ui/base/l10n/l10n_util.h"

 namespace ash {

 namespace {

 // The max utterance length allowed by the TTS extension API.
 const int kMaxUtteranceLength = 32768;
 // The minimum speech rate allowed by the TTS extension API.
 const double kMinRate = 0.1;
 // The maximum speech rate allowed by the TTS extension API.
 const double kMaxRate = 10.0;
 // The maximum speech pitch allowed by the TTS extension API.
 const double kMaxPitch = 2.0;
 // The maximum speech volume allowed by the TTS extension API.
 const double kMaxVolume = 1.0;

 ax::mojom::TtsEventType ToMojo(content::TtsEventType event_type) {
   switch (event_type) {
     case content::TTS_EVENT_START:
       return ax::mojom::TtsEventType::kStart;
     case content::TTS_EVENT_END:
       return ax::mojom::TtsEventType::kEnd;
     case content::TTS_EVENT_WORD:
       return ax::mojom::TtsEventType::kWord;
     case content::TTS_EVENT_SENTENCE:
       return ax::mojom::TtsEventType::kSentence;
     case content::TTS_EVENT_MARKER:
       return ax::mojom::TtsEventType::kMarker;
     case content::TTS_EVENT_INTERRUPTED:
       return ax::mojom::TtsEventType::kInterrupted;
     case content::TTS_EVENT_CANCELLED:
       return ax::mojom::TtsEventType::kCancelled;
     case content::TTS_EVENT_ERROR:
       return ax::mojom::TtsEventType::kError;
     case content::TTS_EVENT_PAUSE:
       return ax::mojom::TtsEventType::kPause;
     case content::TTS_EVENT_RESUME:
       return ax::mojom::TtsEventType::kResume;
   }
 }

 // Owned by TtsUtterance.
 class AtpTtsEventHandler : public content::UtteranceEventDelegate {
  public:
   AtpTtsEventHandler() = default;
   ~AtpTtsEventHandler() override = default;
   AtpTtsEventHandler(const AtpTtsEventHandler&) = delete;
   AtpTtsEventHandler& operator=(const AtpTtsEventHandler&) = delete;

   // content::UtteranceEventDelegate:
   void OnTtsEvent(content::TtsUtterance* utterance,
                   content::TtsEventType event_type,
                   int char_index,
                   int length,
                   const std::string& error_message) override {
     auto mojom_event = ax::mojom::TtsEvent::New();
     mojom_event->type = ToMojo(event_type);
     mojom_event->char_index = char_index;
     mojom_event->length = length;
     mojom_event->is_final = utterance->IsFinished();
     if (event_type == content::TTS_EVENT_ERROR) {
       mojom_event->error_message = error_message;
     }
     utterance_client_->OnEvent(std::move(mojom_event));
   }
   mojo::PendingReceiver<ax::mojom::TtsUtteranceClient> PassReceiver() {
     return utterance_client_.BindNewPipeAndPassReceiver();
   }

  private:
   mojo::Remote<ax::mojom::TtsUtteranceClient> utterance_client_;
 };

 }  // namespace

 TtsClientImpl::TtsClientImpl(content::BrowserContext* profile)
     : profile_(profile) {
   CHECK(profile_);
 }

 TtsClientImpl::~TtsClientImpl() = default;

 void TtsClientImpl::Bind(mojo::PendingReceiver<Tts> tts_receiver) {
   tts_receivers_.Add(this, std::move(tts_receiver));
 }

 void TtsClientImpl::Speak(const std::string& utterance,
                           ax::mojom::TtsOptionsPtr options,
                           SpeakCallback callback) {
   auto result = ax::mojom::TtsSpeakResult::New();
   if (utterance.size() > kMaxUtteranceLength) {
     result->error = ax::mojom::TtsError::kErrorUtteranceTooLong;
     std::move(callback).Run(std::move(result));
     return;
   }

   // Check for errors in options.
   // TODO(crbug.com/41278287): Centralize the struct validation.
   if (options->rate < kMinRate || options->rate > kMaxRate) {
     result->error = ax::mojom::TtsError::kErrorInvalidRate;
     std::move(callback).Run(std::move(result));
     return;
   }
   if (options->pitch < 0.0 || options->pitch > kMaxPitch) {
     result->error = ax::mojom::TtsError::kErrorInvalidPitch;
     std::move(callback).Run(std::move(result));
     return;
   }
   if (options->volume < 0.0 || options->volume > kMaxVolume) {
     result->error = ax::mojom::TtsError::kErrorInvalidVolume;
     std::move(callback).Run(std::move(result));
     return;
   }

   // Only make the utterance once we know we aren't going to return early.
   std::unique_ptr<content::TtsUtterance> tts_utterance =
       content::TtsUtterance::Create(profile_);
   tts_utterance->SetText(utterance);
   // TODO(b:277221897): Pass a fake GURL matching the ash extension URL.
   // This will support both UMA and using enhanced network voices in ATP
   // select-to-speak.
   tts_utterance->SetSrcUrl(GURL(""));

   tts_utterance->SetContinuousParameters(options->rate, options->pitch,
                                          options->volume);
   tts_utterance->SetShouldClearQueue(!options->enqueue);
   if (options->lang) {
     std::string lang = options->lang.value();
     if (!lang.empty() && !l10n_util::IsValidLocaleSyntax(lang)) {
       result->error = ax::mojom::TtsError::kErrorInvalidLang;
       std::move(callback).Run(std::move(result));
       return;
     }
     tts_utterance->SetLang(options->lang.value());
   }
   if (options->voice_name) {
     tts_utterance->SetVoiceName(options->voice_name.value());
   }
   if (options->engine_id) {
     tts_utterance->SetEngineId(options->engine_id.value());
   }
   if (options->on_event) {
     auto atp_tts_event_handler = std::make_unique<AtpTtsEventHandler>();
     result->utterance_client = atp_tts_event_handler->PassReceiver();
     tts_utterance->SetEventDelegate(std::move(atp_tts_event_handler));
   }
   // Note: we don't need desired/required event types because they aren't
   // passed by ChromeVox or STS. We don't need an options_dict, it's redundant,
   // and we don't need a src_id because each ATP utterance has its own callback.

   // Send the callback back to ATP with the utterance client.
   result->error = ax::mojom::TtsError::kNoError;
   std::move(callback).Run(std::move(result));

   // Start speech.
   content::TtsController* controller = content::TtsController::GetInstance();
   controller->SpeakOrEnqueue(std::move(tts_utterance));
 }

 void TtsClientImpl::Stop() {
   content::TtsController* controller = content::TtsController::GetInstance();
   // TODO(b:277221897): Pass a fake GURL matching the ash extension URL so that
   // extensions cannot clobber other speech.
   controller->Stop(GURL(""));
 }

 void TtsClientImpl::Pause() {
   content::TtsController::GetInstance()->Pause();
 }

 void TtsClientImpl::Resume() {
   content::TtsController::GetInstance()->Resume();
 }

 void TtsClientImpl::IsSpeaking(IsSpeakingCallback callback) {
   std::move(callback).Run(content::TtsController::GetInstance()->IsSpeaking());
 }

 void TtsClientImpl::GetVoices(GetVoicesCallback callback) {
   std::vector<content::VoiceData> voices;
   // TODO(b:277221897): Pass a fake GURL matching the extension URL so that
   // Select to Speak can get the enhanced network voices.
   content::TtsController::GetInstance()->GetVoices(profile_, GURL(""), &voices);
   std::vector<ax::mojom::TtsVoicePtr> results;
   for (auto& voice : voices) {
     auto result = ax::mojom::TtsVoice::New();
     result->voice_name = voice.name;
     result->lang = voice.lang;
     result->remote = voice.remote;
     result->engine_id = voice.engine_id;
     if (!voice.events.empty()) {
       result->event_types = std::vector<ax::mojom::TtsEventType>();
       for (auto type : voice.events) {
         result->event_types->emplace_back(ToMojo(type));
       }
     }
     results.emplace_back(std::move(result));
   }
   std::move(callback).Run(std::move(results));
 }

 }  // namespace ash
	// Copyright 2023 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "chrome/browser/ash/accessibility/service/tts_client_impl.h"

	#include "content/public/browser/browser_context.h"
	#include "content/public/browser/tts_controller.h"
	#include "services/accessibility/public/mojom/tts.mojom.h"
	#include "ui/base/l10n/l10n_util.h"

	namespace ash {

	namespace {

	// The max utterance length allowed by the TTS extension API.
	const int kMaxUtteranceLength = 32768;
	// The minimum speech rate allowed by the TTS extension API.
	const double kMinRate = 0.1;
	// The maximum speech rate allowed by the TTS extension API.
	const double kMaxRate = 10.0;
	// The maximum speech pitch allowed by the TTS extension API.
	const double kMaxPitch = 2.0;
	// The maximum speech volume allowed by the TTS extension API.
	const double kMaxVolume = 1.0;

	ax::mojom::TtsEventType ToMojo(content::TtsEventType event_type) {
	switch (event_type) {
	case content::TTS_EVENT_START:
	return ax::mojom::TtsEventType::kStart;
	case content::TTS_EVENT_END:
	return ax::mojom::TtsEventType::kEnd;
	case content::TTS_EVENT_WORD:
	return ax::mojom::TtsEventType::kWord;
	case content::TTS_EVENT_SENTENCE:
	return ax::mojom::TtsEventType::kSentence;
	case content::TTS_EVENT_MARKER:
	return ax::mojom::TtsEventType::kMarker;
	case content::TTS_EVENT_INTERRUPTED:
	return ax::mojom::TtsEventType::kInterrupted;
	case content::TTS_EVENT_CANCELLED:
	return ax::mojom::TtsEventType::kCancelled;
	case content::TTS_EVENT_ERROR:
	return ax::mojom::TtsEventType::kError;
	case content::TTS_EVENT_PAUSE:
	return ax::mojom::TtsEventType::kPause;
	case content::TTS_EVENT_RESUME:
	return ax::mojom::TtsEventType::kResume;
	}
	}

	// Owned by TtsUtterance.
	class AtpTtsEventHandler : public content::UtteranceEventDelegate {
	public:
	AtpTtsEventHandler() = default;
	~AtpTtsEventHandler() override = default;
	AtpTtsEventHandler(const AtpTtsEventHandler&) = delete;
	AtpTtsEventHandler& operator=(const AtpTtsEventHandler&) = delete;

	// content::UtteranceEventDelegate:
	void OnTtsEvent(content::TtsUtterance* utterance,
	content::TtsEventType event_type,
	int char_index,
	int length,
	const std::string& error_message) override {
	auto mojom_event = ax::mojom::TtsEvent::New();
	mojom_event->type = ToMojo(event_type);
	mojom_event->char_index = char_index;
	mojom_event->length = length;
	mojom_event->is_final = utterance->IsFinished();
	if (event_type == content::TTS_EVENT_ERROR) {
	mojom_event->error_message = error_message;
	}
	utterance_client_->OnEvent(std::move(mojom_event));
	}
	mojo::PendingReceiver<ax::mojom::TtsUtteranceClient> PassReceiver() {
	return utterance_client_.BindNewPipeAndPassReceiver();
	}

	private:
	mojo::Remote<ax::mojom::TtsUtteranceClient> utterance_client_;
	};

	} // namespace

	TtsClientImpl::TtsClientImpl(content::BrowserContext* profile)
	: profile_(profile) {
	CHECK(profile_);
	}

	TtsClientImpl::~TtsClientImpl() = default;

	void TtsClientImpl::Bind(mojo::PendingReceiver<Tts> tts_receiver) {
	tts_receivers_.Add(this, std::move(tts_receiver));
	}

	void TtsClientImpl::Speak(const std::string& utterance,
	ax::mojom::TtsOptionsPtr options,
	SpeakCallback callback) {
	auto result = ax::mojom::TtsSpeakResult::New();
	if (utterance.size() > kMaxUtteranceLength) {
	result->error = ax::mojom::TtsError::kErrorUtteranceTooLong;
	std::move(callback).Run(std::move(result));
	return;
	}

	// Check for errors in options.
	// TODO(crbug.com/41278287): Centralize the struct validation.
	if (options->rate < kMinRate \|\| options->rate > kMaxRate) {
	result->error = ax::mojom::TtsError::kErrorInvalidRate;
	std::move(callback).Run(std::move(result));
	return;
	}
	if (options->pitch < 0.0 \|\| options->pitch > kMaxPitch) {
	result->error = ax::mojom::TtsError::kErrorInvalidPitch;
	std::move(callback).Run(std::move(result));
	return;
	}
	if (options->volume < 0.0 \|\| options->volume > kMaxVolume) {
	result->error = ax::mojom::TtsError::kErrorInvalidVolume;
	std::move(callback).Run(std::move(result));
	return;
	}

	// Only make the utterance once we know we aren't going to return early.
	std::unique_ptr<content::TtsUtterance> tts_utterance =
	content::TtsUtterance::Create(profile_);
	tts_utterance->SetText(utterance);
	// TODO(b:277221897): Pass a fake GURL matching the ash extension URL.
	// This will support both UMA and using enhanced network voices in ATP
	// select-to-speak.
	tts_utterance->SetSrcUrl(GURL(""));

	tts_utterance->SetContinuousParameters(options->rate, options->pitch,
	options->volume);
	tts_utterance->SetShouldClearQueue(!options->enqueue);
	if (options->lang) {
	std::string lang = options->lang.value();
	if (!lang.empty() && !l10n_util::IsValidLocaleSyntax(lang)) {
	result->error = ax::mojom::TtsError::kErrorInvalidLang;
	std::move(callback).Run(std::move(result));
	return;
	}
	tts_utterance->SetLang(options->lang.value());
	}
	if (options->voice_name) {
	tts_utterance->SetVoiceName(options->voice_name.value());
	}
	if (options->engine_id) {
	tts_utterance->SetEngineId(options->engine_id.value());
	}
	if (options->on_event) {
	auto atp_tts_event_handler = std::make_unique<AtpTtsEventHandler>();
	result->utterance_client = atp_tts_event_handler->PassReceiver();
	tts_utterance->SetEventDelegate(std::move(atp_tts_event_handler));
	}
	// Note: we don't need desired/required event types because they aren't
	// passed by ChromeVox or STS. We don't need an options_dict, it's redundant,
	// and we don't need a src_id because each ATP utterance has its own callback.

	// Send the callback back to ATP with the utterance client.
	result->error = ax::mojom::TtsError::kNoError;
	std::move(callback).Run(std::move(result));

	// Start speech.
	content::TtsController* controller = content::TtsController::GetInstance();
	controller->SpeakOrEnqueue(std::move(tts_utterance));
	}

	void TtsClientImpl::Stop() {
	content::TtsController* controller = content::TtsController::GetInstance();
	// TODO(b:277221897): Pass a fake GURL matching the ash extension URL so that
	// extensions cannot clobber other speech.
	controller->Stop(GURL(""));
	}

	void TtsClientImpl::Pause() {
	content::TtsController::GetInstance()->Pause();
	}

	void TtsClientImpl::Resume() {
	content::TtsController::GetInstance()->Resume();
	}

	void TtsClientImpl::IsSpeaking(IsSpeakingCallback callback) {
	std::move(callback).Run(content::TtsController::GetInstance()->IsSpeaking());
	}

	void TtsClientImpl::GetVoices(GetVoicesCallback callback) {
	std::vector<content::VoiceData> voices;
	// TODO(b:277221897): Pass a fake GURL matching the extension URL so that
	// Select to Speak can get the enhanced network voices.
	content::TtsController::GetInstance()->GetVoices(profile_, GURL(""), &voices);
	std::vector<ax::mojom::TtsVoicePtr> results;
	for (auto& voice : voices) {
	auto result = ax::mojom::TtsVoice::New();
	result->voice_name = voice.name;
	result->lang = voice.lang;
	result->remote = voice.remote;
	result->engine_id = voice.engine_id;
	if (!voice.events.empty()) {
	result->event_types = std::vector<ax::mojom::TtsEventType>();
	for (auto type : voice.events) {
	result->event_types->emplace_back(ToMojo(type));
	}
	}
	results.emplace_back(std::move(result));
	}
	std::move(callback).Run(std::move(results));
	}

	} // namespace ash