blob: dcfb2d419ccbaca3387672247643952c1a1708ef [file] [log] [blame]
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <math.h>
#include <stddef.h>
#include <algorithm>
#include <map>
#include <memory>
#include "base/command_line.h"
#include "base/compiler_specific.h"
#include "base/debug/leak_annotations.h"
#include "base/functional/bind.h"
#include "base/memory/raw_ptr.h"
#include "base/no_destructor.h"
#include "base/synchronization/lock.h"
#include "base/task/sequenced_task_runner.h"
#include "base/task/task_runner.h"
#include "base/task/task_traits.h"
#include "base/task/thread_pool.h"
#include "base/threading/sequence_bound.h"
#include "content/browser/speech/tts_platform_impl.h"
#include "content/public/browser/browser_task_traits.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/tts_controller.h"
#include "content/public/common/content_switches.h"
#include "library_loaders/libspeechd.h"
namespace content {
namespace {
struct SPDChromeVoice {
std::string name;
std::string module;
std::string language;
};
using PlatformVoices = std::map<std::string, SPDChromeVoice>;
constexpr int kInvalidUtteranceId = -1;
constexpr int kInvalidMessageUid = -1;
} // namespace
class TtsPlatformImplBackgroundWorker {
public:
TtsPlatformImplBackgroundWorker() = default;
TtsPlatformImplBackgroundWorker(const TtsPlatformImplBackgroundWorker&) =
delete;
TtsPlatformImplBackgroundWorker& operator=(
const TtsPlatformImplBackgroundWorker&) = delete;
~TtsPlatformImplBackgroundWorker() = default;
void Initialize();
void ProcessSpeech(int utterance_id,
const std::string& parsed_utterance,
const std::string& lang,
float rate,
float pitch,
SPDChromeVoice voice,
base::OnceCallback<void(bool)> on_speak_finished);
void Pause();
void Resume();
void StopSpeaking();
void Shutdown();
private:
bool InitializeSpeechd();
void InitializeVoices(PlatformVoices*);
void OpenConnection();
void CloseConnection();
void OnSpeechEvent(int msg_id, SPDNotificationType type);
// Send an TTS event notification to the TTS controller.
void SendTtsEvent(int utterance_id,
TtsEventType event_type,
int char_index,
int length = -1);
static void NotificationCallback(size_t msg_id,
size_t client_id,
SPDNotificationType type);
static void IndexMarkCallback(size_t msg_id,
size_t client_id,
SPDNotificationType state,
char* index_mark);
LibSpeechdLoader libspeechd_loader_;
raw_ptr<SPDConnection> conn_ = nullptr;
int msg_uid_ = kInvalidMessageUid;
// These apply to the current utterance only that is currently being
// processed.
int utterance_id_ = kInvalidUtteranceId;
size_t utterance_length_ = 0;
size_t utterance_char_position_ = 0;
};
class TtsPlatformImplLinux : public TtsPlatformImpl {
public:
TtsPlatformImplLinux(const TtsPlatformImplLinux&) = delete;
TtsPlatformImplLinux& operator=(const TtsPlatformImplLinux&) = delete;
bool PlatformImplSupported() override;
bool PlatformImplInitialized() override;
void Speak(int utterance_id,
const std::string& utterance,
const std::string& lang,
const VoiceData& voice,
const UtteranceContinuousParameters& params,
base::OnceCallback<void(bool)> on_speak_finished) override;
bool StopSpeaking() override;
void Pause() override;
void Resume() override;
bool IsSpeaking() override;
void GetVoices(std::vector<VoiceData>* out_voices) override;
void Shutdown() override;
void OnInitialized(bool success, PlatformVoices voices);
void OnSpeakScheduled(base::OnceCallback<void(bool)> on_speak_finished,
bool success);
void OnSpeakFinished(int utterance_id);
base::SequenceBound<TtsPlatformImplBackgroundWorker>* worker() {
return &worker_;
}
// Get the single instance of this class.
static TtsPlatformImplLinux* GetInstance();
private:
friend base::NoDestructor<TtsPlatformImplLinux>;
TtsPlatformImplLinux();
void ProcessSpeech(int utterance_id,
const std::string& lang,
const VoiceData& voice,
const UtteranceContinuousParameters& params,
base::OnceCallback<void(bool)> on_speak_finished,
const std::string& parsed_utterance);
// Holds the platform state.
bool is_supported_ = false;
bool is_initialized_ = false;
bool is_speaking_ = false;
bool paused_ = false;
// The current utterance being spoke.
int utterance_id_ = kInvalidUtteranceId;
// Map a string composed of a voicename and module to the voicename. Used to
// uniquely identify a voice across all available modules.
PlatformVoices voices_;
// Hold the state and the code of the background implementation.
base::SequenceBound<TtsPlatformImplBackgroundWorker> worker_;
};
//
// TtsPlatformImplBackgroundWorker
//
void TtsPlatformImplBackgroundWorker::Initialize() {
PlatformVoices voices;
if (InitializeSpeechd()) {
OpenConnection();
InitializeVoices(&voices);
}
bool success = (conn_ != nullptr);
GetUIThreadTaskRunner({})->PostTask(
FROM_HERE,
base::BindOnce(&TtsPlatformImplLinux::OnInitialized,
base::Unretained(TtsPlatformImplLinux::GetInstance()),
success, std::move(voices)));
}
void TtsPlatformImplBackgroundWorker::ProcessSpeech(
int utterance_id,
const std::string& parsed_utterance,
const std::string& lang,
float rate,
float pitch,
SPDChromeVoice voice,
base::OnceCallback<void(bool)> on_speak_finished) {
if (!conn_) {
GetUIThreadTaskRunner({})->PostTask(
FROM_HERE, base::BindOnce(std::move(on_speak_finished), false));
return;
}
libspeechd_loader_.spd_set_output_module(conn_, voice.module.c_str());
libspeechd_loader_.spd_set_synthesis_voice(conn_, voice.name.c_str());
// Map our multiplicative range to Speech Dispatcher's linear range.
// .334 = -100.
// 3 = 100.
libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));
// Support languages other than the default
if (!lang.empty())
libspeechd_loader_.spd_set_language(conn_, lang.c_str());
utterance_id_ = utterance_id;
utterance_char_position_ = 0;
utterance_length_ = parsed_utterance.size();
// spd_say(...) returns msg_uid on success, -1 otherwise. Each call to spd_say
// returns a different msg_uid.
msg_uid_ =
libspeechd_loader_.spd_say(conn_, SPD_TEXT, parsed_utterance.c_str());
bool success = (msg_uid_ != kInvalidMessageUid);
GetUIThreadTaskRunner({})->PostTask(
FROM_HERE, base::BindOnce(std::move(on_speak_finished), success));
}
void TtsPlatformImplBackgroundWorker::Pause() {
if (conn_ && msg_uid_ != kInvalidMessageUid)
libspeechd_loader_.spd_pause(conn_);
}
void TtsPlatformImplBackgroundWorker::Resume() {
if (conn_ && msg_uid_ != kInvalidMessageUid)
libspeechd_loader_.spd_resume(conn_);
}
void TtsPlatformImplBackgroundWorker::StopSpeaking() {
if (conn_ && msg_uid_ != kInvalidMessageUid) {
int result = libspeechd_loader_.spd_stop(conn_);
if (result == -1) {
CloseConnection();
OpenConnection();
}
msg_uid_ = kInvalidMessageUid;
utterance_id_ = kInvalidUtteranceId;
}
}
void TtsPlatformImplBackgroundWorker::Shutdown() {
CloseConnection();
}
bool TtsPlatformImplBackgroundWorker::InitializeSpeechd() {
return libspeechd_loader_.Load("libspeechd.so.2");
}
void TtsPlatformImplBackgroundWorker::InitializeVoices(PlatformVoices* voices) {
if (!conn_)
return;
char** modules = libspeechd_loader_.spd_list_modules(conn_);
if (!modules)
return;
for (int i = 0; UNSAFE_TODO(modules[i]); i++) {
char* module = UNSAFE_TODO(modules[i]);
libspeechd_loader_.spd_set_output_module(conn_, module);
SPDVoice** spd_voices = libspeechd_loader_.spd_list_synthesis_voices(conn_);
if (!spd_voices) {
free(module);
continue;
}
for (int j = 0; UNSAFE_TODO(spd_voices[j]); j++) {
SPDVoice* spd_voice = UNSAFE_TODO(spd_voices[j]);
SPDChromeVoice spd_data;
spd_data.name = spd_voice->name;
spd_data.module = module;
spd_data.language = spd_voice->language;
std::string key;
key.append(spd_data.name);
key.append(" ");
key.append(spd_data.module);
voices->insert(std::pair<std::string, SPDChromeVoice>(key, spd_data));
free(UNSAFE_TODO(spd_voices[j]));
}
free(UNSAFE_TODO(modules[i]));
}
}
void TtsPlatformImplBackgroundWorker::OpenConnection() {
{
// spd_open has memory leaks which are hard to suppress.
// http://crbug.com/317360
ANNOTATE_SCOPED_MEMORY_LEAK;
conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", nullptr,
SPD_MODE_THREADED);
}
if (!conn_)
return;
// Register callbacks for all events.
conn_->callback_begin = conn_->callback_end = conn_->callback_cancel =
conn_->callback_pause = conn_->callback_resume =
&TtsPlatformImplBackgroundWorker::NotificationCallback;
conn_->callback_im = &TtsPlatformImplBackgroundWorker::IndexMarkCallback;
libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
}
void TtsPlatformImplBackgroundWorker::CloseConnection() {
if (conn_) {
libspeechd_loader_.spd_close(conn_);
conn_ = nullptr;
}
}
void TtsPlatformImplBackgroundWorker::OnSpeechEvent(int msg_id,
SPDNotificationType type) {
if (!conn_ || msg_id != msg_uid_)
return;
switch (type) {
case SPD_EVENT_BEGIN:
utterance_char_position_ = 0;
SendTtsEvent(utterance_id_, TTS_EVENT_START, utterance_char_position_,
-1);
break;
case SPD_EVENT_RESUME:
SendTtsEvent(utterance_id_, TTS_EVENT_RESUME, utterance_char_position_,
-1);
break;
case SPD_EVENT_END:
GetUIThreadTaskRunner({})->PostTask(
FROM_HERE,
base::BindOnce(&TtsPlatformImplLinux::OnSpeakFinished,
base::Unretained(TtsPlatformImplLinux::GetInstance()),
utterance_id_));
utterance_char_position_ = utterance_length_;
SendTtsEvent(utterance_id_, TTS_EVENT_END, utterance_char_position_, 0);
break;
case SPD_EVENT_PAUSE:
SendTtsEvent(utterance_id_, TTS_EVENT_PAUSE, utterance_char_position_,
-1);
break;
case SPD_EVENT_CANCEL:
SendTtsEvent(utterance_id_, TTS_EVENT_CANCELLED, utterance_char_position_,
-1);
break;
case SPD_EVENT_INDEX_MARK:
// TODO: Can we get length from linux? If so, update
// utterance_char_position_.
SendTtsEvent(utterance_id_, TTS_EVENT_MARKER, utterance_char_position_,
-1);
break;
}
}
void TtsPlatformImplBackgroundWorker::SendTtsEvent(int utterance_id,
TtsEventType event_type,
int char_index,
int length) {
GetUIThreadTaskRunner({})->PostTask(
FROM_HERE, base::BindOnce(&TtsController::OnTtsEvent,
base::Unretained(TtsController::GetInstance()),
utterance_id, event_type, char_index, length,
std::string()));
}
// static
void TtsPlatformImplBackgroundWorker::NotificationCallback(
size_t msg_id,
size_t client_id,
SPDNotificationType type) {
TtsPlatformImplLinux::GetInstance()
->worker()
->AsyncCall(&TtsPlatformImplBackgroundWorker::OnSpeechEvent)
.WithArgs(msg_id, type);
}
// static
void TtsPlatformImplBackgroundWorker::IndexMarkCallback(
size_t msg_id,
size_t client_id,
SPDNotificationType type,
char* index_mark) {
// TODO(dtseng): index_mark appears to specify an index type supplied by a
// client. Need to explore how this is used before hooking it up with existing
// word, sentence events.
TtsPlatformImplLinux::GetInstance()
->worker()
->AsyncCall(&TtsPlatformImplBackgroundWorker::OnSpeechEvent)
.WithArgs(msg_id, type);
}
//
// TtsPlatformImplLinux
//
TtsPlatformImplLinux::TtsPlatformImplLinux()
: worker_(base::ThreadPool::CreateSequencedTaskRunner({base::MayBlock()})) {
DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
const base::CommandLine& command_line =
*base::CommandLine::ForCurrentProcess();
if (!command_line.HasSwitch(switches::kEnableSpeechDispatcher))
return;
// The TTS platform is supported. The Tts platform initialisation will happen
// on a worker thread and it will become initialized.
is_supported_ = true;
worker_.AsyncCall(&TtsPlatformImplBackgroundWorker::Initialize);
}
bool TtsPlatformImplLinux::PlatformImplSupported() {
return is_supported_;
}
bool TtsPlatformImplLinux::PlatformImplInitialized() {
return is_initialized_;
}
void TtsPlatformImplLinux::Speak(
int utterance_id,
const std::string& utterance,
const std::string& lang,
const VoiceData& voice,
const UtteranceContinuousParameters& params,
base::OnceCallback<void(bool)> on_speak_finished) {
DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
DCHECK(PlatformImplInitialized());
if (paused_ || is_speaking_) {
std::move(on_speak_finished).Run(false);
return;
}
// Flag that a utterance is getting emitted. The |is_speaking_| flag will be
// set back to false when the utterance will be fully spoken, stopped or if
// the voice synthetizer was not able to emit it.
is_speaking_ = true;
utterance_id_ = utterance_id;
// Parse SSML and process speech.
TtsController::GetInstance()->StripSSML(
utterance,
base::BindOnce(&TtsPlatformImplLinux::ProcessSpeech,
base::Unretained(this), utterance_id, lang, voice, params,
base::BindOnce(&TtsPlatformImplLinux::OnSpeakScheduled,
base::Unretained(this),
std::move(on_speak_finished))));
}
bool TtsPlatformImplLinux::StopSpeaking() {
DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
DCHECK(PlatformImplInitialized());
worker_.AsyncCall(&TtsPlatformImplBackgroundWorker::StopSpeaking);
paused_ = false;
is_speaking_ = false;
utterance_id_ = kInvalidUtteranceId;
return true;
}
void TtsPlatformImplLinux::Pause() {
DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
DCHECK(PlatformImplInitialized());
if (paused_ || !is_speaking_)
return;
worker_.AsyncCall(&TtsPlatformImplBackgroundWorker::Pause);
paused_ = true;
}
void TtsPlatformImplLinux::Resume() {
DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
DCHECK(PlatformImplInitialized());
if (!paused_ || !is_speaking_)
return;
worker_.AsyncCall(&TtsPlatformImplBackgroundWorker::Resume);
paused_ = false;
}
bool TtsPlatformImplLinux::IsSpeaking() {
return is_speaking_;
}
void TtsPlatformImplLinux::GetVoices(std::vector<VoiceData>* out_voices) {
DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
DCHECK(PlatformImplInitialized());
for (auto it = voices_.begin(); it != voices_.end(); ++it) {
out_voices->push_back(VoiceData());
VoiceData& voice = out_voices->back();
voice.native = true;
voice.name = it->first;
voice.lang = it->second.language;
voice.events.insert(TTS_EVENT_START);
voice.events.insert(TTS_EVENT_END);
voice.events.insert(TTS_EVENT_CANCELLED);
voice.events.insert(TTS_EVENT_MARKER);
voice.events.insert(TTS_EVENT_PAUSE);
voice.events.insert(TTS_EVENT_RESUME);
}
}
void TtsPlatformImplLinux::Shutdown() {
worker_.AsyncCall(&TtsPlatformImplBackgroundWorker::Shutdown);
}
void TtsPlatformImplLinux::OnInitialized(bool success, PlatformVoices voices) {
DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
if (success)
voices_ = std::move(voices);
is_initialized_ = true;
TtsController::GetInstance()->VoicesChanged();
}
void TtsPlatformImplLinux::OnSpeakScheduled(
base::OnceCallback<void(bool)> on_speak_finished,
bool success) {
DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
DCHECK(is_speaking_);
// If the utterance was not able to be emitted, stop the speaking. There
// won't be any asynchronous TTS event to confirm the end of the speech.
if (!success) {
is_speaking_ = false;
utterance_id_ = kInvalidUtteranceId;
}
// Pass the results to our caller.
std::move(on_speak_finished).Run(success);
}
void TtsPlatformImplLinux::OnSpeakFinished(int utterance_id) {
DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
if (utterance_id != utterance_id_)
return;
DCHECK(is_speaking_);
DCHECK_NE(utterance_id_, kInvalidUtteranceId);
is_speaking_ = false;
utterance_id_ = kInvalidUtteranceId;
}
void TtsPlatformImplLinux::ProcessSpeech(
int utterance_id,
const std::string& lang,
const VoiceData& voice,
const UtteranceContinuousParameters& params,
base::OnceCallback<void(bool)> on_speak_finished,
const std::string& parsed_utterance) {
DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
// Speech dispatcher's speech params are around 3x at either limit.
float rate = std::clamp(static_cast<float>(params.rate), 0.334f, 3.0f);
float pitch = std::clamp(static_cast<float>(params.pitch), 0.334f, 3.0f);
SPDChromeVoice matched_voice;
auto it = voices_.find(voice.name);
if (it != voices_.end())
matched_voice = it->second;
worker_.AsyncCall(&TtsPlatformImplBackgroundWorker::ProcessSpeech)
.WithArgs(utterance_id, parsed_utterance, lang, rate, pitch,
matched_voice, std::move(on_speak_finished));
}
// static
TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
static base::NoDestructor<TtsPlatformImplLinux> tts_platform;
return tts_platform.get();
}
// static
TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
return TtsPlatformImplLinux::GetInstance();
}
} // namespace content