chrome/browser/speech/tts_linux.cc - chromium/src - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include <math.h>
 #include <stddef.h>

 #include <map>
 #include <memory>

 #include "base/command_line.h"
 #include "base/debug/leak_annotations.h"
 #include "base/macros.h"
 #include "base/memory/singleton.h"
 #include "base/synchronization/lock.h"
 #include "base/task_scheduler/post_task.h"
 #include "chrome/browser/speech/tts_platform.h"
 #include "content/public/browser/browser_thread.h"
 #include "content/public/common/content_switches.h"
 #include "library_loaders/libspeechd.h"

 using content::BrowserThread;

 namespace {

 const char kNotSupportedError[] =
     "Native speech synthesis not supported on this platform.";

 struct SPDChromeVoice {
   std::string name;
   std::string module;
 };

 }  // namespace

 class TtsPlatformImplLinux : public TtsPlatformImpl {
  public:
   bool PlatformImplAvailable() override;
   bool Speak(int utterance_id,
              const std::string& utterance,
              const std::string& lang,
              const VoiceData& voice,
              const UtteranceContinuousParameters& params) override;
   bool StopSpeaking() override;
   void Pause() override;
   void Resume() override;
   bool IsSpeaking() override;
   void GetVoices(std::vector<VoiceData>* out_voices) override;

   void OnSpeechEvent(SPDNotificationType type);

   // Get the single instance of this class.
   static TtsPlatformImplLinux* GetInstance();

  private:
   TtsPlatformImplLinux();
   ~TtsPlatformImplLinux() override;

   // Initiate the connection with the speech dispatcher.
   void Initialize();

   // Resets the connection with speech dispatcher.
   void Reset();

   static void NotificationCallback(size_t msg_id,
                                    size_t client_id,
                                    SPDNotificationType type);

   static void IndexMarkCallback(size_t msg_id,
                                 size_t client_id,
                                 SPDNotificationType state,
                                 char* index_mark);

   static SPDNotificationType current_notification_;

   base::Lock initialization_lock_;
   LibSpeechdLoader libspeechd_loader_;
   SPDConnection* conn_;

   // These apply to the current utterance only.
   std::string utterance_;
   int utterance_id_;

   // Map a string composed of a voicename and module to the voicename. Used to
   // uniquely identify a voice across all available modules.
   std::unique_ptr<std::map<std::string, SPDChromeVoice>> all_native_voices_;

   friend struct base::DefaultSingletonTraits<TtsPlatformImplLinux>;

   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplLinux);
 };

 // static
 SPDNotificationType TtsPlatformImplLinux::current_notification_ =
     SPD_EVENT_END;

 TtsPlatformImplLinux::TtsPlatformImplLinux()
     : utterance_id_(0) {
   const base::CommandLine& command_line =
       *base::CommandLine::ForCurrentProcess();
   if (!command_line.HasSwitch(switches::kEnableSpeechDispatcher))
     return;

   base::PostTaskWithTraits(
       FROM_HERE, {base::MayBlock(), base::TaskPriority::BACKGROUND},
       base::Bind(&TtsPlatformImplLinux::Initialize, base::Unretained(this)));
 }

 void TtsPlatformImplLinux::Initialize() {
   base::AutoLock lock(initialization_lock_);

   if (!libspeechd_loader_.Load("libspeechd.so.2"))
     return;

   {
     // spd_open has memory leaks which are hard to suppress.
     // http://crbug.com/317360
     ANNOTATE_SCOPED_MEMORY_LEAK;
     conn_ = libspeechd_loader_.spd_open(
         "chrome", "extension_api", NULL, SPD_MODE_THREADED);
   }
   if (!conn_)
     return;

   // Register callbacks for all events.
   conn_->callback_begin =
     conn_->callback_end =
     conn_->callback_cancel =
     conn_->callback_pause =
     conn_->callback_resume =
     &NotificationCallback;

   conn_->callback_im = &IndexMarkCallback;

   libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
   libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
   libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
   libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
   libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
 }

 TtsPlatformImplLinux::~TtsPlatformImplLinux() {
   base::AutoLock lock(initialization_lock_);
   if (conn_) {
     libspeechd_loader_.spd_close(conn_);
     conn_ = NULL;
   }
 }

 void TtsPlatformImplLinux::Reset() {
   base::AutoLock lock(initialization_lock_);
   if (conn_)
     libspeechd_loader_.spd_close(conn_);
   conn_ = libspeechd_loader_.spd_open(
       "chrome", "extension_api", NULL, SPD_MODE_THREADED);
 }

 bool TtsPlatformImplLinux::PlatformImplAvailable() {
   if (!initialization_lock_.Try())
     return false;
   bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
   initialization_lock_.Release();
   return result;
 }

 bool TtsPlatformImplLinux::Speak(
     int utterance_id,
     const std::string& utterance,
     const std::string& lang,
     const VoiceData& voice,
     const UtteranceContinuousParameters& params) {
   if (!PlatformImplAvailable()) {
     error_ = kNotSupportedError;
     return false;
   }

   // Speech dispatcher's speech params are around 3x at either limit.
   float rate = params.rate > 3 ? 3 : params.rate;
   rate = params.rate < 0.334 ? 0.334 : rate;
   float pitch = params.pitch > 3 ? 3 : params.pitch;
   pitch = params.pitch < 0.334 ? 0.334 : pitch;

   std::map<std::string, SPDChromeVoice>::iterator it =
       all_native_voices_->find(voice.name);
   if (it != all_native_voices_->end()) {
     libspeechd_loader_.spd_set_output_module(conn_, it->second.module.c_str());
     libspeechd_loader_.spd_set_synthesis_voice(conn_, it->second.name.c_str());
   }

   // Map our multiplicative range to Speech Dispatcher's linear range.
   // .334 = -100.
   // 3 = 100.
   libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
   libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));

   // Support languages other than the default
   if (!lang.empty())
     libspeechd_loader_.spd_set_language(conn_, lang.c_str());

   utterance_ = utterance;
   utterance_id_ = utterance_id;

   if (libspeechd_loader_.spd_say(conn_, SPD_TEXT, utterance.c_str()) == -1) {
     Reset();
     return false;
   }
   return true;
 }

 bool TtsPlatformImplLinux::StopSpeaking() {
   if (!PlatformImplAvailable())
     return false;
   if (libspeechd_loader_.spd_stop(conn_) == -1) {
     Reset();
     return false;
   }
   return true;
 }

 void TtsPlatformImplLinux::Pause() {
   if (!PlatformImplAvailable())
     return;
   libspeechd_loader_.spd_pause(conn_);
 }

 void TtsPlatformImplLinux::Resume() {
   if (!PlatformImplAvailable())
     return;
   libspeechd_loader_.spd_resume(conn_);
 }

 bool TtsPlatformImplLinux::IsSpeaking() {
   return current_notification_ == SPD_EVENT_BEGIN;
 }

 void TtsPlatformImplLinux::GetVoices(
     std::vector<VoiceData>* out_voices) {
   if (!all_native_voices_.get()) {
     all_native_voices_.reset(new std::map<std::string, SPDChromeVoice>());
     char** modules = libspeechd_loader_.spd_list_modules(conn_);
     if (!modules)
       return;
     for (int i = 0; modules[i]; i++) {
       char* module = modules[i];
       libspeechd_loader_.spd_set_output_module(conn_, module);
       SPDVoice** native_voices =
           libspeechd_loader_.spd_list_synthesis_voices(conn_);
       if (!native_voices) {
         free(module);
         continue;
       }
       for (int j = 0; native_voices[j]; j++) {
         SPDVoice* native_voice = native_voices[j];
         SPDChromeVoice native_data;
         native_data.name = native_voice->name;
         native_data.module = module;
         std::string key;
         key.append(native_data.name);
         key.append(" ");
         key.append(native_data.module);
         all_native_voices_->insert(
             std::pair<std::string, SPDChromeVoice>(key, native_data));
         free(native_voices[j]);
       }
       free(modules[i]);
     }
   }

   for (std::map<std::string, SPDChromeVoice>::iterator it =
            all_native_voices_->begin();
        it != all_native_voices_->end();
        it++) {
     out_voices->push_back(VoiceData());
     VoiceData& voice = out_voices->back();
     voice.native = true;
     voice.name = it->first;
     voice.events.insert(TTS_EVENT_START);
     voice.events.insert(TTS_EVENT_END);
     voice.events.insert(TTS_EVENT_CANCELLED);
     voice.events.insert(TTS_EVENT_MARKER);
     voice.events.insert(TTS_EVENT_PAUSE);
     voice.events.insert(TTS_EVENT_RESUME);
   }
 }

 void TtsPlatformImplLinux::OnSpeechEvent(SPDNotificationType type) {
   TtsController* controller = TtsController::GetInstance();
   switch (type) {
   case SPD_EVENT_BEGIN:
     controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, std::string());
     break;
   case SPD_EVENT_RESUME:
     controller->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME, 0, std::string());
     break;
   case SPD_EVENT_END:
     controller->OnTtsEvent(
         utterance_id_, TTS_EVENT_END, utterance_.size(), std::string());
     break;
   case SPD_EVENT_PAUSE:
     controller->OnTtsEvent(
         utterance_id_, TTS_EVENT_PAUSE, utterance_.size(), std::string());
     break;
   case SPD_EVENT_CANCEL:
     controller->OnTtsEvent(
         utterance_id_, TTS_EVENT_CANCELLED, 0, std::string());
     break;
   case SPD_EVENT_INDEX_MARK:
     controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, 0, std::string());
     break;
   }
 }

 // static
 void TtsPlatformImplLinux::NotificationCallback(
     size_t msg_id, size_t client_id, SPDNotificationType type) {
   // We run Speech Dispatcher in threaded mode, so these callbacks should always
   // be in a separate thread.
   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
     current_notification_ = type;
     BrowserThread::PostTask(
         BrowserThread::UI,
         FROM_HERE,
         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
                    base::Unretained(TtsPlatformImplLinux::GetInstance()),
                    type));
   }
 }

 // static
 void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
                                                       size_t client_id,
                                                       SPDNotificationType state,
                                                       char* index_mark) {
   // TODO(dtseng): index_mark appears to specify an index type supplied by a
   // client. Need to explore how this is used before hooking it up with existing
   // word, sentence events.
   // We run Speech Dispatcher in threaded mode, so these callbacks should always
   // be in a separate thread.
   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
     current_notification_ = state;
     BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
         base::Unretained(TtsPlatformImplLinux::GetInstance()),
         state));
   }
 }

 // static
 TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
   return base::Singleton<
       TtsPlatformImplLinux,
       base::LeakySingletonTraits<TtsPlatformImplLinux>>::get();
 }

 // static
 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
   return TtsPlatformImplLinux::GetInstance();
 }
	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include <math.h>
	#include <stddef.h>

	#include <map>
	#include <memory>

	#include "base/command_line.h"
	#include "base/debug/leak_annotations.h"
	#include "base/macros.h"
	#include "base/memory/singleton.h"
	#include "base/synchronization/lock.h"
	#include "base/task_scheduler/post_task.h"
	#include "chrome/browser/speech/tts_platform.h"
	#include "content/public/browser/browser_thread.h"
	#include "content/public/common/content_switches.h"
	#include "library_loaders/libspeechd.h"

	using content::BrowserThread;

	namespace {

	const char kNotSupportedError[] =
	"Native speech synthesis not supported on this platform.";

	struct SPDChromeVoice {
	std::string name;
	std::string module;
	};

	} // namespace

	class TtsPlatformImplLinux : public TtsPlatformImpl {
	public:
	bool PlatformImplAvailable() override;
	bool Speak(int utterance_id,
	const std::string& utterance,
	const std::string& lang,
	const VoiceData& voice,
	const UtteranceContinuousParameters& params) override;
	bool StopSpeaking() override;
	void Pause() override;
	void Resume() override;
	bool IsSpeaking() override;
	void GetVoices(std::vector<VoiceData>* out_voices) override;

	void OnSpeechEvent(SPDNotificationType type);

	// Get the single instance of this class.
	static TtsPlatformImplLinux* GetInstance();

	private:
	TtsPlatformImplLinux();
	~TtsPlatformImplLinux() override;

	// Initiate the connection with the speech dispatcher.
	void Initialize();

	// Resets the connection with speech dispatcher.
	void Reset();

	static void NotificationCallback(size_t msg_id,
	size_t client_id,
	SPDNotificationType type);

	static void IndexMarkCallback(size_t msg_id,
	size_t client_id,
	SPDNotificationType state,
	char* index_mark);

	static SPDNotificationType current_notification_;

	base::Lock initialization_lock_;
	LibSpeechdLoader libspeechd_loader_;
	SPDConnection* conn_;

	// These apply to the current utterance only.
	std::string utterance_;
	int utterance_id_;

	// Map a string composed of a voicename and module to the voicename. Used to
	// uniquely identify a voice across all available modules.
	std::unique_ptr<std::map<std::string, SPDChromeVoice>> all_native_voices_;

	friend struct base::DefaultSingletonTraits<TtsPlatformImplLinux>;

	DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplLinux);
	};

	// static
	SPDNotificationType TtsPlatformImplLinux::current_notification_ =
	SPD_EVENT_END;

	TtsPlatformImplLinux::TtsPlatformImplLinux()
	: utterance_id_(0) {
	const base::CommandLine& command_line =
	*base::CommandLine::ForCurrentProcess();
	if (!command_line.HasSwitch(switches::kEnableSpeechDispatcher))
	return;

	base::PostTaskWithTraits(
	FROM_HERE, {base::MayBlock(), base::TaskPriority::BACKGROUND},
	base::Bind(&TtsPlatformImplLinux::Initialize, base::Unretained(this)));
	}

	void TtsPlatformImplLinux::Initialize() {
	base::AutoLock lock(initialization_lock_);

	if (!libspeechd_loader_.Load("libspeechd.so.2"))
	return;

	{
	// spd_open has memory leaks which are hard to suppress.
	// http://crbug.com/317360
	ANNOTATE_SCOPED_MEMORY_LEAK;
	conn_ = libspeechd_loader_.spd_open(
	"chrome", "extension_api", NULL, SPD_MODE_THREADED);
	}
	if (!conn_)
	return;

	// Register callbacks for all events.
	conn_->callback_begin =
	conn_->callback_end =
	conn_->callback_cancel =
	conn_->callback_pause =
	conn_->callback_resume =
	&NotificationCallback;

	conn_->callback_im = &IndexMarkCallback;

	libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
	libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
	libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
	libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
	libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
	}

	TtsPlatformImplLinux::~TtsPlatformImplLinux() {
	base::AutoLock lock(initialization_lock_);
	if (conn_) {
	libspeechd_loader_.spd_close(conn_);
	conn_ = NULL;
	}
	}

	void TtsPlatformImplLinux::Reset() {
	base::AutoLock lock(initialization_lock_);
	if (conn_)
	libspeechd_loader_.spd_close(conn_);
	conn_ = libspeechd_loader_.spd_open(
	"chrome", "extension_api", NULL, SPD_MODE_THREADED);
	}

	bool TtsPlatformImplLinux::PlatformImplAvailable() {
	if (!initialization_lock_.Try())
	return false;
	bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
	initialization_lock_.Release();
	return result;
	}

	bool TtsPlatformImplLinux::Speak(
	int utterance_id,
	const std::string& utterance,
	const std::string& lang,
	const VoiceData& voice,
	const UtteranceContinuousParameters& params) {
	if (!PlatformImplAvailable()) {
	error_ = kNotSupportedError;
	return false;
	}

	// Speech dispatcher's speech params are around 3x at either limit.
	float rate = params.rate > 3 ? 3 : params.rate;
	rate = params.rate < 0.334 ? 0.334 : rate;
	float pitch = params.pitch > 3 ? 3 : params.pitch;
	pitch = params.pitch < 0.334 ? 0.334 : pitch;

	std::map<std::string, SPDChromeVoice>::iterator it =
	all_native_voices_->find(voice.name);
	if (it != all_native_voices_->end()) {
	libspeechd_loader_.spd_set_output_module(conn_, it->second.module.c_str());
	libspeechd_loader_.spd_set_synthesis_voice(conn_, it->second.name.c_str());
	}

	// Map our multiplicative range to Speech Dispatcher's linear range.
	// .334 = -100.
	// 3 = 100.
	libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
	libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));

	// Support languages other than the default
	if (!lang.empty())
	libspeechd_loader_.spd_set_language(conn_, lang.c_str());

	utterance_ = utterance;
	utterance_id_ = utterance_id;

	if (libspeechd_loader_.spd_say(conn_, SPD_TEXT, utterance.c_str()) == -1) {
	Reset();
	return false;
	}
	return true;
	}

	bool TtsPlatformImplLinux::StopSpeaking() {
	if (!PlatformImplAvailable())
	return false;
	if (libspeechd_loader_.spd_stop(conn_) == -1) {
	Reset();
	return false;
	}
	return true;
	}

	void TtsPlatformImplLinux::Pause() {
	if (!PlatformImplAvailable())
	return;
	libspeechd_loader_.spd_pause(conn_);
	}

	void TtsPlatformImplLinux::Resume() {
	if (!PlatformImplAvailable())
	return;
	libspeechd_loader_.spd_resume(conn_);
	}

	bool TtsPlatformImplLinux::IsSpeaking() {
	return current_notification_ == SPD_EVENT_BEGIN;
	}

	void TtsPlatformImplLinux::GetVoices(
	std::vector<VoiceData>* out_voices) {
	if (!all_native_voices_.get()) {
	all_native_voices_.reset(new std::map<std::string, SPDChromeVoice>());
	char** modules = libspeechd_loader_.spd_list_modules(conn_);
	if (!modules)
	return;
	for (int i = 0; modules[i]; i++) {
	char* module = modules[i];
	libspeechd_loader_.spd_set_output_module(conn_, module);
	SPDVoice** native_voices =
	libspeechd_loader_.spd_list_synthesis_voices(conn_);
	if (!native_voices) {
	free(module);
	continue;
	}
	for (int j = 0; native_voices[j]; j++) {
	SPDVoice* native_voice = native_voices[j];
	SPDChromeVoice native_data;
	native_data.name = native_voice->name;
	native_data.module = module;
	std::string key;
	key.append(native_data.name);
	key.append(" ");
	key.append(native_data.module);
	all_native_voices_->insert(
	std::pair<std::string, SPDChromeVoice>(key, native_data));
	free(native_voices[j]);
	}
	free(modules[i]);
	}
	}

	for (std::map<std::string, SPDChromeVoice>::iterator it =
	all_native_voices_->begin();
	it != all_native_voices_->end();
	it++) {
	out_voices->push_back(VoiceData());
	VoiceData& voice = out_voices->back();
	voice.native = true;
	voice.name = it->first;
	voice.events.insert(TTS_EVENT_START);
	voice.events.insert(TTS_EVENT_END);
	voice.events.insert(TTS_EVENT_CANCELLED);
	voice.events.insert(TTS_EVENT_MARKER);
	voice.events.insert(TTS_EVENT_PAUSE);
	voice.events.insert(TTS_EVENT_RESUME);
	}
	}

	void TtsPlatformImplLinux::OnSpeechEvent(SPDNotificationType type) {
	TtsController* controller = TtsController::GetInstance();
	switch (type) {
	case SPD_EVENT_BEGIN:
	controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, std::string());
	break;
	case SPD_EVENT_RESUME:
	controller->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME, 0, std::string());
	break;
	case SPD_EVENT_END:
	controller->OnTtsEvent(
	utterance_id_, TTS_EVENT_END, utterance_.size(), std::string());
	break;
	case SPD_EVENT_PAUSE:
	controller->OnTtsEvent(
	utterance_id_, TTS_EVENT_PAUSE, utterance_.size(), std::string());
	break;
	case SPD_EVENT_CANCEL:
	controller->OnTtsEvent(
	utterance_id_, TTS_EVENT_CANCELLED, 0, std::string());
	break;
	case SPD_EVENT_INDEX_MARK:
	controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, 0, std::string());
	break;
	}
	}

	// static
	void TtsPlatformImplLinux::NotificationCallback(
	size_t msg_id, size_t client_id, SPDNotificationType type) {
	// We run Speech Dispatcher in threaded mode, so these callbacks should always
	// be in a separate thread.
	if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
	current_notification_ = type;
	BrowserThread::PostTask(
	BrowserThread::UI,
	FROM_HERE,
	base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
	base::Unretained(TtsPlatformImplLinux::GetInstance()),
	type));
	}
	}

	// static
	void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
	size_t client_id,
	SPDNotificationType state,
	char* index_mark) {
	// TODO(dtseng): index_mark appears to specify an index type supplied by a
	// client. Need to explore how this is used before hooking it up with existing
	// word, sentence events.
	// We run Speech Dispatcher in threaded mode, so these callbacks should always
	// be in a separate thread.
	if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
	current_notification_ = state;
	BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
	base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
	base::Unretained(TtsPlatformImplLinux::GetInstance()),
	state));
	}
	}

	// static
	TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
	return base::Singleton<
	TtsPlatformImplLinux,
	base::LeakySingletonTraits<TtsPlatformImplLinux>>::get();
	}

	// static
	TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
	return TtsPlatformImplLinux::GetInstance();
	}