tts_service.h - chromiumos/platform/speech_synthesis - Git at Google

 // Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
 // This header defines the interface to access a Text-To-Speech service
 // that manages background threads and presents a real-time, nonblocking
 // interface.

 #ifndef SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_
 #define SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_

 #include <stdint.h>

 #include <list>
 #include <string>
 #include <vector>

 #include "base/basictypes.h"
 #include "base/memory/scoped_ptr.h"

 #include "audio_output.h"
 #include "threading.h"
 #include "tts_engine.h"
 #include "tts_receiver.h"

 namespace speech_synthesis {

 enum tts_status {
   TTS_IDLE = 0,
   TTS_BUSY = 1,
   TTS_ERROR = 2,
 };

 template<typename T> class RingBuffer;
 class TtsEngine;

 // Add more such as rate, pitch etc. in the future.
 struct UtteranceOptions {
  public:
   TtsVoice voice_options;
   // Default is 1. Use higher or lower values to increase or decrease the
   // speaking rate. Map default to ~100 words/min if possible. Speech
   // engines may or may not support this mapping.
   float rate;
   // Default is 1. Use higher or lower values to increase or decrease the
   // speaking pitch. Map default to 200 Hz if possible. Speech
   // engines may or may not support this exact mapping.
   float pitch;
   // Default is 1. Use higher or lower values to increase or decrease the
   // speaking volume.
   float volume;
   // Whether to enqueue this utterance or flush the existing queue.
   bool enqueue;
   // Whether this utterance is interruptible.
   bool interruptible;

   UtteranceOptions();
 };

 struct Utterance {
  public:
   std::string text;
   int voice_index;
   UtteranceOptions options;
 };

 class TtsService
     : public AudioProvider,
       public Runnable,
       public TtsDataReceiver {
  public:
   TtsService(TtsEngine *engine, AudioOutput *audio_output);

   virtual ~TtsService();

   //
   // External interface
   //

   // Start the background service
   bool StartService();

   // Stop the background service.
   void StopService();

   // Queue up this text to be spoken and return immediately. The
   // UtteranceOptions contains other settings such as language name, voice,
   // pitch, rate etc. Currently language name specified as:
   // <language>-<locale> is supported. Example: en-US, fr-FR, etc.
   void Speak(const std::string& text, const UtteranceOptions& options);

   // Interrupts the current utterance and discards other utterances
   // in the queue.
   void Stop();

   // Determine if the service is busy or speaking.
   tts_status GetStatus();

   // Block until all queued utterances are done speaking.
   void WaitUntilFinished();

   //
   // Internal implementation
   //

   // Implementation of AudioProvider, called by the audio output thread.
   bool FillAudioBuffer(int16_t* samples, int size);

   // Implementation of Runnable, for our background thread.
   virtual void Run() OVERRIDE;

   // Implementation of TtsDataReceiver, where the TtsEngine calls us
   // with the generated audio data.
   tts_callback_status Receive(int rate,
                               int num_channels,
                               const int16_t* data,
                               int num_samples,
                               tts_synth_status status);

  private:
   TtsEngine *engine_;
   AudioOutput *audio_output_;
   scoped_ptr<RingBuffer<int16_t> > ring_buffer_;
   scoped_ptr<Thread> thread_;
   Utterance *current_utterance_;
   std::vector<int16_t> audio_buffer_;

   // A mutex and condition variable
   scoped_ptr<Mutex> mutex_;
   scoped_ptr<CondVar> cond_var_;

   // Variables that are protected by the mutex and signaled by the
   // condition variable.
   std::list<Utterance*> utterances_;
   bool service_running_;
   bool utterance_running_;
   bool current_utterance_interruptible_;

   DISALLOW_COPY_AND_ASSIGN(TtsService);
 };
 }  // namespace speech_synthesis

 #endif  // SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_
	// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.
	//
	// This header defines the interface to access a Text-To-Speech service
	// that manages background threads and presents a real-time, nonblocking
	// interface.

	#ifndef SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_
	#define SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_

	#include <stdint.h>

	#include <list>
	#include <string>
	#include <vector>

	#include "base/basictypes.h"
	#include "base/memory/scoped_ptr.h"

	#include "audio_output.h"
	#include "threading.h"
	#include "tts_engine.h"
	#include "tts_receiver.h"

	namespace speech_synthesis {

	enum tts_status {
	TTS_IDLE = 0,
	TTS_BUSY = 1,
	TTS_ERROR = 2,
	};

	template<typename T> class RingBuffer;
	class TtsEngine;

	// Add more such as rate, pitch etc. in the future.
	struct UtteranceOptions {
	public:
	TtsVoice voice_options;
	// Default is 1. Use higher or lower values to increase or decrease the
	// speaking rate. Map default to ~100 words/min if possible. Speech
	// engines may or may not support this mapping.
	float rate;
	// Default is 1. Use higher or lower values to increase or decrease the
	// speaking pitch. Map default to 200 Hz if possible. Speech
	// engines may or may not support this exact mapping.
	float pitch;
	// Default is 1. Use higher or lower values to increase or decrease the
	// speaking volume.
	float volume;
	// Whether to enqueue this utterance or flush the existing queue.
	bool enqueue;
	// Whether this utterance is interruptible.
	bool interruptible;

	UtteranceOptions();
	};

	struct Utterance {
	public:
	std::string text;
	int voice_index;
	UtteranceOptions options;
	};

	class TtsService
	: public AudioProvider,
	public Runnable,
	public TtsDataReceiver {
	public:
	TtsService(TtsEngine engine, AudioOutput audio_output);

	virtual ~TtsService();

	//
	// External interface
	//

	// Start the background service
	bool StartService();

	// Stop the background service.
	void StopService();

	// Queue up this text to be spoken and return immediately. The
	// UtteranceOptions contains other settings such as language name, voice,
	// pitch, rate etc. Currently language name specified as:
	// <language>-<locale> is supported. Example: en-US, fr-FR, etc.
	void Speak(const std::string& text, const UtteranceOptions& options);

	// Interrupts the current utterance and discards other utterances
	// in the queue.
	void Stop();

	// Determine if the service is busy or speaking.
	tts_status GetStatus();

	// Block until all queued utterances are done speaking.
	void WaitUntilFinished();

	//
	// Internal implementation
	//

	// Implementation of AudioProvider, called by the audio output thread.
	bool FillAudioBuffer(int16_t* samples, int size);

	// Implementation of Runnable, for our background thread.
	virtual void Run() OVERRIDE;

	// Implementation of TtsDataReceiver, where the TtsEngine calls us
	// with the generated audio data.
	tts_callback_status Receive(int rate,
	int num_channels,
	const int16_t* data,
	int num_samples,
	tts_synth_status status);

	private:
	TtsEngine *engine_;
	AudioOutput *audio_output_;
	scoped_ptr<RingBuffer<int16_t> > ring_buffer_;
	scoped_ptr<Thread> thread_;
	Utterance *current_utterance_;
	std::vector<int16_t> audio_buffer_;

	// A mutex and condition variable
	scoped_ptr<Mutex> mutex_;
	scoped_ptr<CondVar> cond_var_;

	// Variables that are protected by the mutex and signaled by the
	// condition variable.
	std::list<Utterance*> utterances_;
	bool service_running_;
	bool utterance_running_;
	bool current_utterance_interruptible_;

	DISALLOW_COPY_AND_ASSIGN(TtsService);
	};
	} // namespace speech_synthesis

	#endif // SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_