tts_service.h - chromiumos/platform/speech_synthesis - Git at Google

 // Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
 // This header defines the interface to access a Text-To-Speech service
 // that manages background threads and presents a real-time, nonblocking
 // interface.

 #ifndef SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_
 #define SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_

 #include <stdint.h>

 #include <list>
 #include <string>

 #include "audio_output.h"
 #include "ringbuffer.h"
 #include "tts_engine.h"
 #include "tts_receiver.h"

 using std::list;
 using std::string;

 namespace speech_synthesis {

 enum tts_status {
   TTS_IDLE = 0,
   TTS_BUSY = 1,
   TTS_ERROR = 2,
 };

 class Resampler;

 // Add more such as rate, pitch etc. in the future.
 struct UtteranceOptions {
  public:
   Runnable *completion;
   struct TtsVoice *voice_options;
   // Default is 1. Use higher or lower values to increase or decrease the
   // speaking rate. Map default to ~100 words/min if possible. Speech
   // engines may or may not support this mapping.
   float rate;
   // Default is 1. Use higher or lower values to increase or decrease the
   // speaking pitch. Map default to 200 Hz if possible. Speech
   // engines may or may not support this exact mapping.
   float pitch;
   // Default is 1. Use higher or lower values to increase or decrease the
   // speaking volume.
   float volume;
   // Whether to enqueue this utterance or flush the existing queue.
   bool enqueue;
   // Whether this utterance is interruptible.
   bool interruptible;
   UtteranceOptions()
       : completion(NULL),
         rate(1),
         pitch(1),
         volume(1),
         enqueue(false),
         interruptible(true) { }

   UtteranceOptions(const UtteranceOptions& options)
       : completion(options.completion),
         voice_options(new TtsVoice(*options.voice_options)),
         rate(options.rate),
         pitch(options.pitch),
         volume(options.volume),
         enqueue(options.enqueue),
         interruptible(options.interruptible) { }
 };

 struct Utterance {
  public:
   string text;
   int voice_index;
   struct UtteranceOptions *options;
 };

 class TtsService
     : public AudioProvider,
       public Runnable,
       public TtsDataReceiver {
  public:
   TtsService(TtsEngine *engine,
              AudioOutput *audio_output,
              Threading *threading);

   virtual ~TtsService();

   //
   // External interface
   //

   // Start the background service
   bool StartService();

   // Stop the background service.
   void StopService();

   // Queue up this text to be spoken and return immediately. The
   // UtteranceOptions contains other settings such as language name, voice,
   // pitch, rate etc. Currently language name specified as:
   // <language>-<locale> is supported. Example: en-US, fr-FR, etc.
   void Speak(string text, UtteranceOptions *options = NULL);

   // Interrupts the current utterance and discards other utterances
   // in the queue.
   void Stop();

   // Determine if the service is busy or speaking.
   tts_status GetStatus();

   // Block until all queued utterances are done speaking.
   void WaitUntilFinished();

   //
   // Internal implementation
   //

   // Implementation of AudioProvider, called by the audio output thread.
   bool FillAudioBuffer(int16_t* samples, int size);

   // Implementation of Runnable, for our background thread.
   void Run();

   // Implementation of TtsDataReceiver, where the TtsEngine calls us
   // with the generated audio data.
   tts_callback_status Receive(int rate,
                               int num_channels,
                               const int16_t* data,
                               int num_samples,
                               tts_synth_status status);

  private:
   TtsEngine *engine_;
   AudioOutput *audio_output_;
   RingBuffer<int16_t> *ring_buffer_;
   Threading *threading_;
   Thread *thread_;
   Utterance *current_utterance_;
   Resampler *resampler_;
   int16_t *audio_buffer_;
   int audio_buffer_size_;

   // A mutex and condition variable
   Mutex *mutex_;
   CondVar *cond_var_;

   // Variables that are protected by the mutex and signaled by the
   // condition variable.
   list<Utterance*> utterances_;
   bool service_running_;
   bool utterance_running_;
   bool current_utterance_interruptible_;
 };
 }  // namespace speech_synthesis

 #endif  // SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_
	// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.
	//
	// This header defines the interface to access a Text-To-Speech service
	// that manages background threads and presents a real-time, nonblocking
	// interface.

	#ifndef SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_
	#define SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_

	#include <stdint.h>

	#include <list>
	#include <string>

	#include "audio_output.h"
	#include "ringbuffer.h"
	#include "tts_engine.h"
	#include "tts_receiver.h"

	using std::list;
	using std::string;

	namespace speech_synthesis {

	enum tts_status {
	TTS_IDLE = 0,
	TTS_BUSY = 1,
	TTS_ERROR = 2,
	};

	class Resampler;

	// Add more such as rate, pitch etc. in the future.
	struct UtteranceOptions {
	public:
	Runnable *completion;
	struct TtsVoice *voice_options;
	// Default is 1. Use higher or lower values to increase or decrease the
	// speaking rate. Map default to ~100 words/min if possible. Speech
	// engines may or may not support this mapping.
	float rate;
	// Default is 1. Use higher or lower values to increase or decrease the
	// speaking pitch. Map default to 200 Hz if possible. Speech
	// engines may or may not support this exact mapping.
	float pitch;
	// Default is 1. Use higher or lower values to increase or decrease the
	// speaking volume.
	float volume;
	// Whether to enqueue this utterance or flush the existing queue.
	bool enqueue;
	// Whether this utterance is interruptible.
	bool interruptible;
	UtteranceOptions()
	: completion(NULL),
	rate(1),
	pitch(1),
	volume(1),
	enqueue(false),
	interruptible(true) { }

	UtteranceOptions(const UtteranceOptions& options)
	: completion(options.completion),
	voice_options(new TtsVoice(*options.voice_options)),
	rate(options.rate),
	pitch(options.pitch),
	volume(options.volume),
	enqueue(options.enqueue),
	interruptible(options.interruptible) { }
	};

	struct Utterance {
	public:
	string text;
	int voice_index;
	struct UtteranceOptions *options;
	};

	class TtsService
	: public AudioProvider,
	public Runnable,
	public TtsDataReceiver {
	public:
	TtsService(TtsEngine *engine,
	AudioOutput *audio_output,
	Threading *threading);

	virtual ~TtsService();

	//
	// External interface
	//

	// Start the background service
	bool StartService();

	// Stop the background service.
	void StopService();

	// Queue up this text to be spoken and return immediately. The
	// UtteranceOptions contains other settings such as language name, voice,
	// pitch, rate etc. Currently language name specified as:
	// <language>-<locale> is supported. Example: en-US, fr-FR, etc.
	void Speak(string text, UtteranceOptions *options = NULL);

	// Interrupts the current utterance and discards other utterances
	// in the queue.
	void Stop();

	// Determine if the service is busy or speaking.
	tts_status GetStatus();

	// Block until all queued utterances are done speaking.
	void WaitUntilFinished();

	//
	// Internal implementation
	//

	// Implementation of AudioProvider, called by the audio output thread.
	bool FillAudioBuffer(int16_t* samples, int size);

	// Implementation of Runnable, for our background thread.
	void Run();

	// Implementation of TtsDataReceiver, where the TtsEngine calls us
	// with the generated audio data.
	tts_callback_status Receive(int rate,
	int num_channels,
	const int16_t* data,
	int num_samples,
	tts_synth_status status);

	private:
	TtsEngine *engine_;
	AudioOutput *audio_output_;
	RingBuffer<int16_t> *ring_buffer_;
	Threading *threading_;
	Thread *thread_;
	Utterance *current_utterance_;
	Resampler *resampler_;
	int16_t *audio_buffer_;
	int audio_buffer_size_;

	// A mutex and condition variable
	Mutex *mutex_;
	CondVar *cond_var_;

	// Variables that are protected by the mutex and signaled by the
	// condition variable.
	list<Utterance*> utterances_;
	bool service_running_;
	bool utterance_running_;
	bool current_utterance_interruptible_;
	};
	} // namespace speech_synthesis

	#endif // SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_