| // Copyright (c) 2010 The Chromium OS Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| // This header defines the interface to access a Text-To-Speech service |
| // that manages background threads and presents a real-time, nonblocking |
| // interface. |
| |
| #ifndef SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_ |
| #define SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_ |
| |
| #include <stdint.h> |
| |
| #include <list> |
| #include <string> |
| |
| #include "audio_output.h" |
| #include "ringbuffer.h" |
| #include "tts_engine.h" |
| #include "tts_receiver.h" |
| |
| using std::list; |
| using std::string; |
| |
| namespace speech_synthesis { |
| |
| enum tts_status { |
| TTS_IDLE = 0, |
| TTS_BUSY = 1, |
| TTS_ERROR = 2, |
| }; |
| |
| class Resampler; |
| |
| // Add more such as rate, pitch etc. in the future. |
| struct UtteranceOptions { |
| public: |
| Runnable *completion; |
| struct TtsVoice *voice_options; |
| // Default is 1. Use higher or lower values to increase or decrease the |
| // speaking rate. Map default to ~100 words/min if possible. Speech |
| // engines may or may not support this mapping. |
| float rate; |
| // Default is 1. Use higher or lower values to increase or decrease the |
| // speaking pitch. Map default to 200 Hz if possible. Speech |
| // engines may or may not support this exact mapping. |
| float pitch; |
| // Default is 1. Use higher or lower values to increase or decrease the |
| // speaking volume. |
| float volume; |
| // Whether to enqueue this utterance or flush the existing queue. |
| bool enqueue; |
| // Whether this utterance is interruptible. |
| bool interruptible; |
| UtteranceOptions() |
| : completion(NULL), |
| rate(1), |
| pitch(1), |
| volume(1), |
| enqueue(false), |
| interruptible(true) { } |
| |
| UtteranceOptions(const UtteranceOptions& options) |
| : completion(options.completion), |
| voice_options(new TtsVoice(*options.voice_options)), |
| rate(options.rate), |
| pitch(options.pitch), |
| volume(options.volume), |
| enqueue(options.enqueue), |
| interruptible(options.interruptible) { } |
| }; |
| |
| struct Utterance { |
| public: |
| string text; |
| int voice_index; |
| struct UtteranceOptions *options; |
| }; |
| |
| class TtsService |
| : public AudioProvider, |
| public Runnable, |
| public TtsDataReceiver { |
| public: |
| TtsService(TtsEngine *engine, |
| AudioOutput *audio_output, |
| Threading *threading); |
| |
| virtual ~TtsService(); |
| |
| // |
| // External interface |
| // |
| |
| // Start the background service |
| bool StartService(); |
| |
| // Stop the background service. |
| void StopService(); |
| |
| // Queue up this text to be spoken and return immediately. The |
| // UtteranceOptions contains other settings such as language name, voice, |
| // pitch, rate etc. Currently language name specified as: |
| // <language>-<locale> is supported. Example: en-US, fr-FR, etc. |
| void Speak(string text, UtteranceOptions *options = NULL); |
| |
| // Interrupts the current utterance and discards other utterances |
| // in the queue. |
| void Stop(); |
| |
| // Determine if the service is busy or speaking. |
| tts_status GetStatus(); |
| |
| // Block until all queued utterances are done speaking. |
| void WaitUntilFinished(); |
| |
| // |
| // Internal implementation |
| // |
| |
| // Implementation of AudioProvider, called by the audio output thread. |
| bool FillAudioBuffer(int16_t* samples, int size); |
| |
| // Implementation of Runnable, for our background thread. |
| void Run(); |
| |
| // Implementation of TtsDataReceiver, where the TtsEngine calls us |
| // with the generated audio data. |
| tts_callback_status Receive(int rate, |
| int num_channels, |
| const int16_t* data, |
| int num_samples, |
| tts_synth_status status); |
| |
| private: |
| TtsEngine *engine_; |
| AudioOutput *audio_output_; |
| RingBuffer<int16_t> *ring_buffer_; |
| Threading *threading_; |
| Thread *thread_; |
| Utterance *current_utterance_; |
| Resampler *resampler_; |
| int16_t *audio_buffer_; |
| int audio_buffer_size_; |
| |
| // A mutex and condition variable |
| Mutex *mutex_; |
| CondVar *cond_var_; |
| |
| // Variables that are protected by the mutex and signaled by the |
| // condition variable. |
| list<Utterance*> utterances_; |
| bool service_running_; |
| bool utterance_running_; |
| bool current_utterance_interruptible_; |
| }; |
| } // namespace speech_synthesis |
| |
| #endif // SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_ |
| |