blob: 703159cde8a61c59fc775c31e0af7abafbbcb86a [file] [log] [blame]
// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This header defines the interface to access a Text-To-Speech service
// that manages background threads and presents a real-time, nonblocking
// interface.
#ifndef SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_
#define SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_
#include <stdint.h>
#include <list>
#include <string>
#include "audio_output.h"
#include "ringbuffer.h"
#include "tts_engine.h"
#include "tts_receiver.h"
using std::list;
using std::string;
namespace speech_synthesis {
enum tts_status {
TTS_IDLE = 0,
TTS_BUSY = 1,
TTS_ERROR = 2,
};
class Resampler;
// Add more such as rate, pitch etc. in the future.
struct UtteranceOptions {
public:
Runnable *completion;
struct TtsVoice *voice_options;
// Default is 1. Use higher or lower values to increase or decrease the
// speaking rate. Map default to ~100 words/min if possible. Speech
// engines may or may not support this mapping.
float rate;
// Default is 1. Use higher or lower values to increase or decrease the
// speaking pitch. Map default to 200 Hz if possible. Speech
// engines may or may not support this exact mapping.
float pitch;
// Default is 1. Use higher or lower values to increase or decrease the
// speaking volume.
float volume;
// Whether to enqueue this utterance or flush the existing queue.
bool enqueue;
// Whether this utterance is interruptible.
bool interruptible;
UtteranceOptions()
: completion(NULL),
rate(1),
pitch(1),
volume(1),
enqueue(false),
interruptible(true) { }
UtteranceOptions(const UtteranceOptions& options)
: completion(options.completion),
voice_options(new TtsVoice(*options.voice_options)),
rate(options.rate),
pitch(options.pitch),
volume(options.volume),
enqueue(options.enqueue),
interruptible(options.interruptible) { }
};
struct Utterance {
public:
string text;
int voice_index;
struct UtteranceOptions *options;
};
class TtsService
: public AudioProvider,
public Runnable,
public TtsDataReceiver {
public:
TtsService(TtsEngine *engine,
AudioOutput *audio_output,
Threading *threading);
virtual ~TtsService();
//
// External interface
//
// Start the background service
bool StartService();
// Stop the background service.
void StopService();
// Queue up this text to be spoken and return immediately. The
// UtteranceOptions contains other settings such as language name, voice,
// pitch, rate etc. Currently language name specified as:
// <language>-<locale> is supported. Example: en-US, fr-FR, etc.
void Speak(string text, UtteranceOptions *options = NULL);
// Interrupts the current utterance and discards other utterances
// in the queue.
void Stop();
// Determine if the service is busy or speaking.
tts_status GetStatus();
// Block until all queued utterances are done speaking.
void WaitUntilFinished();
//
// Internal implementation
//
// Implementation of AudioProvider, called by the audio output thread.
bool FillAudioBuffer(int16_t* samples, int size);
// Implementation of Runnable, for our background thread.
void Run();
// Implementation of TtsDataReceiver, where the TtsEngine calls us
// with the generated audio data.
tts_callback_status Receive(int rate,
int num_channels,
const int16_t* data,
int num_samples,
tts_synth_status status);
private:
TtsEngine *engine_;
AudioOutput *audio_output_;
RingBuffer<int16_t> *ring_buffer_;
Threading *threading_;
Thread *thread_;
Utterance *current_utterance_;
Resampler *resampler_;
int16_t *audio_buffer_;
int audio_buffer_size_;
// A mutex and condition variable
Mutex *mutex_;
CondVar *cond_var_;
// Variables that are protected by the mutex and signaled by the
// condition variable.
list<Utterance*> utterances_;
bool service_running_;
bool utterance_running_;
bool current_utterance_interruptible_;
};
} // namespace speech_synthesis
#endif // SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_