blob: af8604f10d6b90473a49c22699ae3b65f81cd8b1 [file] [log] [blame]
// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This header defines the interface to access a Text-To-Speech service
// that manages background threads and presents a real-time, nonblocking
// interface.
#ifndef SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_
#define SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_
#include <stdint.h>
#include <list>
#include <string>
#include <vector>
#include "base/basictypes.h"
#include "base/memory/scoped_ptr.h"
#include "audio_output.h"
#include "threading.h"
#include "tts_engine.h"
#include "tts_receiver.h"
namespace speech_synthesis {
enum tts_status {
TTS_IDLE = 0,
TTS_BUSY = 1,
TTS_ERROR = 2,
};
template<typename T> class RingBuffer;
class TtsEngine;
// Add more such as rate, pitch etc. in the future.
struct UtteranceOptions {
public:
TtsVoice voice_options;
// Default is 1. Use higher or lower values to increase or decrease the
// speaking rate. Map default to ~100 words/min if possible. Speech
// engines may or may not support this mapping.
float rate;
// Default is 1. Use higher or lower values to increase or decrease the
// speaking pitch. Map default to 200 Hz if possible. Speech
// engines may or may not support this exact mapping.
float pitch;
// Default is 1. Use higher or lower values to increase or decrease the
// speaking volume.
float volume;
// Whether to enqueue this utterance or flush the existing queue.
bool enqueue;
// Whether this utterance is interruptible.
bool interruptible;
UtteranceOptions();
};
struct Utterance {
public:
std::string text;
int voice_index;
UtteranceOptions options;
};
class TtsService
: public AudioProvider,
public Runnable,
public TtsDataReceiver {
public:
TtsService(TtsEngine *engine, AudioOutput *audio_output);
virtual ~TtsService();
//
// External interface
//
// Start the background service
bool StartService();
// Stop the background service.
void StopService();
// Queue up this text to be spoken and return immediately. The
// UtteranceOptions contains other settings such as language name, voice,
// pitch, rate etc. Currently language name specified as:
// <language>-<locale> is supported. Example: en-US, fr-FR, etc.
void Speak(const std::string& text, const UtteranceOptions& options);
// Interrupts the current utterance and discards other utterances
// in the queue.
void Stop();
// Determine if the service is busy or speaking.
tts_status GetStatus();
// Block until all queued utterances are done speaking.
void WaitUntilFinished();
//
// Internal implementation
//
// Implementation of AudioProvider, called by the audio output thread.
bool FillAudioBuffer(int16_t* samples, int size);
// Implementation of Runnable, for our background thread.
virtual void Run() OVERRIDE;
// Implementation of TtsDataReceiver, where the TtsEngine calls us
// with the generated audio data.
tts_callback_status Receive(int rate,
int num_channels,
const int16_t* data,
int num_samples,
tts_synth_status status);
private:
TtsEngine *engine_;
AudioOutput *audio_output_;
scoped_ptr<RingBuffer<int16_t> > ring_buffer_;
scoped_ptr<Thread> thread_;
Utterance *current_utterance_;
std::vector<int16_t> audio_buffer_;
// A mutex and condition variable
scoped_ptr<Mutex> mutex_;
scoped_ptr<CondVar> cond_var_;
// Variables that are protected by the mutex and signaled by the
// condition variable.
std::list<Utterance*> utterances_;
bool service_running_;
bool utterance_running_;
bool current_utterance_interruptible_;
DISALLOW_COPY_AND_ASSIGN(TtsService);
};
} // namespace speech_synthesis
#endif // SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_SERVICE_H_