blob: f089fe0b2cf5884d8e7ec960a704a8b0a8b43522 [file] [log] [blame]
// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This header defines the interface to access Text-To-Speech functionality
// in shared libraries that implement speech synthesis and the management
// of resources associated with the synthesis.
// An example of the implementation of this interface can be found in
// pico/tts_engine.cc
#ifndef SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_ENGINE_H_
#define SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_ENGINE_H_
#include <string>
#include "tts_receiver.h"
using std::string;
namespace speech_synthesis {
enum tts_result {
TTS_SUCCESS = 0,
TTS_FAILURE = -1,
TTS_FEATURE_UNSUPPORTED = -2,
TTS_VALUE_INVALID = -3,
TTS_PROPERTY_UNSUPPORTED = -4,
TTS_MISSING_RESOURCES = -5
};
enum tts_gender {
TTS_UNSPECIFIED_GENDER = 0,
TTS_MALE = 1,
TTS_FEMALE = 2
};
enum tts_quality {
TTS_UNSPECIFIED_QUALITY = 0,
TTS_EXPERIMENTAL_QUALITY = 1,
TTS_LOW_QUALITY = 2,
TTS_NORMAL_QUALITY = 3,
TTS_HIGH_QUALITY = 4
};
struct TtsVoice {
public:
// Required
string name;
string language;
int sample_rate;
tts_quality quality;
// Optional
string region;
tts_gender gender;
int age;
TtsVoice()
: name(),
language(),
sample_rate(0),
quality(TTS_UNSPECIFIED_QUALITY),
region(),
gender(TTS_UNSPECIFIED_GENDER),
age(0) {
}
};
class TtsEngine {
public:
virtual ~TtsEngine() {}
// Initialize the TTS engine and returns whether initialization succeeded.
// @return TTS_SUCCESS, or TTS_FAILURE
virtual tts_result Init() = 0;
// Shut down the TTS engine and releases all associated resources.
// @return TTS_SUCCESS, or TTS_FAILURE
virtual tts_result Shutdown() = 0;
// Interrupt synthesis and flushes any synthesized data that hasn't been
// output yet. This will block until callbacks underway are completed.
// @return TTS_SUCCESS, or TTS_FAILURE
virtual tts_result Stop() = 0;
// Return the number of loaded voices
virtual int GetVoiceCount() = 0;
// Return information about the ith voice
virtual const TtsVoice * GetVoiceInfo(int voice_index) = 0;
// Returns true iff the specified voice matches the pattern given in
// voice_options. Default values in voice_options are treated as wildcards.
static bool VoiceMatches(const TtsVoice* voice,
const TtsVoice *voice_options);
// Return the voice index for the first voice matching the specified
// voice options, or -1 if there are no matching voices.
int GetVoiceIndex(const TtsVoice* voice_options);
// Switch to the voice with the given index.
// @return TTS_SUCCESS or TTS_FAILURE
virtual tts_result SetVoice(int voice_index) = 0;
// Switch to the first voice in the given language code.
// @return TTS_SUCCESS or TTS_FAILURE
tts_result SetVoiceByLanguage(const string& language_code);
// Switch to the first voice with matching properties.
// @return TTS_SUCCESS or TTS_FAILURE
tts_result SetVoiceByProperties(const TtsVoice* voice_options) {
return SetVoice(GetVoiceIndex(voice_options));
}
// Set the object that will receive completed audio samples
virtual void SetReceiver(TtsDataReceiver* receiver) = 0;
// Set a property for the the TTS engine
// @param property pointer to the property name
// @param value pointer to the new property value, null-terminated utf-8
// @return TTS_PROPERTY_UNSUPPORTED, or TTS_SUCCESS, or
// TTS_FAILURE, or TTS_VALUE_INVALID
virtual tts_result SetProperty(const char *property, const char *value) = 0;
// Set the speaking rate/speed.
// @param rate the speaking rate in the range of 0 to 1. Default is 0.2
// @return TTS_SUCCESS, or TTS_FAILURE, or TTS_VALUE_INVALID
virtual tts_result SetRate(float rate) = 0;
// Set the speaking pitch.
// @param pitch the speaking pitch in the range of 0 to 1. Default is 0.5
// @return TTS_SUCCESS, or TTS_FAILURE, or TTS_VALUE_INVALID
virtual tts_result SetPitch(float pitch) = 0;
// Set the speaking volume.
// @param volume the speaking volume in the range of 0 to 1. Default is 0.2
// @return TTS_SUCCESS, or TTS_FAILURE, or TTS_VALUE_INVALID
virtual tts_result SetVolume(float volume) = 0;
// Retrieve a property from the TTS engine
// @param property pointer to the property name
// @param[out] out_value will return a const pointer to the
// retrieved value, or null if it doesn't exist
// @return TTS_PROPERTY_UNSUPPORTED, or TTS_SUCCESS
virtual tts_result GetProperty(const char *property, const char **value) = 0;
// Get the sample rate of the currently selected voice
// @return the sample rate in Hz
virtual int GetSampleRate() = 0;
// Synthesize the text.
// As the synthesis is performed, the engine invokes the callback to notify
// the TTS framework that it has filled the given buffer, and indicates how
// many bytes it wrote. The callback is called repeatedly until the engine
// has generated all the audio data corresponding to the text.
// Text is coded in UTF-8 and supports SSML.
//
// @param text null-terminated UTF-8 text to synthesize
// @param audio_buffer buffer to write output audio samples
// @param audio_sample_count number of samples in the audio buffer
// @param out_total_samples receives total number of samples output
// @return TTS_SUCCESS or TTS_FAILURE
virtual tts_result SynthesizeText(const char *text,
int16_t* audio_buffer,
int audio_sample_count,
int* out_total_samples) = 0;
};
} // namespace speech_synthesis
#endif // SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_ENGINE_H_