| // Copyright (c) 2010 The Chromium OS Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| // This header defines the interface to access Text-To-Speech functionality |
| // in shared libraries that implement speech synthesis and the management |
| // of resources associated with the synthesis. |
| // An example of the implementation of this interface can be found in |
| // pico/tts_engine.cc |
| |
| #ifndef SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_ENGINE_H_ |
| #define SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_ENGINE_H_ |
| |
| #include <string> |
| |
| #include "tts_receiver.h" |
| |
| using std::string; |
| |
| namespace speech_synthesis { |
| |
| enum tts_result { |
| TTS_SUCCESS = 0, |
| TTS_FAILURE = -1, |
| TTS_FEATURE_UNSUPPORTED = -2, |
| TTS_VALUE_INVALID = -3, |
| TTS_PROPERTY_UNSUPPORTED = -4, |
| TTS_MISSING_RESOURCES = -5 |
| }; |
| |
| enum tts_gender { |
| TTS_UNSPECIFIED_GENDER = 0, |
| TTS_MALE = 1, |
| TTS_FEMALE = 2 |
| }; |
| |
| enum tts_quality { |
| TTS_UNSPECIFIED_QUALITY = 0, |
| TTS_EXPERIMENTAL_QUALITY = 1, |
| TTS_LOW_QUALITY = 2, |
| TTS_NORMAL_QUALITY = 3, |
| TTS_HIGH_QUALITY = 4 |
| }; |
| |
| struct TtsVoice { |
| public: |
| // Required |
| string name; |
| string language; |
| int sample_rate; |
| tts_quality quality; |
| |
| // Optional |
| string region; |
| tts_gender gender; |
| int age; |
| |
| TtsVoice() |
| : name(), |
| language(), |
| sample_rate(0), |
| quality(TTS_UNSPECIFIED_QUALITY), |
| region(), |
| gender(TTS_UNSPECIFIED_GENDER), |
| age(0) { |
| } |
| }; |
| |
| class TtsEngine { |
| public: |
| virtual ~TtsEngine() {} |
| |
| // Initialize the TTS engine and returns whether initialization succeeded. |
| // @return TTS_SUCCESS, or TTS_FAILURE |
| virtual tts_result Init() = 0; |
| |
| // Shut down the TTS engine and releases all associated resources. |
| // @return TTS_SUCCESS, or TTS_FAILURE |
| virtual tts_result Shutdown() = 0; |
| |
| // Interrupt synthesis and flushes any synthesized data that hasn't been |
| // output yet. This will block until callbacks underway are completed. |
| // @return TTS_SUCCESS, or TTS_FAILURE |
| virtual tts_result Stop() = 0; |
| |
| // Return the number of loaded voices |
| virtual int GetVoiceCount() = 0; |
| |
| // Return information about the ith voice |
| virtual const TtsVoice * GetVoiceInfo(int voice_index) = 0; |
| |
| // Returns true iff the specified voice matches the pattern given in |
| // voice_options. Default values in voice_options are treated as wildcards. |
| static bool VoiceMatches(const TtsVoice* voice, |
| const TtsVoice *voice_options); |
| |
| // Return the voice index for the first voice matching the specified |
| // voice options, or -1 if there are no matching voices. |
| int GetVoiceIndex(const TtsVoice* voice_options); |
| |
| // Switch to the voice with the given index. |
| // @return TTS_SUCCESS or TTS_FAILURE |
| virtual tts_result SetVoice(int voice_index) = 0; |
| |
| // Switch to the first voice in the given language code. |
| // @return TTS_SUCCESS or TTS_FAILURE |
| tts_result SetVoiceByLanguage(const string& language_code); |
| |
| // Switch to the first voice with matching properties. |
| // @return TTS_SUCCESS or TTS_FAILURE |
| tts_result SetVoiceByProperties(const TtsVoice* voice_options) { |
| return SetVoice(GetVoiceIndex(voice_options)); |
| } |
| |
| // Set the object that will receive completed audio samples |
| virtual void SetReceiver(TtsDataReceiver* receiver) = 0; |
| |
| // Set a property for the the TTS engine |
| // @param property pointer to the property name |
| // @param value pointer to the new property value, null-terminated utf-8 |
| // @return TTS_PROPERTY_UNSUPPORTED, or TTS_SUCCESS, or |
| // TTS_FAILURE, or TTS_VALUE_INVALID |
| virtual tts_result SetProperty(const char *property, const char *value) = 0; |
| |
| // Set the speaking rate/speed. |
| // @param rate the speaking rate in the range of 0 to 1. Default is 0.2 |
| // @return TTS_SUCCESS, or TTS_FAILURE, or TTS_VALUE_INVALID |
| virtual tts_result SetRate(float rate) = 0; |
| |
| // Set the speaking pitch. |
| // @param pitch the speaking pitch in the range of 0 to 1. Default is 0.5 |
| // @return TTS_SUCCESS, or TTS_FAILURE, or TTS_VALUE_INVALID |
| virtual tts_result SetPitch(float pitch) = 0; |
| |
| // Set the speaking volume. |
| // @param volume the speaking volume in the range of 0 to 1. Default is 0.2 |
| // @return TTS_SUCCESS, or TTS_FAILURE, or TTS_VALUE_INVALID |
| virtual tts_result SetVolume(float volume) = 0; |
| |
| // Retrieve a property from the TTS engine |
| // @param property pointer to the property name |
| // @param[out] out_value will return a const pointer to the |
| // retrieved value, or null if it doesn't exist |
| // @return TTS_PROPERTY_UNSUPPORTED, or TTS_SUCCESS |
| virtual tts_result GetProperty(const char *property, const char **value) = 0; |
| |
| // Get the sample rate of the currently selected voice |
| // @return the sample rate in Hz |
| virtual int GetSampleRate() = 0; |
| |
| // Synthesize the text. |
| // As the synthesis is performed, the engine invokes the callback to notify |
| // the TTS framework that it has filled the given buffer, and indicates how |
| // many bytes it wrote. The callback is called repeatedly until the engine |
| // has generated all the audio data corresponding to the text. |
| // Text is coded in UTF-8 and supports SSML. |
| // |
| // @param text null-terminated UTF-8 text to synthesize |
| // @param audio_buffer buffer to write output audio samples |
| // @param audio_sample_count number of samples in the audio buffer |
| // @param out_total_samples receives total number of samples output |
| // @return TTS_SUCCESS or TTS_FAILURE |
| virtual tts_result SynthesizeText(const char *text, |
| int16_t* audio_buffer, |
| int audio_sample_count, |
| int* out_total_samples) = 0; |
| }; |
| |
| } // namespace speech_synthesis |
| |
| #endif // SPEECH_CLIENT_SYNTHESIS_SERVICE_TTS_ENGINE_H_ |
| |