pico/pico_tts_engine.cc - chromiumos/platform/speech_synthesis - Git at Google

 // Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
 // Pico specific implementation of the TtsEngine interface defined in
 // tts_engine.h.

 #include "pico/pico_tts_engine.h"

 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>

 #include "base/string_number_conversions.h"

 #include "log.h"

 #define FAILERR(X) \
   if (PICO_OK != (X)) { \
     LOG(ERROR) << "Fail line " << __LINE__; \
     return TTS_FAILURE; \
   } \
   else

 using std::string;

 namespace speech_synthesis {

 const char* PROP_RATE = "rate";
 const char* PROP_PITCH = "pitch";
 const char* PROP_VOLUME = "volume";

 const int PICO_MEM_SIZE = 2500000;
 const pico_Char * PICO_VOICE_NAME =
     reinterpret_cast<const pico_Char *>("PicoVoice");

 PicoTtsEngine::PicoTtsEngine(const std::string& base_path)
     : base_path_(base_path),
       mem_area_(NULL),
       system_(NULL),
       engine_(NULL),
       ta_resource_(NULL),
       sg_resource_(NULL),
       receiver_(NULL) {
 }

 PicoTtsEngine::~PicoTtsEngine() {
   Shutdown();
 }

 // Unloads the Pico engine and any loaded Pico resources, but does not
 // shut down.
 void PicoTtsEngine::CleanResources(void) {
   if (engine_) {
     pico_disposeEngine(system_, &engine_);
     pico_releaseVoiceDefinition(system_, PICO_VOICE_NAME);
     engine_ = NULL;
   }
   if (ta_resource_) {
     pico_unloadResource(system_, &ta_resource_);
     ta_resource_ = NULL;
   }
   if (sg_resource_) {
     pico_unloadResource(system_, &sg_resource_);
     sg_resource_ = NULL;
   }

   current_voice_index_ = -1;
 }

 // Initializes the engine for the specified voice.
 tts_result PicoTtsEngine::InitVoice(int voice_index) {
   if (voice_index < 0 || voice_index >= GetVoiceCount()) {
     LOG(INFO) << "Voice index out of range: " << voice_index;
     return TTS_FAILURE;
   }
   const PicoTtsVoice * voice = &voices_[voice_index];

   pico_Char ta_resource_name[PICO_MAX_RESOURCE_NAME_SIZE];
   pico_Char sg_resource_name[PICO_MAX_RESOURCE_NAME_SIZE];

   string tafile = base_path_ + voice->ta_lingware;
   string sgfile = base_path_ + voice->sg_lingware;
   const pico_Char *ta_filename =
       reinterpret_cast<const pico_Char *>(tafile.c_str());
   const pico_Char *sg_filename =
       reinterpret_cast<const pico_Char *>(sgfile.c_str());

   FAILERR(pico_loadResource(system_, ta_filename, &ta_resource_));
   FAILERR(pico_loadResource(system_, sg_filename, &sg_resource_));
   FAILERR(pico_getResourceName(system_, ta_resource_,
       reinterpret_cast<char *>(ta_resource_name)));
   FAILERR(pico_getResourceName(system_, sg_resource_,
       reinterpret_cast<char *>(sg_resource_name)));
   FAILERR(pico_createVoiceDefinition(system_, PICO_VOICE_NAME));
   FAILERR(pico_addResourceToVoiceDefinition(
       system_, PICO_VOICE_NAME, ta_resource_name));
   FAILERR(pico_addResourceToVoiceDefinition(
       system_, PICO_VOICE_NAME, sg_resource_name));
   pico_newEngine(system_, PICO_VOICE_NAME, &engine_);
   current_voice_index_ = voice_index;

   return TTS_SUCCESS;
 }

 // Initialize TTS engine.
 tts_result PicoTtsEngine::Init() {
   LOG(INFO) << "Start.";
   LoadVoices(base_path_ + "tts_support.xml");
   mem_area_ = malloc(PICO_MEM_SIZE);
   if (!mem_area_) {
     LOG(ERROR) << "Failed to allocate memory for Pico system";
     return TTS_FAILURE;
   }

   FAILERR(pico_initialize(mem_area_, PICO_MEM_SIZE, &system_));
   // Set the first language in the data file as the default.
   FAILERR(InitVoice(0));
   LOG(INFO) << "Init done.";
   return TTS_SUCCESS;
 }

 // Shuts down the TTS engine, cleans up resources.
 tts_result PicoTtsEngine::Shutdown() {
   CleanResources();
   if (system_) {
     pico_terminate(&system_);
     system_ = NULL;
   }
   if (mem_area_) {
     free(mem_area_);
     mem_area_ = NULL;
   }
   return TTS_SUCCESS;
 }

 tts_result PicoTtsEngine::Stop() {
   // TODO(fergus): use PICO_RESET_SOFT here instead?
   pico_resetEngine(engine_, PICO_RESET_FULL);
   return TTS_SUCCESS;
 }

 int PicoTtsEngine::GetVoiceCount() {
   return static_cast<int>(voices_.size());
 }

 const TtsVoice* PicoTtsEngine::GetVoiceInfo(int voice_index) {
   if (voice_index >= 0 && voice_index < GetVoiceCount()) {
     return &voices_[voice_index];
   } else {
     return NULL;
   }
 }

 tts_result PicoTtsEngine::SetVoice(int voice_index) {
   if (current_voice_index_ != voice_index) {
     CleanResources();
     return InitVoice(voice_index);
   } else {
     return TTS_SUCCESS;
   }
 }

 void PicoTtsEngine::SetReceiver(TtsDataReceiver* receiver) {
   receiver_ = receiver;
 }

 // Sets the property for the engine.
 tts_result PicoTtsEngine::SetProperty(const char *property, const char *value) {
   if (properties_.count(property) > 0) {
     properties_[property] = value;
     return TTS_SUCCESS;
   } else {
     return TTS_PROPERTY_UNSUPPORTED;
   }
 }

 tts_result PicoTtsEngine::SetProperty(const char *property, float value) {
   SetProperty(property, base::IntToString(value).c_str());
   return TTS_SUCCESS;
 }

 tts_result PicoTtsEngine::SetRate(float rate) {
   if (rate < 0 || rate > 5) {
     return TTS_VALUE_INVALID;
   }
   rate = rate < 0.2 ? 0.2 : rate;
   return SetProperty(PROP_RATE, rate * 100);
 }

 tts_result PicoTtsEngine::SetPitch(float pitch) {
   if (pitch < 0 || pitch > 2) {
     return TTS_VALUE_INVALID;
   }
   pitch = pitch < 0.5 ? 0.5 : pitch;
   return SetProperty(PROP_PITCH, pitch * 100);
 }

 tts_result PicoTtsEngine::SetVolume(float volume) {
   if (volume < 0 || volume > 5) {
     return TTS_VALUE_INVALID;
   }
   return SetProperty(PROP_VOLUME, volume * 100);
 }

 tts_result PicoTtsEngine::GetProperty(const char *property,
     const char **value) {
   std::map<string, string>::const_iterator iter =
       properties_.find(property);
   if (iter != properties_.end()) {
     if (value != NULL) {
       (*value) = iter->second.c_str();
     }
     return TTS_SUCCESS;
   }
   return TTS_PROPERTY_UNSUPPORTED;
 }

 int PicoTtsEngine::GetSampleRate() {
   return voices_[current_voice_index_].sample_rate;
 }

 tts_result PicoTtsEngine::SynthesizeText(const char* text,
                                          int16_t* audio_buffer,
                                          int audio_buffer_size,
                                          int* out_total_samples) {
   if (out_total_samples != NULL) {
     *out_total_samples = 0;
   }

   string synth_text = "";
   AppendProperties(text, &synth_text);

   int text_pos = 0;
   const pico_Char* text_ptr =
       reinterpret_cast<const pico_Char*>(synth_text.c_str());
   int text_buffer_len = synth_text.size() + 1;
   while (text_pos < text_buffer_len) {
     pico_Int16 text_bytes_consumed = 0;
     if (PICO_OK != pico_putTextUtf8(
             engine_, text_ptr, text_buffer_len - text_pos,
             &text_bytes_consumed)) {
       return TTS_FAILURE;
     }

     int out_samples;
     tts_result result = GetAudioFromTts(
         audio_buffer, audio_buffer_size, &out_samples);
     if (out_total_samples != NULL) {
       *out_total_samples += out_samples;
     }

     if (result != TTS_SUCCESS) {
       return result;
     }

     text_pos += text_bytes_consumed;
     text_ptr += text_bytes_consumed;
   }
   return TTS_SUCCESS;
 }

 // This method appends the SSML tags for the supported properties if their
 // values are different from the default values.
 void PicoTtsEngine::AppendProperties(const char *text, string *synth_text) {
   int rate_level_ = floor(atof(properties_[PROP_RATE].c_str()));
   int pitch_level_ = floor(atof(properties_[PROP_PITCH].c_str()));
   int volume_level_ = floor(atof(properties_[PROP_VOLUME].c_str()));

   if (rate_level_ < PICO_MIN_RATE || rate_level_ > PICO_MAX_RATE) {
     LOG(WARNING) << "Rate is outside the allowed range.";
   }
   if (pitch_level_ < PICO_MIN_PITCH || pitch_level_ > PICO_MAX_PITCH) {
     LOG(WARNING) << "Pitch is outside the allowed range.";
   }
   if (volume_level_ < PICO_MIN_VOL || volume_level_ > PICO_MAX_VOL) {
     LOG(WARNING) << "Volume is outside the allowed range.";
   }

   *synth_text = "";

   // Append opening tags
   if (rate_level_ != PICO_DEF_RATE) {
     *synth_text += "<speed level='" + properties_[PROP_RATE] + "'>";
   }
   if (pitch_level_ != PICO_DEF_PITCH) {
     *synth_text += "<pitch level='" + properties_[PROP_PITCH] + "'>";
   }
   if (volume_level_ != PICO_DEF_VOL) {
     *synth_text += "<volume level='" + properties_[PROP_VOLUME] + "'>";
   }
   // Append text
   *synth_text += text;
   // Append closing tags in the reverse order
   if (volume_level_ != PICO_DEF_VOL) {
     *synth_text += "</volume>";
   }
   if (pitch_level_ != PICO_DEF_PITCH) {
     *synth_text += "</pitch>";
   }
   if (rate_level_ != PICO_DEF_RATE) {
     *synth_text += "</speed>";
   }
 }

 // max_iterations_without_apparent_progress is a hack to prevent infinite loops.
 // This needs to be more than 200 to pass simple tests such as hello world.
 // TODO(fergus): we should fix the underlying bug <http://b/2501315> in the
 // //third_party/pico sources, and then delete all the code relating to
 // max_iterations_without_apparent_progress.
 int PicoTtsEngine::max_iterations_without_apparent_progress = 10000;

 tts_result PicoTtsEngine::GetAudioFromTts(int16_t* audio_buffer,
                                           int audio_buffer_size,
                                           int* out_total_samples) {
   int total_samples_output = 0;
   int status;
   tts_callback_status callback_status = TTS_CALLBACK_CONTINUE;
   pico_Int16 data_type = PICO_DATA_PCM_16BIT;
   uint32_t sample_rate = voices_[current_voice_index_].sample_rate;
   int iterations_without_apparent_progress = 0;
   while (1) {
     pico_Int16 bytes_received = 0;
     data_type = 0;
     int8_t* buffer_ptr = reinterpret_cast<int8_t *>(audio_buffer);
     pico_Int16 buffer_size_bytes = audio_buffer_size * sizeof(pico_Int16);

     status = pico_getData(engine_, buffer_ptr, buffer_size_bytes,
         &bytes_received, &data_type);

     if (status != PICO_STEP_ERROR && bytes_received > 0) {
       if (data_type != PICO_DATA_PCM_16BIT) {
         break;
       }

       int samples_output = bytes_received / sizeof(const pico_Int16);
       total_samples_output += samples_output;

       // make the callback here...note that it's important to call this
       // method even if no data was received.
       if (receiver_) {
         callback_status =
             receiver_->Receive(sample_rate, 1, audio_buffer,
                                samples_output, TTS_SYNTH_PENDING);
         if (callback_status != TTS_CALLBACK_CONTINUE) {
           break;
         }
       }
     }
     if (status != PICO_STEP_BUSY) {
       break;
     }
     if (bytes_received == 0) {
       iterations_without_apparent_progress++;
       if (iterations_without_apparent_progress >
           max_iterations_without_apparent_progress) {
         break;
       }
     } else {
       iterations_without_apparent_progress = 0;
     }
   };

   if (out_total_samples != NULL) {
     *out_total_samples = total_samples_output;
   }

   if (status == PICO_STEP_ERROR ||
       callback_status == TTS_CALLBACK_ERROR ||
       data_type != PICO_DATA_PCM_16BIT ||
       iterations_without_apparent_progress >
       max_iterations_without_apparent_progress) {
     return TTS_FAILURE;
   }

   if (callback_status != TTS_CALLBACK_HALT) {
     // Send a zero-length packet to tell the destination receiver that
     // we're done.
     callback_status = receiver_->Receive(sample_rate, 1, NULL, 0,
                                          TTS_SYNTH_DONE);
     if (callback_status == TTS_CALLBACK_ERROR) {
       return TTS_FAILURE;
     }
   }

   return TTS_SUCCESS;
 }

 }  // namespace speech_synthesis
	// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.
	//
	// Pico specific implementation of the TtsEngine interface defined in
	// tts_engine.h.

	#include "pico/pico_tts_engine.h"

	#include <stdio.h>
	#include <stdlib.h>
	#include <math.h>

	#include "base/string_number_conversions.h"

	#include "log.h"

	#define FAILERR(X) \
	if (PICO_OK != (X)) { \
	LOG(ERROR) << "Fail line " << __LINE__; \
	return TTS_FAILURE; \
	} \
	else

	using std::string;

	namespace speech_synthesis {

	const char* PROP_RATE = "rate";
	const char* PROP_PITCH = "pitch";
	const char* PROP_VOLUME = "volume";

	const int PICO_MEM_SIZE = 2500000;
	const pico_Char * PICO_VOICE_NAME =
	reinterpret_cast<const pico_Char *>("PicoVoice");

	PicoTtsEngine::PicoTtsEngine(const std::string& base_path)
	: base_path_(base_path),
	mem_area_(NULL),
	system_(NULL),
	engine_(NULL),
	ta_resource_(NULL),
	sg_resource_(NULL),
	receiver_(NULL) {
	}

	PicoTtsEngine::~PicoTtsEngine() {
	Shutdown();
	}

	// Unloads the Pico engine and any loaded Pico resources, but does not
	// shut down.
	void PicoTtsEngine::CleanResources(void) {
	if (engine_) {
	pico_disposeEngine(system_, &engine_);
	pico_releaseVoiceDefinition(system_, PICO_VOICE_NAME);
	engine_ = NULL;
	}
	if (ta_resource_) {
	pico_unloadResource(system_, &ta_resource_);
	ta_resource_ = NULL;
	}
	if (sg_resource_) {
	pico_unloadResource(system_, &sg_resource_);
	sg_resource_ = NULL;
	}

	current_voice_index_ = -1;
	}

	// Initializes the engine for the specified voice.
	tts_result PicoTtsEngine::InitVoice(int voice_index) {
	if (voice_index < 0 \|\| voice_index >= GetVoiceCount()) {
	LOG(INFO) << "Voice index out of range: " << voice_index;
	return TTS_FAILURE;
	}
	const PicoTtsVoice * voice = &voices_[voice_index];

	pico_Char ta_resource_name[PICO_MAX_RESOURCE_NAME_SIZE];
	pico_Char sg_resource_name[PICO_MAX_RESOURCE_NAME_SIZE];

	string tafile = base_path_ + voice->ta_lingware;
	string sgfile = base_path_ + voice->sg_lingware;
	const pico_Char *ta_filename =
	reinterpret_cast<const pico_Char *>(tafile.c_str());
	const pico_Char *sg_filename =
	reinterpret_cast<const pico_Char *>(sgfile.c_str());

	FAILERR(pico_loadResource(system_, ta_filename, &ta_resource_));
	FAILERR(pico_loadResource(system_, sg_filename, &sg_resource_));
	FAILERR(pico_getResourceName(system_, ta_resource_,
	reinterpret_cast<char *>(ta_resource_name)));
	FAILERR(pico_getResourceName(system_, sg_resource_,
	reinterpret_cast<char *>(sg_resource_name)));
	FAILERR(pico_createVoiceDefinition(system_, PICO_VOICE_NAME));
	FAILERR(pico_addResourceToVoiceDefinition(
	system_, PICO_VOICE_NAME, ta_resource_name));
	FAILERR(pico_addResourceToVoiceDefinition(
	system_, PICO_VOICE_NAME, sg_resource_name));
	pico_newEngine(system_, PICO_VOICE_NAME, &engine_);
	current_voice_index_ = voice_index;

	return TTS_SUCCESS;
	}

	// Initialize TTS engine.
	tts_result PicoTtsEngine::Init() {
	LOG(INFO) << "Start.";
	LoadVoices(base_path_ + "tts_support.xml");
	mem_area_ = malloc(PICO_MEM_SIZE);
	if (!mem_area_) {
	LOG(ERROR) << "Failed to allocate memory for Pico system";
	return TTS_FAILURE;
	}

	FAILERR(pico_initialize(mem_area_, PICO_MEM_SIZE, &system_));
	// Set the first language in the data file as the default.
	FAILERR(InitVoice(0));
	LOG(INFO) << "Init done.";
	return TTS_SUCCESS;
	}

	// Shuts down the TTS engine, cleans up resources.
	tts_result PicoTtsEngine::Shutdown() {
	CleanResources();
	if (system_) {
	pico_terminate(&system_);
	system_ = NULL;
	}
	if (mem_area_) {
	free(mem_area_);
	mem_area_ = NULL;
	}
	return TTS_SUCCESS;
	}

	tts_result PicoTtsEngine::Stop() {
	// TODO(fergus): use PICO_RESET_SOFT here instead?
	pico_resetEngine(engine_, PICO_RESET_FULL);
	return TTS_SUCCESS;
	}

	int PicoTtsEngine::GetVoiceCount() {
	return static_cast<int>(voices_.size());
	}

	const TtsVoice* PicoTtsEngine::GetVoiceInfo(int voice_index) {
	if (voice_index >= 0 && voice_index < GetVoiceCount()) {
	return &voices_[voice_index];
	} else {
	return NULL;
	}
	}

	tts_result PicoTtsEngine::SetVoice(int voice_index) {
	if (current_voice_index_ != voice_index) {
	CleanResources();
	return InitVoice(voice_index);
	} else {
	return TTS_SUCCESS;
	}
	}

	void PicoTtsEngine::SetReceiver(TtsDataReceiver* receiver) {
	receiver_ = receiver;
	}

	// Sets the property for the engine.
	tts_result PicoTtsEngine::SetProperty(const char property, const char value) {
	if (properties_.count(property) > 0) {
	properties_[property] = value;
	return TTS_SUCCESS;
	} else {
	return TTS_PROPERTY_UNSUPPORTED;
	}
	}

	tts_result PicoTtsEngine::SetProperty(const char *property, float value) {
	SetProperty(property, base::IntToString(value).c_str());
	return TTS_SUCCESS;
	}

	tts_result PicoTtsEngine::SetRate(float rate) {
	if (rate < 0 \|\| rate > 5) {
	return TTS_VALUE_INVALID;
	}
	rate = rate < 0.2 ? 0.2 : rate;
	return SetProperty(PROP_RATE, rate * 100);
	}

	tts_result PicoTtsEngine::SetPitch(float pitch) {
	if (pitch < 0 \|\| pitch > 2) {
	return TTS_VALUE_INVALID;
	}
	pitch = pitch < 0.5 ? 0.5 : pitch;
	return SetProperty(PROP_PITCH, pitch * 100);
	}

	tts_result PicoTtsEngine::SetVolume(float volume) {
	if (volume < 0 \|\| volume > 5) {
	return TTS_VALUE_INVALID;
	}
	return SetProperty(PROP_VOLUME, volume * 100);
	}

	tts_result PicoTtsEngine::GetProperty(const char *property,
	const char **value) {
	std::map<string, string>::const_iterator iter =
	properties_.find(property);
	if (iter != properties_.end()) {
	if (value != NULL) {
	(*value) = iter->second.c_str();
	}
	return TTS_SUCCESS;
	}
	return TTS_PROPERTY_UNSUPPORTED;
	}

	int PicoTtsEngine::GetSampleRate() {
	return voices_[current_voice_index_].sample_rate;
	}

	tts_result PicoTtsEngine::SynthesizeText(const char* text,
	int16_t* audio_buffer,
	int audio_buffer_size,
	int* out_total_samples) {
	if (out_total_samples != NULL) {
	*out_total_samples = 0;
	}

	string synth_text = "";
	AppendProperties(text, &synth_text);

	int text_pos = 0;
	const pico_Char* text_ptr =
	reinterpret_cast<const pico_Char*>(synth_text.c_str());
	int text_buffer_len = synth_text.size() + 1;
	while (text_pos < text_buffer_len) {
	pico_Int16 text_bytes_consumed = 0;
	if (PICO_OK != pico_putTextUtf8(
	engine_, text_ptr, text_buffer_len - text_pos,
	&text_bytes_consumed)) {
	return TTS_FAILURE;
	}

	int out_samples;
	tts_result result = GetAudioFromTts(
	audio_buffer, audio_buffer_size, &out_samples);
	if (out_total_samples != NULL) {
	*out_total_samples += out_samples;
	}

	if (result != TTS_SUCCESS) {
	return result;
	}

	text_pos += text_bytes_consumed;
	text_ptr += text_bytes_consumed;
	}
	return TTS_SUCCESS;
	}

	// This method appends the SSML tags for the supported properties if their
	// values are different from the default values.
	void PicoTtsEngine::AppendProperties(const char text, string synth_text) {
	int rate_level_ = floor(atof(properties_[PROP_RATE].c_str()));
	int pitch_level_ = floor(atof(properties_[PROP_PITCH].c_str()));
	int volume_level_ = floor(atof(properties_[PROP_VOLUME].c_str()));

	if (rate_level_ < PICO_MIN_RATE \|\| rate_level_ > PICO_MAX_RATE) {
	LOG(WARNING) << "Rate is outside the allowed range.";
	}
	if (pitch_level_ < PICO_MIN_PITCH \|\| pitch_level_ > PICO_MAX_PITCH) {
	LOG(WARNING) << "Pitch is outside the allowed range.";
	}
	if (volume_level_ < PICO_MIN_VOL \|\| volume_level_ > PICO_MAX_VOL) {
	LOG(WARNING) << "Volume is outside the allowed range.";
	}

	*synth_text = "";

	// Append opening tags
	if (rate_level_ != PICO_DEF_RATE) {
	*synth_text += "<speed level='" + properties_[PROP_RATE] + "'>";
	}
	if (pitch_level_ != PICO_DEF_PITCH) {
	*synth_text += "<pitch level='" + properties_[PROP_PITCH] + "'>";
	}
	if (volume_level_ != PICO_DEF_VOL) {
	*synth_text += "<volume level='" + properties_[PROP_VOLUME] + "'>";
	}
	// Append text
	*synth_text += text;
	// Append closing tags in the reverse order
	if (volume_level_ != PICO_DEF_VOL) {
	*synth_text += "</volume>";
	}
	if (pitch_level_ != PICO_DEF_PITCH) {
	*synth_text += "</pitch>";
	}
	if (rate_level_ != PICO_DEF_RATE) {
	*synth_text += "</speed>";
	}
	}

	// max_iterations_without_apparent_progress is a hack to prevent infinite loops.
	// This needs to be more than 200 to pass simple tests such as hello world.
	// TODO(fergus): we should fix the underlying bug <http://b/2501315> in the
	// //third_party/pico sources, and then delete all the code relating to
	// max_iterations_without_apparent_progress.
	int PicoTtsEngine::max_iterations_without_apparent_progress = 10000;

	tts_result PicoTtsEngine::GetAudioFromTts(int16_t* audio_buffer,
	int audio_buffer_size,
	int* out_total_samples) {
	int total_samples_output = 0;
	int status;
	tts_callback_status callback_status = TTS_CALLBACK_CONTINUE;
	pico_Int16 data_type = PICO_DATA_PCM_16BIT;
	uint32_t sample_rate = voices_[current_voice_index_].sample_rate;
	int iterations_without_apparent_progress = 0;
	while (1) {
	pico_Int16 bytes_received = 0;
	data_type = 0;
	int8_t* buffer_ptr = reinterpret_cast<int8_t *>(audio_buffer);
	pico_Int16 buffer_size_bytes = audio_buffer_size * sizeof(pico_Int16);

	status = pico_getData(engine_, buffer_ptr, buffer_size_bytes,
	&bytes_received, &data_type);

	if (status != PICO_STEP_ERROR && bytes_received > 0) {
	if (data_type != PICO_DATA_PCM_16BIT) {
	break;
	}

	int samples_output = bytes_received / sizeof(const pico_Int16);
	total_samples_output += samples_output;

	// make the callback here...note that it's important to call this
	// method even if no data was received.
	if (receiver_) {
	callback_status =
	receiver_->Receive(sample_rate, 1, audio_buffer,
	samples_output, TTS_SYNTH_PENDING);
	if (callback_status != TTS_CALLBACK_CONTINUE) {
	break;
	}
	}
	}
	if (status != PICO_STEP_BUSY) {
	break;
	}
	if (bytes_received == 0) {
	iterations_without_apparent_progress++;
	if (iterations_without_apparent_progress >
	max_iterations_without_apparent_progress) {
	break;
	}
	} else {
	iterations_without_apparent_progress = 0;
	}
	};

	if (out_total_samples != NULL) {
	*out_total_samples = total_samples_output;
	}

	if (status == PICO_STEP_ERROR \|\|
	callback_status == TTS_CALLBACK_ERROR \|\|
	data_type != PICO_DATA_PCM_16BIT \|\|
	iterations_without_apparent_progress >
	max_iterations_without_apparent_progress) {
	return TTS_FAILURE;
	}

	if (callback_status != TTS_CALLBACK_HALT) {
	// Send a zero-length packet to tell the destination receiver that
	// we're done.
	callback_status = receiver_->Receive(sample_rate, 1, NULL, 0,
	TTS_SYNTH_DONE);
	if (callback_status == TTS_CALLBACK_ERROR) {
	return TTS_FAILURE;
	}
	}

	return TTS_SUCCESS;
	}

	} // namespace speech_synthesis