tts_service.cc - chromiumos/platform/speech_synthesis - Git at Google

 // Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "tts_service.h"

 #include "log.h"
 #include "resampler.h"
 #include "ringbuffer.h"
 #include "tts_engine.h"

 namespace speech_synthesis {

 UtteranceOptions::UtteranceOptions()
     : rate(1),
       pitch(1),
       volume(1),
       enqueue(false),
       interruptible(true) {
 }

 TtsService::TtsService(TtsEngine *engine, AudioOutput *audio_output)
     : engine_(engine),
       audio_output_(audio_output),
       current_utterance_(NULL),
       mutex_(Mutex::Create()),
       cond_var_(CondVar::Create()),
       service_running_(false),
       utterance_running_(false),
       current_utterance_interruptible_(true) {
 }

 TtsService::~TtsService() {
 }

 bool TtsService::StartService() {
   if (!audio_output_->Init(this)) {
     LOG(ERROR) << "TTS Service unable to open audio output.";
     return false;
   }
   ring_buffer_.reset(
       new RingBuffer<int16_t>(audio_output_->GetTotalBufferSize()));
   audio_buffer_.resize(audio_output_->GetChunkSize());
   if (engine_->Init() != TTS_SUCCESS) {
     return false;
   }
   LOG(INFO) << "StartService";
   audio_output_->StartAudio();
   service_running_ = true;
   thread_.reset(Thread::StartJoinableThread(this));
   return true;
 }

 void TtsService::StopService() {
   Stop();
   if (!service_running_) {
     return;
   }
   LOG(INFO) << "Stopping audio.";
   audio_output_->StopAudio();

   LOG(INFO) << "Stopping main service.";
   mutex_->Lock();
   service_running_ = false;
   cond_var_->Signal();
   mutex_->Unlock();

   LOG(INFO) << "Joining main thread.";
   thread_->Join();
   LOG(INFO) << "Joined";
 }

 void TtsService::Speak(const std::string& text,
                        const UtteranceOptions& options) {
   if (!service_running_) {
     return;
   }
   Utterance *utterance = new Utterance;
   utterance->text = text;
   if ((utterance->voice_index =
        engine_->GetVoiceIndex(options.voice_options)) == -1) {
     utterance->voice_index = 0;
   }
   utterance->options = options;

   mutex_->Lock();
   if (!utterance->options.enqueue) {
     // Remove from the back until a non-interruptible utterance is found.
     // This means that the an utterance being non-interruptible gives a
     // cascading effect on the utterances that are queued before the
     // non-interruptible utterance. It can be enforced at a higher level
     // that if interruptiple == false, then enqueue = false.
     while (!utterances_.empty() &&
            utterances_.back()->options.interruptible) {
       delete utterances_.back();
       utterances_.pop_back();
     }
     if (utterances_.empty() && current_utterance_interruptible_) {
       ring_buffer_->Reset();
       utterance_running_ = false;
     }
   }
   utterances_.push_back(utterance);
   cond_var_->Signal();
   mutex_->Unlock();
 }

 void TtsService::Stop() {
   if (!service_running_) {
     return;
   }
   mutex_->Lock();
   ring_buffer_->Reset();
   while (!utterances_.empty()) {
     delete utterances_.front();
     utterances_.pop_front();
   }
   utterance_running_ = false;
   cond_var_->Signal();
   mutex_->Unlock();
 }

 tts_status TtsService::GetStatus() {
   if (!service_running_) {
     return TTS_ERROR;
   }
   tts_status status;
   mutex_->Lock();
   if (utterances_.empty() && !utterance_running_)
     status = TTS_IDLE;
   else
     status = TTS_BUSY;
   mutex_->Unlock();
   return status;
 }

 void TtsService::WaitUntilFinished() {
   if (!service_running_) {
     return;
   }
   mutex_->Lock();
   while (!utterances_.empty() || utterance_running_) {
     cond_var_->Wait(mutex_.get());
   }
   mutex_->Unlock();
 }

 void TtsService::Run() {
   if (!service_running_) {
     return;
   }
   LOG(INFO) << "Running background thread";
   for (;;) {
     mutex_->Lock();
     // If there are no utterances and there's no signal to stop,
     // wait on our condition variable, which will allow this thread to
     // sleep with no CPU usage and wake up immediately when there's
     // work for us to do.
     if (utterances_.empty() && service_running_ == true) {
       cond_var_->Wait(mutex_.get());
     }

     if (service_running_ == false) {
       LOG(INFO) << "Exiting background thread";
       while (!utterances_.empty()) {
         delete utterances_.front();
         utterances_.pop_front();
       }
       mutex_->Unlock();
       return;
     }

     if (current_utterance_ == NULL && !utterances_.empty()) {
       current_utterance_ = utterances_.front();
       current_utterance_interruptible_ =
           current_utterance_->options.interruptible;
       utterances_.pop_front();
     }

     utterance_running_ = true;

     mutex_->Unlock();

     if (!current_utterance_) {
       continue;
     }

     engine_->SetRate(current_utterance_->options.rate);
     engine_->SetPitch(current_utterance_->options.pitch);
     engine_->SetVolume(current_utterance_->options.volume);

     // Synthesize the current utterance.  The TTS engine will call our
     // callback method, Receive, repeatedly while it performs synthesis.
     // During that callback, we check if Stop was called and can cause
     // this method to exit prematurely.  Otherwise this method won't exit
     // until this utterance is done synthesizing, and then current_utterance_
     // will be set to NULL.
     int samples_output = 0;
     engine_->SetVoice(current_utterance_->voice_index);

     scoped_ptr<Resampler> resampler;
     if (audio_output_->GetSampleRate() != engine_->GetSampleRate()) {
       resampler.reset(new Resampler(this,
                                      engine_->GetSampleRate(),
                                      audio_output_->GetSampleRate(),
                                      audio_buffer_.size()));
       engine_->SetReceiver(resampler.get());
     } else {
       engine_->SetReceiver(this);
     }

     // Save the utterance text because current_utterance_ is deleted
     // by the Done() callback before the call to SynthesizeText exits.
     string utterance_text = current_utterance_->text;

     engine_->SynthesizeText(
         utterance_text.c_str(),
         &audio_buffer_[0],
         audio_buffer_.size(),
         &samples_output);

     // TODO(chaitanyag): Make the completion callback here.
     VLOG(3) << "Done: " << utterance_text.c_str();

     mutex_->Lock();
     if (utterance_running_ == false) {
       // The utterance was interrupted
       engine_->Stop();
     }
     utterance_running_ = false;
     cond_var_->Signal();
     mutex_->Unlock();

     delete current_utterance_;
     current_utterance_ = NULL;
   }
 }

 tts_callback_status TtsService::Receive(int rate,
                                         int num_channels,
                                         const int16_t* data,
                                         int num_samples,
                                         tts_synth_status status) {
   if (status == TTS_SYNTH_DONE) {
     current_utterance_ = NULL;
   }

   // Check if we need to exit prematurely
   mutex_->Lock();
   if (service_running_ == false || utterance_running_ == false) {
     mutex_->Unlock();
     return TTS_CALLBACK_HALT;
   }
   mutex_->Unlock();

   // If there's no audio data, just return success
   if (num_samples == 0) {
     return TTS_CALLBACK_CONTINUE;
   }

   // If the ring buffer is full, compute the amount of time we expect
   // it to take for that many audio samples to be output, and sleep for
   // that long.
   while (ring_buffer_->WriteAvail() < num_samples) {
     int ms_to_sleep = num_samples * 1000 / rate;
     mutex_->Lock();
     cond_var_->WaitWithTimeout(mutex_.get(), ms_to_sleep);
     if (service_running_ == false || utterance_running_ == false) {
       mutex_->Unlock();
       return TTS_CALLBACK_HALT;
     }
     mutex_->Unlock();
   }

   bool success = ring_buffer_->Write(data, num_samples);
   if (!success) {
     LOG(INFO) << "Unable to write to ring buffer";
     exit(0);
   }

   if (status == TTS_SYNTH_DONE) {
     ring_buffer_->MarkFinished();
   }

   return TTS_CALLBACK_CONTINUE;
 }

 bool TtsService::FillAudioBuffer(int16_t* samples, int size) {
   int avail = ring_buffer_->ReadAvail();

   // If the ring buffer is finished, play until the end.  Otherwise,
   // only play if we have a full buffer.
   int copy_len;
   if (ring_buffer_->IsFinished()) {
     copy_len = avail < size? avail : size;
   } else {
     copy_len = avail >= size? size : 0;
   }

   ring_buffer_->Read(samples, copy_len);
   for (int i = copy_len; i < size; i++) {
     samples[i] = 0;
   }

   return !ring_buffer_->IsFinished();
 }

 }  // namespace speech_synthesis
	// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "tts_service.h"

	#include "log.h"
	#include "resampler.h"
	#include "ringbuffer.h"
	#include "tts_engine.h"

	namespace speech_synthesis {

	UtteranceOptions::UtteranceOptions()
	: rate(1),
	pitch(1),
	volume(1),
	enqueue(false),
	interruptible(true) {
	}

	TtsService::TtsService(TtsEngine engine, AudioOutput audio_output)
	: engine_(engine),
	audio_output_(audio_output),
	current_utterance_(NULL),
	mutex_(Mutex::Create()),
	cond_var_(CondVar::Create()),
	service_running_(false),
	utterance_running_(false),
	current_utterance_interruptible_(true) {
	}

	TtsService::~TtsService() {
	}

	bool TtsService::StartService() {
	if (!audio_output_->Init(this)) {
	LOG(ERROR) << "TTS Service unable to open audio output.";
	return false;
	}
	ring_buffer_.reset(
	new RingBuffer<int16_t>(audio_output_->GetTotalBufferSize()));
	audio_buffer_.resize(audio_output_->GetChunkSize());
	if (engine_->Init() != TTS_SUCCESS) {
	return false;
	}
	LOG(INFO) << "StartService";
	audio_output_->StartAudio();
	service_running_ = true;
	thread_.reset(Thread::StartJoinableThread(this));
	return true;
	}

	void TtsService::StopService() {
	Stop();
	if (!service_running_) {
	return;
	}
	LOG(INFO) << "Stopping audio.";
	audio_output_->StopAudio();

	LOG(INFO) << "Stopping main service.";
	mutex_->Lock();
	service_running_ = false;
	cond_var_->Signal();
	mutex_->Unlock();

	LOG(INFO) << "Joining main thread.";
	thread_->Join();
	LOG(INFO) << "Joined";
	}

	void TtsService::Speak(const std::string& text,
	const UtteranceOptions& options) {
	if (!service_running_) {
	return;
	}
	Utterance *utterance = new Utterance;
	utterance->text = text;
	if ((utterance->voice_index =
	engine_->GetVoiceIndex(options.voice_options)) == -1) {
	utterance->voice_index = 0;
	}
	utterance->options = options;

	mutex_->Lock();
	if (!utterance->options.enqueue) {
	// Remove from the back until a non-interruptible utterance is found.
	// This means that the an utterance being non-interruptible gives a
	// cascading effect on the utterances that are queued before the
	// non-interruptible utterance. It can be enforced at a higher level
	// that if interruptiple == false, then enqueue = false.
	while (!utterances_.empty() &&
	utterances_.back()->options.interruptible) {
	delete utterances_.back();
	utterances_.pop_back();
	}
	if (utterances_.empty() && current_utterance_interruptible_) {
	ring_buffer_->Reset();
	utterance_running_ = false;
	}
	}
	utterances_.push_back(utterance);
	cond_var_->Signal();
	mutex_->Unlock();
	}

	void TtsService::Stop() {
	if (!service_running_) {
	return;
	}
	mutex_->Lock();
	ring_buffer_->Reset();
	while (!utterances_.empty()) {
	delete utterances_.front();
	utterances_.pop_front();
	}
	utterance_running_ = false;
	cond_var_->Signal();
	mutex_->Unlock();
	}

	tts_status TtsService::GetStatus() {
	if (!service_running_) {
	return TTS_ERROR;
	}
	tts_status status;
	mutex_->Lock();
	if (utterances_.empty() && !utterance_running_)
	status = TTS_IDLE;
	else
	status = TTS_BUSY;
	mutex_->Unlock();
	return status;
	}

	void TtsService::WaitUntilFinished() {
	if (!service_running_) {
	return;
	}
	mutex_->Lock();
	while (!utterances_.empty() \|\| utterance_running_) {
	cond_var_->Wait(mutex_.get());
	}
	mutex_->Unlock();
	}

	void TtsService::Run() {
	if (!service_running_) {
	return;
	}
	LOG(INFO) << "Running background thread";
	for (;;) {
	mutex_->Lock();
	// If there are no utterances and there's no signal to stop,
	// wait on our condition variable, which will allow this thread to
	// sleep with no CPU usage and wake up immediately when there's
	// work for us to do.
	if (utterances_.empty() && service_running_ == true) {
	cond_var_->Wait(mutex_.get());
	}

	if (service_running_ == false) {
	LOG(INFO) << "Exiting background thread";
	while (!utterances_.empty()) {
	delete utterances_.front();
	utterances_.pop_front();
	}
	mutex_->Unlock();
	return;
	}

	if (current_utterance_ == NULL && !utterances_.empty()) {
	current_utterance_ = utterances_.front();
	current_utterance_interruptible_ =
	current_utterance_->options.interruptible;
	utterances_.pop_front();
	}

	utterance_running_ = true;

	mutex_->Unlock();

	if (!current_utterance_) {
	continue;
	}

	engine_->SetRate(current_utterance_->options.rate);
	engine_->SetPitch(current_utterance_->options.pitch);
	engine_->SetVolume(current_utterance_->options.volume);

	// Synthesize the current utterance. The TTS engine will call our
	// callback method, Receive, repeatedly while it performs synthesis.
	// During that callback, we check if Stop was called and can cause
	// this method to exit prematurely. Otherwise this method won't exit
	// until this utterance is done synthesizing, and then current_utterance_
	// will be set to NULL.
	int samples_output = 0;
	engine_->SetVoice(current_utterance_->voice_index);

	scoped_ptr<Resampler> resampler;
	if (audio_output_->GetSampleRate() != engine_->GetSampleRate()) {
	resampler.reset(new Resampler(this,
	engine_->GetSampleRate(),
	audio_output_->GetSampleRate(),
	audio_buffer_.size()));
	engine_->SetReceiver(resampler.get());
	} else {
	engine_->SetReceiver(this);
	}

	// Save the utterance text because current_utterance_ is deleted
	// by the Done() callback before the call to SynthesizeText exits.
	string utterance_text = current_utterance_->text;

	engine_->SynthesizeText(
	utterance_text.c_str(),
	&audio_buffer_[0],
	audio_buffer_.size(),
	&samples_output);

	// TODO(chaitanyag): Make the completion callback here.
	VLOG(3) << "Done: " << utterance_text.c_str();

	mutex_->Lock();
	if (utterance_running_ == false) {
	// The utterance was interrupted
	engine_->Stop();
	}
	utterance_running_ = false;
	cond_var_->Signal();
	mutex_->Unlock();

	delete current_utterance_;
	current_utterance_ = NULL;
	}
	}

	tts_callback_status TtsService::Receive(int rate,
	int num_channels,
	const int16_t* data,
	int num_samples,
	tts_synth_status status) {
	if (status == TTS_SYNTH_DONE) {
	current_utterance_ = NULL;
	}

	// Check if we need to exit prematurely
	mutex_->Lock();
	if (service_running_ == false \|\| utterance_running_ == false) {
	mutex_->Unlock();
	return TTS_CALLBACK_HALT;
	}
	mutex_->Unlock();

	// If there's no audio data, just return success
	if (num_samples == 0) {
	return TTS_CALLBACK_CONTINUE;
	}

	// If the ring buffer is full, compute the amount of time we expect
	// it to take for that many audio samples to be output, and sleep for
	// that long.
	while (ring_buffer_->WriteAvail() < num_samples) {
	int ms_to_sleep = num_samples * 1000 / rate;
	mutex_->Lock();
	cond_var_->WaitWithTimeout(mutex_.get(), ms_to_sleep);
	if (service_running_ == false \|\| utterance_running_ == false) {
	mutex_->Unlock();
	return TTS_CALLBACK_HALT;
	}
	mutex_->Unlock();
	}

	bool success = ring_buffer_->Write(data, num_samples);
	if (!success) {
	LOG(INFO) << "Unable to write to ring buffer";
	exit(0);
	}

	if (status == TTS_SYNTH_DONE) {
	ring_buffer_->MarkFinished();
	}

	return TTS_CALLBACK_CONTINUE;
	}

	bool TtsService::FillAudioBuffer(int16_t* samples, int size) {
	int avail = ring_buffer_->ReadAvail();

	// If the ring buffer is finished, play until the end. Otherwise,
	// only play if we have a full buffer.
	int copy_len;
	if (ring_buffer_->IsFinished()) {
	copy_len = avail < size? avail : size;
	} else {
	copy_len = avail >= size? size : 0;
	}

	ring_buffer_->Read(samples, copy_len);
	for (int i = copy_len; i < size; i++) {
	samples[i] = 0;
	}

	return !ring_buffer_->IsFinished();
	}

	} // namespace speech_synthesis