earcon_manager.cc - chromiumos/platform/speech_synthesis - Git at Google

 // Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include <stdio.h>
 #include <stdlib.h>

 #include "base/memory/scoped_ptr.h"
 #include "earcon_manager.h"
 #include "log.h"
 #include "resampler.h"
 #include "tts_receiver.h"

 namespace speech_synthesis {

 struct WavFormatChunk {
   uint16_t format;
   uint16_t channels;
   uint32_t samplerate;
   uint32_t byterate;
   uint16_t block_align;
   uint16_t bits_per_sample;
 };

 // This is just for clarity; WAV files use a sequence of four
 // meaningful characters to mark different sections of the file.
 // We read and write them as single 32-bit values.
 inline const uint32_t FourCharUInt32(const char *str) {
   return *reinterpret_cast<const uint32_t *>(str);
 }

 class EarconReceiver : public TtsDataReceiver {
  public:
   explicit EarconReceiver(Earcon* earcon) : earcon_(earcon) {}

   virtual tts_callback_status Receive(int rate,
                                       int num_channels,
                                       const int16_t* data,
                                       int num_samples,
                                       tts_synth_status status) {
     int count = num_samples;
     count = std::min(count, earcon_->frame_count - earcon_->position);

     for (int i = 0; i < count * num_channels; i++)
       earcon_->data[num_channels * earcon_->position + i] = data[i];

     earcon_->position += num_samples;
     if (earcon_->position == earcon_->frame_count)
       return TTS_CALLBACK_HALT;
     else
       return TTS_CALLBACK_CONTINUE;
   }

  private:
   Earcon* earcon_;
 };

 EarconManager::EarconManager(int output_frame_rate, int output_channels)
     : rate_(output_frame_rate), channels_(output_channels) {
 }

 EarconManager::~EarconManager() {
   for (size_t i = 0; i < earcons_.size(); i++) {
     delete[] earcons_[i].data;
   }
 }

 int EarconManager::LoadEarcon(int frame_count,
                               int16_t* data,
                               int source_channels,
                               int source_rate,
                               bool loop) {
   int earcon_id = earcons_.size();
   bool is_playing = false;
   int position = 0;
   Earcon *earcon = NULL;

   if (source_rate != rate_) {
     int new_size = frame_count * rate_ / source_rate;
     earcon = new Earcon(new_size, new int16_t[new_size * channels_],
         is_playing, position, loop);
     EarconReceiver receiver(earcon);
     Resampler resampler(&receiver, source_rate, rate_, new_size);
     resampler.Receive(source_rate, channels_, data, frame_count,
         TTS_SYNTH_DONE);
     earcon->frame_count = earcon->position;
     earcon->position = 0;
   } else {
     earcon = new Earcon(frame_count, new int16_t[frame_count * channels_],
         is_playing, position, loop);
     // Convert from the source channels to the destination number of
     // channels.
     if (source_channels == 1 && channels_ == 2) {
       for (int i = 0; i < frame_count; i++) {
         earcon->data[2 * i] = data[i];
         earcon->data[2 * i + 1] = data[i];
       }
     } else if (source_channels == 2 && channels_ == 1) {
       for (int i = 0; i < frame_count; i++)
         earcon->data[i] = (data[2 * i] + data[2 * i + 1]) / 2;
     } else if (source_channels == channels_) {
       for (int i = 0; i < frame_count * channels_; i++)
         earcon->data[i] = data[i];
     } else {
       LOG(ERROR) << "Fatal: unsupported number of channels";
       delete earcon;
       return -1;
     }
   }
   earcons_.push_back(*earcon);
   return earcon_id;
 }

 int EarconManager::LoadEarconFromWavFile(const char *path, bool loop) {
   FILE* fp = fopen(path, "rb");
   if (!fp || ferror(fp)) {
     return -1;
   }
   fseek(fp, 0, SEEK_END);
   unsigned int filelen = static_cast<unsigned int>(ftell(fp));
   if (filelen < 40) {
     LOG(ERROR) << "File too short to be a WAV file.";
     return -1;
   }
   fseek(fp, 0, SEEK_SET);
   scoped_array<char> data;
   data.reset(new char[filelen]);
   if (filelen != fread(data.get(), 1, filelen, fp)) {
     LOG(ERROR) << "Error reading file.";
     return -1;
   }

   if (reinterpret_cast<uint32_t*>(data.get())[0] != FourCharUInt32("RIFF") ||
       reinterpret_cast<uint32_t*>(data.get())[2] != FourCharUInt32("WAVE")) {
     LOG(ERROR) << "File is not WAV format.";
     return -1;
   }

   int channels = 0;
   int rate = 0;
   int frames = 0;
   scoped_array<int16_t> audio_data;

   int pos = 12;
   while (filelen - pos >= 8) {
     uint32_t label = reinterpret_cast<uint32_t*>(&data[pos])[0];
     uint32_t chunk_bytes = reinterpret_cast<uint32_t*>(&data[pos])[1];
     if (filelen - pos < chunk_bytes) {
       LOG(ERROR) << "Error reading WAV file. File size incorrect";
       return -1;
     }
     if (label == FourCharUInt32("fmt ")) {
       if (chunk_bytes < sizeof(WavFormatChunk) || chunk_bytes > 1024) {
         LOG(ERROR) << "Error reading WAV file";
         return -1;
       }
       WavFormatChunk* format =
           reinterpret_cast<WavFormatChunk*>(&data[pos + 8]);
       if (format->format != 1) {
         LOG(ERROR) << "Invalid WAV file format";
         return -1;
       }
       if (format->channels < 1 || format->channels > 2) {
         LOG(ERROR) << "Invalid number of channels in WAV file";
         return -1;
       }
       if (format->bits_per_sample != 16) {
         LOG(ERROR) << "Invalid sample size";
         return -1;
       }
       unsigned int expected_byterate =
           format->samplerate * format->channels *
           format->bits_per_sample / 8;
       if (format->byterate != expected_byterate) {
         LOG(ERROR) << "Invalid byte rate for WAV file";
         return -1;
       }
       if (format->block_align !=
           format->channels * format->bits_per_sample / 8) {
         LOG(ERROR) << "Error reading WAV file";
         return -1;
       }
       rate = format->samplerate;
       channels = format->channels;
     } else if (label == FourCharUInt32("data")) {
       if (rate == 0 || channels == 0) {
         LOG(ERROR) << "Error reading WAV file";
         return -1;
       }
       int new_frames = chunk_bytes / (2 * channels);
       int16_t* new_audio_data = reinterpret_cast<int16_t*>(&data[pos + 8]);
       int16_t* old_audio_data = audio_data.get();
       audio_data.reset(new int16_t[(frames + new_frames) * channels]);
       for (int i = 0; i < frames * channels; i++)
         audio_data[i] = old_audio_data[i];
       for (int i = 0; i < new_frames * channels; i++)
         audio_data[i + frames * channels] = new_audio_data[i];
       delete[] old_audio_data;
       frames += new_frames;
     }

     pos += chunk_bytes + 8;
   }

   if (frames && channels && rate) {
     return LoadEarcon(frames, audio_data.get(), channels, rate, loop);
   }
   return -1;
 }

 void EarconManager::Play(int earcon_id) {
   earcons_[earcon_id].is_playing = true;
   earcons_[earcon_id].position = 0;
 }

 void EarconManager::Stop(int earcon_id) {
   earcons_[earcon_id].is_playing = false;
 }

 void EarconManager::StopAll() {
   for (size_t i = 0; i < earcons_.size(); i++)
     earcons_[i].is_playing = false;
 }

 bool EarconManager::IsPlaying(int earcon_id) {
   return earcons_[earcon_id].is_playing;
 }

 bool EarconManager::IsAnythingPlaying() {
   for (size_t i = 0; i < earcons_.size(); i++) {
     if (earcons_[i].is_playing)
       return true;
   }
   return false;
 }

 void EarconManager::FillAudioBuffer(int16_t* data, int frames) {
   for (size_t i = 0; i < earcons_.size(); i++) {
     // Skip earcons that aren't playing now.
     if (!earcons_[i].is_playing)
       continue;

     // Figure out how many frames of this earcon to play.
     int count = frames;
     if (count > earcons_[i].frame_count - earcons_[i].position)
       count = earcons_[i].frame_count - earcons_[i].position;

     // Mix in this earcon with the existing audio, and handle
     // clipping properly.
     int16_t* earcon_data = &earcons_[i].data[
         channels_ * earcons_[i].position];
     for (int j = 0; j < count * channels_; j++) {
       int value = data[j] + earcon_data[j];
       value = std::max(std::min(value, 32767), -32768);
       data[j] = value;
     }

     earcons_[i].position += count;
     if (earcons_[i].position == earcons_[i].frame_count)
       earcons_[i].is_playing = false;
   }
 }

 }  // namespace speech_synthesis
	// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include <stdio.h>
	#include <stdlib.h>

	#include "base/memory/scoped_ptr.h"
	#include "earcon_manager.h"
	#include "log.h"
	#include "resampler.h"
	#include "tts_receiver.h"

	namespace speech_synthesis {

	struct WavFormatChunk {
	uint16_t format;
	uint16_t channels;
	uint32_t samplerate;
	uint32_t byterate;
	uint16_t block_align;
	uint16_t bits_per_sample;
	};

	// This is just for clarity; WAV files use a sequence of four
	// meaningful characters to mark different sections of the file.
	// We read and write them as single 32-bit values.
	inline const uint32_t FourCharUInt32(const char *str) {
	return reinterpret_cast<const uint32_t >(str);
	}

	class EarconReceiver : public TtsDataReceiver {
	public:
	explicit EarconReceiver(Earcon* earcon) : earcon_(earcon) {}

	virtual tts_callback_status Receive(int rate,
	int num_channels,
	const int16_t* data,
	int num_samples,
	tts_synth_status status) {
	int count = num_samples;
	count = std::min(count, earcon_->frame_count - earcon_->position);

	for (int i = 0; i < count * num_channels; i++)
	earcon_->data[num_channels * earcon_->position + i] = data[i];

	earcon_->position += num_samples;
	if (earcon_->position == earcon_->frame_count)
	return TTS_CALLBACK_HALT;
	else
	return TTS_CALLBACK_CONTINUE;
	}

	private:
	Earcon* earcon_;
	};

	EarconManager::EarconManager(int output_frame_rate, int output_channels)
	: rate_(output_frame_rate), channels_(output_channels) {
	}

	EarconManager::~EarconManager() {
	for (size_t i = 0; i < earcons_.size(); i++) {
	delete[] earcons_[i].data;
	}
	}

	int EarconManager::LoadEarcon(int frame_count,
	int16_t* data,
	int source_channels,
	int source_rate,
	bool loop) {
	int earcon_id = earcons_.size();
	bool is_playing = false;
	int position = 0;
	Earcon *earcon = NULL;

	if (source_rate != rate_) {
	int new_size = frame_count * rate_ / source_rate;
	earcon = new Earcon(new_size, new int16_t[new_size * channels_],
	is_playing, position, loop);
	EarconReceiver receiver(earcon);
	Resampler resampler(&receiver, source_rate, rate_, new_size);
	resampler.Receive(source_rate, channels_, data, frame_count,
	TTS_SYNTH_DONE);
	earcon->frame_count = earcon->position;
	earcon->position = 0;
	} else {
	earcon = new Earcon(frame_count, new int16_t[frame_count * channels_],
	is_playing, position, loop);
	// Convert from the source channels to the destination number of
	// channels.
	if (source_channels == 1 && channels_ == 2) {
	for (int i = 0; i < frame_count; i++) {
	earcon->data[2 * i] = data[i];
	earcon->data[2 * i + 1] = data[i];
	}
	} else if (source_channels == 2 && channels_ == 1) {
	for (int i = 0; i < frame_count; i++)
	earcon->data[i] = (data[2 * i] + data[2 * i + 1]) / 2;
	} else if (source_channels == channels_) {
	for (int i = 0; i < frame_count * channels_; i++)
	earcon->data[i] = data[i];
	} else {
	LOG(ERROR) << "Fatal: unsupported number of channels";
	delete earcon;
	return -1;
	}
	}
	earcons_.push_back(*earcon);
	return earcon_id;
	}

	int EarconManager::LoadEarconFromWavFile(const char *path, bool loop) {
	FILE* fp = fopen(path, "rb");
	if (!fp \|\| ferror(fp)) {
	return -1;
	}
	fseek(fp, 0, SEEK_END);
	unsigned int filelen = static_cast<unsigned int>(ftell(fp));
	if (filelen < 40) {
	LOG(ERROR) << "File too short to be a WAV file.";
	return -1;
	}
	fseek(fp, 0, SEEK_SET);
	scoped_array<char> data;
	data.reset(new char[filelen]);
	if (filelen != fread(data.get(), 1, filelen, fp)) {
	LOG(ERROR) << "Error reading file.";
	return -1;
	}

	if (reinterpret_cast<uint32_t*>(data.get())[0] != FourCharUInt32("RIFF") \|\|
	reinterpret_cast<uint32_t*>(data.get())[2] != FourCharUInt32("WAVE")) {
	LOG(ERROR) << "File is not WAV format.";
	return -1;
	}

	int channels = 0;
	int rate = 0;
	int frames = 0;
	scoped_array<int16_t> audio_data;

	int pos = 12;
	while (filelen - pos >= 8) {
	uint32_t label = reinterpret_cast<uint32_t*>(&data[pos])[0];
	uint32_t chunk_bytes = reinterpret_cast<uint32_t*>(&data[pos])[1];
	if (filelen - pos < chunk_bytes) {
	LOG(ERROR) << "Error reading WAV file. File size incorrect";
	return -1;
	}
	if (label == FourCharUInt32("fmt ")) {
	if (chunk_bytes < sizeof(WavFormatChunk) \|\| chunk_bytes > 1024) {
	LOG(ERROR) << "Error reading WAV file";
	return -1;
	}
	WavFormatChunk* format =
	reinterpret_cast<WavFormatChunk*>(&data[pos + 8]);
	if (format->format != 1) {
	LOG(ERROR) << "Invalid WAV file format";
	return -1;
	}
	if (format->channels < 1 \|\| format->channels > 2) {
	LOG(ERROR) << "Invalid number of channels in WAV file";
	return -1;
	}
	if (format->bits_per_sample != 16) {
	LOG(ERROR) << "Invalid sample size";
	return -1;
	}
	unsigned int expected_byterate =
	format->samplerate * format->channels *
	format->bits_per_sample / 8;
	if (format->byterate != expected_byterate) {
	LOG(ERROR) << "Invalid byte rate for WAV file";
	return -1;
	}
	if (format->block_align !=
	format->channels * format->bits_per_sample / 8) {
	LOG(ERROR) << "Error reading WAV file";
	return -1;
	}
	rate = format->samplerate;
	channels = format->channels;
	} else if (label == FourCharUInt32("data")) {
	if (rate == 0 \|\| channels == 0) {
	LOG(ERROR) << "Error reading WAV file";
	return -1;
	}
	int new_frames = chunk_bytes / (2 * channels);
	int16_t* new_audio_data = reinterpret_cast<int16_t*>(&data[pos + 8]);
	int16_t* old_audio_data = audio_data.get();
	audio_data.reset(new int16_t[(frames + new_frames) * channels]);
	for (int i = 0; i < frames * channels; i++)
	audio_data[i] = old_audio_data[i];
	for (int i = 0; i < new_frames * channels; i++)
	audio_data[i + frames * channels] = new_audio_data[i];
	delete[] old_audio_data;
	frames += new_frames;
	}

	pos += chunk_bytes + 8;
	}

	if (frames && channels && rate) {
	return LoadEarcon(frames, audio_data.get(), channels, rate, loop);
	}
	return -1;
	}

	void EarconManager::Play(int earcon_id) {
	earcons_[earcon_id].is_playing = true;
	earcons_[earcon_id].position = 0;
	}

	void EarconManager::Stop(int earcon_id) {
	earcons_[earcon_id].is_playing = false;
	}

	void EarconManager::StopAll() {
	for (size_t i = 0; i < earcons_.size(); i++)
	earcons_[i].is_playing = false;
	}

	bool EarconManager::IsPlaying(int earcon_id) {
	return earcons_[earcon_id].is_playing;
	}

	bool EarconManager::IsAnythingPlaying() {
	for (size_t i = 0; i < earcons_.size(); i++) {
	if (earcons_[i].is_playing)
	return true;
	}
	return false;
	}

	void EarconManager::FillAudioBuffer(int16_t* data, int frames) {
	for (size_t i = 0; i < earcons_.size(); i++) {
	// Skip earcons that aren't playing now.
	if (!earcons_[i].is_playing)
	continue;

	// Figure out how many frames of this earcon to play.
	int count = frames;
	if (count > earcons_[i].frame_count - earcons_[i].position)
	count = earcons_[i].frame_count - earcons_[i].position;

	// Mix in this earcon with the existing audio, and handle
	// clipping properly.
	int16_t* earcon_data = &earcons_[i].data[
	channels_ * earcons_[i].position];
	for (int j = 0; j < count * channels_; j++) {
	int value = data[j] + earcon_data[j];
	value = std::max(std::min(value, 32767), -32768);
	data[j] = value;
	}

	earcons_[i].position += count;
	if (earcons_[i].position == earcons_[i].frame_count)
	earcons_[i].is_playing = false;
	}
	}

	} // namespace speech_synthesis