blob: a6c318a93ae897a76aec85b698011aaffeafc147 [file] [log] [blame]
// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stdio.h>
#include <stdlib.h>
#include "base/memory/scoped_ptr.h"
#include "earcon_manager.h"
#include "log.h"
#include "resampler.h"
#include "tts_receiver.h"
namespace speech_synthesis {
struct WavFormatChunk {
uint16_t format;
uint16_t channels;
uint32_t samplerate;
uint32_t byterate;
uint16_t block_align;
uint16_t bits_per_sample;
};
// This is just for clarity; WAV files use a sequence of four
// meaningful characters to mark different sections of the file.
// We read and write them as single 32-bit values.
inline const uint32_t FourCharUInt32(const char *str) {
return *reinterpret_cast<const uint32_t *>(str);
}
class EarconReceiver : public TtsDataReceiver {
public:
explicit EarconReceiver(Earcon* earcon) : earcon_(earcon) {}
virtual tts_callback_status Receive(int rate,
int num_channels,
const int16_t* data,
int num_samples,
tts_synth_status status) {
int count = num_samples;
count = std::min(count, earcon_->frame_count - earcon_->position);
for (int i = 0; i < count * num_channels; i++)
earcon_->data[num_channels * earcon_->position + i] = data[i];
earcon_->position += num_samples;
if (earcon_->position == earcon_->frame_count)
return TTS_CALLBACK_HALT;
else
return TTS_CALLBACK_CONTINUE;
}
private:
Earcon* earcon_;
};
EarconManager::EarconManager(int output_frame_rate, int output_channels)
: rate_(output_frame_rate), channels_(output_channels) {
}
EarconManager::~EarconManager() {
for (size_t i = 0; i < earcons_.size(); i++) {
delete[] earcons_[i].data;
}
}
int EarconManager::LoadEarcon(int frame_count,
int16_t* data,
int source_channels,
int source_rate,
bool loop) {
int earcon_id = earcons_.size();
bool is_playing = false;
int position = 0;
Earcon *earcon = NULL;
if (source_rate != rate_) {
int new_size = frame_count * rate_ / source_rate;
earcon = new Earcon(new_size, new int16_t[new_size * channels_],
is_playing, position, loop);
EarconReceiver receiver(earcon);
Resampler resampler(&receiver, source_rate, rate_, new_size);
resampler.Receive(source_rate, channels_, data, frame_count,
TTS_SYNTH_DONE);
earcon->frame_count = earcon->position;
earcon->position = 0;
} else {
earcon = new Earcon(frame_count, new int16_t[frame_count * channels_],
is_playing, position, loop);
// Convert from the source channels to the destination number of
// channels.
if (source_channels == 1 && channels_ == 2) {
for (int i = 0; i < frame_count; i++) {
earcon->data[2 * i] = data[i];
earcon->data[2 * i + 1] = data[i];
}
} else if (source_channels == 2 && channels_ == 1) {
for (int i = 0; i < frame_count; i++)
earcon->data[i] = (data[2 * i] + data[2 * i + 1]) / 2;
} else if (source_channels == channels_) {
for (int i = 0; i < frame_count * channels_; i++)
earcon->data[i] = data[i];
} else {
LOG(ERROR) << "Fatal: unsupported number of channels";
delete earcon;
return -1;
}
}
earcons_.push_back(*earcon);
return earcon_id;
}
int EarconManager::LoadEarconFromWavFile(const char *path, bool loop) {
FILE* fp = fopen(path, "rb");
if (!fp || ferror(fp)) {
return -1;
}
fseek(fp, 0, SEEK_END);
unsigned int filelen = static_cast<unsigned int>(ftell(fp));
if (filelen < 40) {
LOG(ERROR) << "File too short to be a WAV file.";
return -1;
}
fseek(fp, 0, SEEK_SET);
scoped_array<char> data;
data.reset(new char[filelen]);
if (filelen != fread(data.get(), 1, filelen, fp)) {
LOG(ERROR) << "Error reading file.";
return -1;
}
if (reinterpret_cast<uint32_t*>(data.get())[0] != FourCharUInt32("RIFF") ||
reinterpret_cast<uint32_t*>(data.get())[2] != FourCharUInt32("WAVE")) {
LOG(ERROR) << "File is not WAV format.";
return -1;
}
int channels = 0;
int rate = 0;
int frames = 0;
scoped_array<int16_t> audio_data;
int pos = 12;
while (filelen - pos >= 8) {
uint32_t label = reinterpret_cast<uint32_t*>(&data[pos])[0];
uint32_t chunk_bytes = reinterpret_cast<uint32_t*>(&data[pos])[1];
if (filelen - pos < chunk_bytes) {
LOG(ERROR) << "Error reading WAV file. File size incorrect";
return -1;
}
if (label == FourCharUInt32("fmt ")) {
if (chunk_bytes < sizeof(WavFormatChunk) || chunk_bytes > 1024) {
LOG(ERROR) << "Error reading WAV file";
return -1;
}
WavFormatChunk* format =
reinterpret_cast<WavFormatChunk*>(&data[pos + 8]);
if (format->format != 1) {
LOG(ERROR) << "Invalid WAV file format";
return -1;
}
if (format->channels < 1 || format->channels > 2) {
LOG(ERROR) << "Invalid number of channels in WAV file";
return -1;
}
if (format->bits_per_sample != 16) {
LOG(ERROR) << "Invalid sample size";
return -1;
}
unsigned int expected_byterate =
format->samplerate * format->channels *
format->bits_per_sample / 8;
if (format->byterate != expected_byterate) {
LOG(ERROR) << "Invalid byte rate for WAV file";
return -1;
}
if (format->block_align !=
format->channels * format->bits_per_sample / 8) {
LOG(ERROR) << "Error reading WAV file";
return -1;
}
rate = format->samplerate;
channels = format->channels;
} else if (label == FourCharUInt32("data")) {
if (rate == 0 || channels == 0) {
LOG(ERROR) << "Error reading WAV file";
return -1;
}
int new_frames = chunk_bytes / (2 * channels);
int16_t* new_audio_data = reinterpret_cast<int16_t*>(&data[pos + 8]);
int16_t* old_audio_data = audio_data.get();
audio_data.reset(new int16_t[(frames + new_frames) * channels]);
for (int i = 0; i < frames * channels; i++)
audio_data[i] = old_audio_data[i];
for (int i = 0; i < new_frames * channels; i++)
audio_data[i + frames * channels] = new_audio_data[i];
delete[] old_audio_data;
frames += new_frames;
}
pos += chunk_bytes + 8;
}
if (frames && channels && rate) {
return LoadEarcon(frames, audio_data.get(), channels, rate, loop);
}
return -1;
}
void EarconManager::Play(int earcon_id) {
earcons_[earcon_id].is_playing = true;
earcons_[earcon_id].position = 0;
}
void EarconManager::Stop(int earcon_id) {
earcons_[earcon_id].is_playing = false;
}
void EarconManager::StopAll() {
for (size_t i = 0; i < earcons_.size(); i++)
earcons_[i].is_playing = false;
}
bool EarconManager::IsPlaying(int earcon_id) {
return earcons_[earcon_id].is_playing;
}
bool EarconManager::IsAnythingPlaying() {
for (size_t i = 0; i < earcons_.size(); i++) {
if (earcons_[i].is_playing)
return true;
}
return false;
}
void EarconManager::FillAudioBuffer(int16_t* data, int frames) {
for (size_t i = 0; i < earcons_.size(); i++) {
// Skip earcons that aren't playing now.
if (!earcons_[i].is_playing)
continue;
// Figure out how many frames of this earcon to play.
int count = frames;
if (count > earcons_[i].frame_count - earcons_[i].position)
count = earcons_[i].frame_count - earcons_[i].position;
// Mix in this earcon with the existing audio, and handle
// clipping properly.
int16_t* earcon_data = &earcons_[i].data[
channels_ * earcons_[i].position];
for (int j = 0; j < count * channels_; j++) {
int value = data[j] + earcon_data[j];
value = std::max(std::min(value, 32767), -32768);
data[j] = value;
}
earcons_[i].position += count;
if (earcons_[i].position == earcons_[i].frame_count)
earcons_[i].is_playing = false;
}
}
} // namespace speech_synthesis