| // Copyright (c) 2010 The Chromium OS Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| |
| #include "base/memory/scoped_ptr.h" |
| #include "earcon_manager.h" |
| #include "log.h" |
| #include "resampler.h" |
| #include "tts_receiver.h" |
| |
| namespace speech_synthesis { |
| |
| struct WavFormatChunk { |
| uint16_t format; |
| uint16_t channels; |
| uint32_t samplerate; |
| uint32_t byterate; |
| uint16_t block_align; |
| uint16_t bits_per_sample; |
| }; |
| |
| // This is just for clarity; WAV files use a sequence of four |
| // meaningful characters to mark different sections of the file. |
| // We read and write them as single 32-bit values. |
| inline const uint32_t FourCharUInt32(const char *str) { |
| return *reinterpret_cast<const uint32_t *>(str); |
| } |
| |
| class EarconReceiver : public TtsDataReceiver { |
| public: |
| explicit EarconReceiver(Earcon* earcon) : earcon_(earcon) {} |
| |
| virtual tts_callback_status Receive(int rate, |
| int num_channels, |
| const int16_t* data, |
| int num_samples, |
| tts_synth_status status) { |
| int count = num_samples; |
| count = std::min(count, earcon_->frame_count - earcon_->position); |
| |
| for (int i = 0; i < count * num_channels; i++) |
| earcon_->data[num_channels * earcon_->position + i] = data[i]; |
| |
| earcon_->position += num_samples; |
| if (earcon_->position == earcon_->frame_count) |
| return TTS_CALLBACK_HALT; |
| else |
| return TTS_CALLBACK_CONTINUE; |
| } |
| |
| private: |
| Earcon* earcon_; |
| }; |
| |
| EarconManager::EarconManager(int output_frame_rate, int output_channels) |
| : rate_(output_frame_rate), channels_(output_channels) { |
| } |
| |
| EarconManager::~EarconManager() { |
| for (size_t i = 0; i < earcons_.size(); i++) { |
| delete[] earcons_[i].data; |
| } |
| } |
| |
| int EarconManager::LoadEarcon(int frame_count, |
| int16_t* data, |
| int source_channels, |
| int source_rate, |
| bool loop) { |
| int earcon_id = earcons_.size(); |
| bool is_playing = false; |
| int position = 0; |
| Earcon *earcon = NULL; |
| |
| if (source_rate != rate_) { |
| int new_size = frame_count * rate_ / source_rate; |
| earcon = new Earcon(new_size, new int16_t[new_size * channels_], |
| is_playing, position, loop); |
| EarconReceiver receiver(earcon); |
| Resampler resampler(&receiver, source_rate, rate_, new_size); |
| resampler.Receive(source_rate, channels_, data, frame_count, |
| TTS_SYNTH_DONE); |
| earcon->frame_count = earcon->position; |
| earcon->position = 0; |
| } else { |
| earcon = new Earcon(frame_count, new int16_t[frame_count * channels_], |
| is_playing, position, loop); |
| // Convert from the source channels to the destination number of |
| // channels. |
| if (source_channels == 1 && channels_ == 2) { |
| for (int i = 0; i < frame_count; i++) { |
| earcon->data[2 * i] = data[i]; |
| earcon->data[2 * i + 1] = data[i]; |
| } |
| } else if (source_channels == 2 && channels_ == 1) { |
| for (int i = 0; i < frame_count; i++) |
| earcon->data[i] = (data[2 * i] + data[2 * i + 1]) / 2; |
| } else if (source_channels == channels_) { |
| for (int i = 0; i < frame_count * channels_; i++) |
| earcon->data[i] = data[i]; |
| } else { |
| LOG(ERROR) << "Fatal: unsupported number of channels"; |
| delete earcon; |
| return -1; |
| } |
| } |
| earcons_.push_back(*earcon); |
| return earcon_id; |
| } |
| |
| int EarconManager::LoadEarconFromWavFile(const char *path, bool loop) { |
| FILE* fp = fopen(path, "rb"); |
| if (!fp || ferror(fp)) { |
| return -1; |
| } |
| fseek(fp, 0, SEEK_END); |
| unsigned int filelen = static_cast<unsigned int>(ftell(fp)); |
| if (filelen < 40) { |
| LOG(ERROR) << "File too short to be a WAV file."; |
| return -1; |
| } |
| fseek(fp, 0, SEEK_SET); |
| scoped_array<char> data; |
| data.reset(new char[filelen]); |
| if (filelen != fread(data.get(), 1, filelen, fp)) { |
| LOG(ERROR) << "Error reading file."; |
| return -1; |
| } |
| |
| if (reinterpret_cast<uint32_t*>(data.get())[0] != FourCharUInt32("RIFF") || |
| reinterpret_cast<uint32_t*>(data.get())[2] != FourCharUInt32("WAVE")) { |
| LOG(ERROR) << "File is not WAV format."; |
| return -1; |
| } |
| |
| int channels = 0; |
| int rate = 0; |
| int frames = 0; |
| scoped_array<int16_t> audio_data; |
| |
| int pos = 12; |
| while (filelen - pos >= 8) { |
| uint32_t label = reinterpret_cast<uint32_t*>(&data[pos])[0]; |
| uint32_t chunk_bytes = reinterpret_cast<uint32_t*>(&data[pos])[1]; |
| if (filelen - pos < chunk_bytes) { |
| LOG(ERROR) << "Error reading WAV file. File size incorrect"; |
| return -1; |
| } |
| if (label == FourCharUInt32("fmt ")) { |
| if (chunk_bytes < sizeof(WavFormatChunk) || chunk_bytes > 1024) { |
| LOG(ERROR) << "Error reading WAV file"; |
| return -1; |
| } |
| WavFormatChunk* format = |
| reinterpret_cast<WavFormatChunk*>(&data[pos + 8]); |
| if (format->format != 1) { |
| LOG(ERROR) << "Invalid WAV file format"; |
| return -1; |
| } |
| if (format->channels < 1 || format->channels > 2) { |
| LOG(ERROR) << "Invalid number of channels in WAV file"; |
| return -1; |
| } |
| if (format->bits_per_sample != 16) { |
| LOG(ERROR) << "Invalid sample size"; |
| return -1; |
| } |
| unsigned int expected_byterate = |
| format->samplerate * format->channels * |
| format->bits_per_sample / 8; |
| if (format->byterate != expected_byterate) { |
| LOG(ERROR) << "Invalid byte rate for WAV file"; |
| return -1; |
| } |
| if (format->block_align != |
| format->channels * format->bits_per_sample / 8) { |
| LOG(ERROR) << "Error reading WAV file"; |
| return -1; |
| } |
| rate = format->samplerate; |
| channels = format->channels; |
| } else if (label == FourCharUInt32("data")) { |
| if (rate == 0 || channels == 0) { |
| LOG(ERROR) << "Error reading WAV file"; |
| return -1; |
| } |
| int new_frames = chunk_bytes / (2 * channels); |
| int16_t* new_audio_data = reinterpret_cast<int16_t*>(&data[pos + 8]); |
| int16_t* old_audio_data = audio_data.get(); |
| audio_data.reset(new int16_t[(frames + new_frames) * channels]); |
| for (int i = 0; i < frames * channels; i++) |
| audio_data[i] = old_audio_data[i]; |
| for (int i = 0; i < new_frames * channels; i++) |
| audio_data[i + frames * channels] = new_audio_data[i]; |
| delete[] old_audio_data; |
| frames += new_frames; |
| } |
| |
| pos += chunk_bytes + 8; |
| } |
| |
| if (frames && channels && rate) { |
| return LoadEarcon(frames, audio_data.get(), channels, rate, loop); |
| } |
| return -1; |
| } |
| |
| void EarconManager::Play(int earcon_id) { |
| earcons_[earcon_id].is_playing = true; |
| earcons_[earcon_id].position = 0; |
| } |
| |
| void EarconManager::Stop(int earcon_id) { |
| earcons_[earcon_id].is_playing = false; |
| } |
| |
| void EarconManager::StopAll() { |
| for (size_t i = 0; i < earcons_.size(); i++) |
| earcons_[i].is_playing = false; |
| } |
| |
| bool EarconManager::IsPlaying(int earcon_id) { |
| return earcons_[earcon_id].is_playing; |
| } |
| |
| bool EarconManager::IsAnythingPlaying() { |
| for (size_t i = 0; i < earcons_.size(); i++) { |
| if (earcons_[i].is_playing) |
| return true; |
| } |
| return false; |
| } |
| |
| void EarconManager::FillAudioBuffer(int16_t* data, int frames) { |
| for (size_t i = 0; i < earcons_.size(); i++) { |
| // Skip earcons that aren't playing now. |
| if (!earcons_[i].is_playing) |
| continue; |
| |
| // Figure out how many frames of this earcon to play. |
| int count = frames; |
| if (count > earcons_[i].frame_count - earcons_[i].position) |
| count = earcons_[i].frame_count - earcons_[i].position; |
| |
| // Mix in this earcon with the existing audio, and handle |
| // clipping properly. |
| int16_t* earcon_data = &earcons_[i].data[ |
| channels_ * earcons_[i].position]; |
| for (int j = 0; j < count * channels_; j++) { |
| int value = data[j] + earcon_data[j]; |
| value = std::max(std::min(value, 32767), -32768); |
| data[j] = value; |
| } |
| |
| earcons_[i].position += count; |
| if (earcons_[i].position == earcons_[i].frame_count) |
| earcons_[i].is_playing = false; |
| } |
| } |
| |
| } // namespace speech_synthesis |