blob: ec7d6ed159acf0ca0a9cb145dcec93d87fd8e217 [file] [log] [blame]
 // Copyright 2016 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "include/evaluator.h" #include namespace { // Returns the square of absolute value of complex number. inline double SquareAbs(const double real, const double imaginary) { return real * real + imaginary * imaginary; } void ToComplex(const std::vector &data, std::vector *output) { if (output->size() != data.size() * 2) output->resize(data.size() * 2); auto it = output->begin(); for (size_t i = 0; i < data.size(); ++i) { *(it++) = data[i]; *(it++) = 0.0; } } // One dimension fast fourier transform using Danielson-Lanczos lemma. void FFT(std::vector *data_ptr) { auto &data = *data_ptr; unsigned order, pos = 1, size = data.size(); // Reverses binary reindexing. for (unsigned i = 1; i < size; i += 2) { if (pos > i) { std::swap(data[pos - 1], data[i - 1]); std::swap(data[pos], data[i]); } order = size / 2; while (order >= 2 && pos > order) { pos -= order; order >>= 1; } pos += order; } // Danielson-Lanczos lemma. unsigned mmax = 2, step; double theta, wtemp; double cur[2], pre[2], temp[2]; // [0] real, [1] complex mmax = 2; while (size > mmax) { step = mmax << 1; theta = -(2 * M_PI / mmax); wtemp = sin(theta / 2); pre[0] = -2.0 * wtemp * wtemp; pre[1] = sin(theta); cur[0] = 1.0; cur[1] = 0.0; for (unsigned k = 1; k < mmax; k += 2) { for (unsigned i = k; i <= size; i += step) { pos = i + mmax; temp[0] = cur[0] * data[pos - 1] - cur[1] * data[pos]; temp[1] = cur[0] * data[pos] + cur[1] * data[pos - 1]; data[pos - 1] = data[i - 1] - temp[0]; data[pos] = data[i] - temp[1]; data[i - 1] += temp[0]; data[i] += temp[1]; } wtemp = cur[0]; cur[0] += wtemp * pre[0] - cur[1] * pre[1]; cur[1] += cur[1] * pre[0] + wtemp * pre[1]; } mmax = step; } } } // namespace Evaluator::Evaluator(const AudioFunTestConfig& config) : filter_(config.match_window_size), half_window_size_(config.match_window_size / 2), num_channels_(config.num_mic_channels), active_mic_channels_(config.active_mic_channels), format_(config.sample_format), sample_rate_(config.sample_rate), bin_(config.match_window_size), power_threshold_(config.power_threshold), confidence_threshold_(config.confidence_threshold), verbose_(config.verbose) { // Initializes original expected filter. filter_[half_window_size_] = 1; // center double mean = 1.0; double sigma = 1.0; // standard deviation // Normalization. mean /= filter_.size(); sigma = sqrt(sigma / filter_.size() - mean * mean); for (auto &x : filter_) { x = (x - mean) / sigma; } // Estimates max trial. // max_trial_ is the reverse of allowed delay plus the threshold to pass. // And +2 is for the acceptable variation. max_trial_ = config.allowed_delay_sec * sample_rate_ / config.fft_size + confidence_threshold_ + 2; buf_size_ = num_channels_ * config.fft_size * format_.bytes(); buffer_.reset(new uint8_t[buf_size_]); } void Evaluator::Evaluate(int center_bin, RecordClient *recorder, std::vector *result) { bool all_pass = false; std::vector accum_confidence(num_channels_); for (int trial = 1; trial <= max_trial_ && !all_pass; ++trial) { all_pass = true; recorder->Record(buffer_.get(), buf_size_); std::vector > data; int num_frames = Unpack( buffer_.get(), buf_size_, format_, num_channels_, &data); std::vector complex_data(num_frames * 2); // Evaluates all channels. for (int channel: active_mic_channels_) { if (accum_confidence[channel] >= confidence_threshold_) continue; ToComplex(data[channel], &complex_data); accum_confidence[channel] += std::max( EstimateChannel(&complex_data, center_bin), 0.0); if (accum_confidence[channel] < confidence_threshold_) all_pass = false; else (*result)[channel] = true; } } } double Evaluator::EstimateChannel( std::vector *data, int center_bin) { // Calculate RMS before FFT // The |data| here is already formatted as double sized to store the result // (read, imaginary) from FFT. int data_size = data->size() / 2; double rms = 0.0; for (int t = 0; t < data_size; ++t) { const double amplitude = (*data)[2 * t]; rms += amplitude * amplitude; } rms = sqrt(rms / data_size); if (verbose_) printf("rms: %0.4f\n", rms); if (rms < power_threshold_) { perror("The RMS level is too low."); return 0.0; } FFT(data); double confidence = 0.0, mean = 0.0, sigma = 0.0; for (int bin = (center_bin - half_window_size_); bin <= center_bin + half_window_size_; ++bin) { int index = bin - (center_bin - half_window_size_); bin_[index] = SquareAbs( (*data)[2 * bin], (*data)[2 * bin + 1]) / data->size(); if (verbose_) printf("%e ", bin_[index]); confidence += bin_[index] * filter_[index]; mean += bin_[index]; sigma += bin_[index] * bin_[index]; } if (verbose_) printf("\n"); // Avoids divide by zero. if (std::abs(sigma) < 1e-9) return 0.0; const double power_ratio = bin_[half_window_size_] / mean; mean /= filter_.size(); sigma = sqrt(sigma / filter_.size() - mean * mean); confidence /= (sigma * filter_.size()); if (verbose_) printf("power: %0.4f, conf: %0.4f\n", power_ratio, confidence); return power_ratio * confidence; }