| // Copyright 2022 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifdef UNSAFE_BUFFERS_BUILD |
| // TODO(crbug.com/351564777): Remove this and convert code to safer constructs. |
| #pragma allow_unsafe_buffers |
| // TODO(crbug.com/390223051): Remove C-library calls to fix the errors. |
| #pragma allow_unsafe_libc_calls |
| #endif |
| |
| #include "third_party/blink/renderer/modules/webaudio/wave_shaper_handler.h" |
| |
| #include <algorithm> |
| #include <memory> |
| |
| #include "base/memory/scoped_refptr.h" |
| #include "base/synchronization/lock.h" |
| #include "build/build_config.h" |
| #include "third_party/blink/renderer/core/typed_arrays/dom_typed_array.h" |
| #include "third_party/blink/renderer/modules/webaudio/audio_node.h" |
| #include "third_party/blink/renderer/modules/webaudio/audio_node_input.h" |
| #include "third_party/blink/renderer/modules/webaudio/audio_node_output.h" |
| #include "third_party/blink/renderer/modules/webaudio/base_audio_context.h" |
| #include "third_party/blink/renderer/platform/audio/audio_array.h" |
| #include "third_party/blink/renderer/platform/audio/audio_utilities.h" |
| #include "third_party/blink/renderer/platform/audio/down_sampler.h" |
| #include "third_party/blink/renderer/platform/audio/up_sampler.h" |
| #include "third_party/blink/renderer/platform/audio/vector_math.h" |
| #include "third_party/blink/renderer/platform/wtf/math_extras.h" |
| #include "third_party/blink/renderer/platform/wtf/threading.h" |
| |
| #if defined(ARCH_CPU_X86_FAMILY) |
| #include <xmmintrin.h> |
| #elif defined(CPU_ARM_NEON) |
| #include <arm_neon.h> |
| #endif |
| |
| namespace blink { |
| |
| namespace { |
| |
| constexpr unsigned kDefaultNumberOfOutputChannels = 1; |
| |
| // Computes value of the WaveShaper |
| double WaveShaperCurveValue(float input, |
| const float* curve_data, |
| int curve_length) { |
| // Calculate a virtual index based on input -1 -> +1 with -1 being |
| // curve[0], +1 being curve[curveLength - 1], and 0 being at the center of |
| // the curve data. Then linearly interpolate between the two points in the |
| // curve. |
| const double virtual_index = 0.5 * (input + 1) * (curve_length - 1); |
| double output; |
| if (virtual_index < 0) { |
| // input < -1, so use curve[0] |
| output = curve_data[0]; |
| } else if (virtual_index >= curve_length - 1) { |
| // input >= 1, so use last curve value |
| output = curve_data[curve_length - 1]; |
| } else { |
| // The general case where -1 <= input < 1, where 0 <= virtualIndex < |
| // curveLength - 1, so interpolate between the nearest samples on the |
| // curve. |
| const unsigned index1 = static_cast<unsigned>(virtual_index); |
| const unsigned index2 = index1 + 1; |
| const double interpolation_factor = virtual_index - index1; |
| |
| const double value1 = curve_data[index1]; |
| const double value2 = curve_data[index2]; |
| |
| output = |
| (1.0 - interpolation_factor) * value1 + interpolation_factor * value2; |
| } |
| |
| return output; |
| } |
| |
| } // namespace |
| |
| class WaveShaperKernel final { |
| public: |
| // Oversampling. |
| std::unique_ptr<AudioFloatArray> temp_buffer_; |
| std::unique_ptr<AudioFloatArray> temp_buffer2_; |
| std::unique_ptr<UpSampler> up_sampler_; |
| std::unique_ptr<DownSampler> down_sampler_; |
| std::unique_ptr<UpSampler> up_sampler2_; |
| std::unique_ptr<DownSampler> down_sampler2_; |
| |
| bool IsInitialized() { return temp_buffer_ != nullptr; } |
| |
| // Oversampling requires more resources, so let's only allocate them if |
| // needed. |
| void LazyInitializeOversampling(unsigned render_quantum_frames) { |
| if (!IsInitialized()) { |
| temp_buffer_ = |
| std::make_unique<AudioFloatArray>(render_quantum_frames * 2); |
| temp_buffer2_ = |
| std::make_unique<AudioFloatArray>(render_quantum_frames * 4); |
| up_sampler_ = std::make_unique<UpSampler>(render_quantum_frames); |
| down_sampler_ = std::make_unique<DownSampler>(render_quantum_frames * 2); |
| up_sampler2_ = std::make_unique<UpSampler>(render_quantum_frames * 2); |
| down_sampler2_ = std::make_unique<DownSampler>(render_quantum_frames * 4); |
| } |
| } |
| }; |
| |
| scoped_refptr<WaveShaperHandler> WaveShaperHandler::Create(AudioNode& node, |
| float sample_rate) { |
| return base::AdoptRef(new WaveShaperHandler(node, sample_rate)); |
| } |
| |
| WaveShaperHandler::~WaveShaperHandler() { |
| if (IsInitialized()) { |
| Uninitialize(); |
| } |
| } |
| |
| void WaveShaperHandler::SetCurve(const float* curve_data, |
| unsigned curve_length) { |
| DCHECK(IsMainThread()); |
| |
| // This synchronizes with process(). |
| base::AutoLock process_locker(process_lock_); |
| |
| if (curve_length == 0 || !curve_data) { |
| curve_ = nullptr; |
| tail_time_ = 0; |
| return; |
| } |
| |
| // Copy the curve data, if any, to our internal buffer. |
| curve_ = std::make_unique<Vector<float>>(curve_length); |
| memcpy(curve_->data(), curve_data, sizeof(float) * curve_length); |
| |
| // Compute the curve output for a zero input, and set the tail time. |
| const double output = WaveShaperCurveValue(0.0, curve_data, curve_length); |
| tail_time_ = output == 0 ? 0 : std::numeric_limits<double>::infinity(); |
| } |
| |
| const Vector<float>* WaveShaperHandler::Curve() const { |
| DCHECK(IsMainThread()); |
| return curve_.get(); |
| } |
| |
| void WaveShaperHandler::SetOversample(V8OverSampleType::Enum oversample) { |
| DCHECK(IsMainThread()); |
| |
| base::AutoLock process_locker(process_lock_); |
| oversample_ = oversample; |
| |
| // Lazy initialize resamplers, and reset resamplers that are no longer used |
| switch (oversample) { |
| case V8OverSampleType::Enum::kNone: |
| for (auto& kernel : kernels_) { |
| if (kernel->IsInitialized()) { |
| kernel->up_sampler_->Reset(); |
| kernel->down_sampler_->Reset(); |
| kernel->up_sampler2_->Reset(); |
| kernel->down_sampler2_->Reset(); |
| } |
| } |
| break; |
| case V8OverSampleType::Enum::k2X: |
| for (auto& kernel : kernels_) { |
| kernel->LazyInitializeOversampling(render_quantum_frames_); |
| DCHECK(kernel->IsInitialized()); |
| kernel->up_sampler2_->Reset(); |
| kernel->down_sampler2_->Reset(); |
| } |
| break; |
| case V8OverSampleType::Enum::k4X: { |
| for (auto& kernel : kernels_) { |
| kernel->LazyInitializeOversampling(render_quantum_frames_); |
| } |
| break; |
| } |
| } |
| |
| // Calculate and cache `latency_time_` |
| if (kernels_.empty()) { |
| latency_time_ = 0; |
| } else { |
| switch (oversample) { |
| case V8OverSampleType::Enum::kNone: |
| latency_time_ = 0; |
| break; |
| case V8OverSampleType::Enum::k2X: { |
| const size_t latency_frames = |
| kernels_.front()->up_sampler_->LatencyFrames() + |
| kernels_.front()->down_sampler_->LatencyFrames(); |
| |
| latency_time_ = static_cast<double>(latency_frames) / sample_rate_; |
| } break; |
| case V8OverSampleType::Enum::k4X: { |
| // Account for first stage upsampling. |
| const size_t latency_frames = |
| kernels_.front()->up_sampler_->LatencyFrames() + |
| kernels_.front()->down_sampler_->LatencyFrames(); |
| |
| // Account for second stage upsampling. |
| // and divide by 2 to get back down to the regular sample-rate. |
| const size_t latency_frames2 = |
| (kernels_.front()->up_sampler2_->LatencyFrames() + |
| kernels_.front()->down_sampler2_->LatencyFrames()) / |
| 2; |
| |
| latency_time_ = static_cast<double>(latency_frames + latency_frames2) / |
| sample_rate_; |
| } break; |
| } |
| } |
| } |
| |
| V8OverSampleType::Enum WaveShaperHandler::Oversample() const { |
| DCHECK(IsMainThread()); |
| return oversample_; |
| } |
| |
| WaveShaperHandler::WaveShaperHandler(AudioNode& node, float sample_rate) |
| : AudioHandler(NodeType::kNodeTypeWaveShaper, node, sample_rate), |
| sample_rate_(sample_rate), |
| render_quantum_frames_( |
| node.context()->GetDeferredTaskHandler().RenderQuantumFrames()), |
| // 4 times render size to handle 4x oversampling. |
| virtual_index_(4 * render_quantum_frames_), |
| index_(4 * render_quantum_frames_), |
| v1_(4 * render_quantum_frames_), |
| v2_(4 * render_quantum_frames_), |
| f_(4 * render_quantum_frames_) { |
| AddInput(); |
| AddOutput(kDefaultNumberOfOutputChannels); |
| |
| Initialize(); |
| } |
| |
| void WaveShaperHandler::Process(uint32_t frames_to_process) { |
| AudioBus* destination_bus = Output(0).Bus(); |
| |
| if (!IsInitialized()) { |
| destination_bus->Zero(); |
| } else { |
| scoped_refptr<AudioBus> source_bus = Input(0).Bus(); |
| |
| // TODO(crbug.com/396149720): if we take "tail time" into account, then we |
| // can avoid calling process once the tail dies down. |
| if (!Input(0).IsConnected()) { |
| source_bus->Zero(); |
| } |
| |
| DCHECK_EQ(source_bus->NumberOfChannels(), |
| destination_bus->NumberOfChannels()); |
| // The audio thread can't block on this lock, so we call tryLock() instead. |
| base::AutoTryLock try_locker(process_lock_); |
| if (try_locker.is_acquired()) { |
| DCHECK_EQ(source_bus->NumberOfChannels(), kernels_.size()); |
| DCHECK_EQ(frames_to_process, render_quantum_frames_); |
| |
| const float* curve_data = curve_ ? curve_->data() : nullptr; |
| const int curve_length = curve_ ? curve_->size() : 0; |
| |
| // For each channel of our input, process using the corresponding |
| // WaveShaperKernel into the output channel. |
| for (unsigned i = 0; i < kernels_.size(); ++i) { |
| if (!curve_data || !curve_length) { |
| // Act as "straight wire" pass-through if no curve is set. |
| memcpy(destination_bus->Channel(i)->MutableData(), |
| source_bus->Channel(i)->Data(), |
| sizeof(float) * frames_to_process); |
| } else { |
| switch (oversample_) { |
| case V8OverSampleType::Enum::kNone: |
| WaveShaperCurveValues(destination_bus->Channel(i)->MutableData(), |
| source_bus->Channel(i)->Data(), |
| frames_to_process, curve_data, |
| curve_length); |
| break; |
| |
| case V8OverSampleType::Enum::k2X: { |
| float* temp_p = kernels_[i]->temp_buffer_->Data(); |
| kernels_[i]->up_sampler_->Process(source_bus->Channel(i)->Data(), |
| temp_p, frames_to_process); |
| |
| // Process at 2x up-sampled rate. |
| WaveShaperCurveValues(temp_p, temp_p, frames_to_process * 2, |
| curve_data, curve_length); |
| |
| kernels_[i]->down_sampler_->Process( |
| temp_p, destination_bus->Channel(i)->MutableData(), |
| frames_to_process * 2); |
| } break; |
| |
| case V8OverSampleType::Enum::k4X: { |
| float* temp_p = kernels_[i]->temp_buffer_->Data(); |
| float* temp_p2 = kernels_[i]->temp_buffer2_->Data(); |
| |
| kernels_[i]->up_sampler_->Process(source_bus->Channel(i)->Data(), |
| temp_p, frames_to_process); |
| kernels_[i]->up_sampler2_->Process(temp_p, temp_p2, |
| frames_to_process * 2); |
| |
| // Process at 4x up-sampled rate. |
| WaveShaperCurveValues(temp_p2, temp_p2, frames_to_process * 4, |
| curve_data, curve_length); |
| |
| kernels_[i]->down_sampler2_->Process(temp_p2, temp_p, |
| frames_to_process * 4); |
| kernels_[i]->down_sampler_->Process( |
| temp_p, destination_bus->Channel(i)->MutableData(), |
| frames_to_process * 2); |
| } break; |
| } |
| } |
| } |
| } else { |
| // The tryLock() failed. We must be in the middle of modifying guarded |
| // values. |
| destination_bus->Zero(); |
| } |
| } |
| } |
| |
| void WaveShaperHandler::Initialize() { |
| if (IsInitialized()) { |
| return; |
| } |
| |
| { |
| base::AutoLock locker(process_lock_); |
| DCHECK(!kernels_.size()); |
| |
| // Create processing kernels, one per channel. |
| for (unsigned i = 0; i < Output(0).NumberOfChannels(); ++i) { |
| kernels_.push_back(std::make_unique<WaveShaperKernel>()); |
| if (oversample_ != V8OverSampleType::Enum::kNone) { |
| kernels_.back()->LazyInitializeOversampling(render_quantum_frames_); |
| } |
| } |
| } |
| |
| AudioHandler::Initialize(); |
| } |
| |
| void WaveShaperHandler::Uninitialize() { |
| if (!IsInitialized()) { |
| return; |
| } |
| |
| { |
| base::AutoLock locker(process_lock_); |
| kernels_.clear(); |
| } |
| |
| AudioHandler::Uninitialize(); |
| } |
| |
| void WaveShaperHandler::CheckNumberOfChannelsForInput(AudioNodeInput* input) { |
| DCHECK(Context()->IsAudioThread()); |
| Context()->AssertGraphOwner(); |
| |
| DCHECK_EQ(input, &Input(0)); |
| |
| unsigned number_of_channels = input->NumberOfChannels(); |
| |
| if (IsInitialized() && number_of_channels != Output(0).NumberOfChannels()) { |
| // We're already initialized but the channel count has changed. |
| Uninitialize(); |
| } |
| |
| if (!IsInitialized()) { |
| // This will propagate the channel count to any nodes connected further |
| // down the chain... |
| Output(0).SetNumberOfChannels(number_of_channels); |
| |
| // Re-initialize the processor with the new channel count. |
| Initialize(); |
| } |
| |
| AudioHandler::CheckNumberOfChannelsForInput(input); |
| } |
| |
| bool WaveShaperHandler::RequiresTailProcessing() const { |
| // Always return true even if the tail time and latency might both be zero. |
| return true; |
| } |
| |
| double WaveShaperHandler::TailTime() const { |
| DCHECK(!IsMainThread()); |
| base::AutoTryLock try_locker(process_lock_); |
| if (try_locker.is_acquired()) { |
| return tail_time_; |
| } else { |
| // Since we don't want to block the Audio Device thread, we return a large |
| // value instead of trying to acquire the lock. |
| return std::numeric_limits<double>::infinity(); |
| } |
| } |
| |
| double WaveShaperHandler::LatencyTime() const { |
| DCHECK(!IsMainThread()); |
| base::AutoTryLock try_locker(process_lock_); |
| if (try_locker.is_acquired()) { |
| return latency_time_; |
| } else { |
| // Since we don't want to block the Audio Device thread, we return a large |
| // value instead of trying to acquire the lock. |
| return std::numeric_limits<double>::infinity(); |
| } |
| } |
| |
| void WaveShaperHandler::PullInputs(uint32_t frames_to_process) { |
| // Render directly into output bus for in-place processing |
| Input(0).Pull(Output(0).Bus(), frames_to_process); |
| } |
| |
| // Like WaveShaperCurveValue, but computes the values for a vector of |
| // inputs. |
| void WaveShaperHandler::WaveShaperCurveValues(float* destination, |
| const float* source, |
| uint32_t frames_to_process, |
| const float* curve_data, |
| int curve_length) { |
| DCHECK_LE(frames_to_process, virtual_index_.size()); |
| // Index into the array computed from the source value. |
| float* virtual_index = virtual_index_.Data(); |
| |
| // virtual_index[k] = |
| // ClampTo(0.5 * (source[k] + 1) * (curve_length - 1), |
| // 0.0f, |
| // static_cast<float>(curve_length - 1)) |
| |
| // Add 1 to source puttting result in virtual_index |
| vector_math::Vsadd(source, 1, 1, virtual_index, 1, frames_to_process); |
| |
| // Scale virtual_index in place by (curve_lenth -1)/2 |
| vector_math::Vsmul(virtual_index, 1, 0.5 * (curve_length - 1), virtual_index, |
| 1, frames_to_process); |
| |
| // Clip virtual_index, in place. |
| vector_math::Vclip(virtual_index, 1, 0, curve_length - 1, virtual_index, 1, |
| frames_to_process); |
| |
| // index = floor(virtual_index) |
| DCHECK_LE(frames_to_process, index_.size()); |
| float* index = index_.Data(); |
| |
| // v1 and v2 hold the curve_data corresponding to the closest curve |
| // values to the source sample. To save memory, v1 will use the |
| // destination array. |
| DCHECK_LE(frames_to_process, v1_.size()); |
| DCHECK_LE(frames_to_process, v2_.size()); |
| float* v1 = v1_.Data(); |
| float* v2 = v2_.Data(); |
| |
| // Interpolation factor: virtual_index - index. |
| DCHECK_LE(frames_to_process, f_.size()); |
| float* f = f_.Data(); |
| |
| int max_index = curve_length - 1; |
| unsigned k = 0; |
| #if defined(ARCH_CPU_X86_FAMILY) |
| { |
| int loop_limit = frames_to_process / 4; |
| |
| // one = 1 |
| __m128i one = _mm_set1_epi32(1); |
| |
| // Do 4 eleemnts at a time |
| for (int loop = 0; loop < loop_limit; ++loop, k += 4) { |
| // v = virtual_index[k] |
| __m128 v = _mm_loadu_ps(virtual_index + k); |
| |
| // index1 = static_cast<int>(v); |
| __m128i index1 = _mm_cvttps_epi32(v); |
| |
| // v = static_cast<float>(index1) and save result to index[k:k+3] |
| v = _mm_cvtepi32_ps(index1); |
| _mm_storeu_ps(&index[k], v); |
| |
| // index2 = index2 + 1; |
| __m128i index2 = _mm_add_epi32(index1, one); |
| |
| // Convert index1/index2 to arrays of 32-bit int values that are our |
| // array indices to use to get the curve data. |
| int32_t* i1 = reinterpret_cast<int32_t*>(&index1); |
| int32_t* i2 = reinterpret_cast<int32_t*>(&index2); |
| |
| // Get the curve_data values and save them in v1 and v2, |
| // carefully clamping the values. If the input is NaN, index1 |
| // could be 0x8000000. |
| v1[k] = curve_data[ClampTo(i1[0], 0, max_index)]; |
| v2[k] = curve_data[ClampTo(i2[0], 0, max_index)]; |
| v1[k + 1] = curve_data[ClampTo(i1[1], 0, max_index)]; |
| v2[k + 1] = curve_data[ClampTo(i2[1], 0, max_index)]; |
| v1[k + 2] = curve_data[ClampTo(i1[2], 0, max_index)]; |
| v2[k + 2] = curve_data[ClampTo(i2[2], 0, max_index)]; |
| v1[k + 3] = curve_data[ClampTo(i1[3], 0, max_index)]; |
| v2[k + 3] = curve_data[ClampTo(i2[3], 0, max_index)]; |
| } |
| } |
| #elif defined(CPU_ARM_NEON) |
| { |
| int loop_limit = frames_to_process / 4; |
| |
| // Neon constants: |
| // zero = 0 |
| // one = 1 |
| // max = max_index |
| int32x4_t zero = vdupq_n_s32(0); |
| int32x4_t one = vdupq_n_s32(1); |
| int32x4_t max = vdupq_n_s32(max_index); |
| |
| for (int loop = 0; loop < loop_limit; ++loop, k += 4) { |
| // v = virtual_index |
| float32x4_t v = vld1q_f32(virtual_index + k); |
| |
| // index1 = static_cast<int32_t>(v), then clamp to a valid index range |
| // for curve_data |
| int32x4_t index1 = vcvtq_s32_f32(v); |
| index1 = vmaxq_s32(vminq_s32(index1, max), zero); |
| |
| // v = static_cast<float>(v) and save it away for later use. |
| v = vcvtq_f32_s32(index1); |
| vst1q_f32(&index[k], v); |
| |
| // index2 = index1 + 1, then clamp to a valid range for curve_data. |
| int32x4_t index2 = vaddq_s32(index1, one); |
| index2 = vmaxq_s32(vminq_s32(index2, max), zero); |
| |
| // Save index1/2 so we can get the individual parts. Aligned to |
| // 16 bytes for vst1q instruction. |
| int32_t i1[4] __attribute__((aligned(16))); |
| int32_t i2[4] __attribute__((aligned(16))); |
| vst1q_s32(i1, index1); |
| vst1q_s32(i2, index2); |
| |
| // Get curve elements corresponding to the indices. |
| v1[k] = curve_data[i1[0]]; |
| v2[k] = curve_data[i2[0]]; |
| v1[k + 1] = curve_data[i1[1]]; |
| v2[k + 1] = curve_data[i2[1]]; |
| v1[k + 2] = curve_data[i1[2]]; |
| v2[k + 2] = curve_data[i2[2]]; |
| v1[k + 3] = curve_data[i1[3]]; |
| v2[k + 3] = curve_data[i2[3]]; |
| } |
| } |
| #endif |
| |
| // Compute values for index1 and load the curve_data corresponding to |
| // indices. |
| for (; k < frames_to_process; ++k) { |
| unsigned index1 = |
| ClampTo(static_cast<unsigned>(virtual_index[k]), 0, max_index); |
| unsigned index2 = ClampTo(index1 + 1, 0, max_index); |
| index[k] = index1; |
| v1[k] = curve_data[index1]; |
| v2[k] = curve_data[index2]; |
| } |
| |
| // f[k] = virtual_index[k] - index[k] |
| vector_math::Vsub(virtual_index, 1, index, 1, f, 1, frames_to_process); |
| |
| // Do the linear interpolation of the curve data: |
| // destination[k] = v1[k] + f[k]*(v2[k] - v1[k]) |
| // |
| // 1. v2[k] = v2[k] - v1[k] |
| // 2. v2[k] = f[k]*v2[k] = f[k]*(v2[k] - v1[k]) |
| // 3. destination[k] = destination[k] + v2[k] |
| // = v1[k] + f[k]*(v2[k] - v1[k]) |
| vector_math::Vsub(v2, 1, v1, 1, v2, 1, frames_to_process); |
| vector_math::Vmul(f, 1, v2, 1, v2, 1, frames_to_process); |
| vector_math::Vadd(v2, 1, v1, 1, destination, 1, frames_to_process); |
| } |
| |
| } // namespace blink |