third_party/blink/renderer/modules/webaudio/wave_shaper_handler.cc - chromium/src.git - Git at Google

 // Copyright 2022 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifdef UNSAFE_BUFFERS_BUILD
 // TODO(crbug.com/351564777): Remove this and convert code to safer constructs.
 #pragma allow_unsafe_buffers
 // TODO(crbug.com/390223051): Remove C-library calls to fix the errors.
 #pragma allow_unsafe_libc_calls
 #endif

 #include "third_party/blink/renderer/modules/webaudio/wave_shaper_handler.h"

 #include <algorithm>
 #include <memory>

 #include "base/memory/scoped_refptr.h"
 #include "base/synchronization/lock.h"
 #include "build/build_config.h"
 #include "third_party/blink/renderer/core/typed_arrays/dom_typed_array.h"
 #include "third_party/blink/renderer/modules/webaudio/audio_node.h"
 #include "third_party/blink/renderer/modules/webaudio/audio_node_input.h"
 #include "third_party/blink/renderer/modules/webaudio/audio_node_output.h"
 #include "third_party/blink/renderer/modules/webaudio/base_audio_context.h"
 #include "third_party/blink/renderer/platform/audio/audio_array.h"
 #include "third_party/blink/renderer/platform/audio/audio_utilities.h"
 #include "third_party/blink/renderer/platform/audio/down_sampler.h"
 #include "third_party/blink/renderer/platform/audio/up_sampler.h"
 #include "third_party/blink/renderer/platform/audio/vector_math.h"
 #include "third_party/blink/renderer/platform/wtf/math_extras.h"
 #include "third_party/blink/renderer/platform/wtf/threading.h"

 #if defined(ARCH_CPU_X86_FAMILY)
 #include <xmmintrin.h>
 #elif defined(CPU_ARM_NEON)
 #include <arm_neon.h>
 #endif

 namespace blink {

 namespace {

 constexpr unsigned kDefaultNumberOfOutputChannels = 1;

 // Computes value of the WaveShaper
 double WaveShaperCurveValue(float input,
                             const float* curve_data,
                             int curve_length) {
   // Calculate a virtual index based on input -1 -> +1 with -1 being
   // curve[0], +1 being curve[curveLength - 1], and 0 being at the center of
   // the curve data. Then linearly interpolate between the two points in the
   // curve.
   const double virtual_index = 0.5 * (input + 1) * (curve_length - 1);
   double output;
   if (virtual_index < 0) {
     // input < -1, so use curve[0]
     output = curve_data[0];
   } else if (virtual_index >= curve_length - 1) {
     // input >= 1, so use last curve value
     output = curve_data[curve_length - 1];
   } else {
     // The general case where -1 <= input < 1, where 0 <= virtualIndex <
     // curveLength - 1, so interpolate between the nearest samples on the
     // curve.
     const unsigned index1 = static_cast<unsigned>(virtual_index);
     const unsigned index2 = index1 + 1;
     const double interpolation_factor = virtual_index - index1;

     const double value1 = curve_data[index1];
     const double value2 = curve_data[index2];

     output =
         (1.0 - interpolation_factor) * value1 + interpolation_factor * value2;
   }

   return output;
 }

 }  // namespace

 class WaveShaperKernel final {
  public:
   // Oversampling.
   std::unique_ptr<AudioFloatArray> temp_buffer_;
   std::unique_ptr<AudioFloatArray> temp_buffer2_;
   std::unique_ptr<UpSampler> up_sampler_;
   std::unique_ptr<DownSampler> down_sampler_;
   std::unique_ptr<UpSampler> up_sampler2_;
   std::unique_ptr<DownSampler> down_sampler2_;

   bool IsInitialized() { return temp_buffer_ != nullptr; }

   // Oversampling requires more resources, so let's only allocate them if
   // needed.
   void LazyInitializeOversampling(unsigned render_quantum_frames) {
     if (!IsInitialized()) {
       temp_buffer_ =
           std::make_unique<AudioFloatArray>(render_quantum_frames * 2);
       temp_buffer2_ =
           std::make_unique<AudioFloatArray>(render_quantum_frames * 4);
       up_sampler_ = std::make_unique<UpSampler>(render_quantum_frames);
       down_sampler_ = std::make_unique<DownSampler>(render_quantum_frames * 2);
       up_sampler2_ = std::make_unique<UpSampler>(render_quantum_frames * 2);
       down_sampler2_ = std::make_unique<DownSampler>(render_quantum_frames * 4);
     }
   }
 };

 scoped_refptr<WaveShaperHandler> WaveShaperHandler::Create(AudioNode& node,
                                                            float sample_rate) {
   return base::AdoptRef(new WaveShaperHandler(node, sample_rate));
 }

 WaveShaperHandler::~WaveShaperHandler() {
   if (IsInitialized()) {
     Uninitialize();
   }
 }

 void WaveShaperHandler::SetCurve(const float* curve_data,
                                  unsigned curve_length) {
   DCHECK(IsMainThread());

   // This synchronizes with process().
   base::AutoLock process_locker(process_lock_);

   if (curve_length == 0 || !curve_data) {
     curve_ = nullptr;
     tail_time_ = 0;
     return;
   }

   // Copy the curve data, if any, to our internal buffer.
   curve_ = std::make_unique<Vector<float>>(curve_length);
   memcpy(curve_->data(), curve_data, sizeof(float) * curve_length);

   // Compute the curve output for a zero input, and set the tail time.
   const double output = WaveShaperCurveValue(0.0, curve_data, curve_length);
   tail_time_ = output == 0 ? 0 : std::numeric_limits<double>::infinity();
 }

 const Vector<float>* WaveShaperHandler::Curve() const {
   DCHECK(IsMainThread());
   return curve_.get();
 }

 void WaveShaperHandler::SetOversample(V8OverSampleType::Enum oversample) {
   DCHECK(IsMainThread());

   base::AutoLock process_locker(process_lock_);
   oversample_ = oversample;

   // Lazy initialize resamplers, and reset resamplers that are no longer used
   switch (oversample) {
     case V8OverSampleType::Enum::kNone:
       for (auto& kernel : kernels_) {
         if (kernel->IsInitialized()) {
           kernel->up_sampler_->Reset();
           kernel->down_sampler_->Reset();
           kernel->up_sampler2_->Reset();
           kernel->down_sampler2_->Reset();
         }
       }
       break;
     case V8OverSampleType::Enum::k2X:
       for (auto& kernel : kernels_) {
         kernel->LazyInitializeOversampling(render_quantum_frames_);
         DCHECK(kernel->IsInitialized());
         kernel->up_sampler2_->Reset();
         kernel->down_sampler2_->Reset();
       }
       break;
     case V8OverSampleType::Enum::k4X: {
       for (auto& kernel : kernels_) {
         kernel->LazyInitializeOversampling(render_quantum_frames_);
       }
       break;
     }
   }

   // Calculate and cache `latency_time_`
   if (kernels_.empty()) {
     latency_time_ = 0;
   } else {
     switch (oversample) {
       case V8OverSampleType::Enum::kNone:
         latency_time_ = 0;
         break;
       case V8OverSampleType::Enum::k2X: {
         const size_t latency_frames =
             kernels_.front()->up_sampler_->LatencyFrames() +
             kernels_.front()->down_sampler_->LatencyFrames();

         latency_time_ = static_cast<double>(latency_frames) / sample_rate_;
       } break;
       case V8OverSampleType::Enum::k4X: {
         // Account for first stage upsampling.
         const size_t latency_frames =
             kernels_.front()->up_sampler_->LatencyFrames() +
             kernels_.front()->down_sampler_->LatencyFrames();

         // Account for second stage upsampling.
         // and divide by 2 to get back down to the regular sample-rate.
         const size_t latency_frames2 =
             (kernels_.front()->up_sampler2_->LatencyFrames() +
              kernels_.front()->down_sampler2_->LatencyFrames()) /
             2;

         latency_time_ = static_cast<double>(latency_frames + latency_frames2) /
                         sample_rate_;
       } break;
     }
   }
 }

 V8OverSampleType::Enum WaveShaperHandler::Oversample() const {
   DCHECK(IsMainThread());
   return oversample_;
 }

 WaveShaperHandler::WaveShaperHandler(AudioNode& node, float sample_rate)
     : AudioHandler(NodeType::kNodeTypeWaveShaper, node, sample_rate),
       sample_rate_(sample_rate),
       render_quantum_frames_(
           node.context()->GetDeferredTaskHandler().RenderQuantumFrames()),
       // 4 times render size to handle 4x oversampling.
       virtual_index_(4 * render_quantum_frames_),
       index_(4 * render_quantum_frames_),
       v1_(4 * render_quantum_frames_),
       v2_(4 * render_quantum_frames_),
       f_(4 * render_quantum_frames_) {
   AddInput();
   AddOutput(kDefaultNumberOfOutputChannels);

   Initialize();
 }

 void WaveShaperHandler::Process(uint32_t frames_to_process) {
   AudioBus* destination_bus = Output(0).Bus();

   if (!IsInitialized()) {
     destination_bus->Zero();
   } else {
     scoped_refptr<AudioBus> source_bus = Input(0).Bus();

     // TODO(crbug.com/396149720): if we take "tail time" into account, then we
     // can avoid calling process once the tail dies down.
     if (!Input(0).IsConnected()) {
       source_bus->Zero();
     }

     DCHECK_EQ(source_bus->NumberOfChannels(),
               destination_bus->NumberOfChannels());
     // The audio thread can't block on this lock, so we call tryLock() instead.
     base::AutoTryLock try_locker(process_lock_);
     if (try_locker.is_acquired()) {
       DCHECK_EQ(source_bus->NumberOfChannels(), kernels_.size());
       DCHECK_EQ(frames_to_process, render_quantum_frames_);

       const float* curve_data = curve_ ? curve_->data() : nullptr;
       const int curve_length = curve_ ? curve_->size() : 0;

       // For each channel of our input, process using the corresponding
       // WaveShaperKernel into the output channel.
       for (unsigned i = 0; i < kernels_.size(); ++i) {
         if (!curve_data || !curve_length) {
           // Act as "straight wire" pass-through if no curve is set.
           memcpy(destination_bus->Channel(i)->MutableData(),
                  source_bus->Channel(i)->Data(),
                  sizeof(float) * frames_to_process);
         } else {
           switch (oversample_) {
             case V8OverSampleType::Enum::kNone:
               WaveShaperCurveValues(destination_bus->Channel(i)->MutableData(),
                                     source_bus->Channel(i)->Data(),
                                     frames_to_process, curve_data,
                                     curve_length);
               break;

             case V8OverSampleType::Enum::k2X: {
               float* temp_p = kernels_[i]->temp_buffer_->Data();
               kernels_[i]->up_sampler_->Process(source_bus->Channel(i)->Data(),
                                                 temp_p, frames_to_process);

               // Process at 2x up-sampled rate.
               WaveShaperCurveValues(temp_p, temp_p, frames_to_process * 2,
                                     curve_data, curve_length);

               kernels_[i]->down_sampler_->Process(
                   temp_p, destination_bus->Channel(i)->MutableData(),
                   frames_to_process * 2);
             } break;

             case V8OverSampleType::Enum::k4X: {
               float* temp_p = kernels_[i]->temp_buffer_->Data();
               float* temp_p2 = kernels_[i]->temp_buffer2_->Data();

               kernels_[i]->up_sampler_->Process(source_bus->Channel(i)->Data(),
                                                 temp_p, frames_to_process);
               kernels_[i]->up_sampler2_->Process(temp_p, temp_p2,
                                                  frames_to_process * 2);

               // Process at 4x up-sampled rate.
               WaveShaperCurveValues(temp_p2, temp_p2, frames_to_process * 4,
                                     curve_data, curve_length);

               kernels_[i]->down_sampler2_->Process(temp_p2, temp_p,
                                                    frames_to_process * 4);
               kernels_[i]->down_sampler_->Process(
                   temp_p, destination_bus->Channel(i)->MutableData(),
                   frames_to_process * 2);
             } break;
           }
         }
       }
     } else {
       // The tryLock() failed. We must be in the middle of modifying guarded
       // values.
       destination_bus->Zero();
     }
   }
 }

 void WaveShaperHandler::Initialize() {
   if (IsInitialized()) {
     return;
   }

   {
     base::AutoLock locker(process_lock_);
     DCHECK(!kernels_.size());

     // Create processing kernels, one per channel.
     for (unsigned i = 0; i < Output(0).NumberOfChannels(); ++i) {
       kernels_.push_back(std::make_unique<WaveShaperKernel>());
       if (oversample_ != V8OverSampleType::Enum::kNone) {
         kernels_.back()->LazyInitializeOversampling(render_quantum_frames_);
       }
     }
   }

   AudioHandler::Initialize();
 }

 void WaveShaperHandler::Uninitialize() {
   if (!IsInitialized()) {
     return;
   }

   {
     base::AutoLock locker(process_lock_);
     kernels_.clear();
   }

   AudioHandler::Uninitialize();
 }

 void WaveShaperHandler::CheckNumberOfChannelsForInput(AudioNodeInput* input) {
   DCHECK(Context()->IsAudioThread());
   Context()->AssertGraphOwner();

   DCHECK_EQ(input, &Input(0));

   unsigned number_of_channels = input->NumberOfChannels();

   if (IsInitialized() && number_of_channels != Output(0).NumberOfChannels()) {
     // We're already initialized but the channel count has changed.
     Uninitialize();
   }

   if (!IsInitialized()) {
     // This will propagate the channel count to any nodes connected further
     // down the chain...
     Output(0).SetNumberOfChannels(number_of_channels);

     // Re-initialize the processor with the new channel count.
     Initialize();
   }

   AudioHandler::CheckNumberOfChannelsForInput(input);
 }

 bool WaveShaperHandler::RequiresTailProcessing() const {
   // Always return true even if the tail time and latency might both be zero.
   return true;
 }

 double WaveShaperHandler::TailTime() const {
   DCHECK(!IsMainThread());
   base::AutoTryLock try_locker(process_lock_);
   if (try_locker.is_acquired()) {
     return tail_time_;
   } else {
     // Since we don't want to block the Audio Device thread, we return a large
     // value instead of trying to acquire the lock.
     return std::numeric_limits<double>::infinity();
   }
 }

 double WaveShaperHandler::LatencyTime() const {
   DCHECK(!IsMainThread());
   base::AutoTryLock try_locker(process_lock_);
   if (try_locker.is_acquired()) {
     return latency_time_;
   } else {
     // Since we don't want to block the Audio Device thread, we return a large
     // value instead of trying to acquire the lock.
     return std::numeric_limits<double>::infinity();
   }
 }

 void WaveShaperHandler::PullInputs(uint32_t frames_to_process) {
   // Render directly into output bus for in-place processing
   Input(0).Pull(Output(0).Bus(), frames_to_process);
 }

 // Like WaveShaperCurveValue, but computes the values for a vector of
 // inputs.
 void WaveShaperHandler::WaveShaperCurveValues(float* destination,
                                               const float* source,
                                               uint32_t frames_to_process,
                                               const float* curve_data,
                                               int curve_length) {
   DCHECK_LE(frames_to_process, virtual_index_.size());
   // Index into the array computed from the source value.
   float* virtual_index = virtual_index_.Data();

   // virtual_index[k] =
   //   ClampTo(0.5 * (source[k] + 1) * (curve_length - 1),
   //           0.0f,
   //           static_cast<float>(curve_length - 1))

   // Add 1 to source puttting result in virtual_index
   vector_math::Vsadd(source, 1, 1, virtual_index, 1, frames_to_process);

   // Scale virtual_index in place by (curve_lenth -1)/2
   vector_math::Vsmul(virtual_index, 1, 0.5 * (curve_length - 1), virtual_index,
                      1, frames_to_process);

   // Clip virtual_index, in place.
   vector_math::Vclip(virtual_index, 1, 0, curve_length - 1, virtual_index, 1,
                      frames_to_process);

   // index = floor(virtual_index)
   DCHECK_LE(frames_to_process, index_.size());
   float* index = index_.Data();

   // v1 and v2 hold the curve_data corresponding to the closest curve
   // values to the source sample.  To save memory, v1 will use the
   // destination array.
   DCHECK_LE(frames_to_process, v1_.size());
   DCHECK_LE(frames_to_process, v2_.size());
   float* v1 = v1_.Data();
   float* v2 = v2_.Data();

   // Interpolation factor: virtual_index - index.
   DCHECK_LE(frames_to_process, f_.size());
   float* f = f_.Data();

   int max_index = curve_length - 1;
   unsigned k = 0;
 #if defined(ARCH_CPU_X86_FAMILY)
   {
     int loop_limit = frames_to_process / 4;

     // one = 1
     __m128i one = _mm_set1_epi32(1);

     // Do 4 eleemnts at a time
     for (int loop = 0; loop < loop_limit; ++loop, k += 4) {
       // v = virtual_index[k]
       __m128 v = _mm_loadu_ps(virtual_index + k);

       // index1 = static_cast<int>(v);
       __m128i index1 = _mm_cvttps_epi32(v);

       // v = static_cast<float>(index1) and save result to index[k:k+3]
       v = _mm_cvtepi32_ps(index1);
       _mm_storeu_ps(&index[k], v);

       // index2 = index2 + 1;
       __m128i index2 = _mm_add_epi32(index1, one);

       // Convert index1/index2 to arrays of 32-bit int values that are our
       // array indices to use to get the curve data.
       int32_t* i1 = reinterpret_cast<int32_t*>(&index1);
       int32_t* i2 = reinterpret_cast<int32_t*>(&index2);

       // Get the curve_data values and save them in v1 and v2,
       // carefully clamping the values.  If the input is NaN, index1
       // could be 0x8000000.
       v1[k] = curve_data[ClampTo(i1[0], 0, max_index)];
       v2[k] = curve_data[ClampTo(i2[0], 0, max_index)];
       v1[k + 1] = curve_data[ClampTo(i1[1], 0, max_index)];
       v2[k + 1] = curve_data[ClampTo(i2[1], 0, max_index)];
       v1[k + 2] = curve_data[ClampTo(i1[2], 0, max_index)];
       v2[k + 2] = curve_data[ClampTo(i2[2], 0, max_index)];
       v1[k + 3] = curve_data[ClampTo(i1[3], 0, max_index)];
       v2[k + 3] = curve_data[ClampTo(i2[3], 0, max_index)];
     }
   }
 #elif defined(CPU_ARM_NEON)
   {
     int loop_limit = frames_to_process / 4;

     // Neon constants:
     //   zero = 0
     //   one  = 1
     //   max  = max_index
     int32x4_t zero = vdupq_n_s32(0);
     int32x4_t one = vdupq_n_s32(1);
     int32x4_t max = vdupq_n_s32(max_index);

     for (int loop = 0; loop < loop_limit; ++loop, k += 4) {
       // v = virtual_index
       float32x4_t v = vld1q_f32(virtual_index + k);

       // index1 = static_cast<int32_t>(v), then clamp to a valid index range
       // for curve_data
       int32x4_t index1 = vcvtq_s32_f32(v);
       index1 = vmaxq_s32(vminq_s32(index1, max), zero);

       // v = static_cast<float>(v) and save it away for later use.
       v = vcvtq_f32_s32(index1);
       vst1q_f32(&index[k], v);

       // index2 = index1 + 1, then clamp to a valid range for curve_data.
       int32x4_t index2 = vaddq_s32(index1, one);
       index2 = vmaxq_s32(vminq_s32(index2, max), zero);

       // Save index1/2 so we can get the individual parts.  Aligned to
       // 16 bytes for vst1q instruction.
       int32_t i1[4] __attribute__((aligned(16)));
       int32_t i2[4] __attribute__((aligned(16)));
       vst1q_s32(i1, index1);
       vst1q_s32(i2, index2);

       // Get curve elements corresponding to the indices.
       v1[k] = curve_data[i1[0]];
       v2[k] = curve_data[i2[0]];
       v1[k + 1] = curve_data[i1[1]];
       v2[k + 1] = curve_data[i2[1]];
       v1[k + 2] = curve_data[i1[2]];
       v2[k + 2] = curve_data[i2[2]];
       v1[k + 3] = curve_data[i1[3]];
       v2[k + 3] = curve_data[i2[3]];
     }
   }
 #endif

   // Compute values for index1 and load the curve_data corresponding to
   // indices.
   for (; k < frames_to_process; ++k) {
     unsigned index1 =
         ClampTo(static_cast<unsigned>(virtual_index[k]), 0, max_index);
     unsigned index2 = ClampTo(index1 + 1, 0, max_index);
     index[k] = index1;
     v1[k] = curve_data[index1];
     v2[k] = curve_data[index2];
   }

   // f[k] = virtual_index[k] - index[k]
   vector_math::Vsub(virtual_index, 1, index, 1, f, 1, frames_to_process);

   // Do the linear interpolation of the curve data:
   // destination[k] = v1[k] + f[k]*(v2[k] - v1[k])
   //
   // 1. v2[k] = v2[k] - v1[k]
   // 2. v2[k] = f[k]*v2[k] = f[k]*(v2[k] - v1[k])
   // 3. destination[k] = destination[k] + v2[k]
   //                   = v1[k] + f[k]*(v2[k] - v1[k])
   vector_math::Vsub(v2, 1, v1, 1, v2, 1, frames_to_process);
   vector_math::Vmul(f, 1, v2, 1, v2, 1, frames_to_process);
   vector_math::Vadd(v2, 1, v1, 1, destination, 1, frames_to_process);
 }

 }  // namespace blink
	// Copyright 2022 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifdef UNSAFE_BUFFERS_BUILD
	// TODO(crbug.com/351564777): Remove this and convert code to safer constructs.
	#pragma allow_unsafe_buffers
	// TODO(crbug.com/390223051): Remove C-library calls to fix the errors.
	#pragma allow_unsafe_libc_calls
	#endif

	#include "third_party/blink/renderer/modules/webaudio/wave_shaper_handler.h"

	#include <algorithm>
	#include <memory>

	#include "base/memory/scoped_refptr.h"
	#include "base/synchronization/lock.h"
	#include "build/build_config.h"
	#include "third_party/blink/renderer/core/typed_arrays/dom_typed_array.h"
	#include "third_party/blink/renderer/modules/webaudio/audio_node.h"
	#include "third_party/blink/renderer/modules/webaudio/audio_node_input.h"
	#include "third_party/blink/renderer/modules/webaudio/audio_node_output.h"
	#include "third_party/blink/renderer/modules/webaudio/base_audio_context.h"
	#include "third_party/blink/renderer/platform/audio/audio_array.h"
	#include "third_party/blink/renderer/platform/audio/audio_utilities.h"
	#include "third_party/blink/renderer/platform/audio/down_sampler.h"
	#include "third_party/blink/renderer/platform/audio/up_sampler.h"
	#include "third_party/blink/renderer/platform/audio/vector_math.h"
	#include "third_party/blink/renderer/platform/wtf/math_extras.h"
	#include "third_party/blink/renderer/platform/wtf/threading.h"

	#if defined(ARCH_CPU_X86_FAMILY)
	#include <xmmintrin.h>
	#elif defined(CPU_ARM_NEON)
	#include <arm_neon.h>
	#endif

	namespace blink {

	namespace {

	constexpr unsigned kDefaultNumberOfOutputChannels = 1;

	// Computes value of the WaveShaper
	double WaveShaperCurveValue(float input,
	const float* curve_data,
	int curve_length) {
	// Calculate a virtual index based on input -1 -> +1 with -1 being
	// curve[0], +1 being curve[curveLength - 1], and 0 being at the center of
	// the curve data. Then linearly interpolate between the two points in the
	// curve.
	const double virtual_index = 0.5 * (input + 1) * (curve_length - 1);
	double output;
	if (virtual_index < 0) {
	// input < -1, so use curve[0]
	output = curve_data[0];
	} else if (virtual_index >= curve_length - 1) {
	// input >= 1, so use last curve value
	output = curve_data[curve_length - 1];
	} else {
	// The general case where -1 <= input < 1, where 0 <= virtualIndex <
	// curveLength - 1, so interpolate between the nearest samples on the
	// curve.
	const unsigned index1 = static_cast<unsigned>(virtual_index);
	const unsigned index2 = index1 + 1;
	const double interpolation_factor = virtual_index - index1;

	const double value1 = curve_data[index1];
	const double value2 = curve_data[index2];

	output =
	(1.0 - interpolation_factor) * value1 + interpolation_factor * value2;
	}

	return output;
	}

	} // namespace

	class WaveShaperKernel final {
	public:
	// Oversampling.
	std::unique_ptr<AudioFloatArray> temp_buffer_;
	std::unique_ptr<AudioFloatArray> temp_buffer2_;
	std::unique_ptr<UpSampler> up_sampler_;
	std::unique_ptr<DownSampler> down_sampler_;
	std::unique_ptr<UpSampler> up_sampler2_;
	std::unique_ptr<DownSampler> down_sampler2_;

	bool IsInitialized() { return temp_buffer_ != nullptr; }

	// Oversampling requires more resources, so let's only allocate them if
	// needed.
	void LazyInitializeOversampling(unsigned render_quantum_frames) {
	if (!IsInitialized()) {
	temp_buffer_ =
	std::make_unique<AudioFloatArray>(render_quantum_frames * 2);
	temp_buffer2_ =
	std::make_unique<AudioFloatArray>(render_quantum_frames * 4);
	up_sampler_ = std::make_unique<UpSampler>(render_quantum_frames);
	down_sampler_ = std::make_unique<DownSampler>(render_quantum_frames * 2);
	up_sampler2_ = std::make_unique<UpSampler>(render_quantum_frames * 2);
	down_sampler2_ = std::make_unique<DownSampler>(render_quantum_frames * 4);
	}
	}
	};

	scoped_refptr<WaveShaperHandler> WaveShaperHandler::Create(AudioNode& node,
	float sample_rate) {
	return base::AdoptRef(new WaveShaperHandler(node, sample_rate));
	}

	WaveShaperHandler::~WaveShaperHandler() {
	if (IsInitialized()) {
	Uninitialize();
	}
	}

	void WaveShaperHandler::SetCurve(const float* curve_data,
	unsigned curve_length) {
	DCHECK(IsMainThread());

	// This synchronizes with process().
	base::AutoLock process_locker(process_lock_);

	if (curve_length == 0 \|\| !curve_data) {
	curve_ = nullptr;
	tail_time_ = 0;
	return;
	}

	// Copy the curve data, if any, to our internal buffer.
	curve_ = std::make_unique<Vector<float>>(curve_length);
	memcpy(curve_->data(), curve_data, sizeof(float) * curve_length);

	// Compute the curve output for a zero input, and set the tail time.
	const double output = WaveShaperCurveValue(0.0, curve_data, curve_length);
	tail_time_ = output == 0 ? 0 : std::numeric_limits<double>::infinity();
	}

	const Vector<float>* WaveShaperHandler::Curve() const {
	DCHECK(IsMainThread());
	return curve_.get();
	}

	void WaveShaperHandler::SetOversample(V8OverSampleType::Enum oversample) {
	DCHECK(IsMainThread());

	base::AutoLock process_locker(process_lock_);
	oversample_ = oversample;

	// Lazy initialize resamplers, and reset resamplers that are no longer used
	switch (oversample) {
	case V8OverSampleType::Enum::kNone:
	for (auto& kernel : kernels_) {
	if (kernel->IsInitialized()) {
	kernel->up_sampler_->Reset();
	kernel->down_sampler_->Reset();
	kernel->up_sampler2_->Reset();
	kernel->down_sampler2_->Reset();
	}
	}
	break;
	case V8OverSampleType::Enum::k2X:
	for (auto& kernel : kernels_) {
	kernel->LazyInitializeOversampling(render_quantum_frames_);
	DCHECK(kernel->IsInitialized());
	kernel->up_sampler2_->Reset();
	kernel->down_sampler2_->Reset();
	}
	break;
	case V8OverSampleType::Enum::k4X: {
	for (auto& kernel : kernels_) {
	kernel->LazyInitializeOversampling(render_quantum_frames_);
	}
	break;
	}
	}

	// Calculate and cache `latency_time_`
	if (kernels_.empty()) {
	latency_time_ = 0;
	} else {
	switch (oversample) {
	case V8OverSampleType::Enum::kNone:
	latency_time_ = 0;
	break;
	case V8OverSampleType::Enum::k2X: {
	const size_t latency_frames =
	kernels_.front()->up_sampler_->LatencyFrames() +
	kernels_.front()->down_sampler_->LatencyFrames();

	latency_time_ = static_cast<double>(latency_frames) / sample_rate_;
	} break;
	case V8OverSampleType::Enum::k4X: {
	// Account for first stage upsampling.
	const size_t latency_frames =
	kernels_.front()->up_sampler_->LatencyFrames() +
	kernels_.front()->down_sampler_->LatencyFrames();

	// Account for second stage upsampling.
	// and divide by 2 to get back down to the regular sample-rate.
	const size_t latency_frames2 =
	(kernels_.front()->up_sampler2_->LatencyFrames() +
	kernels_.front()->down_sampler2_->LatencyFrames()) /
	2;

	latency_time_ = static_cast<double>(latency_frames + latency_frames2) /
	sample_rate_;
	} break;
	}
	}
	}

	V8OverSampleType::Enum WaveShaperHandler::Oversample() const {
	DCHECK(IsMainThread());
	return oversample_;
	}

	WaveShaperHandler::WaveShaperHandler(AudioNode& node, float sample_rate)
	: AudioHandler(NodeType::kNodeTypeWaveShaper, node, sample_rate),
	sample_rate_(sample_rate),
	render_quantum_frames_(
	node.context()->GetDeferredTaskHandler().RenderQuantumFrames()),
	// 4 times render size to handle 4x oversampling.
	virtual_index_(4 * render_quantum_frames_),
	index_(4 * render_quantum_frames_),
	v1_(4 * render_quantum_frames_),
	v2_(4 * render_quantum_frames_),
	f_(4 * render_quantum_frames_) {
	AddInput();
	AddOutput(kDefaultNumberOfOutputChannels);

	Initialize();
	}

	void WaveShaperHandler::Process(uint32_t frames_to_process) {
	AudioBus* destination_bus = Output(0).Bus();

	if (!IsInitialized()) {
	destination_bus->Zero();
	} else {
	scoped_refptr<AudioBus> source_bus = Input(0).Bus();

	// TODO(crbug.com/396149720): if we take "tail time" into account, then we
	// can avoid calling process once the tail dies down.
	if (!Input(0).IsConnected()) {
	source_bus->Zero();
	}

	DCHECK_EQ(source_bus->NumberOfChannels(),
	destination_bus->NumberOfChannels());
	// The audio thread can't block on this lock, so we call tryLock() instead.
	base::AutoTryLock try_locker(process_lock_);
	if (try_locker.is_acquired()) {
	DCHECK_EQ(source_bus->NumberOfChannels(), kernels_.size());
	DCHECK_EQ(frames_to_process, render_quantum_frames_);

	const float* curve_data = curve_ ? curve_->data() : nullptr;
	const int curve_length = curve_ ? curve_->size() : 0;

	// For each channel of our input, process using the corresponding
	// WaveShaperKernel into the output channel.
	for (unsigned i = 0; i < kernels_.size(); ++i) {
	if (!curve_data \|\| !curve_length) {
	// Act as "straight wire" pass-through if no curve is set.
	memcpy(destination_bus->Channel(i)->MutableData(),
	source_bus->Channel(i)->Data(),
	sizeof(float) * frames_to_process);
	} else {
	switch (oversample_) {
	case V8OverSampleType::Enum::kNone:
	WaveShaperCurveValues(destination_bus->Channel(i)->MutableData(),
	source_bus->Channel(i)->Data(),
	frames_to_process, curve_data,
	curve_length);
	break;

	case V8OverSampleType::Enum::k2X: {
	float* temp_p = kernels_[i]->temp_buffer_->Data();
	kernels_[i]->up_sampler_->Process(source_bus->Channel(i)->Data(),
	temp_p, frames_to_process);

	// Process at 2x up-sampled rate.
	WaveShaperCurveValues(temp_p, temp_p, frames_to_process * 2,
	curve_data, curve_length);

	kernels_[i]->down_sampler_->Process(
	temp_p, destination_bus->Channel(i)->MutableData(),
	frames_to_process * 2);
	} break;

	case V8OverSampleType::Enum::k4X: {
	float* temp_p = kernels_[i]->temp_buffer_->Data();
	float* temp_p2 = kernels_[i]->temp_buffer2_->Data();

	kernels_[i]->up_sampler_->Process(source_bus->Channel(i)->Data(),
	temp_p, frames_to_process);
	kernels_[i]->up_sampler2_->Process(temp_p, temp_p2,
	frames_to_process * 2);

	// Process at 4x up-sampled rate.
	WaveShaperCurveValues(temp_p2, temp_p2, frames_to_process * 4,
	curve_data, curve_length);

	kernels_[i]->down_sampler2_->Process(temp_p2, temp_p,
	frames_to_process * 4);
	kernels_[i]->down_sampler_->Process(
	temp_p, destination_bus->Channel(i)->MutableData(),
	frames_to_process * 2);
	} break;
	}
	}
	}
	} else {
	// The tryLock() failed. We must be in the middle of modifying guarded
	// values.
	destination_bus->Zero();
	}
	}
	}

	void WaveShaperHandler::Initialize() {
	if (IsInitialized()) {
	return;
	}

	{
	base::AutoLock locker(process_lock_);
	DCHECK(!kernels_.size());

	// Create processing kernels, one per channel.
	for (unsigned i = 0; i < Output(0).NumberOfChannels(); ++i) {
	kernels_.push_back(std::make_unique<WaveShaperKernel>());
	if (oversample_ != V8OverSampleType::Enum::kNone) {
	kernels_.back()->LazyInitializeOversampling(render_quantum_frames_);
	}
	}
	}

	AudioHandler::Initialize();
	}

	void WaveShaperHandler::Uninitialize() {
	if (!IsInitialized()) {
	return;
	}

	{
	base::AutoLock locker(process_lock_);
	kernels_.clear();
	}

	AudioHandler::Uninitialize();
	}

	void WaveShaperHandler::CheckNumberOfChannelsForInput(AudioNodeInput* input) {
	DCHECK(Context()->IsAudioThread());
	Context()->AssertGraphOwner();

	DCHECK_EQ(input, &Input(0));

	unsigned number_of_channels = input->NumberOfChannels();

	if (IsInitialized() && number_of_channels != Output(0).NumberOfChannels()) {
	// We're already initialized but the channel count has changed.
	Uninitialize();
	}

	if (!IsInitialized()) {
	// This will propagate the channel count to any nodes connected further
	// down the chain...
	Output(0).SetNumberOfChannels(number_of_channels);

	// Re-initialize the processor with the new channel count.
	Initialize();
	}

	AudioHandler::CheckNumberOfChannelsForInput(input);
	}

	bool WaveShaperHandler::RequiresTailProcessing() const {
	// Always return true even if the tail time and latency might both be zero.
	return true;
	}

	double WaveShaperHandler::TailTime() const {
	DCHECK(!IsMainThread());
	base::AutoTryLock try_locker(process_lock_);
	if (try_locker.is_acquired()) {
	return tail_time_;
	} else {
	// Since we don't want to block the Audio Device thread, we return a large
	// value instead of trying to acquire the lock.
	return std::numeric_limits<double>::infinity();
	}
	}

	double WaveShaperHandler::LatencyTime() const {
	DCHECK(!IsMainThread());
	base::AutoTryLock try_locker(process_lock_);
	if (try_locker.is_acquired()) {
	return latency_time_;
	} else {
	// Since we don't want to block the Audio Device thread, we return a large
	// value instead of trying to acquire the lock.
	return std::numeric_limits<double>::infinity();
	}
	}

	void WaveShaperHandler::PullInputs(uint32_t frames_to_process) {
	// Render directly into output bus for in-place processing
	Input(0).Pull(Output(0).Bus(), frames_to_process);
	}

	// Like WaveShaperCurveValue, but computes the values for a vector of
	// inputs.
	void WaveShaperHandler::WaveShaperCurveValues(float* destination,
	const float* source,
	uint32_t frames_to_process,
	const float* curve_data,
	int curve_length) {
	DCHECK_LE(frames_to_process, virtual_index_.size());
	// Index into the array computed from the source value.
	float* virtual_index = virtual_index_.Data();

	// virtual_index[k] =
	// ClampTo(0.5 * (source[k] + 1) * (curve_length - 1),
	// 0.0f,
	// static_cast<float>(curve_length - 1))

	// Add 1 to source puttting result in virtual_index
	vector_math::Vsadd(source, 1, 1, virtual_index, 1, frames_to_process);

	// Scale virtual_index in place by (curve_lenth -1)/2
	vector_math::Vsmul(virtual_index, 1, 0.5 * (curve_length - 1), virtual_index,
	1, frames_to_process);

	// Clip virtual_index, in place.
	vector_math::Vclip(virtual_index, 1, 0, curve_length - 1, virtual_index, 1,
	frames_to_process);

	// index = floor(virtual_index)
	DCHECK_LE(frames_to_process, index_.size());
	float* index = index_.Data();

	// v1 and v2 hold the curve_data corresponding to the closest curve
	// values to the source sample. To save memory, v1 will use the
	// destination array.
	DCHECK_LE(frames_to_process, v1_.size());
	DCHECK_LE(frames_to_process, v2_.size());
	float* v1 = v1_.Data();
	float* v2 = v2_.Data();

	// Interpolation factor: virtual_index - index.
	DCHECK_LE(frames_to_process, f_.size());
	float* f = f_.Data();

	int max_index = curve_length - 1;
	unsigned k = 0;
	#if defined(ARCH_CPU_X86_FAMILY)
	{
	int loop_limit = frames_to_process / 4;

	// one = 1
	__m128i one = _mm_set1_epi32(1);

	// Do 4 eleemnts at a time
	for (int loop = 0; loop < loop_limit; ++loop, k += 4) {
	// v = virtual_index[k]
	__m128 v = _mm_loadu_ps(virtual_index + k);

	// index1 = static_cast<int>(v);
	__m128i index1 = _mm_cvttps_epi32(v);

	// v = static_cast<float>(index1) and save result to index[k:k+3]
	v = _mm_cvtepi32_ps(index1);
	_mm_storeu_ps(&index[k], v);

	// index2 = index2 + 1;
	__m128i index2 = _mm_add_epi32(index1, one);

	// Convert index1/index2 to arrays of 32-bit int values that are our
	// array indices to use to get the curve data.
	int32_t* i1 = reinterpret_cast<int32_t*>(&index1);
	int32_t* i2 = reinterpret_cast<int32_t*>(&index2);

	// Get the curve_data values and save them in v1 and v2,
	// carefully clamping the values. If the input is NaN, index1
	// could be 0x8000000.
	v1[k] = curve_data[ClampTo(i1[0], 0, max_index)];
	v2[k] = curve_data[ClampTo(i2[0], 0, max_index)];
	v1[k + 1] = curve_data[ClampTo(i1[1], 0, max_index)];
	v2[k + 1] = curve_data[ClampTo(i2[1], 0, max_index)];
	v1[k + 2] = curve_data[ClampTo(i1[2], 0, max_index)];
	v2[k + 2] = curve_data[ClampTo(i2[2], 0, max_index)];
	v1[k + 3] = curve_data[ClampTo(i1[3], 0, max_index)];
	v2[k + 3] = curve_data[ClampTo(i2[3], 0, max_index)];
	}
	}
	#elif defined(CPU_ARM_NEON)
	{
	int loop_limit = frames_to_process / 4;

	// Neon constants:
	// zero = 0
	// one = 1
	// max = max_index
	int32x4_t zero = vdupq_n_s32(0);
	int32x4_t one = vdupq_n_s32(1);
	int32x4_t max = vdupq_n_s32(max_index);

	for (int loop = 0; loop < loop_limit; ++loop, k += 4) {
	// v = virtual_index
	float32x4_t v = vld1q_f32(virtual_index + k);

	// index1 = static_cast<int32_t>(v), then clamp to a valid index range
	// for curve_data
	int32x4_t index1 = vcvtq_s32_f32(v);
	index1 = vmaxq_s32(vminq_s32(index1, max), zero);

	// v = static_cast<float>(v) and save it away for later use.
	v = vcvtq_f32_s32(index1);
	vst1q_f32(&index[k], v);

	// index2 = index1 + 1, then clamp to a valid range for curve_data.
	int32x4_t index2 = vaddq_s32(index1, one);
	index2 = vmaxq_s32(vminq_s32(index2, max), zero);

	// Save index1/2 so we can get the individual parts. Aligned to
	// 16 bytes for vst1q instruction.
	int32_t i1[4] __attribute__((aligned(16)));
	int32_t i2[4] __attribute__((aligned(16)));
	vst1q_s32(i1, index1);
	vst1q_s32(i2, index2);

	// Get curve elements corresponding to the indices.
	v1[k] = curve_data[i1[0]];
	v2[k] = curve_data[i2[0]];
	v1[k + 1] = curve_data[i1[1]];
	v2[k + 1] = curve_data[i2[1]];
	v1[k + 2] = curve_data[i1[2]];
	v2[k + 2] = curve_data[i2[2]];
	v1[k + 3] = curve_data[i1[3]];
	v2[k + 3] = curve_data[i2[3]];
	}
	}
	#endif

	// Compute values for index1 and load the curve_data corresponding to
	// indices.
	for (; k < frames_to_process; ++k) {
	unsigned index1 =
	ClampTo(static_cast<unsigned>(virtual_index[k]), 0, max_index);
	unsigned index2 = ClampTo(index1 + 1, 0, max_index);
	index[k] = index1;
	v1[k] = curve_data[index1];
	v2[k] = curve_data[index2];
	}

	// f[k] = virtual_index[k] - index[k]
	vector_math::Vsub(virtual_index, 1, index, 1, f, 1, frames_to_process);

	// Do the linear interpolation of the curve data:
	// destination[k] = v1[k] + f[k]*(v2[k] - v1[k])
	//
	// 1. v2[k] = v2[k] - v1[k]
	// 2. v2[k] = f[k]v2[k] = f[k](v2[k] - v1[k])
	// 3. destination[k] = destination[k] + v2[k]
	// = v1[k] + f[k]*(v2[k] - v1[k])
	vector_math::Vsub(v2, 1, v1, 1, v2, 1, frames_to_process);
	vector_math::Vmul(f, 1, v2, 1, v2, 1, frames_to_process);
	vector_math::Vadd(v2, 1, v1, 1, destination, 1, frames_to_process);
	}

	} // namespace blink