third_party/WebKit/Source/platform/audio/VectorMath.cpp - chromium/src - Git at Google

 /*
  * Copyright (C) 2010, Google Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1.  Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2.  Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */

 #include "platform/audio/VectorMath.h"

 #include <cmath>

 #include "build/build_config.h"
 #include "platform/wtf/Assertions.h"
 #include "platform/wtf/CPU.h"

 #if defined(OS_MACOSX)
 #include "platform/audio/mac/VectorMathMac.h"
 #elif WTF_CPU_ARM_NEON
 #include "platform/audio/cpu/arm/VectorMathNEON.h"
 #elif defined(ARCH_CPU_X86_FAMILY)
 #include "platform/audio/cpu/x86/VectorMathX86.h"
 #else
 #include "platform/audio/VectorMathScalar.h"
 #endif

 #if HAVE_MIPS_MSA_INTRINSICS
 #include "platform/cpu/mips/CommonMacrosMSA.h"
 #endif

 #include <algorithm>

 namespace blink {

 namespace VectorMath {

 namespace {
 #if defined(OS_MACOSX)
 namespace Impl = Mac;
 #elif WTF_CPU_ARM_NEON
 namespace Impl = NEON;
 #elif defined(ARCH_CPU_X86_FAMILY)
 namespace Impl = X86;
 #else
 namespace Impl = Scalar;
 #endif
 }  // namespace

 void Vsma(const float* source_p,
           int source_stride,
           const float* scale,
           float* dest_p,
           int dest_stride,
           size_t frames_to_process) {
 #if HAVE_MIPS_MSA_INTRINSICS
   int n = frames_to_process;

   if ((source_stride == 1) && (dest_stride == 1)) {
     float* destPCopy = dest_p;
     v4f32 vScale;
     v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
     v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;
     FloatInt scaleVal;

     scaleVal.floatVal = *scale;
     vScale = (v4f32)__msa_fill_w(scaleVal.intVal);

     for (; n >= 32; n -= 32) {
       LD_SP8(source_p, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
              vSrc7);
       LD_SP8(destPCopy, 4, vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6,
              vDst7);
       VSMA4(vSrc0, vSrc1, vSrc2, vSrc3, vDst0, vDst1, vDst2, vDst3, vScale);
       VSMA4(vSrc4, vSrc5, vSrc6, vSrc7, vDst4, vDst5, vDst6, vDst7, vScale);
       ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, dest_p, 4);
     }
   }

   frames_to_process = n;
 #endif

   Impl::Vsma(source_p, source_stride, scale, dest_p, dest_stride,
              frames_to_process);
 }

 void Vsmul(const float* source_p,
            int source_stride,
            const float* scale,
            float* dest_p,
            int dest_stride,
            size_t frames_to_process) {
 #if HAVE_MIPS_MSA_INTRINSICS
   int n = frames_to_process;

   if ((source_stride == 1) && (dest_stride == 1)) {
     v4f32 vScale;
     v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
     v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;
     FloatInt scaleVal;

     scaleVal.floatVal = *scale;
     vScale = (v4f32)__msa_fill_w(scaleVal.intVal);

     for (; n >= 32; n -= 32) {
       LD_SP8(source_p, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
              vSrc7);
       VSMUL4(vSrc0, vSrc1, vSrc2, vSrc3, vDst0, vDst1, vDst2, vDst3, vScale);
       VSMUL4(vSrc4, vSrc5, vSrc6, vSrc7, vDst4, vDst5, vDst6, vDst7, vScale);
       ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, dest_p, 4);
     }
   }

   frames_to_process = n;
 #endif

   Impl::Vsmul(source_p, source_stride, scale, dest_p, dest_stride,
               frames_to_process);
 }

 void Vadd(const float* source1p,
           int source_stride1,
           const float* source2p,
           int source_stride2,
           float* dest_p,
           int dest_stride,
           size_t frames_to_process) {
 #if HAVE_MIPS_MSA_INTRINSICS
   int n = frames_to_process;

   if ((source_stride1 == 1) && (source_stride2 == 1) && (dest_stride == 1)) {
     v4f32 vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5, vSrc1P6,
         vSrc1P7;
     v4f32 vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5, vSrc2P6,
         vSrc2P7;
     v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;

     for (; n >= 32; n -= 32) {
       LD_SP8(source1p, 4, vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5,
              vSrc1P6, vSrc1P7);
       LD_SP8(source2p, 4, vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5,
              vSrc2P6, vSrc2P7);
       ADD4(vSrc1P0, vSrc2P0, vSrc1P1, vSrc2P1, vSrc1P2, vSrc2P2, vSrc1P3,
            vSrc2P3, vDst0, vDst1, vDst2, vDst3);
       ADD4(vSrc1P4, vSrc2P4, vSrc1P5, vSrc2P5, vSrc1P6, vSrc2P6, vSrc1P7,
            vSrc2P7, vDst4, vDst5, vDst6, vDst7);
       ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, dest_p, 4);
     }
   }

   frames_to_process = n;
 #endif

   Impl::Vadd(source1p, source_stride1, source2p, source_stride2, dest_p,
              dest_stride, frames_to_process);
 }

 void Vmul(const float* source1p,
           int source_stride1,
           const float* source2p,
           int source_stride2,
           float* dest_p,
           int dest_stride,
           size_t frames_to_process) {
 #if HAVE_MIPS_MSA_INTRINSICS
   int n = frames_to_process;

   if ((source_stride1 == 1) && (source_stride2 == 1) && (dest_stride == 1)) {
     v4f32 vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5, vSrc1P6,
         vSrc1P7;
     v4f32 vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5, vSrc2P6,
         vSrc2P7;
     v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;

     for (; n >= 32; n -= 32) {
       LD_SP8(source1p, 4, vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5,
              vSrc1P6, vSrc1P7);
       LD_SP8(source2p, 4, vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5,
              vSrc2P6, vSrc2P7);
       MUL4(vSrc1P0, vSrc2P0, vSrc1P1, vSrc2P1, vSrc1P2, vSrc2P2, vSrc1P3,
            vSrc2P3, vDst0, vDst1, vDst2, vDst3);
       MUL4(vSrc1P4, vSrc2P4, vSrc1P5, vSrc2P5, vSrc1P6, vSrc2P6, vSrc1P7,
            vSrc2P7, vDst4, vDst5, vDst6, vDst7);
       ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, dest_p, 4);
     }
   }

   frames_to_process = n;
 #endif

   Impl::Vmul(source1p, source_stride1, source2p, source_stride2, dest_p,
              dest_stride, frames_to_process);
 }

 void Zvmul(const float* real1p,
            const float* imag1p,
            const float* real2p,
            const float* imag2p,
            float* real_dest_p,
            float* imag_dest_p,
            size_t frames_to_process) {
   Impl::Zvmul(real1p, imag1p, real2p, imag2p, real_dest_p, imag_dest_p,
               frames_to_process);
 }

 void Vsvesq(const float* source_p,
             int source_stride,
             float* sum_p,
             size_t frames_to_process) {
   float sum = 0;

   Impl::Vsvesq(source_p, source_stride, &sum, frames_to_process);

   DCHECK(sum_p);
   *sum_p = sum;
 }

 void Vmaxmgv(const float* source_p,
              int source_stride,
              float* max_p,
              size_t frames_to_process) {
   float max = 0;

 #if HAVE_MIPS_MSA_INTRINSICS
   int n = frames_to_process;

   if (source_stride == 1) {
     v4f32 vMax = {
         0,
     };
     v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
     const v16i8 vSignBitMask = (v16i8)__msa_fill_w(0x7FFFFFFF);

     for (; n >= 32; n -= 32) {
       LD_SP8(source_p, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
              vSrc7);
       AND_W4_SP(vSrc0, vSrc1, vSrc2, vSrc3, vSignBitMask);
       VMAX_W4_SP(vSrc0, vSrc1, vSrc2, vSrc3, vMax);
       AND_W4_SP(vSrc4, vSrc5, vSrc6, vSrc7, vSignBitMask);
       VMAX_W4_SP(vSrc4, vSrc5, vSrc6, vSrc7, vMax);
     }

     max = std::max(max, vMax[0]);
     max = std::max(max, vMax[1]);
     max = std::max(max, vMax[2]);
     max = std::max(max, vMax[3]);
   }

   frames_to_process = n;
 #endif

   Impl::Vmaxmgv(source_p, source_stride, &max, frames_to_process);

   DCHECK(max_p);
   *max_p = max;
 }

 void Vclip(const float* source_p,
            int source_stride,
            const float* low_threshold_p,
            const float* high_threshold_p,
            float* dest_p,
            int dest_stride,
            size_t frames_to_process) {
   float low_threshold = *low_threshold_p;
   float high_threshold = *high_threshold_p;

 #if DCHECK_IS_ON()
   // Do the same DCHECKs that |clampTo| would do so that optimization paths do
   // not have to do them.
   for (size_t i = 0u; i < frames_to_process; ++i)
     DCHECK(!std::isnan(source_p[i]));
   // This also ensures that thresholds are not NaNs.
   DCHECK_LE(low_threshold, high_threshold);
 #endif

 #if HAVE_MIPS_MSA_INTRINSICS
   int n = frames_to_process;

   if ((source_stride == 1) && (dest_stride == 1)) {
     v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
     v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;
     v4f32 vLowThr, vHighThr;
     FloatInt lowThr, highThr;

     lowThr.floatVal = low_threshold;
     highThr.floatVal = high_threshold;
     vLowThr = (v4f32)__msa_fill_w(lowThr.intVal);
     vHighThr = (v4f32)__msa_fill_w(highThr.intVal);

     for (; n >= 32; n -= 32) {
       LD_SP8(source_p, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
              vSrc7);
       VCLIP4(vSrc0, vSrc1, vSrc2, vSrc3, vLowThr, vHighThr, vDst0, vDst1, vDst2,
              vDst3);
       VCLIP4(vSrc4, vSrc5, vSrc6, vSrc7, vLowThr, vHighThr, vDst4, vDst5, vDst6,
              vDst7);
       ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, dest_p, 4);
     }
   }

   frames_to_process = n;
 #endif

   Impl::Vclip(source_p, source_stride, &low_threshold, &high_threshold, dest_p,
               dest_stride, frames_to_process);
 }

 }  // namespace VectorMath

 }  // namespace blink
	/*
	* Copyright (C) 2010, Google Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
	* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
	* DAMAGE.
	*/

	#include "platform/audio/VectorMath.h"

	#include <cmath>

	#include "build/build_config.h"
	#include "platform/wtf/Assertions.h"
	#include "platform/wtf/CPU.h"

	#if defined(OS_MACOSX)
	#include "platform/audio/mac/VectorMathMac.h"
	#elif WTF_CPU_ARM_NEON
	#include "platform/audio/cpu/arm/VectorMathNEON.h"
	#elif defined(ARCH_CPU_X86_FAMILY)
	#include "platform/audio/cpu/x86/VectorMathX86.h"
	#else
	#include "platform/audio/VectorMathScalar.h"
	#endif

	#if HAVE_MIPS_MSA_INTRINSICS
	#include "platform/cpu/mips/CommonMacrosMSA.h"
	#endif

	#include <algorithm>

	namespace blink {

	namespace VectorMath {

	namespace {
	#if defined(OS_MACOSX)
	namespace Impl = Mac;
	#elif WTF_CPU_ARM_NEON
	namespace Impl = NEON;
	#elif defined(ARCH_CPU_X86_FAMILY)
	namespace Impl = X86;
	#else
	namespace Impl = Scalar;
	#endif
	} // namespace

	void Vsma(const float* source_p,
	int source_stride,
	const float* scale,
	float* dest_p,
	int dest_stride,
	size_t frames_to_process) {
	#if HAVE_MIPS_MSA_INTRINSICS
	int n = frames_to_process;

	if ((source_stride == 1) && (dest_stride == 1)) {
	float* destPCopy = dest_p;
	v4f32 vScale;
	v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
	v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;
	FloatInt scaleVal;

	scaleVal.floatVal = *scale;
	vScale = (v4f32)__msa_fill_w(scaleVal.intVal);

	for (; n >= 32; n -= 32) {
	LD_SP8(source_p, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
	vSrc7);
	LD_SP8(destPCopy, 4, vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6,
	vDst7);
	VSMA4(vSrc0, vSrc1, vSrc2, vSrc3, vDst0, vDst1, vDst2, vDst3, vScale);
	VSMA4(vSrc4, vSrc5, vSrc6, vSrc7, vDst4, vDst5, vDst6, vDst7, vScale);
	ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, dest_p, 4);
	}
	}

	frames_to_process = n;
	#endif

	Impl::Vsma(source_p, source_stride, scale, dest_p, dest_stride,
	frames_to_process);
	}

	void Vsmul(const float* source_p,
	int source_stride,
	const float* scale,
	float* dest_p,
	int dest_stride,
	size_t frames_to_process) {
	#if HAVE_MIPS_MSA_INTRINSICS
	int n = frames_to_process;

	if ((source_stride == 1) && (dest_stride == 1)) {
	v4f32 vScale;
	v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
	v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;
	FloatInt scaleVal;

	scaleVal.floatVal = *scale;
	vScale = (v4f32)__msa_fill_w(scaleVal.intVal);

	for (; n >= 32; n -= 32) {
	LD_SP8(source_p, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
	vSrc7);
	VSMUL4(vSrc0, vSrc1, vSrc2, vSrc3, vDst0, vDst1, vDst2, vDst3, vScale);
	VSMUL4(vSrc4, vSrc5, vSrc6, vSrc7, vDst4, vDst5, vDst6, vDst7, vScale);
	ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, dest_p, 4);
	}
	}

	frames_to_process = n;
	#endif

	Impl::Vsmul(source_p, source_stride, scale, dest_p, dest_stride,
	frames_to_process);
	}

	void Vadd(const float* source1p,
	int source_stride1,
	const float* source2p,
	int source_stride2,
	float* dest_p,
	int dest_stride,
	size_t frames_to_process) {
	#if HAVE_MIPS_MSA_INTRINSICS
	int n = frames_to_process;

	if ((source_stride1 == 1) && (source_stride2 == 1) && (dest_stride == 1)) {
	v4f32 vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5, vSrc1P6,
	vSrc1P7;
	v4f32 vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5, vSrc2P6,
	vSrc2P7;
	v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;

	for (; n >= 32; n -= 32) {
	LD_SP8(source1p, 4, vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5,
	vSrc1P6, vSrc1P7);
	LD_SP8(source2p, 4, vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5,
	vSrc2P6, vSrc2P7);
	ADD4(vSrc1P0, vSrc2P0, vSrc1P1, vSrc2P1, vSrc1P2, vSrc2P2, vSrc1P3,
	vSrc2P3, vDst0, vDst1, vDst2, vDst3);
	ADD4(vSrc1P4, vSrc2P4, vSrc1P5, vSrc2P5, vSrc1P6, vSrc2P6, vSrc1P7,
	vSrc2P7, vDst4, vDst5, vDst6, vDst7);
	ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, dest_p, 4);
	}
	}

	frames_to_process = n;
	#endif

	Impl::Vadd(source1p, source_stride1, source2p, source_stride2, dest_p,
	dest_stride, frames_to_process);
	}

	void Vmul(const float* source1p,
	int source_stride1,
	const float* source2p,
	int source_stride2,
	float* dest_p,
	int dest_stride,
	size_t frames_to_process) {
	#if HAVE_MIPS_MSA_INTRINSICS
	int n = frames_to_process;

	if ((source_stride1 == 1) && (source_stride2 == 1) && (dest_stride == 1)) {
	v4f32 vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5, vSrc1P6,
	vSrc1P7;
	v4f32 vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5, vSrc2P6,
	vSrc2P7;
	v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;

	for (; n >= 32; n -= 32) {
	LD_SP8(source1p, 4, vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5,
	vSrc1P6, vSrc1P7);
	LD_SP8(source2p, 4, vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5,
	vSrc2P6, vSrc2P7);
	MUL4(vSrc1P0, vSrc2P0, vSrc1P1, vSrc2P1, vSrc1P2, vSrc2P2, vSrc1P3,
	vSrc2P3, vDst0, vDst1, vDst2, vDst3);
	MUL4(vSrc1P4, vSrc2P4, vSrc1P5, vSrc2P5, vSrc1P6, vSrc2P6, vSrc1P7,
	vSrc2P7, vDst4, vDst5, vDst6, vDst7);
	ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, dest_p, 4);
	}
	}

	frames_to_process = n;
	#endif

	Impl::Vmul(source1p, source_stride1, source2p, source_stride2, dest_p,
	dest_stride, frames_to_process);
	}

	void Zvmul(const float* real1p,
	const float* imag1p,
	const float* real2p,
	const float* imag2p,
	float* real_dest_p,
	float* imag_dest_p,
	size_t frames_to_process) {
	Impl::Zvmul(real1p, imag1p, real2p, imag2p, real_dest_p, imag_dest_p,
	frames_to_process);
	}

	void Vsvesq(const float* source_p,
	int source_stride,
	float* sum_p,
	size_t frames_to_process) {
	float sum = 0;

	Impl::Vsvesq(source_p, source_stride, &sum, frames_to_process);

	DCHECK(sum_p);
	*sum_p = sum;
	}

	void Vmaxmgv(const float* source_p,
	int source_stride,
	float* max_p,
	size_t frames_to_process) {
	float max = 0;

	#if HAVE_MIPS_MSA_INTRINSICS
	int n = frames_to_process;

	if (source_stride == 1) {
	v4f32 vMax = {
	0,
	};
	v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
	const v16i8 vSignBitMask = (v16i8)__msa_fill_w(0x7FFFFFFF);

	for (; n >= 32; n -= 32) {
	LD_SP8(source_p, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
	vSrc7);
	AND_W4_SP(vSrc0, vSrc1, vSrc2, vSrc3, vSignBitMask);
	VMAX_W4_SP(vSrc0, vSrc1, vSrc2, vSrc3, vMax);
	AND_W4_SP(vSrc4, vSrc5, vSrc6, vSrc7, vSignBitMask);
	VMAX_W4_SP(vSrc4, vSrc5, vSrc6, vSrc7, vMax);
	}

	max = std::max(max, vMax[0]);
	max = std::max(max, vMax[1]);
	max = std::max(max, vMax[2]);
	max = std::max(max, vMax[3]);
	}

	frames_to_process = n;
	#endif

	Impl::Vmaxmgv(source_p, source_stride, &max, frames_to_process);

	DCHECK(max_p);
	*max_p = max;
	}

	void Vclip(const float* source_p,
	int source_stride,
	const float* low_threshold_p,
	const float* high_threshold_p,
	float* dest_p,
	int dest_stride,
	size_t frames_to_process) {
	float low_threshold = *low_threshold_p;
	float high_threshold = *high_threshold_p;

	#if DCHECK_IS_ON()
	// Do the same DCHECKs that \|clampTo\| would do so that optimization paths do
	// not have to do them.
	for (size_t i = 0u; i < frames_to_process; ++i)
	DCHECK(!std::isnan(source_p[i]));
	// This also ensures that thresholds are not NaNs.
	DCHECK_LE(low_threshold, high_threshold);
	#endif

	#if HAVE_MIPS_MSA_INTRINSICS
	int n = frames_to_process;

	if ((source_stride == 1) && (dest_stride == 1)) {
	v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
	v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;
	v4f32 vLowThr, vHighThr;
	FloatInt lowThr, highThr;

	lowThr.floatVal = low_threshold;
	highThr.floatVal = high_threshold;
	vLowThr = (v4f32)__msa_fill_w(lowThr.intVal);
	vHighThr = (v4f32)__msa_fill_w(highThr.intVal);

	for (; n >= 32; n -= 32) {
	LD_SP8(source_p, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
	vSrc7);
	VCLIP4(vSrc0, vSrc1, vSrc2, vSrc3, vLowThr, vHighThr, vDst0, vDst1, vDst2,
	vDst3);
	VCLIP4(vSrc4, vSrc5, vSrc6, vSrc7, vLowThr, vHighThr, vDst4, vDst5, vDst6,
	vDst7);
	ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, dest_p, 4);
	}
	}

	frames_to_process = n;
	#endif

	Impl::Vclip(source_p, source_stride, &low_threshold, &high_threshold, dest_p,
	dest_stride, frames_to_process);
	}

	} // namespace VectorMath

	} // namespace blink