| /* |
| * Copyright (c) 2017, Intel Corporation |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * * Neither the name of the Intel Corporation nor the |
| * names of its contributors may be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com> |
| */ |
| |
| #ifndef FIR_HIFI2EP_H |
| #define FIR_HIFI2EP_H |
| |
| #include "fir_config.h" |
| |
| #if FIR_HIFIEP |
| |
| #include <xtensa/config/defs.h> |
| #include <xtensa/tie/xt_hifi2.h> |
| #include <sof/audio/format.h> |
| |
| struct fir_state_32x16 { |
| ae_p24x2f *rwp; /* Circular read and write pointer */ |
| ae_p24f *delay; /* Pointer to FIR delay line */ |
| ae_p24f *delay_end; /* Pointer to FIR delay line end */ |
| ae_p16x2s *coef; /* Pointer to FIR coefficients */ |
| int mute; /* Set to 1 to mute EQ output, 0 otherwise */ |
| int taps; /* Number of FIR taps */ |
| int length; /* Number of FIR taps plus input length (even) */ |
| int in_shift; /* Amount of right shifts at input */ |
| int out_shift; /* Amount of right shifts at output */ |
| }; |
| |
| void fir_reset(struct fir_state_32x16 *fir); |
| |
| int fir_init_coef(struct fir_state_32x16 *fir, int16_t config[]); |
| |
| void fir_init_delay(struct fir_state_32x16 *fir, int32_t **data); |
| |
| void eq_fir_2x_s32_hifiep(struct fir_state_32x16 fir[], |
| struct comp_buffer *source, |
| struct comp_buffer *sink, |
| int frames, int nch); |
| |
| void eq_fir_s32_hifiep(struct fir_state_32x16 fir[], struct comp_buffer *source, |
| struct comp_buffer *sink, int frames, int nch); |
| |
| /* The next trivial functions are inlined */ |
| |
| static inline void fir_mute(struct fir_state_32x16 *fir) |
| { |
| fir->mute = 1; |
| } |
| |
| static inline void fir_unmute(struct fir_state_32x16 *fir) |
| { |
| fir->mute = 0; |
| } |
| |
| /* Setup circular buffer for FIR input data delay */ |
| static inline void fir_hifiep_setup_circular(struct fir_state_32x16 *fir) |
| { |
| AE_SETCBEGIN0(fir->delay); |
| AE_SETCEND0(fir->delay_end); |
| } |
| |
| void fir_get_lrshifts(struct fir_state_32x16 *fir, int *lshift, |
| int *rshift); |
| |
| /* The next functions are inlined to optmize execution speed */ |
| |
| /* HiFi EP has the follow number of reqisters that should not be exceeded |
| * 4x 56 bit registers in register file Q |
| * 8x 48 bit registers in register file P |
| */ |
| |
| static inline void fir_32x16_hifiep(struct fir_state_32x16 *fir, int32_t *x, |
| int32_t *y, int lshift, int rshift) |
| { |
| /* This function uses |
| * 1x 56 bit registers Q, |
| * 4x 48 bit registers P |
| * 3x integers |
| * 2x address pointers, |
| */ |
| ae_q56s a; |
| ae_p24x2f data2; |
| ae_p24x2f coef2; |
| ae_p24x2f d0; |
| ae_p24x2f d1; |
| int i; |
| ae_p24x2f *dp = fir->rwp; |
| ae_p16x2s *coefp = fir->coef; |
| const int taps_div_4 = fir->taps >> 2; |
| const int inc = sizeof(int32_t); |
| |
| /* Write sample to delay */ |
| a = AE_LQ32F_I((ae_q32s *)x, 0); |
| AE_SQ32F_C(a, (ae_q32s *)fir->rwp, -sizeof(int32_t)); |
| |
| /* Note: If the next function is converted to handle two samples |
| * per call the data load can be done with single instruction |
| * AE_LP24X2F_C(data2, dp, sizeof(ae_p24x2f)); |
| */ |
| a = AE_ZEROQ56(); |
| for (i = 0; i < taps_div_4; i++) { |
| /* Load two coefficients. Coef2_h contains tap coefp[n] |
| * and coef2_l contains coef[n+1]. |
| */ |
| coef2 = AE_LP16X2F_I(coefp, 0); |
| |
| /* Load two data samples and pack to d0 to data2_h and |
| * d1 to data2_l. |
| */ |
| AE_LP24F_C(d0, dp, inc); |
| AE_LP24F_C(d1, dp, inc); |
| data2 = AE_SELP24_LL(d0, d1); |
| |
| /* Accumulate |
| * data2_h * coef2_h + data2_l * coef2_l. The Q1.31 |
| * data and Q1.15 coefficients are used as 24 bits as |
| * Q1.23 values. |
| */ |
| AE_MULAAFP24S_HH_LL(a, data2, coef2); |
| |
| /* Repeat the same for next two taps and increase coefp. */ |
| coef2 = AE_LP16X2F_I(coefp, sizeof(ae_p16x2s)); |
| AE_LP24F_C(d0, dp, inc); |
| AE_LP24F_C(d1, dp, inc); |
| data2 = AE_SELP24_LL(d0, d1); |
| AE_MULAAFP24S_HH_LL(a, data2, coef2); |
| coefp += 2; |
| } |
| |
| /* Do scaling shifts and store sample. */ |
| a = AE_SRAAQ56(AE_SLLASQ56S(a, lshift), rshift); |
| AE_SQ32F_I(AE_ROUNDSQ32SYM(a), (ae_q32s *)y, 0); |
| } |
| |
| /* HiFi EP has the follow number of reqisters that should not be exceeded |
| * 4x 56 bit registers in register file Q |
| * 8x 48 bit registers in register file P |
| */ |
| |
| static inline void fir_32x16_2x_hifiep(struct fir_state_32x16 *fir, int32_t *x0, |
| int32_t *x1, int32_t *y0, int32_t *y1, |
| int lshift, int rshift) |
| { |
| /* This function uses |
| * 2x 56 bit registers Q, |
| * 4x 48 bit registers P |
| * 3x integers |
| * 2x address pointers, |
| */ |
| ae_q56s a; |
| ae_q56s b; |
| ae_p24x2f d0; |
| ae_p24x2f d1; |
| ae_p24x2f d3; |
| ae_p24x2f coefs; |
| int i; |
| ae_p24x2f *dp; |
| ae_p16x2s *coefp = fir->coef; |
| const int taps_div_4 = fir->taps >> 2; |
| const int inc = 2 * sizeof(int32_t); |
| |
| /* Write samples to delay */ |
| a = AE_LQ32F_I((ae_q32s *)x0, 0); |
| AE_SQ32F_C(a, (ae_q32s *)fir->rwp, -sizeof(int32_t)); |
| a = AE_LQ32F_I((ae_q32s *)x1, 0); |
| dp = fir->rwp; |
| AE_SQ32F_C(a, (ae_q32s *)fir->rwp, -sizeof(int32_t)); |
| |
| /* Note: If the next function is converted to handle two samples |
| * per call the data load can be done with single instruction |
| * AE_LP24X2F_C(data2, dp, sizeof(ae_p24x2f)); |
| */ |
| a = AE_ZEROQ56(); |
| b = AE_ZEROQ56(); |
| /* Load two data samples and pack to d0 to data2_h and |
| * d1 to data2_l. |
| */ |
| AE_LP24X2F_C(d0, dp, inc); |
| for (i = 0; i < taps_div_4; i++) { |
| /* Load two coefficients. Coef2_h contains tap coefp[n] |
| * and coef2_l contains coef[n+1]. |
| */ |
| coefs = AE_LP16X2F_I(coefp, 0); |
| |
| /* Load two data samples. Upper part d1_h is x[n+1] and |
| * lower part d1_l is x[n]. |
| */ |
| AE_LP24X2F_C(d1, dp, inc); |
| |
| /* Accumulate |
| * b += d0_h * coefs_h + d0_l * coefs_l. The Q1.31 data |
| * and Q1.15 coefficients are converted to 24 bits as |
| * Q1.23 values. |
| */ |
| AE_MULAAFP24S_HH_LL(b, d0, coefs); |
| |
| /* Pack d0_l and d1_h to d3. Then accumulate |
| * a += d3_h * coefs_h + d3_l * coefs_l. Pass d1 to d1 for |
| * next unrolled iteration. |
| */ |
| d3 = AE_SELP24_LH(d0, d1); |
| AE_MULAAFP24S_HH_LL(a, d3, coefs); |
| d0 = d1; |
| |
| /* Repeat the same for next two taps and increase coefp. */ |
| coefs = AE_LP16X2F_I(coefp, sizeof(ae_p16x2s)); |
| AE_LP24X2F_C(d1, dp, inc); |
| AE_MULAAFP24S_HH_LL(b, d0, coefs); |
| d3 = AE_SELP24_LH(d0, d1); |
| AE_MULAAFP24S_HH_LL(a, d3, coefs); |
| d0 = d1; |
| coefp += 2; |
| } |
| |
| /* Do scaling shifts and store sample. */ |
| b = AE_SRAAQ56(AE_SLLASQ56S(b, lshift), rshift); |
| a = AE_SRAAQ56(AE_SLLASQ56S(a, lshift), rshift); |
| AE_SQ32F_I(AE_ROUNDSQ32SYM(b), (ae_q32s *)y1, 0); |
| AE_SQ32F_I(AE_ROUNDSQ32SYM(a), (ae_q32s *)y0, 0); |
| } |
| |
| #endif |
| #endif |