| /* ---------------------------------------------------------------------- |
| * Project: CMSIS DSP Library |
| * Title: arm_levinson_durbin_q31.c |
| * Description: q31 version of Levinson Durbin algorithm |
| * |
| * $Date: 23 April 2021 |
| * $Revision: V1.9.0 |
| * |
| * Target Processor: Cortex-M and Cortex-A cores |
| * -------------------------------------------------------------------- */ |
| /* |
| * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. |
| * |
| * SPDX-License-Identifier: Apache-2.0 |
| * |
| * Licensed under the Apache License, Version 2.0 (the License); you may |
| * not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "dsp/filtering_functions.h" |
| |
| #define ONE_Q31 0x7FFFFFFFL |
| #define TWO_Q30 0x7FFFFFFFL |
| |
| #define HALF_Q31 0x00008000L |
| #define ONE_Q15 0x7FFF |
| #define HALF_Q15 0x3FFF |
| #define LOWPART_MASK 0x07FFF |
| |
| __STATIC_FORCEINLINE q31_t mul32x16(q31_t a, q15_t b) |
| { |
| q31_t r = ((q63_t)a * (q63_t)b) >> 15; |
| |
| return(r); |
| |
| } |
| |
| __STATIC_FORCEINLINE q31_t mul32x32(q31_t a, q31_t b) |
| { |
| //q31_t r = __SSAT(((q63_t)a * b) >> 31,31); |
| q31_t r = ((q63_t)a * b) >> 31; |
| |
| return(r); |
| |
| } |
| |
| __STATIC_FORCEINLINE q31_t divide(q31_t n, q31_t d) |
| { |
| arm_status status; |
| int16_t shift; |
| q15_t inverse; |
| q31_t r; |
| // We are computing: |
| // n / d = n / (h + l) where h and l are the high end and low end part. |
| // 1 / (h + l) = 1 / h (1 - l / h) |
| // Our division algorithm has a shift. So it is returning a scaled value sh. |
| // So we need a << shift to convert 1/ sh to 1/h. |
| // In below code, we are organizing the computation differently. Instead of computing: |
| // 1 / h (1 - l / h) |
| // we are computing |
| // 1 / h (2 - (l + h) / h) |
| // 1 / h (2 - d / h) |
| // Also, we are not computing 1/h in Q15 but in Q14. |
| // 2 is expressed in Q30. |
| // So at the end of all computation we need a << 2 |
| |
| // Result is in Q14 because of use of HALF_Q15 instead of ONE_Q15. |
| status=arm_divide_q15(HALF_Q15,d>>16,&inverse,&shift); |
| (void)status; |
| |
| // d is used instead of l |
| // So we will need to substract to 2 instead of 1. |
| r = mul32x16(d,inverse); |
| r = TWO_Q30 - (r << shift); |
| r = mul32x16(r, inverse); |
| r = mul32x32(r,n) ; |
| r = r << (shift + 2); |
| |
| return(r); |
| |
| } |
| |
| /** |
| @ingroup groupFilters |
| */ |
| |
| /** |
| @defgroup LD Levinson Durbin Algorithm |
| |
| */ |
| |
| /** |
| @addtogroup LD |
| @{ |
| */ |
| |
| /** |
| @brief Levinson Durbin |
| @param[in] phi autocovariance vector starting with lag 0 (length is nbCoefs + 1) |
| @param[out] a autoregressive coefficients |
| @param[out] err prediction error (variance) |
| @param[in] nbCoefs number of autoregressive coefficients |
| @return none |
| */ |
| |
| #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__CMSIS_GCC_H) |
| #pragma message "Scalar version of arm_levinson_durbin_q31 built. Helium version has build issues with gcc." |
| #endif |
| |
| #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(__CMSIS_GCC_H) |
| |
| #define LANE23_MASK 0xFF00 |
| |
| #include "arm_helium_utils.h" |
| void arm_levinson_durbin_q31(const q31_t *phi, |
| q31_t *a, |
| q31_t *err, |
| int nbCoefs) |
| { |
| q31_t e; |
| |
| static const uint32_t revOffsetArray[4] = {3,2,1,0}; |
| |
| //a[0] = phi[1] / phi[0]; |
| a[0] = divide(phi[1], phi[0]); |
| |
| |
| //e = phi[0] - phi[1] * a[0]; |
| e = phi[0] - mul32x32(phi[1],a[0]); |
| |
| for(int p=1; p < nbCoefs; p++) |
| { |
| q63_t suma=0; |
| q63_t sumb=0; |
| q31x4_t vecA,vecRevPhi,vecPhi; |
| q31_t k; |
| uint32_t blkCnt; |
| const q31_t *pPhi,*pRevPhi,*pA; |
| uint32x4_t revOffset; |
| |
| |
| int nb,j,i; |
| |
| revOffset = vld1q(revOffsetArray); |
| |
| pRevPhi = &phi[p-3]; |
| pPhi = &phi[1]; |
| pA = a; |
| |
| i = 0; |
| blkCnt = p >> 2; |
| while(blkCnt > 0) |
| { |
| vecA = vld1q(pA); |
| pA += 4; |
| |
| vecPhi = vld1q(pPhi); |
| pPhi += 4; |
| |
| vecRevPhi = vldrwq_gather_shifted_offset_s32(pRevPhi,revOffset); |
| pRevPhi -= 4; |
| |
| suma = vmlaldavaq(suma,vecA,vecRevPhi); |
| sumb = vmlaldavaq(sumb,vecA,vecPhi); |
| |
| i += 4; |
| blkCnt--; |
| } |
| |
| |
| blkCnt = p & 3; |
| while(blkCnt > 0) |
| { |
| suma += ((q63_t)a[i] * phi[p - i]); |
| sumb += ((q63_t)a[i] * phi[i + 1]); |
| |
| i++; |
| blkCnt--; |
| } |
| |
| suma = asrl(suma, 31); |
| sumb = asrl(sumb, 31); |
| |
| |
| |
| //k = (phi[p+1]-suma)/(phi[0] - sumb); |
| k = divide(phi[p+1]-(q31_t)suma,phi[0] - (q31_t)sumb); |
| |
| q31x4_t vecRevA,tmp; |
| static uint32_t orgOffsetArray[4]={0,1,-1,-2}; |
| static const uint32_t offsetIncArray[4]={2,2,-2,-2}; |
| |
| uint32x4_t offset,offsetInc,vecTmp; |
| |
| |
| offset = vld1q(orgOffsetArray); |
| vecTmp = vdupq_n_u32(p); |
| |
| offset = vaddq_m_u32(offset,offset,vecTmp,LANE23_MASK); |
| offsetInc = vld1q(offsetIncArray); |
| |
| |
| nb = p >> 2; |
| j=0; |
| for(int i =0;i < nb ; i++) |
| { |
| /* |
| q31_t x0,x1,x2,x3; |
| |
| //x = a[j] - k * a[p-1-j]; |
| x0 = a[j] - mul32x32(k,a[p-1-j]); |
| x1 = a[j+1] - mul32x32(k,a[p-2-j]); |
| |
| //y = a[p-1-j] - k * a[j]; |
| x2 = a[p-1-j] - mul32x32(k , a[j]); |
| x3 = a[p-2-j] - mul32x32(k , a[j+1]); |
| |
| a[j] = x0; |
| a[j+1] = x1; |
| a[p-1-j] = x2; |
| a[p-2-j] = x3; |
| */ |
| |
| uint64_t tmpa,tmpb; |
| vecA = vldrwq_gather_shifted_offset_s32(a,offset); |
| |
| |
| tmpa = vgetq_lane_u64((uint64x2_t)vecA,0); |
| tmpb = vgetq_lane_u64((uint64x2_t)vecA,1); |
| vecRevA = (q31x4_t) vsetq_lane_u64(tmpb,(uint64x2_t)vecRevA,0); |
| vecRevA = (q31x4_t) vsetq_lane_u64(tmpa,(uint64x2_t)vecRevA,1); |
| |
| |
| tmp = vsubq(vecA,vqdmulhq_n_s32(vecRevA,k)); |
| vstrwq_scatter_shifted_offset_s32(a, offset, tmp); |
| |
| offset = vaddq(offset,offsetInc); |
| |
| j+=2; |
| } |
| |
| switch(p & 3) |
| { |
| case 3: |
| { |
| q31_t x,y; |
| |
| //x = a[j] - k * a[p-1-j]; |
| x = a[j] - mul32x32(k,a[p-1-j]); |
| |
| //y = a[p-1-j] - k * a[j]; |
| y = a[p-1-j] - mul32x32(k , a[j]); |
| |
| a[j] = x; |
| a[p-1-j] = y; |
| |
| //a[j] = a[j]- k * a[p-1-j]; |
| a[j+1] = a[j+1] - mul32x32(k,a[p-2-j]); |
| } |
| break; |
| |
| case 2: |
| { |
| q31_t x,y; |
| |
| //x = a[j] - k * a[p-1-j]; |
| x = a[j] - mul32x32(k,a[p-1-j]); |
| |
| //y = a[p-1-j] - k * a[j]; |
| y = a[p-1-j] - mul32x32(k , a[j]); |
| |
| a[j] = x; |
| a[p-1-j] = y; |
| } |
| break; |
| |
| case 1: |
| //a[j] = a[j]- k * a[p-1-j]; |
| a[j] = a[j] - mul32x32(k,a[p-1-j]); |
| break; |
| } |
| |
| a[p] = k; |
| |
| // e = e * (1 - k*k); |
| e = mul32x32(e,ONE_Q31 - mul32x32(k,k)); |
| |
| |
| } |
| *err = e; |
| } |
| |
| #else |
| |
| void arm_levinson_durbin_q31(const q31_t *phi, |
| q31_t *a, |
| q31_t *err, |
| int nbCoefs) |
| { |
| q31_t e; |
| |
| //a[0] = phi[1] / phi[0]; |
| a[0] = divide(phi[1], phi[0]); |
| |
| |
| //e = phi[0] - phi[1] * a[0]; |
| e = phi[0] - mul32x32(phi[1],a[0]); |
| |
| for(int p=1; p < nbCoefs; p++) |
| { |
| q63_t suma=0; |
| q63_t sumb=0; |
| q31_t k; |
| int nb,j; |
| |
| for(int i=0; i < p; i++) |
| { |
| suma += ((q63_t)a[i] * phi[p - i]); |
| sumb += ((q63_t)a[i] * phi[i + 1]); |
| } |
| |
| suma = suma >> 31; |
| sumb = sumb >> 31; |
| |
| |
| |
| //k = (phi[p+1]-suma)/(phi[0] - sumb); |
| k = divide(phi[p+1]-(q31_t)suma,phi[0] - (q31_t)sumb); |
| |
| |
| nb = p >> 1; |
| j=0; |
| for(int i =0;i < nb ; i++) |
| { |
| q31_t x,y; |
| |
| //x = a[j] - k * a[p-1-j]; |
| x = a[j] - mul32x32(k,a[p-1-j]); |
| |
| //y = a[p-1-j] - k * a[j]; |
| y = a[p-1-j] - mul32x32(k , a[j]); |
| |
| a[j] = x; |
| a[p-1-j] = y; |
| |
| j++; |
| } |
| |
| nb = p & 1; |
| if (nb) |
| { |
| //a[j] = a[j]- k * a[p-1-j]; |
| a[j] = a[j] - mul32x32(k,a[p-1-j]); |
| } |
| |
| a[p] = k; |
| |
| // e = e * (1 - k*k); |
| e = mul32x32(e,ONE_Q31 - mul32x32(k,k)); |
| |
| |
| } |
| *err = e; |
| } |
| #endif /* defined(ARM_MATH_MVEI) */ |
| |
| /** |
| @} end of LD group |
| */ |