| /*! |
| * \copy |
| * Copyright (c) 2013, Cisco Systems |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
| * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| * |
| */ |
| |
| #ifdef HAVE_NEON |
| #include "arm_arch_common_macro.S" |
| |
| .macro SQR_ADD_16BYTES arg0, arg1, arg2 |
| vmull.u8 q3, \arg0, \arg0 |
| vmull.u8 q8, \arg1, \arg1 |
| vpadal.u16 \arg2, q3 |
| vpadal.u16 \arg2, q8 |
| .endm |
| |
| |
| WELS_ASM_FUNC_BEGIN SampleVariance16x16_neon |
| stmdb sp!, {r4} |
| |
| vld1.8 {q15}, [r0], r1 //save the ref data (16bytes) |
| vld1.8 {q14}, [r2], r3 //save the src data (16bytes) |
| |
| |
| vabd.u8 q13, q14, q15 |
| vmull.u8 q12, d27, d27 |
| vmull.u8 q11, d26, d26 |
| vaddl.u16 q12, d24, d25 |
| vpadal.u16 q12, q11 //sqr |
| |
| vaddl.u8 q13, d26, d27 //sum |
| |
| vaddl.u8 q10, d28, d29 //sum_cur |
| |
| vmull.u8 q9, d29, d29 |
| vmull.u8 q8, d28, d28 |
| vaddl.u16 q9, d18, d19 //sqr_cur |
| vpadal.u16 q9, q8 |
| |
| mov r4, #15 |
| pixel_var_16x16_loop0: |
| |
| vld1.8 {q0}, [r0], r1 //save the ref data (16bytes) |
| vld1.8 {q1}, [r2], r3 //save the src data (16bytes) |
| |
| vabd.u8 q2, q0, q1 |
| |
| //q10 save sum_cur |
| vpadal.u8 q10, q1 |
| |
| //q12 save sqr |
| SQR_ADD_16BYTES d4, d5, q12 |
| |
| //q13 save sum |
| vpadal.u8 q13, q2 |
| |
| subs r4, #1 |
| |
| //q9 save sqr_cur |
| SQR_ADD_16BYTES d2, d3, q9 |
| |
| bne pixel_var_16x16_loop0 |
| |
| vadd.u16 d0, d26, d27 //sum |
| vadd.u16 d1, d20, d21 //sum_cur |
| vpaddl.u16 q0, q0 |
| vadd.u32 d2, d24, d25 //sqr |
| vadd.u32 d3, d18, d19 //sqr_cur |
| vpadd.u32 d0, d0, d1 |
| vpadd.u32 d1, d2, d3 |
| |
| ldr r4, [sp, #4] |
| |
| vshr.u32 q0, q0, #8 |
| vmul.u32 d0, d0 |
| vsub.u32 d0, d1, d0 |
| vmovl.u32 q0, d0 |
| vst2.16 {d0[0], d1[0]}, [r4] |
| |
| ldmia sp!, {r4} |
| |
| WELS_ASM_FUNC_END |
| |
| #endif |