| dnl PA64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1). |
| |
| dnl Copyright 2003 Free Software Foundation, Inc. |
| |
| dnl This file is part of the GNU MP Library. |
| |
| dnl The GNU MP Library is free software; you can redistribute it and/or modify |
| dnl it under the terms of the GNU Lesser General Public License as published |
| dnl by the Free Software Foundation; either version 3 of the License, or (at |
| dnl your option) any later version. |
| |
| dnl The GNU MP Library is distributed in the hope that it will be useful, but |
| dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| dnl License for more details. |
| |
| dnl You should have received a copy of the GNU Lesser General Public License |
| dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
| |
| include(`../config.m4') |
| |
| C cycles/limb |
| C 8000,8200: 2 |
| C 8500,8600,8700: 1.75 |
| |
| C TODO |
| C * Write special feed-in code for each (n mod 8). (See the ia64 code.) |
| C * Try to make this run at closer to 1.5 c/l. |
| C * Set up register aliases (define(`u0',`%r19')). |
| C * Explicitly align loop. |
| |
| dnl INPUT PARAMETERS |
| define(`rp',`%r26') |
| define(`up',`%r25') |
| define(`vp',`%r24') |
| define(`n',`%r23') |
| |
| ifdef(`OPERATION_addlsh1_n',` |
| define(ADCSBC, `add,dc') |
| define(INITC, `ldi 0,') |
| define(func, mpn_addlsh1_n) |
| ') |
| ifdef(`OPERATION_sublsh1_n',` |
| define(ADCSBC, `sub,db') |
| define(INITC, `ldi 1,') |
| define(func, mpn_sublsh1_n) |
| ') |
| |
| MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) |
| |
| ifdef(`HAVE_ABI_2_0w',` |
| define(LEVEL, `.level 2.0w') |
| define(RETREG, `%r28') |
| define(CLRRET1, `dnl') |
| ') |
| ifdef(`HAVE_ABI_2_0n',` |
| define(LEVEL, `.level 2.0') |
| define(RETREG, `%r29') |
| define(CLRRET1, `ldi 0, %r28') |
| ') |
| |
| LEVEL |
| PROLOGUE(func) |
| std,ma %r3, 0x100(%r30) C save reg |
| |
| INITC %r1 C init saved cy |
| |
| C Primitive code for the first (n mod 8) limbs: |
| extrd,u n, 63, 3, %r22 C count for loop0 |
| comib,= 0, %r22, L(unrolled) C skip loop0? |
| copy %r0, %r28 |
| LDEF(loop0) |
| ldd 0(vp), %r21 |
| ldo 8(vp), vp |
| ldd 0(up), %r19 |
| ldo 8(up), up |
| shrpd %r21, %r28, 63, %r31 |
| addi -1, %r1, %r0 C restore cy |
| ADCSBC %r19, %r31, %r29 |
| std %r29, 0(rp) |
| add,dc %r0, %r0, %r1 C save cy |
| copy %r21, %r28 |
| addib,> -1, %r22, L(loop0) |
| ldo 8(rp), rp |
| |
| addib,>= -8, n, L(unrolled) |
| addi -1, %r1, %r0 C restore cy |
| |
| shrpd %r0, %r28, 63, %r28 |
| ADCSBC %r0, %r28, RETREG |
| ifdef(`OPERATION_sublsh1_n', |
| ` sub %r0, RETREG, RETREG') |
| CLRRET1 |
| |
| bve (%r2) |
| ldd,mb -0x100(%r30), %r3 |
| |
| |
| LDEF(unrolled) |
| std %r4, -0xf8(%r30) C save reg |
| ldd 0(vp), %r4 |
| std %r5, -0xf0(%r30) C save reg |
| ldd 8(vp), %r5 |
| std %r6, -0xe8(%r30) C save reg |
| ldd 16(vp), %r6 |
| std %r7, -0xe0(%r30) C save reg |
| |
| ldd 24(vp), %r7 |
| shrpd %r4, %r28, 63, %r31 |
| std %r8, -0xd8(%r30) C save reg |
| ldd 32(vp), %r8 |
| shrpd %r5, %r4, 63, %r4 |
| std %r9, -0xd0(%r30) C save reg |
| ldd 40(vp), %r9 |
| shrpd %r6, %r5, 63, %r5 |
| ldd 48(vp), %r3 |
| shrpd %r7, %r6, 63, %r6 |
| ldd 56(vp), %r28 |
| shrpd %r8, %r7, 63, %r7 |
| ldd 0(up), %r19 |
| shrpd %r9, %r8, 63, %r8 |
| ldd 8(up), %r20 |
| shrpd %r3, %r9, 63, %r9 |
| ldd 16(up), %r21 |
| shrpd %r28, %r3, 63, %r3 |
| ldd 24(up), %r22 |
| |
| nop C alignment FIXME |
| addib,<= -8, n, L(end) |
| addi -1, %r1, %r0 C restore cy |
| LDEF(loop) |
| ADCSBC %r19, %r31, %r29 |
| ldd 32(up), %r19 |
| std %r29, 0(rp) |
| ADCSBC %r20, %r4, %r29 |
| ldd 40(up), %r20 |
| std %r29, 8(rp) |
| ADCSBC %r21, %r5, %r29 |
| ldd 48(up), %r21 |
| std %r29, 16(rp) |
| ADCSBC %r22, %r6, %r29 |
| ldd 56(up), %r22 |
| std %r29, 24(rp) |
| ADCSBC %r19, %r7, %r29 |
| ldd 64(vp), %r4 |
| std %r29, 32(rp) |
| ADCSBC %r20, %r8, %r29 |
| ldd 72(vp), %r5 |
| std %r29, 40(rp) |
| ADCSBC %r21, %r9, %r29 |
| ldd 80(vp), %r6 |
| std %r29, 48(rp) |
| ADCSBC %r22, %r3, %r29 |
| std %r29, 56(rp) |
| |
| add,dc %r0, %r0, %r1 C save cy |
| |
| ldd 88(vp), %r7 |
| shrpd %r4, %r28, 63, %r31 |
| ldd 96(vp), %r8 |
| shrpd %r5, %r4, 63, %r4 |
| ldd 104(vp), %r9 |
| shrpd %r6, %r5, 63, %r5 |
| ldd 112(vp), %r3 |
| shrpd %r7, %r6, 63, %r6 |
| ldd 120(vp), %r28 |
| shrpd %r8, %r7, 63, %r7 |
| ldd 64(up), %r19 |
| shrpd %r9, %r8, 63, %r8 |
| ldd 72(up), %r20 |
| shrpd %r3, %r9, 63, %r9 |
| ldd 80(up), %r21 |
| shrpd %r28, %r3, 63, %r3 |
| ldd 88(up), %r22 |
| |
| ldo 64(vp), vp |
| ldo 64(rp), rp |
| ldo 64(up), up |
| addib,> -8, n, L(loop) |
| addi -1, %r1, %r0 C restore cy |
| LDEF(end) |
| ADCSBC %r19, %r31, %r29 |
| ldd 32(up), %r19 |
| std %r29, 0(rp) |
| ADCSBC %r20, %r4, %r29 |
| ldd 40(up), %r20 |
| std %r29, 8(rp) |
| ADCSBC %r21, %r5, %r29 |
| ldd 48(up), %r21 |
| std %r29, 16(rp) |
| ADCSBC %r22, %r6, %r29 |
| ldd 56(up), %r22 |
| std %r29, 24(rp) |
| ADCSBC %r19, %r7, %r29 |
| ldd -0xf8(%r30), %r4 C restore reg |
| std %r29, 32(rp) |
| ADCSBC %r20, %r8, %r29 |
| ldd -0xf0(%r30), %r5 C restore reg |
| std %r29, 40(rp) |
| ADCSBC %r21, %r9, %r29 |
| ldd -0xe8(%r30), %r6 C restore reg |
| std %r29, 48(rp) |
| ADCSBC %r22, %r3, %r29 |
| ldd -0xe0(%r30), %r7 C restore reg |
| std %r29, 56(rp) |
| |
| shrpd %r0, %r28, 63, %r28 |
| ldd -0xd8(%r30), %r8 C restore reg |
| ADCSBC %r0, %r28, RETREG |
| ifdef(`OPERATION_sublsh1_n', |
| ` sub %r0, RETREG, RETREG') |
| CLRRET1 |
| |
| ldd -0xd0(%r30), %r9 C restore reg |
| bve (%r2) |
| ldd,mb -0x100(%r30), %r3 C restore reg |
| EPILOGUE() |