blob: b2cca7a3568e3ccf66cab92d51544442044669c3 [file] [log] [blame]
dnl PA64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
dnl Copyright 2003 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 3 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
C 8000,8200: 2
C 8500,8600,8700: 1.75
C TODO
C * Write special feed-in code for each (n mod 8). (See the ia64 code.)
C * Try to make this run at closer to 1.5 c/l.
C * Set up register aliases (define(`u0',`%r19')).
C * Explicitly align loop.
dnl INPUT PARAMETERS
define(`rp',`%r26')
define(`up',`%r25')
define(`vp',`%r24')
define(`n',`%r23')
ifdef(`OPERATION_addlsh1_n',`
define(ADCSBC, `add,dc')
define(INITC, `ldi 0,')
define(func, mpn_addlsh1_n)
')
ifdef(`OPERATION_sublsh1_n',`
define(ADCSBC, `sub,db')
define(INITC, `ldi 1,')
define(func, mpn_sublsh1_n)
')
MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
ifdef(`HAVE_ABI_2_0w',`
define(LEVEL, `.level 2.0w')
define(RETREG, `%r28')
define(CLRRET1, `dnl')
')
ifdef(`HAVE_ABI_2_0n',`
define(LEVEL, `.level 2.0')
define(RETREG, `%r29')
define(CLRRET1, `ldi 0, %r28')
')
LEVEL
PROLOGUE(func)
std,ma %r3, 0x100(%r30) C save reg
INITC %r1 C init saved cy
C Primitive code for the first (n mod 8) limbs:
extrd,u n, 63, 3, %r22 C count for loop0
comib,= 0, %r22, L(unrolled) C skip loop0?
copy %r0, %r28
LDEF(loop0)
ldd 0(vp), %r21
ldo 8(vp), vp
ldd 0(up), %r19
ldo 8(up), up
shrpd %r21, %r28, 63, %r31
addi -1, %r1, %r0 C restore cy
ADCSBC %r19, %r31, %r29
std %r29, 0(rp)
add,dc %r0, %r0, %r1 C save cy
copy %r21, %r28
addib,> -1, %r22, L(loop0)
ldo 8(rp), rp
addib,>= -8, n, L(unrolled)
addi -1, %r1, %r0 C restore cy
shrpd %r0, %r28, 63, %r28
ADCSBC %r0, %r28, RETREG
ifdef(`OPERATION_sublsh1_n',
` sub %r0, RETREG, RETREG')
CLRRET1
bve (%r2)
ldd,mb -0x100(%r30), %r3
LDEF(unrolled)
std %r4, -0xf8(%r30) C save reg
ldd 0(vp), %r4
std %r5, -0xf0(%r30) C save reg
ldd 8(vp), %r5
std %r6, -0xe8(%r30) C save reg
ldd 16(vp), %r6
std %r7, -0xe0(%r30) C save reg
ldd 24(vp), %r7
shrpd %r4, %r28, 63, %r31
std %r8, -0xd8(%r30) C save reg
ldd 32(vp), %r8
shrpd %r5, %r4, 63, %r4
std %r9, -0xd0(%r30) C save reg
ldd 40(vp), %r9
shrpd %r6, %r5, 63, %r5
ldd 48(vp), %r3
shrpd %r7, %r6, 63, %r6
ldd 56(vp), %r28
shrpd %r8, %r7, 63, %r7
ldd 0(up), %r19
shrpd %r9, %r8, 63, %r8
ldd 8(up), %r20
shrpd %r3, %r9, 63, %r9
ldd 16(up), %r21
shrpd %r28, %r3, 63, %r3
ldd 24(up), %r22
nop C alignment FIXME
addib,<= -8, n, L(end)
addi -1, %r1, %r0 C restore cy
LDEF(loop)
ADCSBC %r19, %r31, %r29
ldd 32(up), %r19
std %r29, 0(rp)
ADCSBC %r20, %r4, %r29
ldd 40(up), %r20
std %r29, 8(rp)
ADCSBC %r21, %r5, %r29
ldd 48(up), %r21
std %r29, 16(rp)
ADCSBC %r22, %r6, %r29
ldd 56(up), %r22
std %r29, 24(rp)
ADCSBC %r19, %r7, %r29
ldd 64(vp), %r4
std %r29, 32(rp)
ADCSBC %r20, %r8, %r29
ldd 72(vp), %r5
std %r29, 40(rp)
ADCSBC %r21, %r9, %r29
ldd 80(vp), %r6
std %r29, 48(rp)
ADCSBC %r22, %r3, %r29
std %r29, 56(rp)
add,dc %r0, %r0, %r1 C save cy
ldd 88(vp), %r7
shrpd %r4, %r28, 63, %r31
ldd 96(vp), %r8
shrpd %r5, %r4, 63, %r4
ldd 104(vp), %r9
shrpd %r6, %r5, 63, %r5
ldd 112(vp), %r3
shrpd %r7, %r6, 63, %r6
ldd 120(vp), %r28
shrpd %r8, %r7, 63, %r7
ldd 64(up), %r19
shrpd %r9, %r8, 63, %r8
ldd 72(up), %r20
shrpd %r3, %r9, 63, %r9
ldd 80(up), %r21
shrpd %r28, %r3, 63, %r3
ldd 88(up), %r22
ldo 64(vp), vp
ldo 64(rp), rp
ldo 64(up), up
addib,> -8, n, L(loop)
addi -1, %r1, %r0 C restore cy
LDEF(end)
ADCSBC %r19, %r31, %r29
ldd 32(up), %r19
std %r29, 0(rp)
ADCSBC %r20, %r4, %r29
ldd 40(up), %r20
std %r29, 8(rp)
ADCSBC %r21, %r5, %r29
ldd 48(up), %r21
std %r29, 16(rp)
ADCSBC %r22, %r6, %r29
ldd 56(up), %r22
std %r29, 24(rp)
ADCSBC %r19, %r7, %r29
ldd -0xf8(%r30), %r4 C restore reg
std %r29, 32(rp)
ADCSBC %r20, %r8, %r29
ldd -0xf0(%r30), %r5 C restore reg
std %r29, 40(rp)
ADCSBC %r21, %r9, %r29
ldd -0xe8(%r30), %r6 C restore reg
std %r29, 48(rp)
ADCSBC %r22, %r3, %r29
ldd -0xe0(%r30), %r7 C restore reg
std %r29, 56(rp)
shrpd %r0, %r28, 63, %r28
ldd -0xd8(%r30), %r8 C restore reg
ADCSBC %r0, %r28, RETREG
ifdef(`OPERATION_sublsh1_n',
` sub %r0, RETREG, RETREG')
CLRRET1
ldd -0xd0(%r30), %r9 C restore reg
bve (%r2)
ldd,mb -0x100(%r30), %r3 C restore reg
EPILOGUE()