blob: dc6ba14e6956cb8d8a944c553d6fe69dc1467e4e [file] [log] [blame]
.file "ghash-x86.s"
.text
.globl _gcm_gmult_4bit_x86
.align 4
_gcm_gmult_4bit_x86:
L_gcm_gmult_4bit_x86_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
subl $84,%esp
movl 104(%esp),%edi
movl 108(%esp),%esi
movl (%edi),%ebp
movl 4(%edi),%edx
movl 8(%edi),%ecx
movl 12(%edi),%ebx
movl $0,16(%esp)
movl $471859200,20(%esp)
movl $943718400,24(%esp)
movl $610271232,28(%esp)
movl $1887436800,32(%esp)
movl $1822425088,36(%esp)
movl $1220542464,40(%esp)
movl $1423966208,44(%esp)
movl $3774873600,48(%esp)
movl $4246732800,52(%esp)
movl $3644850176,56(%esp)
movl $3311403008,60(%esp)
movl $2441084928,64(%esp)
movl $2376073216,68(%esp)
movl $2847932416,72(%esp)
movl $3051356160,76(%esp)
movl %ebp,(%esp)
movl %edx,4(%esp)
movl %ecx,8(%esp)
movl %ebx,12(%esp)
shrl $20,%ebx
andl $240,%ebx
movl 4(%esi,%ebx,1),%ebp
movl (%esi,%ebx,1),%edx
movl 12(%esi,%ebx,1),%ecx
movl 8(%esi,%ebx,1),%ebx
xorl %eax,%eax
movl $15,%edi
jmp L000x86_loop
.align 4,0x90
L000x86_loop:
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
andb $240,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
decl %edi
js L001x86_break
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
shlb $4,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
jmp L000x86_loop
.align 4,0x90
L001x86_break:
bswap %ebx
bswap %ecx
bswap %edx
bswap %ebp
movl 104(%esp),%edi
movl %ebx,12(%edi)
movl %ecx,8(%edi)
movl %edx,4(%edi)
movl %ebp,(%edi)
addl $84,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_ghash_4bit_x86
.align 4
_gcm_ghash_4bit_x86:
L_gcm_ghash_4bit_x86_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
subl $84,%esp
movl 104(%esp),%ebx
movl 108(%esp),%esi
movl 112(%esp),%edi
movl 116(%esp),%ecx
addl %edi,%ecx
movl %ecx,116(%esp)
movl (%ebx),%ebp
movl 4(%ebx),%edx
movl 8(%ebx),%ecx
movl 12(%ebx),%ebx
movl $0,16(%esp)
movl $471859200,20(%esp)
movl $943718400,24(%esp)
movl $610271232,28(%esp)
movl $1887436800,32(%esp)
movl $1822425088,36(%esp)
movl $1220542464,40(%esp)
movl $1423966208,44(%esp)
movl $3774873600,48(%esp)
movl $4246732800,52(%esp)
movl $3644850176,56(%esp)
movl $3311403008,60(%esp)
movl $2441084928,64(%esp)
movl $2376073216,68(%esp)
movl $2847932416,72(%esp)
movl $3051356160,76(%esp)
.align 4,0x90
L002x86_outer_loop:
xorl 12(%edi),%ebx
xorl 8(%edi),%ecx
xorl 4(%edi),%edx
xorl (%edi),%ebp
movl %ebx,12(%esp)
movl %ecx,8(%esp)
movl %edx,4(%esp)
movl %ebp,(%esp)
shrl $20,%ebx
andl $240,%ebx
movl 4(%esi,%ebx,1),%ebp
movl (%esi,%ebx,1),%edx
movl 12(%esi,%ebx,1),%ecx
movl 8(%esi,%ebx,1),%ebx
xorl %eax,%eax
movl $15,%edi
jmp L003x86_loop
.align 4,0x90
L003x86_loop:
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
andb $240,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
decl %edi
js L004x86_break
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
shlb $4,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
jmp L003x86_loop
.align 4,0x90
L004x86_break:
bswap %ebx
bswap %ecx
bswap %edx
bswap %ebp
movl 112(%esp),%edi
leal 16(%edi),%edi
cmpl 116(%esp),%edi
movl %edi,112(%esp)
jb L002x86_outer_loop
movl 104(%esp),%edi
movl %ebx,12(%edi)
movl %ecx,8(%edi)
movl %edx,4(%edi)
movl %ebp,(%edi)
addl $84,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.align 4
__mmx_gmult_4bit_inner:
xorl %ecx,%ecx
movl %ebx,%edx
movb %dl,%cl
shlb $4,%cl
andl $240,%edx
movq 8(%esi,%ecx,1),%mm0
movq (%esi,%ecx,1),%mm1
movd %mm0,%ebp
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 14(%edi),%cl
psllq $60,%mm2
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 13(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 12(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 11(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 10(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 9(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 8(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 7(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 6(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 5(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 4(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 3(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 2(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb 1(%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb (%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
movd %mm0,%ebx
pxor %mm2,%mm0
shlb $4,%cl
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
andl $240,%edx
pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
movd %mm0,%ebp
pxor %mm2,%mm0
psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movd %mm0,%ebx
pxor %mm2,%mm0
movl 4(%eax,%ebp,8),%edi
psrlq $32,%mm0
movd %mm1,%edx
psrlq $32,%mm1
movd %mm0,%ecx
movd %mm1,%ebp
shll $4,%edi
bswap %ebx
bswap %edx
bswap %ecx
xorl %edi,%ebp
bswap %ebp
ret
.globl _gcm_gmult_4bit_mmx
.align 4
_gcm_gmult_4bit_mmx:
L_gcm_gmult_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
call L005pic_point
L005pic_point:
popl %eax
leal Lrem_4bit-L005pic_point(%eax),%eax
movzbl 15(%edi),%ebx
call __mmx_gmult_4bit_inner
movl 20(%esp),%edi
emms
movl %ebx,12(%edi)
movl %edx,4(%edi)
movl %ecx,8(%edi)
movl %ebp,(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_ghash_4bit_mmx
.align 4
_gcm_ghash_4bit_mmx:
L_gcm_ghash_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebp
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ecx
call L006pic_point
L006pic_point:
popl %eax
leal Lrem_4bit-L006pic_point(%eax),%eax
addl %edi,%ecx
movl %ecx,32(%esp)
subl $20,%esp
movl 12(%ebp),%ebx
movl 4(%ebp),%edx
movl 8(%ebp),%ecx
movl (%ebp),%ebp
jmp L007mmx_outer_loop
.align 4,0x90
L007mmx_outer_loop:
xorl 12(%edi),%ebx
xorl 4(%edi),%edx
xorl 8(%edi),%ecx
xorl (%edi),%ebp
movl %edi,48(%esp)
movl %ebx,12(%esp)
movl %edx,4(%esp)
movl %ecx,8(%esp)
movl %ebp,(%esp)
movl %esp,%edi
shrl $24,%ebx
call __mmx_gmult_4bit_inner
movl 48(%esp),%edi
leal 16(%edi),%edi
cmpl 52(%esp),%edi
jb L007mmx_outer_loop
movl 40(%esp),%edi
emms
movl %ebx,12(%edi)
movl %edx,4(%edi)
movl %ecx,8(%edi)
movl %ebp,(%edi)
addl $20,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.align 6,0x90
Lrem_4bit:
.long 0,0,0,29491200,0,58982400,0,38141952
.long 0,117964800,0,113901568,0,76283904,0,88997888
.long 0,235929600,0,265420800,0,227803136,0,206962688
.long 0,152567808,0,148504576,0,177995776,0,190709760
.align 6,0x90
L008rem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
.value 7200,7650,8100,7782,6952,6890,6316,6510
.value 4656,5106,4532,4214,5432,5370,5820,6014
.value 14400,14722,15300,14854,16200,16010,15564,15630
.value 13904,14226,13780,13334,12632,12442,13020,13086
.value 9312,9634,10212,9766,9064,8874,8428,8494
.value 10864,11186,10740,10294,11640,11450,12028,12094
.value 28800,28994,29444,29382,30600,30282,29708,30158
.value 32400,32594,32020,31958,31128,30810,31260,31710
.value 27808,28002,28452,28390,27560,27242,26668,27118
.value 25264,25458,24884,24822,26040,25722,26172,26622
.value 18624,18690,19268,19078,20424,19978,19532,19854
.value 18128,18194,17748,17558,16856,16410,16988,17310
.value 21728,21794,22372,22182,21480,21034,20588,20910
.value 23280,23346,22900,22710,24056,23610,24188,24510
.value 57600,57538,57988,58182,58888,59338,58764,58446
.value 61200,61138,60564,60758,59416,59866,60316,59998
.value 64800,64738,65188,65382,64040,64490,63916,63598
.value 62256,62194,61620,61814,62520,62970,63420,63102
.value 55616,55426,56004,56070,56904,57226,56780,56334
.value 55120,54930,54484,54550,53336,53658,54236,53790
.value 50528,50338,50916,50982,49768,50090,49644,49198
.value 52080,51890,51444,51510,52344,52666,53244,52798
.value 37248,36930,37380,37830,38536,38730,38156,38094
.value 40848,40530,39956,40406,39064,39258,39708,39646
.value 36256,35938,36388,36838,35496,35690,35116,35054
.value 33712,33394,32820,33270,33976,34170,34620,34558
.value 43456,43010,43588,43910,44744,44810,44364,44174
.value 42960,42514,42068,42390,41176,41242,41820,41630
.value 46560,46114,46692,47014,45800,45866,45420,45230
.value 48112,47666,47220,47542,48376,48442,49020,48830
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0