| dnl x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. |
| |
| dnl Copyright 2007, 2008 Free Software Foundation, Inc. |
| |
| dnl This file is part of the GNU MP Library. |
| |
| dnl The GNU MP Library is free software; you can redistribute it and/or modify |
| dnl it under the terms of the GNU Lesser General Public License as published |
| dnl by the Free Software Foundation; either version 3 of the License, or (at |
| dnl your option) any later version. |
| |
| dnl The GNU MP Library is distributed in the hope that it will be useful, but |
| dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| dnl License for more details. |
| |
| dnl You should have received a copy of the GNU Lesser General Public License |
| dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
| |
| include(`../config.m4') |
| |
| |
| C norm frac |
| C 486 |
| C P5 |
| C P6-13 29.2 |
| C P6-15 *26 |
| C K6 |
| C K7 22 |
| C K8 *19 |
| C P4-f1 |
| C P4-f2 *65 |
| C P4-f3 |
| C P4-f4 *72 |
| |
| C A star means numbers not updated for the latest version of the code. |
| |
| |
| C TODO |
| C * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0. |
| C * The loop has not been carefully tuned. We should at the very least do |
| C some local insn swapping. |
| C * The code outside the main loop is what gcc generated. Clean up! |
| C * Clean up stack slot usage. |
| |
| C INPUT PARAMETERS |
| C qp |
| C fn |
| C up_param |
| C un_param |
| C dp |
| |
| |
| C eax ebx ecx edx esi edi ebp |
| C cnt qp |
| |
| ASM_START() |
| TEXT |
| ALIGN(16) |
| PROLOGUE(mpn_divrem_2) |
| push %ebp |
| push %edi |
| push %esi |
| push %ebx |
| sub $36, %esp |
| mov 68(%esp), %ecx C un |
| mov 72(%esp), %esi C dp |
| movl $0, 32(%esp) |
| lea 0(,%ecx,4), %edi |
| add 64(%esp), %edi C up |
| mov (%esi), %ebx |
| mov 4(%esi), %eax |
| mov %ebx, 20(%esp) |
| sub $12, %edi |
| mov %eax, 24(%esp) |
| mov %edi, 12(%esp) |
| mov 8(%edi), %ebx |
| mov 4(%edi), %ebp |
| cmp %eax, %ebx |
| jb L(8) |
| seta %dl |
| cmp 20(%esp), %ebp |
| setae %al |
| or %dl, %al |
| jne L(35) |
| L(8): |
| mov 60(%esp), %esi C fn |
| lea -3(%esi,%ecx), %edi |
| test %edi, %edi |
| js L(9) |
| mov 24(%esp), %edx |
| mov $-1, %esi |
| mov %esi, %eax |
| mov %esi, %ecx |
| not %edx |
| divl 24(%esp) |
| mov %eax, %esi |
| imul 24(%esp), %eax |
| mov %eax, (%esp) |
| mov %esi, %eax |
| mull 20(%esp) |
| mov (%esp), %eax |
| add 20(%esp), %eax |
| adc $0, %ecx |
| add %eax, %edx |
| adc $0, %ecx |
| mov %ecx, %eax |
| js L(32) |
| L(36): dec %esi |
| sub 24(%esp), %edx |
| sbb $0, %eax |
| jns L(36) |
| L(32): |
| mov %esi, 16(%esp) C di |
| mov %edi, %ecx C un |
| mov 12(%esp), %esi C up |
| mov 24(%esp), %eax |
| neg %eax |
| mov %eax, 4(%esp) C -d1 |
| ALIGN(16) |
| nop |
| |
| C eax ebx ecx edx esi edi ebp 0 4 8 12 16 20 24 28 32 56 60 |
| C n2 un up n1 q0 -d1 di d0 d1 msl qp fn |
| |
| L(loop): |
| mov 16(%esp), %eax C di |
| mul %ebx |
| add %ebp, %eax |
| mov %eax, (%esp) C q0 |
| adc %ebx, %edx |
| mov %edx, %edi C q |
| imul 4(%esp), %edx |
| mov 20(%esp), %eax |
| lea (%edx, %ebp), %ebx C n1 -= ... |
| mul %edi |
| xor %ebp, %ebp |
| cmp 60(%esp), %ecx |
| jl L(19) |
| mov (%esi), %ebp |
| sub $4, %esi |
| L(19): sub 20(%esp), %ebp |
| sbb 24(%esp), %ebx |
| sub %eax, %ebp |
| sbb %edx, %ebx |
| mov 20(%esp), %eax C d1 |
| inc %edi |
| xor %edx, %edx |
| cmp (%esp), %ebx |
| adc $-1, %edx C mask |
| add %edx, %edi C q-- |
| and %edx, %eax C d0 or 0 |
| and 24(%esp), %edx C d1 or 0 |
| add %eax, %ebp |
| adc %edx, %ebx |
| cmp 24(%esp), %ebx |
| jae L(fix) |
| L(bck): mov 56(%esp), %edx |
| mov %edi, (%edx, %ecx, 4) |
| dec %ecx |
| jns L(loop) |
| |
| L(9): mov 64(%esp), %esi C up |
| mov %ebp, (%esi) |
| mov %ebx, 4(%esi) |
| mov 32(%esp), %eax |
| add $36, %esp |
| pop %ebx |
| pop %esi |
| pop %edi |
| pop %ebp |
| ret |
| |
| L(fix): seta %dl |
| cmp 20(%esp), %ebp |
| setae %al |
| or %dl, %al |
| je L(bck) |
| inc %edi |
| sub 20(%esp), %ebp |
| sbb 24(%esp), %ebx |
| jmp L(bck) |
| |
| L(35): sub 20(%esp), %ebp |
| sbb 24(%esp), %ebx |
| movl $1, 32(%esp) |
| jmp L(8) |
| EPILOGUE() |