| dnl x86 fat binary entrypoints. |
| |
| dnl Copyright 2003 Free Software Foundation, Inc. |
| dnl |
| dnl This file is part of the GNU MP Library. |
| dnl |
| dnl The GNU MP Library is free software; you can redistribute it and/or |
| dnl modify it under the terms of the GNU Lesser General Public License as |
| dnl published by the Free Software Foundation; either version 3 of the |
| dnl License, or (at your option) any later version. |
| dnl |
| dnl The GNU MP Library is distributed in the hope that it will be useful, |
| dnl but WITHOUT ANY WARRANTY; without even the implied warranty of |
| dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| dnl Lesser General Public License for more details. |
| dnl |
| dnl You should have received a copy of the GNU Lesser General Public License |
| dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
| |
| include(`../config.m4') |
| |
| |
| dnl Forcibly disable profiling. |
| dnl |
| dnl The entrypoints and inits are small enough not to worry about, the real |
| dnl routines arrived at will have any profiling. Also, the way the code |
| dnl here ends with a jump means we won't work properly with the |
| dnl "instrument" profiling scheme anyway. |
| |
| define(`WANT_PROFILING',no) |
| |
| |
| TEXT |
| |
| |
| dnl Usage: FAT_ENTRY(name, offset) |
| dnl |
| dnl Emit a fat binary entrypoint function of the given name. This is the |
| dnl normal entry for applications, eg. __gmpn_add_n. |
| dnl |
| dnl The code simply jumps through the function pointer in __gmpn_cpuvec at |
| dnl the given "offset" (in bytes). |
| dnl |
| dnl For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be |
| dnl fine for all x86s. |
| dnl |
| dnl For PIC, the jumps are 20 bytes each, and are best aligned to 16 to |
| dnl ensure at least the first two instructions don't cross a cache line |
| dnl boundary. |
| dnl |
| dnl Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE |
| dnl grepping in configure, stopping that code trying to eval something with |
| dnl $1 in it. |
| |
| define(FAT_ENTRY, |
| m4_assert_numargs(2) |
| ` ALIGN(ifdef(`PIC',16,8)) |
| `'PROLOGUE($1) |
| ifdef(`PIC', |
| ` call L(movl_eip_edx) |
| L(entry_here$2): |
| addl $_GLOBAL_OFFSET_TABLE_+[.-L(entry_here$2)], %edx |
| movl GSYM_PREFIX`'__gmpn_cpuvec@GOT(%edx), %edx |
| jmp *m4_empty_if_zero($2)(%edx) |
| ',`dnl non-PIC |
| jmp *GSYM_PREFIX`'__gmpn_cpuvec+$2 |
| ') |
| EPILOGUE() |
| ') |
| |
| |
| dnl FAT_ENTRY for each CPUVEC_FUNCS_LIST |
| dnl |
| |
| define(`CPUVEC_offset',0) |
| foreach(i, |
| `FAT_ENTRY(MPN(i),CPUVEC_offset) |
| define(`CPUVEC_offset',eval(CPUVEC_offset + 4))', |
| CPUVEC_FUNCS_LIST) |
| |
| ifdef(`PIC',` |
| ALIGN(8) |
| L(movl_eip_edx): |
| movl (%esp), %edx |
| ret_internal |
| ') |
| |
| |
| dnl Usage: FAT_INIT(name, offset) |
| dnl |
| dnl Emit a fat binary initializer function of the given name. These |
| dnl functions are the initial values for the pointers in __gmpn_cpuvec. |
| dnl |
| dnl The code simply calls __gmpn_cpuvec_init, and then jumps back through |
| dnl the __gmpn_cpuvec pointer, at the given "offset" (in bytes). |
| dnl __gmpn_cpuvec_init will have stored the address of the selected |
| dnl implementation there. |
| dnl |
| dnl Only one of these routines will be executed, and only once, since after |
| dnl that all the __gmpn_cpuvec pointers go to real routines. So there's no |
| dnl need for anything special here, just something small and simple. To |
| dnl keep code size down, "fat_init" is a shared bit of code, arrived at |
| dnl with the offset in %al. %al is used since the movb instruction is 2 |
| dnl bytes where %eax would be 4. |
| dnl |
| dnl Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the |
| dnl HAVE_NATIVE grepping in configure, preventing that code trying to eval |
| dnl something with $1 in it. |
| |
| define(FAT_INIT, |
| m4_assert_numargs(2) |
| `PROLOGUE($1) |
| movb $`'$2, %al |
| jmp L(fat_init) |
| EPILOGUE() |
| ') |
| |
| L(fat_init): |
| C al __gmpn_cpuvec byte offset |
| |
| movsbl %al, %eax |
| pushl %eax |
| |
| ifdef(`PIC',` |
| pushl %ebx |
| call L(movl_eip_ebx) |
| L(init_here): |
| addl $_GLOBAL_OFFSET_TABLE_+[.-L(init_here)], %ebx |
| call GSYM_PREFIX`'__gmpn_cpuvec_init@PLT |
| movl GSYM_PREFIX`'__gmpn_cpuvec@GOT(%ebx), %edx |
| popl %ebx |
| popl %eax |
| jmp *(%edx,%eax) |
| |
| L(movl_eip_ebx): |
| movl (%esp), %ebx |
| ret_internal |
| |
| ',`dnl non-PIC |
| call GSYM_PREFIX`'__gmpn_cpuvec_init |
| popl %eax |
| jmp *GSYM_PREFIX`'__gmpn_cpuvec(%eax) |
| ') |
| |
| dnl FAT_INIT for each CPUVEC_FUNCS_LIST |
| dnl |
| |
| define(`CPUVEC_offset',0) |
| foreach(i, |
| `FAT_INIT(MPN(i`'_init),CPUVEC_offset) |
| define(`CPUVEC_offset',eval(CPUVEC_offset + 4))', |
| CPUVEC_FUNCS_LIST) |
| |
| |
| |
| C long __gmpn_cpuid (char dst[12], int id); |
| C |
| C This is called only once, so just something simple and compact is fine. |
| |
| defframe(PARAM_ID, 8) |
| defframe(PARAM_DST, 4) |
| deflit(`FRAME',0) |
| |
| PROLOGUE(__gmpn_cpuid) |
| pushl %esi FRAME_pushl() |
| pushl %ebx FRAME_pushl() |
| movl PARAM_ID, %eax |
| cpuid |
| movl PARAM_DST, %esi |
| movl %ebx, (%esi) |
| movl %edx, 4(%esi) |
| movl %ecx, 8(%esi) |
| popl %ebx |
| popl %esi |
| ret |
| EPILOGUE() |
| |
| |
| C int __gmpn_cpuid_available (void); |
| C |
| C Return non-zero if the cpuid instruction is available, which means late |
| C model 80486 and higher. 80386 and early 80486 don't have cpuid. |
| C |
| C The test follows Intel AP-485 application note, namely that if bit 21 is |
| C modifiable then cpuid is supported. This test is reentrant and thread |
| C safe, since of course any interrupt or context switch will preserve the |
| C flags while we're tinkering with them. |
| C |
| C This is called only once, so just something simple and compact is fine. |
| |
| PROLOGUE(__gmpn_cpuid_available) |
| pushf |
| popl %ecx C old flags |
| |
| movl %ecx, %edx |
| xorl $0x200000, %edx |
| pushl %edx |
| popf |
| pushf |
| popl %edx C tweaked flags |
| |
| movl $1, %eax |
| cmpl %ecx, %edx |
| jne L(available) |
| xorl %eax, %eax C not changed, so cpuid not available |
| |
| L(available): |
| ret |
| EPILOGUE() |