| ;/* | 
 | ; Copyright (C) 2014 Apple Inc. All rights reserved. | 
 | ; | 
 | ; Redistribution and use in source and binary forms, with or without | 
 | ; modification, are permitted provided that the following conditions | 
 | ; are met: | 
 | ; 1. Redistributions of source code must retain the above copyright | 
 | ;    notice, this list of conditions and the following disclaimer. | 
 | ; 2. Redistributions in binary form must reproduce the above copyright | 
 | ;    notice, this list of conditions and the following disclaimer in the | 
 | ;    documentation and/or other materials provided with the distribution. | 
 | ; | 
 | ; THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | 
 | ; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
 | ; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 
 | ; PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR | 
 | ; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 
 | ; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
 | ; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
 | ; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 
 | ; OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 | ;*/ | 
 |  | 
 | PUBLIC ctiMasmProbeTrampoline | 
 |  | 
 | _TEXT   SEGMENT | 
 |  | 
 | ; The following constants must match the x86_64 version in MacroAssemblerX86Common.cpp. | 
 | PTR_SIZE EQU 8 | 
 |  | 
 | PROBE_PROBE_FUNCTION_OFFSET EQU (0 * PTR_SIZE) | 
 | PROBE_ARG_OFFSET EQU (1 * PTR_SIZE) | 
 | PROBE_INIT_STACK_FUNCTION_OFFSET EQU (2 * PTR_SIZE) | 
 | PROBE_INIT_STACK_ARG_OFFSET EQU (3 * PTR_SIZE) | 
 |  | 
 | PROBE_FIRST_GPR_OFFSET EQU (4 * PTR_SIZE) | 
 | PROBE_CPU_EAX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (0 * PTR_SIZE)) | 
 | PROBE_CPU_ECX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (1 * PTR_SIZE)) | 
 | PROBE_CPU_EDX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (2 * PTR_SIZE)) | 
 | PROBE_CPU_EBX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (3 * PTR_SIZE)) | 
 | PROBE_CPU_ESP_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (4 * PTR_SIZE)) | 
 | PROBE_CPU_EBP_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (5 * PTR_SIZE)) | 
 | PROBE_CPU_ESI_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (6 * PTR_SIZE)) | 
 | PROBE_CPU_EDI_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (7 * PTR_SIZE)) | 
 |  | 
 | PROBE_CPU_R8_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (8 * PTR_SIZE)) | 
 | PROBE_CPU_R9_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (9 * PTR_SIZE)) | 
 | PROBE_CPU_R10_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (10 * PTR_SIZE)) | 
 | PROBE_CPU_R11_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (11 * PTR_SIZE)) | 
 | PROBE_CPU_R12_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (12 * PTR_SIZE)) | 
 | PROBE_CPU_R13_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (13 * PTR_SIZE)) | 
 | PROBE_CPU_R14_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (14 * PTR_SIZE)) | 
 | PROBE_CPU_R15_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (15 * PTR_SIZE)) | 
 | PROBE_FIRST_SPR_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (16 * PTR_SIZE)) | 
 |  | 
 | PROBE_CPU_EIP_OFFSET EQU (PROBE_FIRST_SPR_OFFSET + (0 * PTR_SIZE)) | 
 | PROBE_CPU_EFLAGS_OFFSET EQU (PROBE_FIRST_SPR_OFFSET + (1 * PTR_SIZE)) | 
 | PROBE_FIRST_XMM_OFFSET EQU (PROBE_FIRST_SPR_OFFSET + (2 * PTR_SIZE)) | 
 |  | 
 | XMM_SIZE EQU 8 | 
 | PROBE_CPU_XMM0_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (0 * XMM_SIZE)) | 
 | PROBE_CPU_XMM1_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (1 * XMM_SIZE)) | 
 | PROBE_CPU_XMM2_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (2 * XMM_SIZE)) | 
 | PROBE_CPU_XMM3_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (3 * XMM_SIZE)) | 
 | PROBE_CPU_XMM4_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (4 * XMM_SIZE)) | 
 | PROBE_CPU_XMM5_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (5 * XMM_SIZE)) | 
 | PROBE_CPU_XMM6_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (6 * XMM_SIZE)) | 
 | PROBE_CPU_XMM7_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (7 * XMM_SIZE)) | 
 |  | 
 | PROBE_CPU_XMM8_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (8 * XMM_SIZE)) | 
 | PROBE_CPU_XMM9_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (9 * XMM_SIZE)) | 
 | PROBE_CPU_XMM10_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (10 * XMM_SIZE)) | 
 | PROBE_CPU_XMM11_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (11 * XMM_SIZE)) | 
 | PROBE_CPU_XMM12_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (12 * XMM_SIZE)) | 
 | PROBE_CPU_XMM13_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (13 * XMM_SIZE)) | 
 | PROBE_CPU_XMM14_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (14 * XMM_SIZE)) | 
 | PROBE_CPU_XMM15_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (15 * XMM_SIZE)) | 
 | PROBE_SIZE EQU (PROBE_CPU_XMM15_OFFSET + XMM_SIZE) | 
 |  | 
 | PROBE_EXECUTOR_OFFSET EQU PROBE_SIZE ; Stash the executeProbe function pointer at the end of the ProbeContext. | 
 |  | 
 | OUT_SIZE EQU (5 * PTR_SIZE) | 
 |  | 
 | ctiMasmProbeTrampoline PROC | 
 |     pushfq | 
 |  | 
 |     ; MacroAssemblerX86Common::probe() has already generated code to store some values. | 
 |     ; Together with the rflags pushed above, the top of stack now looks like this: | 
 |     ;     rsp[0 * ptrSize]: rflags | 
 |     ;     rsp[1 * ptrSize]: return address / saved rip | 
 |     ;     rsp[2 * ptrSize]: saved rbx | 
 |     ;     rsp[3 * ptrSize]: saved rdx | 
 |     ;     rsp[4 * ptrSize]: saved rcx | 
 |     ;     rsp[5 * ptrSize]: saved rax | 
 |     ; | 
 |     ; Incoming registers contain: | 
 |     ;     rcx: Probe::executeProbe | 
 |     ;     rdx: probe function | 
 |     ;     rbx: probe arg | 
 |     ;     rax: scratch (was ctiMasmProbeTrampoline) | 
 |  | 
 |     mov rax, rsp  | 
 |     sub rsp, PROBE_SIZE + OUT_SIZE | 
 |  | 
 |     ; The X86_64 ABI specifies that the worse case stack alignment requirement is 32 bytes. | 
 |     and rsp, not 01fh | 
 |     ; Since sp points to the ProbeContext, we've ensured that it's protected from interrupts before we initialize it. | 
 |  | 
 |     mov [PROBE_CPU_EBP_OFFSET + rsp], rbp | 
 |     mov rbp, rsp ; Save the ProbeContext*. | 
 |  | 
 |     mov [PROBE_EXECUTOR_OFFSET + rbp], rcx | 
 |     mov [PROBE_PROBE_FUNCTION_OFFSET + rbp], rdx | 
 |     mov [PROBE_ARG_OFFSET + rbp], rbx | 
 |     mov [PROBE_CPU_ESI_OFFSET + rbp], rsi | 
 |     mov [PROBE_CPU_EDI_OFFSET + rbp], rdi | 
 |  | 
 |     mov rcx, [0 * PTR_SIZE + rax] | 
 |     mov [PROBE_CPU_EFLAGS_OFFSET + rbp], rcx | 
 |     mov rcx, [1 *  PTR_SIZE + rax] | 
 |     mov [PROBE_CPU_EIP_OFFSET + rbp], rcx | 
 |     mov rcx, [2 * PTR_SIZE + rax] | 
 |     mov [PROBE_CPU_EBX_OFFSET + rbp], rcx | 
 |     mov rcx, [3 * PTR_SIZE + rax] | 
 |     mov [PROBE_CPU_EDX_OFFSET + rbp], rcx | 
 |     mov rcx, [4 * PTR_SIZE + rax] | 
 |     mov [PROBE_CPU_ECX_OFFSET + rbp], rcx | 
 |     mov rcx, [5 * PTR_SIZE + rax] | 
 |     mov [PROBE_CPU_EAX_OFFSET + rbp], rcx | 
 |  | 
 |     mov rcx, rax | 
 |     add rcx, 6 * PTR_SIZE | 
 |     mov [PROBE_CPU_ESP_OFFSET + rbp], rcx | 
 |  | 
 |     mov [PROBE_CPU_R8_OFFSET + rbp], r8 | 
 |     mov [PROBE_CPU_R9_OFFSET + rbp], r9 | 
 |     mov [PROBE_CPU_R10_OFFSET + rbp], r10 | 
 |     mov [PROBE_CPU_R11_OFFSET + rbp], r11 | 
 |     mov [PROBE_CPU_R12_OFFSET + rbp], r12 | 
 |     mov [PROBE_CPU_R13_OFFSET + rbp], r13 | 
 |     mov [PROBE_CPU_R14_OFFSET + rbp], r14 | 
 |     mov [PROBE_CPU_R15_OFFSET + rbp], r15 | 
 |  | 
 |     movq qword ptr [PROBE_CPU_XMM0_OFFSET + rbp], xmm0 | 
 |     movq qword ptr [PROBE_CPU_XMM1_OFFSET + rbp], xmm1 | 
 |     movq qword ptr [PROBE_CPU_XMM2_OFFSET + rbp], xmm2 | 
 |     movq qword ptr [PROBE_CPU_XMM3_OFFSET + rbp], xmm3 | 
 |     movq qword ptr [PROBE_CPU_XMM4_OFFSET + rbp], xmm4 | 
 |     movq qword ptr [PROBE_CPU_XMM5_OFFSET + rbp], xmm5 | 
 |     movq qword ptr [PROBE_CPU_XMM6_OFFSET + rbp], xmm6 | 
 |     movq qword ptr [PROBE_CPU_XMM7_OFFSET + rbp], xmm7 | 
 |     movq qword ptr [PROBE_CPU_XMM8_OFFSET + rbp], xmm8 | 
 |     movq qword ptr [PROBE_CPU_XMM9_OFFSET + rbp], xmm9 | 
 |     movq qword ptr [PROBE_CPU_XMM10_OFFSET + rbp], xmm10 | 
 |     movq qword ptr [PROBE_CPU_XMM11_OFFSET + rbp], xmm11 | 
 |     movq qword ptr [PROBE_CPU_XMM12_OFFSET + rbp], xmm12 | 
 |     movq qword ptr [PROBE_CPU_XMM13_OFFSET + rbp], xmm13 | 
 |     movq qword ptr [PROBE_CPU_XMM14_OFFSET + rbp], xmm14 | 
 |     movq qword ptr [PROBE_CPU_XMM15_OFFSET + rbp], xmm15 | 
 |  | 
 |     mov rcx, rbp ; the Probe::State* arg. | 
 |     sub rsp, 32 ; shadow space | 
 |     call qword ptr[PROBE_EXECUTOR_OFFSET + rbp] | 
 |     add rsp, 32 | 
 |  | 
 |     ; Make sure the ProbeContext is entirely below the result stack pointer so | 
 |     ; that register values are still preserved when we call the initializeStack | 
 |     ; function. | 
 |     mov rcx, PROBE_SIZE + OUT_SIZE | 
 |     mov rax, rbp | 
 |     mov rdx, [PROBE_CPU_ESP_OFFSET + rbp] | 
 |     add rax, rcx | 
 |     cmp rdx, rax  | 
 |     jge ctiMasmProbeTrampolineProbeContextIsSafe | 
 |  | 
 |     ; Allocate a safe place on the stack below the result stack pointer to stash the ProbeContext. | 
 |     sub rdx, rcx | 
 |     and rdx, not 01fh ; Keep the stack pointer 32 bytes aligned. | 
 |     xor rax, rax | 
 |     mov rsp, rdx | 
 |  | 
 |     mov rcx, PROBE_SIZE | 
 |  | 
 |     ; Copy the ProbeContext to the safe place. | 
 |     ctiMasmProbeTrampolineCopyLoop: | 
 |     mov rdx, [rbp + rax] | 
 |     mov [rsp + rax], rdx | 
 |     add rax, PTR_SIZE | 
 |     cmp rcx, rax | 
 |     jg ctiMasmProbeTrampolineCopyLoop | 
 |  | 
 |     mov rbp, rsp | 
 |  | 
 |     ; Call initializeStackFunction if present. | 
 |     ctiMasmProbeTrampolineProbeContextIsSafe: | 
 |     xor rcx, rcx | 
 |     add rcx, [PROBE_INIT_STACK_FUNCTION_OFFSET + rbp] | 
 |     je ctiMasmProbeTrampolineRestoreRegisters | 
 |  | 
 |     mov rdx, rcx | 
 |     mov rcx, rbp ; the Probe::State* arg. | 
 |     sub rsp, 32 ; shadow space | 
 |     call rdx | 
 |     add rsp, 32 | 
 |  | 
 |     ctiMasmProbeTrampolineRestoreRegisters: | 
 |  | 
 |     ; To enable probes to modify register state, we copy all registers | 
 |     ; out of the ProbeContext before returning. | 
 |  | 
 |     mov rdx, [PROBE_CPU_EDX_OFFSET + rbp] | 
 |     mov rbx, [PROBE_CPU_EBX_OFFSET + rbp] | 
 |     mov rsi, [PROBE_CPU_ESI_OFFSET + rbp] | 
 |     mov rdi, [PROBE_CPU_EDI_OFFSET + rbp] | 
 |  | 
 |     mov r8, [PROBE_CPU_R8_OFFSET + rbp] | 
 |     mov r9, [PROBE_CPU_R9_OFFSET + rbp] | 
 |     mov r10, [PROBE_CPU_R10_OFFSET + rbp] | 
 |     mov r11, [PROBE_CPU_R11_OFFSET + rbp] | 
 |     mov r12, [PROBE_CPU_R12_OFFSET + rbp] | 
 |     mov r13, [PROBE_CPU_R13_OFFSET + rbp] | 
 |     mov r14, [PROBE_CPU_R14_OFFSET + rbp] | 
 |     mov r15, [PROBE_CPU_R15_OFFSET + rbp] | 
 |  | 
 |     movq xmm0, qword ptr[PROBE_CPU_XMM0_OFFSET + rbp] | 
 |     movq xmm1, qword ptr[PROBE_CPU_XMM1_OFFSET + rbp] | 
 |     movq xmm2, qword ptr[PROBE_CPU_XMM2_OFFSET + rbp] | 
 |     movq xmm3, qword ptr[PROBE_CPU_XMM3_OFFSET + rbp] | 
 |     movq xmm4, qword ptr[PROBE_CPU_XMM4_OFFSET + rbp] | 
 |     movq xmm5, qword ptr[PROBE_CPU_XMM5_OFFSET + rbp] | 
 |     movq xmm6, qword ptr[PROBE_CPU_XMM6_OFFSET + rbp] | 
 |     movq xmm7, qword ptr[PROBE_CPU_XMM7_OFFSET + rbp] | 
 |     movq xmm8, qword ptr[PROBE_CPU_XMM8_OFFSET + rbp] | 
 |     movq xmm9, qword ptr[PROBE_CPU_XMM9_OFFSET + rbp] | 
 |     movq xmm10, qword ptr[PROBE_CPU_XMM10_OFFSET + rbp] | 
 |     movq xmm11, qword ptr[PROBE_CPU_XMM11_OFFSET + rbp] | 
 |     movq xmm12, qword ptr[PROBE_CPU_XMM12_OFFSET + rbp] | 
 |     movq xmm13, qword ptr[PROBE_CPU_XMM13_OFFSET + rbp] | 
 |     movq xmm14, qword ptr[PROBE_CPU_XMM14_OFFSET + rbp] | 
 |     movq xmm15, qword ptr[PROBE_CPU_XMM15_OFFSET + rbp] | 
 |  | 
 |     ; There are 6 more registers left to restore: | 
 |     ;     rax, rcx, rbp, rsp, rip, and rflags. | 
 |  | 
 |     ; The restoration process at ctiMasmProbeTrampolineEnd below works by popping | 
 |     ; 5 words off the stack into rflags, rax, rcx, rbp, and rip. These 5 words need | 
 |     ; to be pushed on top of the final esp value so that just by popping the 5 words, | 
 |     ; we'll get the esp that the probe wants to set. Let's call this area (for storing | 
 |     ; these 5 words) the restore area. | 
 |     mov rcx, [PROBE_CPU_ESP_OFFSET + rbp] | 
 |     sub rcx, 5 * PTR_SIZE | 
 |  | 
 |     ; rcx now points to the restore area. | 
 |  | 
 |     ; Copy remaining restore values from the ProbeContext to the restore area. | 
 |     ; Note: We already ensured above that the ProbeContext is in a safe location before | 
 |     ; calling the initializeStackFunction. The initializeStackFunction is not allowed to | 
 |     ; change the stack pointer again. | 
 |     mov rax, [PROBE_CPU_EFLAGS_OFFSET + rbp] | 
 |     mov [0 * PTR_SIZE + rcx], rax | 
 |     mov rax, [PROBE_CPU_EAX_OFFSET + rbp] | 
 |     mov [1 * PTR_SIZE + rcx], rax | 
 |     mov rax, [PROBE_CPU_ECX_OFFSET + rbp]  | 
 |     mov [2 * PTR_SIZE + rcx], rax | 
 |     mov rax, [PROBE_CPU_EBP_OFFSET + rbp] | 
 |     mov [3 * PTR_SIZE + rcx], rax | 
 |     mov rax, [PROBE_CPU_EIP_OFFSET + rbp] | 
 |     mov [4 * PTR_SIZE + rcx], rax | 
 |     mov rsp, rcx | 
 |  | 
 |     ; Do the remaining restoration by popping off the restore area. | 
 |     popfq | 
 |     pop rax | 
 |     pop rcx | 
 |     pop rbp | 
 |     ret | 
 | ctiMasmProbeTrampoline ENDP | 
 |  | 
 | _TEXT   ENDS | 
 |  | 
 | END |