blob: e7f9bcbcb03163d65d8932a22510b10847700573 [file] [log] [blame]
//-------------------------------------------------------------------------------------------------------
// Copyright (C) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
//-------------------------------------------------------------------------------------------------------
#include "Backend.h"
#if ENABLE_NATIVE_CODEGEN
namespace {
// The definitions in this anonymous namespace must be constexpr to allow OACR to conclude that certain operations
// in InterpreterThunkEmitter::EncodeInterpreterThunk are safe. Because constexpr requires that the declaration
// and the definition appear at the same place (i.e., no forward declarations), this means that we either have
// to move all 5 definitions of InterpreterThunk into the header file, or we have to make InterpreterThunkSize
// public. The latter option seems the less objectionable, so that's what I've done here.
#ifdef _M_X64
#ifdef _WIN32
constexpr BYTE FunctionInfoOffset = 23;
constexpr BYTE FunctionProxyOffset = 27;
constexpr BYTE DynamicThunkAddressOffset = 31;
constexpr BYTE CallBlockStartAddrOffset = 41;
constexpr BYTE ThunkSizeOffset = 55;
constexpr BYTE ErrorOffset = 64;
constexpr BYTE ThunkAddressOffset = 81;
constexpr BYTE PrologSize = 80;
constexpr BYTE StackAllocSize = 0x28;
//
// Home the arguments onto the stack and pass a pointer to the base of the stack location to the inner thunk
//
// Calling convention requires that caller should allocate at least 0x20 bytes and the stack be 16 byte aligned.
// Hence, we allocate 0x28 bytes of stack space for the callee to use. The callee uses 8 bytes to push the first
// argument and the rest 0x20 ensures alignment is correct.
//
constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = {
0x48, 0x89, 0x54, 0x24, 0x10, // mov qword ptr [rsp+10h],rdx
0x48, 0x89, 0x4C, 0x24, 0x08, // mov qword ptr [rsp+8],rcx
0x4C, 0x89, 0x44, 0x24, 0x18, // mov qword ptr [rsp+18h],r8
0x4C, 0x89, 0x4C, 0x24, 0x20, // mov qword ptr [rsp+20h],r9
0x48, 0x8B, 0x41, 0x00, // mov rax, qword ptr [rcx+FunctionInfoOffset]
0x48, 0x8B, 0x48, 0x00, // mov rcx, qword ptr [rax+FunctionProxyOffset]
0x48, 0x8B, 0x51, 0x00, // mov rdx, qword ptr [rcx+DynamicThunkAddressOffset]
// Range Check for Valid call target
0x48, 0x83, 0xE2, 0xF8, // and rdx, 0xFFFFFFFFFFFFFFF8h ;Force 8 byte alignment
0x48, 0x8b, 0xca, // mov rcx, rdx
0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, CallBlockStartAddress
0x48, 0x2b, 0xc8, // sub rcx, rax
0x48, 0x81, 0xf9, 0x00, 0x00, 0x00, 0x00, // cmp rcx, ThunkSize
0x76, 0x09, // jbe $safe
0x48, 0xc7, 0xc1, 0x00, 0x00, 0x00, 0x00, // mov rcx, errorcode
0xcd, 0x29, // int 29h
// $safe:
0x48, 0x8D, 0x4C, 0x24, 0x08, // lea rcx, [rsp+8] ;Load the address to stack
0x48, 0x83, 0xEC, StackAllocSize, // sub rsp,28h
0x48, 0xB8, 0x00, 0x00, 0x00 ,0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, <thunk>
0xFF, 0xE2, // jmp rdx
0xCC, 0xCC, 0xCC, 0xCC, 0xCC // int 3 ;for alignment to size of 8 we are adding this
};
constexpr BYTE Epilog[] = {
0x48, 0x83, 0xC4, StackAllocSize, // add rsp,28h
0xC3 // ret
};
#else // Sys V AMD64
constexpr BYTE FunctionInfoOffset = 7;
constexpr BYTE FunctionProxyOffset = 11;
constexpr BYTE DynamicThunkAddressOffset = 15;
constexpr BYTE CallBlockStartAddrOffset = 25;
constexpr BYTE ThunkSizeOffset = 39;
constexpr BYTE ErrorOffset = 48;
constexpr BYTE ThunkAddressOffset = 61;
constexpr BYTE PrologSize = 60;
constexpr BYTE StackAllocSize = 0x0;
constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = {
0x55, // push rbp // Prolog - setup the stack frame
0x48, 0x89, 0xe5, // mov rbp, rsp
0x48, 0x8b, 0x47, 0x00, // mov rax, qword ptr [rdi + FunctionInfoOffset]
0x48, 0x8B, 0x48, 0x00, // mov rcx, qword ptr [rax+FunctionProxyOffset]
0x48, 0x8B, 0x51, 0x00, // mov rdx, qword ptr [rcx+DynamicThunkAddressOffset]
// Range Check for Valid call target
0x48, 0x83, 0xE2, 0xF8, // and rdx, 0xfffffffffffffff8 // Force 8 byte alignment
0x48, 0x89, 0xd1, // mov rcx, rdx
0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, CallBlockStartAddress
0x48, 0x29, 0xc1, // sub rcx, rax
0x48, 0x81, 0xf9, 0x00, 0x00, 0x00, 0x00, // cmp rcx, ThunkSize
0x76, 0x09, // jbe safe
0x48, 0xc7, 0xc1, 0x00, 0x00, 0x00, 0x00, // mov rcx, errorcode
0xcd, 0x29, // int 29h <-- xplat TODO: just to exit
// safe:
0x48, 0x8d, 0x7c, 0x24, 0x10, // lea rdi, [rsp+0x10]
0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, <thunk> // stack already 16-byte aligned
0xff, 0xe2, // jmp rdx
0xcc // int 3 // for alignment to size of 8
};
constexpr BYTE Epilog[] = {
0x5d, // pop rbp
0xc3 // ret
};
#endif
#elif defined(_M_ARM)
constexpr BYTE ThunkAddressOffset = 8;
constexpr BYTE FunctionInfoOffset = 18;
constexpr BYTE FunctionProxyOffset = 22;
constexpr BYTE DynamicThunkAddressOffset = 26;
constexpr BYTE CallBlockStartAddressInstrOffset = 42;
constexpr BYTE CallThunkSizeInstrOffset = 54;
constexpr BYTE ErrorOffset = 64;
constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = {
0x0F, 0xB4, // push {r0-r3}
0x2D, 0xE9, 0x00, 0x48, // push {r11,lr}
0xEB, 0x46, // mov r11,sp
0x00, 0x00, 0x00, 0x00, // movw r1,ThunkAddress
0x00, 0x00, 0x00, 0x00, // movt r1,ThunkAddress
0xD0, 0xF8, 0x00, 0x20, // ldr.w r2,[r0,#0x00]
0xD2, 0xF8, 0x00, 0x00, // ldr.w r0,[r2,#0x00]
0xD0, 0xF8, 0x00, 0x30, // ldr.w r3,[r0,#0x00]
0x4F, 0xF6, 0xF9, 0x70, // mov r0,#0xFFF9
0xCF, 0xF6, 0xFF, 0x70, // movt r0,#0xFFFF
0x03, 0xEA, 0x00, 0x03, // and r3,r3,r0
0x18, 0x46, // mov r0, r3
0x00, 0x00, 0x00, 0x00, // movw r12, CallBlockStartAddress
0x00, 0x00, 0x00, 0x00, // movt r12, CallBlockStartAddress
0xA0, 0xEB, 0x0C, 0x00, // sub r0, r12
0x00, 0x00, 0x00, 0x00, // mov r12, ThunkSize
0x60, 0x45, // cmp r0, r12
0x02, 0xD9, // bls $safe
0x4F, 0xF0, 0x00, 0x00, // mov r0, errorcode
0xFB, 0xDE, // Equivalent to int 0x29
//$safe:
0x02, 0xA8, // add r0,sp,#8
0x18, 0x47 // bx r3
};
constexpr BYTE JmpOffset = 2;
constexpr BYTE Call[] = {
0x88, 0x47, // blx r1
0x00, 0x00, 0x00, 0x00, // b.w epilog
0xFE, 0xDE, // int 3 ;Required for alignment
};
constexpr BYTE Epilog[] = {
0x5D, 0xF8, 0x04, 0xBB, // pop {r11}
0x5D, 0xF8, 0x14, 0xFB // ldr pc,[sp],#0x14
};
#elif defined(_M_ARM64)
constexpr BYTE FunctionInfoOffset = 24;
constexpr BYTE FunctionProxyOffset = 28;
constexpr BYTE DynamicThunkAddressOffset = 32;
constexpr BYTE ThunkAddressOffset = 36;
//TODO: saravind :Implement Range Check for ARM64
constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = {
0xFD, 0x7B, 0xBB, 0xA9, //stp fp, lr, [sp, #-80]! ;Prologue
0xFD, 0x03, 0x00, 0x91, //mov fp, sp ;update frame pointer to the stack pointer
0xE0, 0x07, 0x01, 0xA9, //stp x0, x1, [sp, #16] ;Prologue again; save all registers
0xE2, 0x0F, 0x02, 0xA9, //stp x2, x3, [sp, #32]
0xE4, 0x17, 0x03, 0xA9, //stp x4, x5, [sp, #48]
0xE6, 0x1F, 0x04, 0xA9, //stp x6, x7, [sp, #64]
0x02, 0x00, 0x40, 0xF9, //ldr x2, [x0, #0x00] ;offset will be replaced with Offset of FunctionInfo
0x40, 0x00, 0x40, 0xF9, //ldr x0, [x2, #0x00] ;offset will be replaced with Offset of FunctionProxy
0x03, 0x00, 0x40, 0xF9, //ldr x3, [x0, #0x00] ;offset will be replaced with offset of DynamicInterpreterThunk
//Following 4 MOV Instrs are to move the 64-bit address of the InterpreterThunk address into register x1.
0x00, 0x00, 0x00, 0x00, //movz x1, #0x00 ;This is overwritten with the actual thunk address(16 - 0 bits) move
0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #16 ;This is overwritten with the actual thunk address(32 - 16 bits) move
0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #32 ;This is overwritten with the actual thunk address(48 - 32 bits) move
0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #48 ;This is overwritten with the actual thunk address(64 - 48 bits) move
0xE0, 0x43, 0x00, 0x91, //add x0, sp, #16
0x60, 0x00, 0x1F, 0xD6, //br x3
0xCC, 0xCC, 0xCC, 0xCC //int 3 for 8byte alignment
};
constexpr BYTE JmpOffset = 4;
constexpr BYTE Call[] = {
0x20, 0x00, 0x3f, 0xd6, // blr x1
0x00, 0x00, 0x00, 0x00 // b epilog
};
constexpr BYTE Epilog[] = {
0xfd, 0x7b, 0xc5, 0xa8, // ldp fp, lr, [sp], #80
0xc0, 0x03, 0x5f, 0xd6 // ret
};
#else // x86
constexpr BYTE FunctionInfoOffset = 8;
constexpr BYTE FunctionProxyOffset = 11;
constexpr BYTE DynamicThunkAddressOffset = 14;
constexpr BYTE CallBlockStartAddrOffset = 21;
constexpr BYTE ThunkSizeOffset = 26;
constexpr BYTE ErrorOffset = 33;
constexpr BYTE ThunkAddressOffset = 44;
constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = {
0x55, // push ebp ;Prolog - setup the stack frame
0x8B, 0xEC, // mov ebp,esp
0x8B, 0x45, 0x08, // mov eax, dword ptr [ebp+8]
0x8B, 0x40, 0x00, // mov eax, dword ptr [eax+FunctionInfoOffset]
0x8B, 0x40, 0x00, // mov eax, dword ptr [eax+FunctionProxyOffset]
0x8B, 0x48, 0x00, // mov ecx, dword ptr [eax+DynamicThunkAddressOffset]
// Range Check for Valid call target
0x83, 0xE1, 0xF8, // and ecx, 0FFFFFFF8h
0x8b, 0xc1, // mov eax, ecx
0x2d, 0x00, 0x00, 0x00, 0x00, // sub eax, CallBlockStartAddress
0x3d, 0x00, 0x00, 0x00, 0x00, // cmp eax, ThunkSize
0x76, 0x07, // jbe SHORT $safe
0xb9, 0x00, 0x00, 0x00, 0x00, // mov ecx, errorcode
0xCD, 0x29, // int 29h
//$safe
0x8D, 0x45, 0x08, // lea eax, ebp+8
0x50, // push eax
0xB8, 0x00, 0x00, 0x00, 0x00, // mov eax, <thunk>
0xFF, 0xE1, // jmp ecx
0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC // int 3 for 8byte alignment
};
constexpr BYTE Epilog[] = {
0x5D, // pop ebp
0xC3 // ret
};
#endif
#if defined(_M_X64) || defined(_M_IX86)
constexpr BYTE JmpOffset = 3;
constexpr BYTE Call[] = {
0xFF, 0xD0, // call rax
0xE9, 0x00, 0x00, 0x00, 0x00, // jmp [offset]
0xCC, // int 3 ;for alignment to size of 8 we are adding this
};
#endif
constexpr BYTE HeaderSize = sizeof(InterpreterThunk);
} // anonymous namespace
const BYTE InterpreterThunkEmitter::ThunkSize = sizeof(Call);
InterpreterThunkEmitter::InterpreterThunkEmitter(Js::ScriptContext* context, ArenaAllocator* allocator, CustomHeap::InProcCodePageAllocators * codePageAllocators, bool isAsmInterpreterThunk) :
emitBufferManager(allocator, codePageAllocators, /*scriptContext*/ nullptr, nullptr, _u("Interpreter thunk buffer"), GetCurrentProcess()),
scriptContext(context),
allocator(allocator),
thunkCount(0),
thunkBuffer(nullptr),
isAsmInterpreterThunk(isAsmInterpreterThunk)
{
}
SListBase<ThunkBlock>*
InterpreterThunkEmitter::GetThunkBlocksList()
{
return &thunkBlocks;
}
//
// Returns the next thunk. Batch allocated PageCount pages of thunks and issue them one at a time
//
BYTE* InterpreterThunkEmitter::GetNextThunk(PVOID* ppDynamicInterpreterThunk)
{
Assert(ppDynamicInterpreterThunk);
Assert(*ppDynamicInterpreterThunk == nullptr);
if(thunkCount == 0)
{
if(!this->freeListedThunkBlocks.Empty())
{
return AllocateFromFreeList(ppDynamicInterpreterThunk);
}
if (!NewThunkBlock())
{
#ifdef ASMJS_PLAT
return this->isAsmInterpreterThunk ? (BYTE*)&Js::InterpreterStackFrame::StaticInterpreterAsmThunk : (BYTE*)&Js::InterpreterStackFrame::StaticInterpreterThunk;
#else
Assert(!this->isAsmInterpreterThunk);
return (BYTE*)&Js::InterpreterStackFrame::StaticInterpreterThunk;
#endif
}
}
Assert(this->thunkBuffer != nullptr);
BYTE* thunk = this->thunkBuffer;
#if _M_ARM
thunk = (BYTE*)((DWORD)thunk | 0x01);
#endif
*ppDynamicInterpreterThunk = thunk + HeaderSize + ((--thunkCount) * ThunkSize);
#if _M_ARM
AssertMsg(((uintptr_t)(*ppDynamicInterpreterThunk) & 0x6) == 0, "Not 8 byte aligned?");
#else
AssertMsg(((uintptr_t)(*ppDynamicInterpreterThunk) & 0x7) == 0, "Not 8 byte aligned?");
#endif
return thunk;
}
//
// Interpreter thunks have an entrypoint at the beginning of the page boundary. Each function has a unique thunk return address
// and this function can convert to the unique thunk return address to the beginning of the page which corresponds with the entrypoint
//
void* InterpreterThunkEmitter::ConvertToEntryPoint(PVOID dynamicInterpreterThunk)
{
Assert(dynamicInterpreterThunk != nullptr);
void* entryPoint = (void*)((size_t)dynamicInterpreterThunk & (~((size_t)(BlockSize) - 1)));
#if _M_ARM
entryPoint = (BYTE*)((DWORD)entryPoint | 0x01);
#endif
return entryPoint;
}
bool InterpreterThunkEmitter::NewThunkBlock()
{
if (this->scriptContext->GetConfig()->IsNoDynamicThunks())
{
return false;
}
#ifdef ENABLE_OOP_NATIVE_CODEGEN
if (CONFIG_FLAG(ForceStaticInterpreterThunk))
{
return false;
}
if (JITManager::GetJITManager()->IsOOPJITEnabled())
{
return NewOOPJITThunkBlock();
}
#endif
Assert(this->thunkCount == 0);
BYTE* buffer;
EmitBufferAllocation<VirtualAllocWrapper, PreReservedVirtualAllocWrapper> * allocation = emitBufferManager.AllocateBuffer(BlockSize, &buffer);
if (allocation == nullptr)
{
Js::Throw::OutOfMemory();
}
if (!emitBufferManager.ProtectBufferWithExecuteReadWriteForInterpreter(allocation))
{
Js::Throw::OutOfMemory();
}
#if PDATA_ENABLED
PRUNTIME_FUNCTION pdataStart = nullptr;
intptr_t epilogEnd = 0;
#endif
DWORD count = this->thunkCount;
FillBuffer(
this->scriptContext->GetThreadContext(),
this->isAsmInterpreterThunk,
(intptr_t)buffer,
BlockSize,
buffer,
#if PDATA_ENABLED
&pdataStart,
&epilogEnd,
#endif
&count
);
if (!emitBufferManager.CommitBufferForInterpreter(allocation, buffer, BlockSize))
{
Js::Throw::OutOfMemory();
}
// Call to set VALID flag for CFG check
BYTE* callTarget = buffer;
#ifdef _M_ARM
// We want to allow the actual callable value, so thumb-tag the address
callTarget = (BYTE*)((uintptr_t)buffer | 0x1);
#endif
ThreadContext::GetContextForCurrentThread()->SetValidCallTargetForCFG(callTarget);
// Update object state only at the end when everything has succeeded - and no exceptions can be thrown.
auto block = this->thunkBlocks.PrependNode(allocator, buffer, count);
#if PDATA_ENABLED
void* pdataTable;
PDataManager::RegisterPdata((PRUNTIME_FUNCTION)pdataStart, (ULONG_PTR)buffer, (ULONG_PTR)epilogEnd, &pdataTable);
block->SetPdata(pdataTable);
#else
Unused(block);
#endif
this->thunkBuffer = buffer;
this->thunkCount = count;
return true;
}
#ifdef ENABLE_OOP_NATIVE_CODEGEN
bool InterpreterThunkEmitter::NewOOPJITThunkBlock()
{
PSCRIPTCONTEXT_HANDLE remoteScriptContext = this->scriptContext->GetRemoteScriptAddr();
if (!JITManager::GetJITManager()->IsConnected())
{
return false;
}
InterpreterThunkInputIDL thunkInput;
thunkInput.asmJsThunk = this->isAsmInterpreterThunk;
InterpreterThunkOutputIDL thunkOutput;
HRESULT hr = JITManager::GetJITManager()->NewInterpreterThunkBlock(remoteScriptContext, &thunkInput, &thunkOutput);
if (!JITManager::HandleServerCallResult(hr, RemoteCallType::ThunkCreation))
{
return false;
}
BYTE* buffer = (BYTE*)thunkOutput.mappedBaseAddr;
if (!CONFIG_FLAG(OOPCFGRegistration))
{
BYTE* callTarget = buffer;
#ifdef _M_ARM
// Need to register the thumb-tagged call target for CFG
callTarget = (BYTE*)((uintptr_t)callTarget | 0x1);
#endif
this->scriptContext->GetThreadContext()->SetValidCallTargetForCFG(callTarget);
}
// Update object state only at the end when everything has succeeded - and no exceptions can be thrown.
auto block = this->thunkBlocks.PrependNode(allocator, buffer, thunkOutput.thunkCount);
#if PDATA_ENABLED
void* pdataTable;
PDataManager::RegisterPdata((PRUNTIME_FUNCTION)thunkOutput.pdataTableStart, (ULONG_PTR)thunkOutput.mappedBaseAddr, (ULONG_PTR)thunkOutput.epilogEndAddr, &pdataTable);
block->SetPdata(pdataTable);
#else
Unused(block);
#endif
this->thunkBuffer = (BYTE*)thunkOutput.mappedBaseAddr;
this->thunkCount = thunkOutput.thunkCount;
return true;
}
#endif
/* static */
void InterpreterThunkEmitter::FillBuffer(
_In_ ThreadContextInfo * threadContext,
_In_ bool asmJsThunk,
_In_ intptr_t finalAddr,
_In_ size_t bufferSize,
_Out_writes_bytes_all_(BlockSize) BYTE* buffer,
#if PDATA_ENABLED
_Out_ PRUNTIME_FUNCTION * pdataTableStart,
_Out_ intptr_t * epilogEndAddr,
#endif
_Out_ DWORD * thunkCount
)
{
#ifdef _M_X64
PrologEncoder prologEncoder;
prologEncoder.EncodeSmallProlog(PrologSize, StackAllocSize);
DWORD pdataSize = prologEncoder.SizeOfPData();
#elif defined(_M_ARM32_OR_ARM64)
DWORD pdataSize = sizeof(RUNTIME_FUNCTION);
#else
DWORD pdataSize = 0;
#endif
DWORD bytesRemaining = BlockSize;
DWORD bytesWritten = 0;
DWORD thunks = 0;
DWORD epilogSize = sizeof(Epilog);
const BYTE *epilog = Epilog;
const BYTE *header = InterpreterThunk;
intptr_t interpreterThunk;
// the static interpreter thunk invoked by the dynamic emitted thunk
#ifdef ASMJS_PLAT
if (asmJsThunk)
{
interpreterThunk = ShiftAddr(threadContext, &Js::InterpreterStackFrame::InterpreterAsmThunk);
}
else
#endif
{
interpreterThunk = ShiftAddr(threadContext, &Js::InterpreterStackFrame::InterpreterThunk);
}
BYTE * currentBuffer = buffer;
// Ensure there is space for PDATA at the end
BYTE* pdataStart = currentBuffer + (BlockSize - Math::Align(pdataSize, EMIT_BUFFER_ALIGNMENT));
BYTE* epilogStart = pdataStart - Math::Align(epilogSize, EMIT_BUFFER_ALIGNMENT);
// Ensure there is space for PDATA at the end
intptr_t finalPdataStart = finalAddr + (BlockSize - Math::Align(pdataSize, EMIT_BUFFER_ALIGNMENT));
intptr_t finalEpilogStart = finalPdataStart - Math::Align(epilogSize, EMIT_BUFFER_ALIGNMENT);
// Copy the thunk buffer and modify it.
js_memcpy_s(currentBuffer, bytesRemaining, header, HeaderSize);
EncodeInterpreterThunk(currentBuffer, finalAddr, finalEpilogStart, epilogSize, interpreterThunk);
currentBuffer += HeaderSize;
bytesRemaining -= HeaderSize;
// Copy call buffer
DWORD callSize = sizeof(Call);
while (currentBuffer < epilogStart - callSize)
{
js_memcpy_s(currentBuffer, bytesRemaining, Call, callSize);
#if _M_ARM
int offset = (epilogStart - (currentBuffer + JmpOffset));
Assert(offset >= 0);
DWORD encodedOffset = EncoderMD::BranchOffset_T2_24(offset);
DWORD encodedBranch = /*opcode=*/ 0x9000F000 | encodedOffset;
Emit(currentBuffer, JmpOffset, encodedBranch);
#elif _M_ARM64
int64 offset = (epilogStart - (currentBuffer + JmpOffset));
Assert(offset >= 0);
DWORD encodedOffset = EncoderMD::BranchOffset_26(offset);
DWORD encodedBranch = /*opcode=*/ 0x14000000 | encodedOffset;
Emit(currentBuffer, JmpOffset, encodedBranch);
#else
// jump requires an offset from the end of the jump instruction.
int offset = (int)(epilogStart - (currentBuffer + JmpOffset + sizeof(int)));
Assert(offset >= 0);
Emit(currentBuffer, JmpOffset, offset);
#endif
currentBuffer += callSize;
bytesRemaining -= callSize;
thunks++;
}
// Fill any gap till start of epilog
bytesWritten = FillDebugBreak(currentBuffer, (DWORD)(epilogStart - currentBuffer));
bytesRemaining -= bytesWritten;
currentBuffer += bytesWritten;
// Copy epilog
bytesWritten = CopyWithAlignment(currentBuffer, bytesRemaining, epilog, epilogSize, EMIT_BUFFER_ALIGNMENT);
currentBuffer += bytesWritten;
bytesRemaining -= bytesWritten;
// Generate and register PDATA
#if PDATA_ENABLED
BYTE* epilogEnd = epilogStart + epilogSize;
DWORD functionSize = (DWORD)(epilogEnd - buffer);
Assert(pdataStart == currentBuffer);
#ifdef _M_X64
Assert(bytesRemaining >= pdataSize);
BYTE* pdata = prologEncoder.Finalize(buffer, functionSize, pdataStart);
bytesWritten = CopyWithAlignment(pdataStart, bytesRemaining, pdata, pdataSize, EMIT_BUFFER_ALIGNMENT);
#elif defined(_M_ARM32_OR_ARM64)
RUNTIME_FUNCTION pdata;
GeneratePdata(buffer, functionSize, &pdata);
bytesWritten = CopyWithAlignment(pdataStart, bytesRemaining, (const BYTE*)&pdata, pdataSize, EMIT_BUFFER_ALIGNMENT);
#endif
*pdataTableStart = (PRUNTIME_FUNCTION)finalPdataStart;
*epilogEndAddr = finalEpilogStart;
#endif
*thunkCount = thunks;
}
#if _M_ARM
void InterpreterThunkEmitter::EncodeInterpreterThunk(
__in_bcount(InterpreterThunkSize) BYTE* thunkBuffer,
__in const intptr_t thunkBufferStartAddress,
__in const intptr_t epilogStart,
__in const DWORD epilogSize,
__in const intptr_t interpreterThunk)
{
// Encode MOVW
DWORD lowerThunkBits = (uint32)interpreterThunk & 0x0000FFFF;
DWORD movW = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/1, lowerThunkBits);
Emit(thunkBuffer,ThunkAddressOffset, movW);
// Encode MOVT
DWORD higherThunkBits = ((uint32)interpreterThunk & 0xFFFF0000) >> 16;
DWORD movT = EncodeMove(/*Opcode*/ 0x0000F2C0, /*register*/1, higherThunkBits);
Emit(thunkBuffer, ThunkAddressOffset + sizeof(movW), movT);
// Encode LDR - Load of function Body
thunkBuffer[FunctionInfoOffset] = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
thunkBuffer[FunctionProxyOffset] = Js::FunctionInfo::GetOffsetOfFunctionProxy();
// Encode LDR - Load of interpreter thunk number
thunkBuffer[DynamicThunkAddressOffset] = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
// Encode MOVW R12, CallBlockStartAddress
uintptr_t callBlockStartAddress = (uintptr_t)thunkBufferStartAddress + HeaderSize;
uint totalThunkSize = (uint)(epilogStart - callBlockStartAddress);
DWORD lowerCallBlockStartAddress = callBlockStartAddress & 0x0000FFFF;
DWORD movWblockStart = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/12, lowerCallBlockStartAddress);
Emit(thunkBuffer,CallBlockStartAddressInstrOffset, movWblockStart);
// Encode MOVT R12, CallBlockStartAddress
DWORD higherCallBlockStartAddress = (callBlockStartAddress & 0xFFFF0000) >> 16;
DWORD movTblockStart = EncodeMove(/*Opcode*/ 0x0000F2C0, /*register*/12, higherCallBlockStartAddress);
Emit(thunkBuffer, CallBlockStartAddressInstrOffset + sizeof(movWblockStart), movTblockStart);
//Encode MOV R12, CallBlockSize
DWORD movBlockSize = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/12, (DWORD)totalThunkSize);
Emit(thunkBuffer, CallThunkSizeInstrOffset, movBlockSize);
Emit(thunkBuffer, ErrorOffset, (BYTE) FAST_FAIL_INVALID_ARG);
}
DWORD InterpreterThunkEmitter::EncodeMove(DWORD opCode, int reg, DWORD imm16)
{
DWORD encodedMove = reg << 24;
#if _M_ARM
DWORD encodedImm = 0;
EncoderMD::EncodeImmediate16(imm16, &encodedImm);
encodedMove |= encodedImm;
#elif _M_ARM64
// ToDo (SaAgarwa) - From Aaron change. Validate for ARM64
encodedMove |= (imm16 & 0xFFFF) << 5;
#endif
AssertMsg((encodedMove & opCode) == 0, "Any bits getting overwritten?");
encodedMove |= opCode;
return encodedMove;
}
void InterpreterThunkEmitter::GeneratePdata(_In_ const BYTE* entryPoint, _In_ const DWORD functionSize, _Out_ RUNTIME_FUNCTION* function)
{
function->BeginAddress = 0x1; // Since our base address is the start of the function - this is offset from the base address
function->Flag = 1; // Packed unwind data is used
function->FunctionLength = functionSize / 2;
function->Ret = 0; // Return via Pop
function->H = 1; // Homes parameters
function->Reg = 7; // No saved registers - R11 is the frame pointer - not considered here
function->R = 1; // No registers are being saved.
function->L = 1; // Save/restore LR register
function->C = 1; // Frame pointer chain in R11 established
function->StackAdjust = 0; // Stack allocation for the function
}
#elif _M_ARM64
void InterpreterThunkEmitter::EncodeInterpreterThunk(
__in_bcount(InterpreterThunkSize) BYTE* thunkBuffer,
__in const intptr_t thunkBufferStartAddress,
__in const intptr_t epilogStart,
__in const DWORD epilogSize,
__in const intptr_t interpreterThunk)
{
int addrOffset = ThunkAddressOffset;
// Following 4 MOV Instrs are to move the 64-bit address of the InterpreterThunk address into register x1.
// Encode MOVZ (movz x1, #<interpreterThunk 16-0 bits>)
DWORD lowerThunkBits = (uint64)interpreterThunk & 0x0000FFFF;
DWORD movZ = EncodeMove(/*Opcode*/ 0xD2800000, /*register x1*/1, lowerThunkBits); // no shift; hw = 00
Emit(thunkBuffer,addrOffset, movZ);
static_assert(sizeof(movZ) == 4, "movZ has to be 32-bit encoded");
addrOffset+= sizeof(movZ);
// Encode MOVK (movk x1, #<interpreterThunk 32-16 bits>, lsl #16)
DWORD higherThunkBits = ((uint64)interpreterThunk & 0xFFFF0000) >> 16;
DWORD movK = EncodeMove(/*Opcode*/ 0xF2A00000, /*register x1*/1, higherThunkBits); // left shift 16 bits; hw = 01
Emit(thunkBuffer, addrOffset, movK);
static_assert(sizeof(movK) == 4, "movK has to be 32-bit encoded");
addrOffset+= sizeof(movK);
// Encode MOVK (movk x1, #<interpreterThunk 48-32 bits>, lsl #16)
higherThunkBits = ((uint64)interpreterThunk & 0xFFFF00000000) >> 32;
movK = EncodeMove(/*Opcode*/ 0xF2C00000, /*register x1*/1, higherThunkBits); // left shift 32 bits; hw = 02
Emit(thunkBuffer, addrOffset, movK);
addrOffset += sizeof(movK);
// Encode MOVK (movk x1, #<interpreterThunk 64-48 bits>, lsl #16)
higherThunkBits = ((uint64)interpreterThunk & 0xFFFF000000000000) >> 48;
movK = EncodeMove(/*Opcode*/ 0xF2E00000, /*register x1*/1, higherThunkBits); // left shift 48 bits; hw = 03
Emit(thunkBuffer, addrOffset, movK);
// Encode LDR - Load of function Body
ULONG offsetOfFunctionInfo = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
AssertMsg(offsetOfFunctionInfo % 8 == 0, "Immediate offset for LDR must be 8 byte aligned");
AssertMsg(offsetOfFunctionInfo < 0x8000, "Immediate offset for LDR must be less than 0x8000");
*(PULONG)&thunkBuffer[FunctionInfoOffset] |= (offsetOfFunctionInfo / 8) << 10;
ULONG offsetOfFunctionProxy = Js::FunctionInfo::GetOffsetOfFunctionProxy();
AssertMsg(offsetOfFunctionProxy % 8 == 0, "Immediate offset for LDR must be 8 byte aligned");
AssertMsg(offsetOfFunctionProxy < 0x8000, "Immediate offset for LDR must be less than 0x8000");
*(PULONG)&thunkBuffer[FunctionProxyOffset] |= (offsetOfFunctionProxy / 8) << 10;
// Encode LDR - Load of interpreter thunk number
ULONG offsetOfDynamicInterpreterThunk = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
AssertMsg(offsetOfDynamicInterpreterThunk % 8 == 0, "Immediate offset for LDR must be 8 byte aligned");
AssertMsg(offsetOfDynamicInterpreterThunk < 0x8000, "Immediate offset for LDR must be less than 0x8000");
*(PULONG)&thunkBuffer[DynamicThunkAddressOffset] |= (offsetOfDynamicInterpreterThunk / 8) << 10;
}
DWORD InterpreterThunkEmitter::EncodeMove(DWORD opCode, int reg, DWORD imm16)
{
DWORD encodedMove = reg << 0;
#if _M_ARM
DWORD encodedImm = 0;
EncoderMD::EncodeImmediate16(imm16, &encodedImm);
encodedMove |= encodedImm;
#elif _M_ARM64
// ToDo (SaAgarwa) - From Aaron change. Validate for ARM64
encodedMove |= (imm16 & 0xFFFF) << 5;
#endif
AssertMsg((encodedMove & opCode) == 0, "Any bits getting overwritten?");
encodedMove |= opCode;
return encodedMove;
}
void InterpreterThunkEmitter::GeneratePdata(_In_ const BYTE* entryPoint, _In_ const DWORD functionSize, _Out_ RUNTIME_FUNCTION* function)
{
function->BeginAddress = 0x0; // Since our base address is the start of the function - this is offset from the base address
function->Flag = 1; // Packed unwind data is used
function->FunctionLength = functionSize / 4;
function->RegF = 0; // number of non-volatile FP registers (d8-d15) saved in the canonical stack location
function->RegI = 0; // number of non-volatile INT registers (r19-r28) saved in the canonical stack location
function->H = 1; // Homes parameters
// (indicating whether the function "homes" the integer parameter registers (r0-r7) by storing them at the very start of the function)
function->CR = 3; // chained function, a store/load pair instruction is used in prolog/epilog <r29,lr>
function->FrameSize = 5; // the number of bytes of stack that is allocated for this function divided by 16
}
#else
void InterpreterThunkEmitter::EncodeInterpreterThunk(
__in_bcount(InterpreterThunkSize) BYTE* thunkBuffer,
__in const intptr_t thunkBufferStartAddress,
__in const intptr_t epilogStart,
__in const DWORD epilogSize,
__in const intptr_t interpreterThunk)
{
Emit(thunkBuffer, ThunkAddressOffset, (uintptr_t)interpreterThunk);
thunkBuffer[DynamicThunkAddressOffset] = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
thunkBuffer[FunctionInfoOffset] = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
thunkBuffer[FunctionProxyOffset] = Js::FunctionInfo::GetOffsetOfFunctionProxy();
Emit(thunkBuffer, CallBlockStartAddrOffset, (uintptr_t) thunkBufferStartAddress + HeaderSize);
uint totalThunkSize = (uint)(epilogStart - (thunkBufferStartAddress + HeaderSize));
Emit(thunkBuffer, ThunkSizeOffset, totalThunkSize);
Emit(thunkBuffer, ErrorOffset, (BYTE) FAST_FAIL_INVALID_ARG);
}
#endif
/*static*/
DWORD InterpreterThunkEmitter::FillDebugBreak(_Out_writes_bytes_all_(count) BYTE* dest, _In_ DWORD count)
{
#if defined(_M_ARM)
Assert(count % 2 == 0);
#elif defined(_M_ARM64)
Assert(count % 4 == 0);
#endif
CustomHeap::FillDebugBreak(dest, count);
return count;
}
/*static*/
DWORD InterpreterThunkEmitter::CopyWithAlignment(
_Out_writes_bytes_all_(sizeInBytes) BYTE* dest,
_In_ const DWORD sizeInBytes,
_In_reads_bytes_(srcSize) const BYTE* src,
_In_ const DWORD srcSize,
_In_ const DWORD alignment)
{
js_memcpy_s(dest, sizeInBytes, src, srcSize);
dest += srcSize;
DWORD alignPad = Math::Align(srcSize, alignment) - srcSize;
Assert(alignPad <= (sizeInBytes - srcSize));
if(alignPad > 0 && alignPad <= (sizeInBytes - srcSize))
{
FillDebugBreak(dest, alignPad);
return srcSize + alignPad;
}
return srcSize;
}
#if DBG
bool
InterpreterThunkEmitter::IsInHeap(void* address)
{
#ifdef ENABLE_OOP_NATIVE_CODEGEN
if (JITManager::GetJITManager()->IsOOPJITEnabled())
{
PSCRIPTCONTEXT_HANDLE remoteScript = this->scriptContext->GetRemoteScriptAddr(false);
if (!remoteScript || !JITManager::GetJITManager()->IsConnected())
{
// this method is used in asserts to validate whether an entry point is valid
// in case JIT process crashed, let's just say true to keep asserts from firing
return true;
}
boolean result;
HRESULT hr = JITManager::GetJITManager()->IsInterpreterThunkAddr(remoteScript, (intptr_t)address, this->isAsmInterpreterThunk, &result);
if (!JITManager::HandleServerCallResult(hr, RemoteCallType::HeapQuery))
{
return true;
}
return result != FALSE;
}
else
#endif
{
return emitBufferManager.IsInHeap(address);
}
}
#endif
// We only decommit at close because there might still be some
// code running here.
// The destructor of emitBufferManager will cause the eventual release.
void InterpreterThunkEmitter::Close()
{
#if PDATA_ENABLED
auto unregisterPdata = ([&] (const ThunkBlock& block)
{
PDataManager::UnregisterPdata((PRUNTIME_FUNCTION) block.GetPdata());
});
thunkBlocks.Iterate(unregisterPdata);
freeListedThunkBlocks.Iterate(unregisterPdata);
#endif
this->thunkBlocks.Clear(allocator);
this->freeListedThunkBlocks.Clear(allocator);
#ifdef ENABLE_OOP_NATIVE_CODEGEN
if (JITManager::GetJITManager()->IsOOPJITEnabled())
{
PSCRIPTCONTEXT_HANDLE remoteScript = this->scriptContext->GetRemoteScriptAddr(false);
if (remoteScript && JITManager::GetJITManager()->IsConnected())
{
JITManager::GetJITManager()->DecommitInterpreterBufferManager(remoteScript, this->isAsmInterpreterThunk);
}
}
else
#endif
{
emitBufferManager.Decommit();
}
this->thunkBuffer = nullptr;
this->thunkCount = 0;
}
void InterpreterThunkEmitter::Release(BYTE* thunkAddress, bool addtoFreeList)
{
if(!addtoFreeList)
{
return;
}
auto predicate = ([=] (const ThunkBlock& block)
{
return block.Contains(thunkAddress);
});
ThunkBlock* block = freeListedThunkBlocks.Find(predicate);
if(!block)
{
block = thunkBlocks.MoveTo(&freeListedThunkBlocks, predicate);
}
// if EnsureFreeList fails in an OOM scenario - we just leak the thunks
if(block && block->EnsureFreeList(allocator))
{
block->Release(thunkAddress);
}
}
BYTE* InterpreterThunkEmitter::AllocateFromFreeList(PVOID* ppDynamicInterpreterThunk )
{
ThunkBlock& block = this->freeListedThunkBlocks.Head();
BYTE* thunk = block.AllocateFromFreeList();
#if _M_ARM
thunk = (BYTE*)((DWORD)thunk | 0x01);
#endif
if(block.IsFreeListEmpty())
{
this->freeListedThunkBlocks.MoveHeadTo(&this->thunkBlocks);
}
*ppDynamicInterpreterThunk = thunk;
BYTE* entryPoint = block.GetStart();
#if _M_ARM
entryPoint = (BYTE*)((DWORD)entryPoint | 0x01);
#endif
return entryPoint;
}
bool ThunkBlock::Contains(BYTE* address) const
{
bool contains = address >= start && address < (start + InterpreterThunkEmitter::BlockSize);
return contains;
}
void ThunkBlock::Release(BYTE* address)
{
Assert(Contains(address));
Assert(this->freeList);
BVIndex index = FromThunkAddress(address);
this->freeList->Set(index);
}
BYTE* ThunkBlock::AllocateFromFreeList()
{
Assert(this->freeList);
BVIndex index = this->freeList->GetNextBit(0);
BYTE* address = ToThunkAddress(index);
this->freeList->Clear(index);
return address;
}
BVIndex ThunkBlock::FromThunkAddress(BYTE* address)
{
uint index = ((uint)(address - start) - HeaderSize) / InterpreterThunkEmitter::ThunkSize;
Assert(index < this->thunkCount);
return index;
}
BYTE* ThunkBlock::ToThunkAddress(BVIndex index)
{
Assert(index < this->thunkCount);
BYTE* address = start + HeaderSize + InterpreterThunkEmitter::ThunkSize * index;
return address;
}
bool ThunkBlock::EnsureFreeList(ArenaAllocator* allocator)
{
if(!this->freeList)
{
this->freeList = BVFixed::NewNoThrow(this->thunkCount, allocator);
}
return this->freeList != nullptr;
}
bool ThunkBlock::IsFreeListEmpty() const
{
Assert(this->freeList);
return this->freeList->IsAllClear();
}
#endif // ENABLE_NATIVE_CODEGEN