blob: 73a239bc40f44cef0e95e2d4b941f52832bc338c [file] [log] [blame]
//-------------------------------------------------------------------------------------------------------
// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
//-------------------------------------------------------------------------------------------------------
#include "Backend.h"
static IR::Instr* removeInstr(IR::Instr* instr);
#ifdef ENABLE_WASM_SIMD
static IR::Instr* removeInstr(IR::Instr* instr)
{
IR::Instr* prevInstr;
prevInstr = instr->m_prev;
instr->Remove();
return prevInstr;
}
#define GET_SIMDOPCODE(irOpcode) m_simd128OpCodesMap[(uint32)(irOpcode - Js::OpCode::Simd128_Start)]
#define SET_SIMDOPCODE(irOpcode, mdOpcode) \
Assert((uint32)m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] == 0);\
Assert(Js::OpCode::mdOpcode > Js::OpCode::MDStart);\
m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] = Js::OpCode::mdOpcode;
IR::Instr* LowererMD::Simd128Instruction(IR::Instr *instr)
{
// Currently only handles type-specialized/asm.js opcodes
if (!instr->GetDst())
{
// SIMD ops always have DST in asmjs
Assert(!instr->m_func->GetJITFunctionBody()->IsAsmJsMode());
// unused result. Do nothing.
IR::Instr * pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
if (Simd128TryLowerMappedInstruction(instr))
{
return instr->m_prev;
}
return Simd128LowerUnMappedInstruction(instr);
}
bool LowererMD::Simd128TryLowerMappedInstruction(IR::Instr *instr)
{
bool legalize = true;
Js::OpCode opcode = GET_SIMDOPCODE(instr->m_opcode);
if ((uint32)opcode == 0)
return false;
Assert(instr->GetDst() && instr->GetDst()->IsRegOpnd() && instr->GetDst()->IsSimd128() || instr->GetDst()->GetType() == TyInt32);
Assert(instr->GetSrc1() && instr->GetSrc1()->IsRegOpnd() && instr->GetSrc1()->IsSimd128());
Assert(!instr->GetSrc2() || (((instr->GetSrc2()->IsRegOpnd() && instr->GetSrc2()->IsSimd128()) || (instr->GetSrc2()->IsIntConstOpnd() && instr->GetSrc2()->GetType() == TyInt8))));
switch (instr->m_opcode)
{
case Js::OpCode::Simd128_Abs_F4:
Assert(opcode == Js::OpCode::ANDPS);
instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskF4Addr(), instr->GetSrc1()->GetType(), m_func));
break;
case Js::OpCode::Simd128_Abs_D2:
Assert(opcode == Js::OpCode::ANDPD);
instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskD2Addr(), instr->GetSrc1()->GetType(), m_func));
break;
case Js::OpCode::Simd128_Neg_F4:
Assert(opcode == Js::OpCode::XORPS);
instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), instr->GetSrc1()->GetType(), m_func));
break;
case Js::OpCode::Simd128_Neg_D2:
Assert(opcode == Js::OpCode::XORPS);
instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskD2Addr(), instr->GetSrc1()->GetType(), m_func));
break;
case Js::OpCode::Simd128_Not_I4:
case Js::OpCode::Simd128_Not_I16:
case Js::OpCode::Simd128_Not_I8:
case Js::OpCode::Simd128_Not_U4:
case Js::OpCode::Simd128_Not_U8:
case Js::OpCode::Simd128_Not_U16:
case Js::OpCode::Simd128_Not_B4:
case Js::OpCode::Simd128_Not_B8:
case Js::OpCode::Simd128_Not_B16:
Assert(opcode == Js::OpCode::XORPS);
instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), instr->GetSrc1()->GetType(), m_func));
break;
case Js::OpCode::Simd128_Gt_F4:
case Js::OpCode::Simd128_Gt_D2:
case Js::OpCode::Simd128_GtEq_F4:
case Js::OpCode::Simd128_GtEq_D2:
case Js::OpCode::Simd128_Lt_I4:
case Js::OpCode::Simd128_Lt_I8:
case Js::OpCode::Simd128_Lt_I16:
{
Assert(opcode == Js::OpCode::CMPLTPS || opcode == Js::OpCode::CMPLTPD || opcode == Js::OpCode::CMPLEPS
|| opcode == Js::OpCode::CMPLEPD || opcode == Js::OpCode::PCMPGTD || opcode == Js::OpCode::PCMPGTB
|| opcode == Js::OpCode::PCMPGTW );
// swap operands
auto *src1 = instr->UnlinkSrc1();
auto *src2 = instr->UnlinkSrc2();
instr->SetSrc1(src2);
instr->SetSrc2(src1);
break;
}
}
instr->m_opcode = opcode;
if (legalize)
{
//MakeDstEquSrc1(instr);
Legalize(instr);
}
return true;
}
IR::MemRefOpnd *
LowererMD::LoadSimdHelperArgument(IR::Instr * instr, uint8 index)
{
//the most reliable way to pass a simd value on x86/x64 win/lnx across calls
//is to pass a pointer to a SIMD value in the simd temporary area.
//otherwise we have to use __m128 and msvc intrinsics which may or may not be the same across
//MSVC and Clang
IR::MemRefOpnd* srcMemRef = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(index), TySimd128F4, m_func);
IR::AddrOpnd* argAddress = IR::AddrOpnd::New(m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(index), IR::AddrOpndKindDynamicMisc, m_func, true /* doesn't come from a user */);
LoadHelperArgument(instr, argAddress);
return srcMemRef;
}
IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr)
{
switch (instr->m_opcode)
{
case Js::OpCode::Simd128_LdC:
return Simd128LoadConst(instr);
#ifdef ENABLE_SIMD
case Js::OpCode::Simd128_FloatsToF4:
case Js::OpCode::Simd128_IntsToI4:
case Js::OpCode::Simd128_IntsToU4:
case Js::OpCode::Simd128_IntsToB4:
return Simd128LowerConstructor_4(instr);
case Js::OpCode::Simd128_IntsToI8:
case Js::OpCode::Simd128_IntsToU8:
case Js::OpCode::Simd128_IntsToB8:
return Simd128LowerConstructor_8(instr);
case Js::OpCode::Simd128_IntsToI16:
case Js::OpCode::Simd128_IntsToU16:
case Js::OpCode::Simd128_IntsToB16:
return Simd128LowerConstructor_16(instr);
case Js::OpCode::Simd128_Rcp_F4:
//case Js::OpCode::Simd128_Rcp_D2:
return Simd128LowerRcp(instr);
//SQRT
case Js::OpCode::Simd128_RcpSqrt_F4:
//case Js::OpCode::Simd128_RcpSqrt_D2:
return Simd128LowerRcpSqrt(instr);
case Js::OpCode::Simd128_Select_F4:
case Js::OpCode::Simd128_Select_I4:
//case Js::OpCode::Simd128_Select_D2:
case Js::OpCode::Simd128_Select_I8:
case Js::OpCode::Simd128_Select_I16:
case Js::OpCode::Simd128_Select_U4:
case Js::OpCode::Simd128_Select_U8:
case Js::OpCode::Simd128_Select_U16:
return Simd128LowerSelect(instr);
#endif
#if 0
case Js::OpCode::Simd128_DoublesToD2:
return Simd128LowerConstructor_2(instr);
#endif // 0
case Js::OpCode::Simd128_ExtractLane_I2:
case Js::OpCode::Simd128_ExtractLane_I4:
case Js::OpCode::Simd128_ExtractLane_I8:
case Js::OpCode::Simd128_ExtractLane_I16:
case Js::OpCode::Simd128_ExtractLane_U4:
case Js::OpCode::Simd128_ExtractLane_U8:
case Js::OpCode::Simd128_ExtractLane_U16:
case Js::OpCode::Simd128_ExtractLane_B4:
case Js::OpCode::Simd128_ExtractLane_B8:
case Js::OpCode::Simd128_ExtractLane_B16:
case Js::OpCode::Simd128_ExtractLane_F4:
return Simd128LowerLdLane(instr);
case Js::OpCode::Simd128_ReplaceLane_I2:
case Js::OpCode::Simd128_ReplaceLane_D2:
return SIMD128LowerReplaceLane_2(instr);
case Js::OpCode::Simd128_ReplaceLane_I4:
case Js::OpCode::Simd128_ReplaceLane_F4:
case Js::OpCode::Simd128_ReplaceLane_U4:
case Js::OpCode::Simd128_ReplaceLane_B4:
return SIMD128LowerReplaceLane_4(instr);
case Js::OpCode::Simd128_ReplaceLane_I8:
case Js::OpCode::Simd128_ReplaceLane_U8:
case Js::OpCode::Simd128_ReplaceLane_B8:
return SIMD128LowerReplaceLane_8(instr);
case Js::OpCode::Simd128_ReplaceLane_I16:
case Js::OpCode::Simd128_ReplaceLane_U16:
case Js::OpCode::Simd128_ReplaceLane_B16:
return SIMD128LowerReplaceLane_16(instr);
case Js::OpCode::Simd128_Splat_F4:
case Js::OpCode::Simd128_Splat_I4:
case Js::OpCode::Simd128_Splat_I2:
case Js::OpCode::Simd128_Splat_D2:
case Js::OpCode::Simd128_Splat_I8:
case Js::OpCode::Simd128_Splat_I16:
case Js::OpCode::Simd128_Splat_U4:
case Js::OpCode::Simd128_Splat_U8:
case Js::OpCode::Simd128_Splat_U16:
case Js::OpCode::Simd128_Splat_B4:
case Js::OpCode::Simd128_Splat_B8:
case Js::OpCode::Simd128_Splat_B16:
return Simd128LowerSplat(instr);
case Js::OpCode::Simd128_Sqrt_F4:
//case Js::OpCode::Simd128_Sqrt_D2:
return Simd128LowerSqrt(instr);
case Js::OpCode::Simd128_Neg_I4:
case Js::OpCode::Simd128_Neg_I8:
case Js::OpCode::Simd128_Neg_I16:
case Js::OpCode::Simd128_Neg_U4:
case Js::OpCode::Simd128_Neg_U8:
case Js::OpCode::Simd128_Neg_U16:
return Simd128LowerNeg(instr);
case Js::OpCode::Simd128_Mul_I4:
case Js::OpCode::Simd128_Mul_U4:
return Simd128LowerMulI4(instr);
case Js::OpCode::Simd128_Mul_I16:
case Js::OpCode::Simd128_Mul_U16:
return Simd128LowerMulI16(instr);
case Js::OpCode::Simd128_ShRtByScalar_I4:
case Js::OpCode::Simd128_ShLtByScalar_I4:
case Js::OpCode::Simd128_ShRtByScalar_I8:
case Js::OpCode::Simd128_ShLtByScalar_I8:
case Js::OpCode::Simd128_ShLtByScalar_I16:
case Js::OpCode::Simd128_ShRtByScalar_I16:
case Js::OpCode::Simd128_ShRtByScalar_U4:
case Js::OpCode::Simd128_ShLtByScalar_U4:
case Js::OpCode::Simd128_ShRtByScalar_U8:
case Js::OpCode::Simd128_ShLtByScalar_U8:
case Js::OpCode::Simd128_ShRtByScalar_U16:
case Js::OpCode::Simd128_ShLtByScalar_U16:
case Js::OpCode::Simd128_ShLtByScalar_I2:
case Js::OpCode::Simd128_ShRtByScalar_U2:
case Js::OpCode::Simd128_ShRtByScalar_I2:
return Simd128LowerShift(instr);
case Js::OpCode::Simd128_LdArr_I4:
case Js::OpCode::Simd128_LdArr_I8:
case Js::OpCode::Simd128_LdArr_I16:
case Js::OpCode::Simd128_LdArr_U4:
case Js::OpCode::Simd128_LdArr_U8:
case Js::OpCode::Simd128_LdArr_U16:
case Js::OpCode::Simd128_LdArr_F4:
//case Js::OpCode::Simd128_LdArr_D2:
case Js::OpCode::Simd128_LdArrConst_I4:
case Js::OpCode::Simd128_LdArrConst_I8:
case Js::OpCode::Simd128_LdArrConst_I16:
case Js::OpCode::Simd128_LdArrConst_U4:
case Js::OpCode::Simd128_LdArrConst_U8:
case Js::OpCode::Simd128_LdArrConst_U16:
case Js::OpCode::Simd128_LdArrConst_F4:
//case Js::OpCode::Simd128_LdArrConst_D2:
if (m_func->GetJITFunctionBody()->IsAsmJsMode())
{
// with bound checks
return Simd128AsmJsLowerLoadElem(instr);
}
else
{
// non-AsmJs, boundChecks are extracted from instr
return Simd128LowerLoadElem(instr);
}
case Js::OpCode::Simd128_StArr_I4:
case Js::OpCode::Simd128_StArr_I8:
case Js::OpCode::Simd128_StArr_I16:
case Js::OpCode::Simd128_StArr_U4:
case Js::OpCode::Simd128_StArr_U8:
case Js::OpCode::Simd128_StArr_U16:
case Js::OpCode::Simd128_StArr_F4:
//case Js::OpCode::Simd128_StArr_D2:
case Js::OpCode::Simd128_StArrConst_I4:
case Js::OpCode::Simd128_StArrConst_I8:
case Js::OpCode::Simd128_StArrConst_I16:
case Js::OpCode::Simd128_StArrConst_U4:
case Js::OpCode::Simd128_StArrConst_U8:
case Js::OpCode::Simd128_StArrConst_U16:
case Js::OpCode::Simd128_StArrConst_F4:
//case Js::OpCode::Simd128_StArrConst_D2:
if (m_func->GetJITFunctionBody()->IsAsmJsMode())
{
return Simd128AsmJsLowerStoreElem(instr);
}
else
{
return Simd128LowerStoreElem(instr);
}
case Js::OpCode::Simd128_Swizzle_U4:
case Js::OpCode::Simd128_Swizzle_I4:
case Js::OpCode::Simd128_Swizzle_F4:
//case Js::OpCode::Simd128_Swizzle_D2:
return Simd128LowerSwizzle_4(instr);
case Js::OpCode::Simd128_Shuffle_U4:
case Js::OpCode::Simd128_Shuffle_I4:
case Js::OpCode::Simd128_Shuffle_F4:
//case Js::OpCode::Simd128_Shuffle_D2:
return Simd128LowerShuffle_4(instr);
case Js::OpCode::Simd128_Swizzle_I8:
case Js::OpCode::Simd128_Swizzle_I16:
case Js::OpCode::Simd128_Swizzle_U8:
case Js::OpCode::Simd128_Swizzle_U16:
case Js::OpCode::Simd128_Shuffle_I8:
case Js::OpCode::Simd128_Shuffle_I16:
case Js::OpCode::Simd128_Shuffle_U8:
case Js::OpCode::Simd128_Shuffle_U16:
return Simd128LowerShuffle(instr);
case Js::OpCode::Simd128_FromUint32x4_F4:
return Simd128LowerFloat32x4FromUint32x4(instr);
case Js::OpCode::Simd128_FromFloat32x4_I4:
return Simd128LowerInt32x4FromFloat32x4(instr);
case Js::OpCode::Simd128_FromFloat32x4_U4:
return Simd128LowerUint32x4FromFloat32x4(instr);
case Js::OpCode::Simd128_FromInt64x2_D2:
return EmitSimdConversion(instr, IR::HelperSimd128ConvertSD2);
case Js::OpCode::Simd128_FromUint64x2_D2:
return EmitSimdConversion(instr, IR::HelperSimd128ConvertUD2);
case Js::OpCode::Simd128_FromFloat64x2_I2:
return EmitSimdConversion(instr, IR::HelperSimd128TruncateI2);
case Js::OpCode::Simd128_FromFloat64x2_U2:
return EmitSimdConversion(instr, IR::HelperSimd128TruncateU2);
case Js::OpCode::Simd128_Neq_I4:
case Js::OpCode::Simd128_Neq_I8:
case Js::OpCode::Simd128_Neq_I16:
case Js::OpCode::Simd128_Neq_U4:
case Js::OpCode::Simd128_Neq_U8:
case Js::OpCode::Simd128_Neq_U16:
return Simd128LowerNotEqual(instr);
case Js::OpCode::Simd128_Lt_U4:
case Js::OpCode::Simd128_Lt_U8:
case Js::OpCode::Simd128_Lt_U16:
case Js::OpCode::Simd128_GtEq_U4:
case Js::OpCode::Simd128_GtEq_U8:
case Js::OpCode::Simd128_GtEq_U16:
return Simd128LowerLessThan(instr);
case Js::OpCode::Simd128_LtEq_I4:
case Js::OpCode::Simd128_LtEq_I8:
case Js::OpCode::Simd128_LtEq_I16:
case Js::OpCode::Simd128_LtEq_U4:
case Js::OpCode::Simd128_LtEq_U8:
case Js::OpCode::Simd128_LtEq_U16:
case Js::OpCode::Simd128_Gt_U4:
case Js::OpCode::Simd128_Gt_U8:
case Js::OpCode::Simd128_Gt_U16:
return Simd128LowerLessThanOrEqual(instr);
case Js::OpCode::Simd128_GtEq_I4:
case Js::OpCode::Simd128_GtEq_I8:
case Js::OpCode::Simd128_GtEq_I16:
return Simd128LowerGreaterThanOrEqual(instr);
case Js::OpCode::Simd128_Min_F4:
case Js::OpCode::Simd128_Max_F4:
return Simd128LowerMinMax_F4(instr);
case Js::OpCode::Simd128_AnyTrue_B2:
case Js::OpCode::Simd128_AnyTrue_B4:
case Js::OpCode::Simd128_AnyTrue_B8:
case Js::OpCode::Simd128_AnyTrue_B16:
return Simd128LowerAnyTrue(instr);
case Js::OpCode::Simd128_AllTrue_B2:
case Js::OpCode::Simd128_AllTrue_B4:
case Js::OpCode::Simd128_AllTrue_B8:
case Js::OpCode::Simd128_AllTrue_B16:
return Simd128LowerAllTrue(instr);
case Js::OpCode::Simd128_BitSelect_I4:
return LowerSimd128BitSelect(instr);
default:
AssertMsg(UNREACHED, "Unsupported Simd128 instruction");
}
return nullptr;
}
IR::Instr* LowererMD::LowerSimd128BitSelect(IR::Instr* instr)
{
SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
IR::Opnd *dst = args->Pop();
IR::Opnd *src1 = args->Pop();
IR::Opnd *src2 = args->Pop();
IR::Opnd *mask = args->Pop();
IR::Instr* pInstr = IR::Instr::New(Js::OpCode::PXOR, dst, src1, src2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
instr->InsertBefore(IR::Instr::New(Js::OpCode::PAND, dst, dst, mask, m_func));
instr->InsertBefore(IR::Instr::New(Js::OpCode::PXOR, dst, dst, src2, m_func));
return removeInstr(instr);
}
IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr)
{
Assert(instr->GetDst() && instr->m_opcode == Js::OpCode::Simd128_LdC);
Assert(instr->GetDst()->IsSimd128());
Assert(instr->GetSrc1()->IsSimd128());
Assert(instr->GetSrc1()->IsSimd128ConstOpnd());
Assert(instr->GetSrc2() == nullptr);
AsmJsSIMDValue value = instr->GetSrc1()->AsSimd128ConstOpnd()->m_value;
// MOVUPS dst, [const]
void *pValue = NativeCodeDataNewNoFixup(this->m_func->GetNativeCodeDataAllocator(), SIMDType<DataDesc_LowererMD_Simd128LoadConst>, value);
IR::Opnd * simdRef;
if (!m_func->IsOOPJIT())
{
simdRef = IR::MemRefOpnd::New((void *)pValue, instr->GetDst()->GetType(), instr->m_func);
}
else
{
int offset = NativeCodeData::GetDataTotalOffset(pValue);
simdRef = IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), offset, instr->GetDst()->GetType(),
#if DBG
NativeCodeData::GetDataDescription(pValue, m_func->m_alloc),
#endif
m_func, true);
GetLowerer()->addToLiveOnBackEdgeSyms->Set(m_func->GetTopFunc()->GetNativeCodeDataSym()->m_id);
}
instr->ReplaceSrc1(simdRef);
instr->m_opcode = LowererMDArch::GetAssignOp(instr->GetDst()->GetType());
Legalize(instr);
return instr->m_prev;
}
IR::Instr* LowererMD::Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode &cmpOpcode, IR::Opnd& dstOpnd)
{
Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToB4 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16 ||
instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16 ||
instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B2 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16 ||
instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16
);
IR::Instr *pInstr;
//dst = cmpOpcode dst, X86_ALL_ZEROS
pInstr = IR::Instr::New(cmpOpcode, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllZerosAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// dst = PANDN dst, X86_ALL_NEG_ONES
pInstr = IR::Instr::New(Js::OpCode::PANDN, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
return instr;
}
IR::Instr* LowererMD::EmitSimdConversion(IR::Instr *instr, IR::JnHelperMethod helper)
{
IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0);
IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1);
m_lowerer->InsertMove(srcMemRef, instr->UnlinkSrc1(), instr);
IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func);
instr->InsertBefore(helperCall);
this->ChangeToHelperCall(helperCall, helper);
m_lowerer->InsertMove(instr->UnlinkDst(), dstMemRef, instr);
return removeInstr(instr);
}
void LowererMD::EmitShiftByScalarI2(IR::Instr *instr, IR::JnHelperMethod helper)
{
IR::Opnd* src2 = instr->GetSrc2();
IR::Opnd* dst = instr->GetDst();
LoadHelperArgument(instr, src2);
IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0);
m_lowerer->InsertMove(srcMemRef, instr->GetSrc1(), instr);
IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1);
IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func);
instr->InsertBefore(helperCall);
this->ChangeToHelperCall(helperCall, helper);
m_lowerer->InsertMove(dst, dstMemRef, instr);
}
IR::Instr * LowererMD::SIMD128LowerReplaceLane_2(IR::Instr *instr)
{
SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
IR::Opnd *dst = args->Pop();
IR::Opnd *src1 = args->Pop();
IR::Opnd *src2 = args->Pop();
IR::Opnd *src3 = args->Pop();
int lane = src2->AsIntConstOpnd()->AsInt32();
Assert(dst->IsSimd128() && src1->IsSimd128());
if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_D2)
{
AssertMsg(AutoSystemInfo::Data.SSE2Available(), "SSE2 not supported");
Assert(src3->IsFloat64());
m_lowerer->InsertMove(dst, src1, instr);
if (lane)
{
instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPD, dst, src3, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
}
else
{
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src3, m_func));
}
return removeInstr(instr);
}
Assert(src3->IsInt64());
if (AutoSystemInfo::Data.SSE4_1Available())
{
m_lowerer->InsertMove(dst, src1, instr);
instr->SetDst(dst);
EmitInsertInt64(src3, lane, instr);
}
else
{
LoadHelperArgument(instr, src2);
LoadInt64HelperArgument(instr, src3);
IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0);
m_lowerer->InsertMove(srcMemRef, src1, instr);
IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1);
IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func);
instr->InsertBefore(helperCall);
this->ChangeToHelperCall(helperCall, IR::HelperSimd128ReplaceLaneI2);
m_lowerer->InsertMove(dst, dstMemRef, instr);
}
return removeInstr(instr);
}
void LowererMD::EmitInsertInt64(IR::Opnd* src, uint index, IR::Instr *instr)
{
IR::Opnd* dst = instr->GetDst();
Assert(dst->IsSimd128() && src->IsInt64());
if (AutoSystemInfo::Data.SSE4_1Available())
{
#ifdef _M_IX86
index *= 2;
Int64RegPair srcPair = m_func->FindOrCreateInt64Pair(src);
instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRD, dst, srcPair.low, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRD, dst, srcPair.high, IR::IntConstOpnd::New(index + 1, TyInt8, m_func, true), m_func));
#else
instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRQ, dst, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
#endif
}
else
{
intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
#ifdef _M_IX86
Int64RegPair src1Pair = m_func->FindOrCreateInt64Pair(src);
IR::Opnd* lower = IR::MemRefOpnd::New(tempSIMD, TyMachPtr, m_func);
m_lowerer->InsertMove(lower, src1Pair.low, instr);
IR::Opnd* higher = IR::MemRefOpnd::New(tempSIMD + 4, TyMachPtr, m_func);
m_lowerer->InsertMove(higher, src1Pair.high, instr);
#else
IR::Opnd* mem = IR::MemRefOpnd::New(tempSIMD, TyMachPtr, m_func);
m_lowerer->InsertMove(mem, src, instr);
#endif
IR::MemRefOpnd* tmp = IR::MemRefOpnd::New(tempSIMD, TyFloat64, m_func);
Js::OpCode opcode = (index) ? Js::OpCode::MOVHPD : Js::OpCode::MOVLPD;
IR::Instr* newInstr = IR::Instr::New(opcode, dst, tmp, m_func);
instr->InsertBefore(newInstr);
newInstr->HoistMemRefAddress(tmp, Js::OpCode::MOV);
Legalize(newInstr);
}
}
void LowererMD::EmitExtractInt64(IR::Opnd* dst, IR::Opnd* src, uint index, IR::Instr *instr)
{
Assert(index == 0 || index == 1);
Assert(dst->IsInt64() && src->IsSimd128());
if (AutoSystemInfo::Data.SSE4_1Available())
{
#ifdef _M_IX86
index *= 2;
Int64RegPair dstPair = m_func->FindOrCreateInt64Pair(dst);
instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRD, dstPair.low, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRD, dstPair.high, src, IR::IntConstOpnd::New(index + 1, TyInt8, m_func, true), m_func));
#else
instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRQ, dst, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
#endif
}
else
{
IR::Opnd* tmp = src;
if (index)
{
tmp = IR::RegOpnd::New(TySimd128F4, m_func);
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, tmp, src, IR::IntConstOpnd::New(2 | 3 << 2, TyInt8, m_func, true), m_func));
}
//kludg-ish; we need a new instruction for LowerReinterpretPrimitive to transform
//and dummy one for a caller to remove
IR::Instr* tmpInstr = IR::Instr::New(Js::OpCode::Simd128_ExtractLane_I2, dst, tmp->UseWithNewType(TyFloat64, m_func), m_func);
instr->InsertBefore(tmpInstr);
m_lowerer->LowerReinterpretPrimitive(tmpInstr);
}
}
IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
{
IR::Opnd* dst, *src1, *src2;
Js::OpCode movOpcode = Js::OpCode::MOVSS;
uint laneWidth = 0, laneIndex = 0, shamt = 0, mask = 0;
IRType laneType = TyInt32;
dst = instr->GetDst();
src1 = instr->GetSrc1();
src2 = instr->GetSrc2();
Assert(dst && dst->IsRegOpnd() && (dst->GetType() == TyFloat32 || dst->GetType() == TyInt32 || dst->GetType() == TyUint32 || dst->GetType() == TyFloat64 || dst->IsInt64()));
Assert(src1 && src1->IsRegOpnd() && src1->IsSimd128());
Assert(src2 && src2->IsIntConstOpnd());
laneIndex = (uint)src2->AsIntConstOpnd()->AsUint32();
laneWidth = 4;
switch (instr->m_opcode)
{
case Js::OpCode::Simd128_ExtractLane_I2:
laneWidth = 8;
break;
case Js::OpCode::Simd128_ExtractLane_F4:
movOpcode = Js::OpCode::MOVSS;
Assert(laneIndex < 4);
break;
case Js::OpCode::Simd128_ExtractLane_I8:
case Js::OpCode::Simd128_ExtractLane_U8:
case Js::OpCode::Simd128_ExtractLane_B8:
movOpcode = Js::OpCode::MOVD;
Assert(laneIndex < 8);
shamt = (laneIndex % 2) * 16;
laneIndex = laneIndex / 2;
laneType = TyInt16;
mask = 0x0000ffff;
break;
case Js::OpCode::Simd128_ExtractLane_I16:
case Js::OpCode::Simd128_ExtractLane_U16:
case Js::OpCode::Simd128_ExtractLane_B16:
movOpcode = Js::OpCode::MOVD;
Assert(laneIndex < 16);
shamt = (laneIndex % 4) * 8;
laneIndex = laneIndex / 4;
laneType = TyInt8;
mask = 0x000000ff;
break;
case Js::OpCode::Simd128_ExtractLane_U4:
case Js::OpCode::Simd128_ExtractLane_I4:
case Js::OpCode::Simd128_ExtractLane_B4:
movOpcode = Js::OpCode::MOVD;
Assert(laneIndex < 4);
break;
default:
Assert(UNREACHED);
}
if (laneWidth == 8) //Simd128_ExtractLane_I2
{
EmitExtractInt64(dst, instr->GetSrc1(), laneIndex, instr);
}
else
{
IR::Opnd* tmp = src1;
if (laneIndex != 0)
{
// tmp = PSRLDQ src1, shamt
tmp = IR::RegOpnd::New(src1->GetType(), m_func);
IR::Instr *shiftInstr = IR::Instr::New(Js::OpCode::PSRLDQ, tmp, src1, IR::IntConstOpnd::New(laneWidth * laneIndex, TyInt8, m_func, true), m_func);
instr->InsertBefore(shiftInstr);
Legalize(shiftInstr);
}
// MOVSS/MOVSD/MOVD dst, tmp
instr->InsertBefore(IR::Instr::New(movOpcode, movOpcode == Js::OpCode::MOVD ? dst : dst->UseWithNewType(tmp->GetType(), m_func), tmp, m_func));
}
// dst has the 4-byte lane
if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 ||
instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U16 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16)
{
// extract the 1/2 bytes sublane
IR::Instr *newInstr = nullptr;
if (shamt != 0)
{
// SHR dst, dst, shamt
newInstr = IR::Instr::New(Js::OpCode::SHR, dst, dst, IR::IntConstOpnd::New((IntConstType)shamt, TyInt8, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
}
Assert(laneType == TyInt8 || laneType == TyInt16);
// zero or sign-extend upper bits
if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16)
{
if (laneType == TyInt8)
{
IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func);
}
else
{
newInstr = IR::Instr::New(Js::OpCode::MOVSXW, dst, dst->UseWithNewType(laneType, m_func), m_func);
}
}
else
{
newInstr = IR::Instr::New(Js::OpCode::AND, dst, dst, IR::IntConstOpnd::New(mask, TyInt32, m_func), m_func);
}
instr->InsertBefore(newInstr);
Legalize(newInstr);
}
if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 ||
instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16)
{
IR::Instr* pInstr = nullptr;
IR::RegOpnd* tmp = IR::RegOpnd::New(TyInt8, m_func);
// cmp dst, 0
pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
pInstr->SetSrc1(dst->UseWithNewType(laneType, m_func));
pInstr->SetSrc2(IR::IntConstOpnd::New(0, laneType, m_func, true));
instr->InsertBefore(pInstr);
Legalize(pInstr);
// mov tmp(TyInt8), dst
pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// setne tmp(TyInt8)
pInstr = IR::Instr::New(Js::OpCode::SETNE, tmp, tmp, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// movsx dst, tmp(TyInt8)
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
}
IR::Instr* prevInstr = instr->m_prev;
instr->Remove();
return prevInstr;
}
IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr)
{
Js::OpCode shufOpCode = Js::OpCode::SHUFPS, movOpCode = Js::OpCode::MOVSS;
IR::Opnd *dst, *src1;
IR::Instr *pInstr = nullptr;
dst = instr->GetDst();
src1 = instr->GetSrc1();
Assert(dst && dst->IsRegOpnd() && dst->IsSimd128());
Assert(src1 && src1->IsRegOpnd() && (src1->GetType() == TyFloat32 || src1->GetType() == TyInt32 || src1->GetType() == TyFloat64 ||
src1->GetType() == TyInt16 || src1->GetType() == TyInt8 || src1->GetType() == TyUint16 ||
src1->GetType() == TyUint8 || src1->GetType() == TyUint32 || src1->IsInt64()));
Assert(!instr->GetSrc2());
IR::Opnd* tempTruncate = nullptr;
bool bSkip = false;
IR::LabelInstr *labelZero = IR::LabelInstr::New(Js::OpCode::Label, m_func);
IR::LabelInstr *labelDone = IR::LabelInstr::New(Js::OpCode::Label, m_func);
switch (instr->m_opcode)
{
case Js::OpCode::Simd128_Splat_F4:
shufOpCode = Js::OpCode::SHUFPS;
movOpCode = Js::OpCode::MOVSS;
break;
case Js::OpCode::Simd128_Splat_I4:
case Js::OpCode::Simd128_Splat_U4:
shufOpCode = Js::OpCode::PSHUFD;
movOpCode = Js::OpCode::MOVD;
break;
case Js::OpCode::Simd128_Splat_D2:
shufOpCode = Js::OpCode::SHUFPD;
movOpCode = Js::OpCode::MOVSD;
break;
case Js::OpCode::Simd128_Splat_I2:
{
EmitInsertInt64(src1, 0, instr);
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(68, TyInt8, m_func, true), m_func));
bSkip = true;
break;
}
case Js::OpCode::Simd128_Splat_I8:
case Js::OpCode::Simd128_Splat_U8:
// MOV tempTruncate(bx), src1: truncate the value to 16bit int
// MOVD dst, tempTruncate(bx)
// PUNPCKLWD dst, dst
// PSHUFD dst, dst, 0
tempTruncate = EnregisterIntConst(instr, src1, TyInt16);
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func));
instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func));
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
bSkip = true;
break;
case Js::OpCode::Simd128_Splat_I16:
case Js::OpCode::Simd128_Splat_U16:
// MOV tempTruncate(bx), src1: truncate the value to 8bit int
// MOVD dst, tempTruncate(bx)
// PUNPCKLBW dst, dst
// PUNPCKLWD dst, dst
// PSHUFD dst, dst, 0
tempTruncate = EnregisterIntConst(instr, src1, TyInt8);
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func));
instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLBW, dst, dst, dst, m_func));
instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func));
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
bSkip = true;
break;
case Js::OpCode::Simd128_Splat_B4:
case Js::OpCode::Simd128_Splat_B8:
case Js::OpCode::Simd128_Splat_B16:
// CMP src1, 0
// JEQ $labelZero
// MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES]
// JMP $labelDone
// $labelZero:
// XORPS dst, dst
// $labelDone:
//pInstr = IR::Instr::New(Js::OpCode::CMP, src1, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func);
//instr->InsertBefore(pInstr);
//Legalize(pInstr);
// cmp src1, 0000h
pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
pInstr->SetSrc1(src1);
pInstr->SetSrc2(IR::IntConstOpnd::New(0x0000, TyInt32, m_func, true));
instr->InsertBefore(pInstr);
Legalize(pInstr);
//JEQ $labelZero
instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, labelZero, m_func));
// MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES]
pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// JMP $labelDone
instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, m_func));
// $labelZero:
instr->InsertBefore(labelZero);
// XORPS dst, dst
instr->InsertBefore(IR::Instr::New(Js::OpCode::XORPS, dst, dst, dst, m_func)); // make dst to be 0
// $labelDone:
instr->InsertBefore(labelDone);
bSkip = true;
break;
default:
Assert(UNREACHED);
}
if (instr->m_opcode == Js::OpCode::Simd128_Splat_F4 && instr->GetSrc1()->IsFloat64())
{
IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
// CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
instr->InsertBefore(IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func));
src1 = regOpnd32;
}
if (!bSkip)
{
instr->InsertBefore(IR::Instr::New(movOpCode, dst, src1, m_func));
instr->InsertBefore(IR::Instr::New(shufOpCode, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
}
IR::Instr* prevInstr = instr->m_prev;
instr->Remove();
return prevInstr;
}
IR::Instr* LowererMD::Simd128LowerSqrt(IR::Instr *instr)
{
Js::OpCode opcode = Js::OpCode::SQRTPS;
IR::Opnd *dst, *src1;
dst = instr->GetDst();
src1 = instr->GetSrc1();
Assert(dst && dst->IsRegOpnd());
Assert(src1 && src1->IsRegOpnd());
Assert(instr->GetSrc2() == nullptr);
opcode = Js::OpCode::SQRTPS;
#if 0
{
Assert(instr->m_opcode == Js::OpCode::Simd128_Sqrt_D2);
opcode = Js::OpCode::SQRTPD;
}
#endif // 0
instr->InsertBefore(IR::Instr::New(opcode, dst, src1, m_func));
IR::Instr* prevInstr = instr->m_prev;
instr->Remove();
return prevInstr;
}
IR::Instr* LowererMD::Simd128LowerNeg(IR::Instr *instr)
{
IR::Opnd* dst = instr->GetDst();
IR::Opnd* src1 = instr->GetSrc1();
Js::OpCode addOpcode = Js::OpCode::PADDD;
ThreadContextInfo* threadContextInfo = m_func->GetThreadContextInfo();
intptr_t allOnes = threadContextInfo->GetX86AllOnesI4Addr();
Assert(dst->IsRegOpnd() && dst->IsSimd128());
Assert(src1->IsRegOpnd() && src1->IsSimd128());
Assert(instr->GetSrc2() == nullptr);
switch (instr->m_opcode)
{
case Js::OpCode::Simd128_Neg_I4:
case Js::OpCode::Simd128_Neg_U4:
break;
case Js::OpCode::Simd128_Neg_I8:
case Js::OpCode::Simd128_Neg_U8:
addOpcode = Js::OpCode::PADDW;
allOnes = threadContextInfo->GetX86AllOnesI8Addr();
break;
case Js::OpCode::Simd128_Neg_I16:
case Js::OpCode::Simd128_Neg_U16:
addOpcode = Js::OpCode::PADDB;
allOnes = threadContextInfo->GetX86AllOnesI16Addr();
break;
default:
Assert(UNREACHED);
}
// MOVAPS dst, src1
IR::Instr *pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
instr->InsertBefore(pInstr);
// PANDN dst, dst, 0xfff...f
pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(threadContextInfo->GetX86AllNegOnesAddr(), src1->GetType(), m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// addOpCode dst, dst, {allOnes}
pInstr = IR::Instr::New(addOpcode, dst, dst, IR::MemRefOpnd::New(allOnes, src1->GetType(), m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerMulI4(IR::Instr *instr)
{
Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I4 || instr->m_opcode == Js::OpCode::Simd128_Mul_U4);
IR::Instr *pInstr;
IR::Opnd* dst = instr->GetDst();
IR::Opnd* src1 = instr->GetSrc1();
IR::Opnd* src2 = instr->GetSrc2();
IR::Opnd* temp1, *temp2, *temp3;
Assert(dst->IsRegOpnd() && dst->IsSimd128());
Assert(src1->IsRegOpnd() && src1->IsSimd128());
Assert(src2->IsRegOpnd() && src2->IsSimd128());
temp1 = IR::RegOpnd::New(src1->GetType(), m_func);
temp2 = IR::RegOpnd::New(src1->GetType(), m_func);
temp3 = IR::RegOpnd::New(src1->GetType(), m_func);
// temp1 = PMULUDQ src1, src2
pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp1, src1, src2, m_func);
instr->InsertBefore(pInstr);
//MakeDstEquSrc1(pInstr);
Legalize(pInstr);
// temp2 = PSLRD src1, 0x4
pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp2, src1, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
instr->InsertBefore(pInstr);
//MakeDstEquSrc1(pInstr);
Legalize(pInstr);
// temp3 = PSLRD src2, 0x4
pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp3, src2, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
instr->InsertBefore(pInstr);
//MakeDstEquSrc1(pInstr);
Legalize(pInstr);
// temp2 = PMULUDQ temp2, temp3
pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp2, temp2, temp3, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//PSHUFD temp1, temp1, 0x8
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp1, temp1, IR::IntConstOpnd::New( 8 /*b00001000*/, TyInt8, m_func, true), m_func));
//PSHUFD temp2, temp2, 0x8
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp2, temp2, IR::IntConstOpnd::New(8 /*b00001000*/, TyInt8, m_func, true), m_func));
// PUNPCKLDQ dst, temp1, temp2
pInstr = IR::Instr::New(Js::OpCode::PUNPCKLDQ, dst, temp1, temp2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerMulI16(IR::Instr *instr)
{
Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I16 || instr->m_opcode == Js::OpCode::Simd128_Mul_U16);
IR::Instr *pInstr = nullptr;
IR::Opnd* dst = instr->GetDst();
IR::Opnd* src1 = instr->GetSrc1();
IR::Opnd* src2 = instr->GetSrc2();
IR::Opnd* temp1, *temp2, *temp3;
IRType simdType, laneType;
if (instr->m_opcode == Js::OpCode::Simd128_Mul_I16)
{
simdType = TySimd128I16;
laneType = TyInt8;
}
else
{
simdType = TySimd128U16;
laneType = TyUint8;
}
Assert(dst->IsRegOpnd() && dst->GetType() == simdType);
Assert(src1->IsRegOpnd() && src1->GetType() == simdType);
Assert(src2->IsRegOpnd() && src2->GetType() == simdType);
temp1 = IR::RegOpnd::New(simdType, m_func);
temp2 = IR::RegOpnd::New(simdType, m_func);
temp3 = IR::RegOpnd::New(simdType, m_func);
// MOVAPS temp1, src1
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp1, src1, m_func));
//PMULLW temp1, src2
pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp1, temp1, src2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//PAND temp1 {0x00ff00ff00ff00ff00ff00ff00ff00ff} :To zero out bytes 1,3,5...
pInstr = IR::Instr::New(Js::OpCode::PAND, temp1, temp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), simdType, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//PSRLW src1, 8
pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp2, src2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//PSRLW src2, 8 :upper 8 bits of each word
pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp3, src1, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//PMULLW src1, src2
pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp2, temp2, temp3, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//PSLLW src1, 8 :sets the results bytes 1,3,5..
pInstr = IR::Instr::New(Js::OpCode::PSLLW, temp2, temp2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//POR temp1, src1 :OR bytes 0,2,4.. to final result
pInstr = IR::Instr::New(Js::OpCode::POR, dst, temp1, temp2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerShift(IR::Instr *instr)
{
IR::Opnd* dst = instr->GetDst();
IR::Opnd* src1 = instr->GetSrc1();
IR::Opnd* src2 = instr->GetSrc2();
Assert(dst->IsRegOpnd() && dst->IsSimd128());
Assert(src1->IsRegOpnd() && src1->IsSimd128());
Assert(src2->IsInt32());
Js::OpCode opcode = Js::OpCode::PSLLD;
int elementSizeInBytes = 0;
switch (instr->m_opcode)
{
case Js::OpCode::Simd128_ShRtByScalar_I2:
EmitShiftByScalarI2(instr, IR::HelperSimd128ShRtByScalarI2);
return removeInstr(instr);
case Js::OpCode::Simd128_ShLtByScalar_I2:
opcode = Js::OpCode::PSLLQ;
elementSizeInBytes = 8;
break;
case Js::OpCode::Simd128_ShRtByScalar_U2:
opcode = Js::OpCode::PSRLQ;
elementSizeInBytes = 8;
break;
case Js::OpCode::Simd128_ShLtByScalar_I4:
case Js::OpCode::Simd128_ShLtByScalar_U4: // same as int32x4.ShiftLeftScalar
opcode = Js::OpCode::PSLLD;
elementSizeInBytes = 4;
break;
case Js::OpCode::Simd128_ShRtByScalar_I4:
opcode = Js::OpCode::PSRAD;
elementSizeInBytes = 4;
break;
case Js::OpCode::Simd128_ShLtByScalar_I8:
case Js::OpCode::Simd128_ShLtByScalar_U8: // same as int16x8.ShiftLeftScalar
opcode = Js::OpCode::PSLLW;
elementSizeInBytes = 2;
break;
case Js::OpCode::Simd128_ShRtByScalar_I8:
opcode = Js::OpCode::PSRAW;
elementSizeInBytes = 2;
break;
case Js::OpCode::Simd128_ShRtByScalar_U4:
opcode = Js::OpCode::PSRLD;
elementSizeInBytes = 4;
break;
case Js::OpCode::Simd128_ShRtByScalar_U8:
opcode = Js::OpCode::PSRLW;
elementSizeInBytes = 2;
break;
case Js::OpCode::Simd128_ShLtByScalar_I16: // composite, int8x16.ShiftLeftScalar
case Js::OpCode::Simd128_ShRtByScalar_I16: // composite, int8x16.ShiftRightScalar
case Js::OpCode::Simd128_ShLtByScalar_U16: // same as int8x16.ShiftLeftScalar
case Js::OpCode::Simd128_ShRtByScalar_U16: // composite, uint8x16.ShiftRightScalar
elementSizeInBytes = 1;
break;
default:
Assert(UNREACHED);
}
IR::Instr *pInstr = nullptr;
IR::RegOpnd *reg = IR::RegOpnd::New(TyInt32, m_func);
IR::RegOpnd *reg2 = IR::RegOpnd::New(TyInt32, m_func);
IR::RegOpnd *tmp0 = IR::RegOpnd::New(src1->GetType(), m_func);
IR::RegOpnd *tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
IR::RegOpnd *tmp2 = IR::RegOpnd::New(src1->GetType(), m_func);
//Shift amount: The shift amount is masked by [ElementSize] * 8
//The masked Shift amount is moved to xmm register
//AND shamt, shmask, shamt
//MOVD tmp0, shamt
IR::RegOpnd *shamt = IR::RegOpnd::New(src2->GetType(), m_func);
// en-register
IR::Opnd *origShamt = EnregisterIntConst(instr, src2); //unnormalized shift amount
pInstr = IR::Instr::New(Js::OpCode::AND, shamt, origShamt, IR::IntConstOpnd::New(Js::SIMDUtils::SIMDGetShiftAmountMask(elementSizeInBytes), TyInt32, m_func), m_func); // normalizing by elm width (i.e. shamt % elm_width)
instr->InsertBefore(pInstr);
Legalize(pInstr);
pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp0, shamt, m_func);
instr->InsertBefore(pInstr);
if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I4 ||
instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U4 ||
instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I8 ||
instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U8 ||
instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I2 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U2)
{
// shiftOpCode dst, src1, tmp0
pInstr = IR::Instr::New(opcode, dst, src1, tmp0, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
}
else if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I16 || instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U16)
{
// MOVAPS tmp1, src1
pInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func);
instr->InsertBefore(pInstr);
// MOVAPS dst, src1
pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
instr->InsertBefore(pInstr);
// PAND tmp1, [X86_HIGHBYTES_MASK]
pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86HighBytesMaskAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// PSLLW tmp1, tmp0
pInstr = IR::Instr::New(Js::OpCode::PSLLW, tmp1, tmp1, tmp0, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// PSLLW dst, tmp0
pInstr = IR::Instr::New(Js::OpCode::PSLLW, dst, dst, tmp0, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// PAND dst, [X86_LOWBYTES_MASK]
pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// POR dst, tmp1
pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func);
instr->InsertBefore(pInstr);
}
else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I16)
{
// MOVAPS tmp1, src1
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func));
// MOVAPS dst, src1
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
// PSLLW dst, 8
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLW, dst, dst, IR::IntConstOpnd::New(8, TyInt8, m_func), m_func));
// LEA reg, [shamt + 8]
IR::IndirOpnd *indirOpnd = IR::IndirOpnd::New(shamt->AsRegOpnd(), +8, TyInt32, m_func);
instr->InsertBefore(IR::Instr::New(Js::OpCode::LEA, reg, indirOpnd, m_func));
// MOVD tmp0, reg
pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp2, reg, m_func);
instr->InsertBefore(pInstr);
// PSRAW dst, tmp0
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, dst, dst, tmp2, m_func));
// PAND dst, [X86_LOWBYTES_MASK]
pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// PSRAW tmp1, tmp0
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, tmp1, tmp1, tmp0, m_func));
// PAND tmp1, [X86_HIGHBYTES_MASK]
pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86HighBytesMaskAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// POR dst, tmp1
instr->InsertBefore(IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func));
}
else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U16)
{
IR::RegOpnd * shamtReg = IR::RegOpnd::New(TyInt8, m_func);
shamtReg->SetReg(LowererMDArch::GetRegShiftCount());
IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
// MOVAPS dst, src1
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
// MOV reg2, 0FFh
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, reg2, IR::IntConstOpnd::New(0xFF, TyInt32, m_func), m_func));
// MOV shamtReg, shamt
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, shamtReg, shamt, m_func));
// SHR reg2, shamtReg (lower 8 bit)
instr->InsertBefore(IR::Instr::New(Js::OpCode::SHR, reg2, reg2, shamtReg, m_func));
// MOV tmp, reg2
// MOVSX reg2, tmp(TyInt8)
pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, reg2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, reg2, tmp, m_func));
IR::RegOpnd *mask = IR::RegOpnd::New(TySimd128I4, m_func);
// PSRLW dst, mask
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLW, dst, dst, tmp0, m_func));
// splat (0xFF >> shamt) into mask
// MOVD mask, reg2
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, mask, reg2, m_func));
// PUNPCKLBW mask, mask
pInstr = IR::Instr::New(Js::OpCode::PUNPCKLBW, mask, mask, mask, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// PUNPCKLWD mask, mask
pInstr = IR::Instr::New(Js::OpCode::PUNPCKLWD, mask, mask, mask, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// PSHUFD mask, mask, 0
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, mask, mask, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
// PAND dst, mask
instr->InsertBefore(IR::Instr::New(Js::OpCode::PAND, dst, dst, mask, m_func));
}
else
{
Assert(UNREACHED);
}
pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::SIMD128LowerReplaceLane_8(IR::Instr* instr)
{
SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
int lane = 0;
IR::Opnd *dst = args->Pop();
IR::Opnd *src1 = args->Pop();
IR::Opnd *src2 = args->Pop();
IR::Opnd *src3 = args->Pop();
IR::Instr * newInstr = nullptr;
Assert(dst->IsSimd128() && src1->IsSimd128());
lane = src2->AsIntConstOpnd()->AsInt32();
IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt16);
Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8);
// MOVAPS dst, src1
newInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
// PINSRW dst, value, index
newInstr = IR::Instr::New(Js::OpCode::PINSRW, dst, laneValue, IR::IntConstOpnd::New(lane, TyInt8, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8) //canonicalizing lanes
{
instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQW, *dst);
}
IR::Instr* prevInstr = instr->m_prev;
instr->Remove();
return prevInstr;
}
IR::Instr* LowererMD::SIMD128LowerReplaceLane_16(IR::Instr* instr)
{
SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
int lane = 0;
IR::Opnd *dst = args->Pop();
IR::Opnd *src1 = args->Pop();
IR::Opnd *src2 = args->Pop();
IR::Opnd *src3 = args->Pop();
IR::Instr * newInstr = nullptr;
Assert(dst->IsSimd128() && src1->IsSimd128());
lane = src2->AsIntConstOpnd()->AsInt32();
Assert(lane >= 0 && lane < 16);
IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt8);
intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
#if DBG
// using only one SIMD temp
intptr_t endAddrSIMD = tempSIMD + sizeof(X86SIMDValue);
#endif
Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16);
// MOVUPS [temp], src1
intptr_t address = tempSIMD;
newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New(address, TySimd128I16, m_func), src1, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
// MOV [temp+offset], laneValue
address = tempSIMD + lane;
// check for buffer overrun
Assert((intptr_t)address < endAddrSIMD);
newInstr = IR::Instr::New(Js::OpCode::MOV, IR::MemRefOpnd::New(address, TyInt8, m_func), laneValue, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
// MOVUPS dst, [temp]
address = tempSIMD;
newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New(address, TySimd128I16, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16) //canonicalizing lanes.
{
instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQB, *dst);
}
IR::Instr* prevInstr = instr->m_prev;
instr->Remove();
return prevInstr;
}
IR::Instr* LowererMD::SIMD128LowerReplaceLane_4(IR::Instr* instr)
{
SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
int lane = 0, byteWidth = 0;
IR::Opnd *dst = args->Pop();
IR::Opnd *src1 = args->Pop();
IR::Opnd *src2 = args->Pop();
IR::Opnd *src3 = args->Pop();
Assert(dst->IsSimd128() && src1->IsSimd128());
IRType type = dst->GetType();
lane = src2->AsIntConstOpnd()->AsInt32();
IR::Opnd* laneValue = EnregisterIntConst(instr, src3);
switch (instr->m_opcode)
{
case Js::OpCode::Simd128_ReplaceLane_I4:
case Js::OpCode::Simd128_ReplaceLane_U4:
case Js::OpCode::Simd128_ReplaceLane_B4:
byteWidth = TySize[TyInt32];
break;
case Js::OpCode::Simd128_ReplaceLane_F4:
byteWidth = TySize[TyFloat32];
break;
default:
Assert(UNREACHED);
}
// MOVAPS dst, src1
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
if (laneValue->GetType() == TyInt32 || laneValue->GetType() == TyUint32)
{
IR::RegOpnd *tempReg = IR::RegOpnd::New(TyFloat32, m_func);//mov intval to xmm
//MOVD
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, tempReg, laneValue, m_func));
laneValue = tempReg;
}
Assert(laneValue->GetType() == TyFloat32);
if (lane == 0)
{
// MOVSS for both TyFloat32 and TyInt32. MOVD zeroes upper bits.
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
}
else if (lane == 2)
{
IR::RegOpnd *tmp = IR::RegOpnd::New(type, m_func);
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVHLPS, tmp, dst, m_func));
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, tmp, laneValue, m_func));
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVLHPS, dst, tmp, m_func));
}
else
{
Assert(lane == 1 || lane == 3);
uint8 shufMask = 0xE4; // 11 10 01 00
shufMask |= lane; // 11 10 01 id
shufMask &= ~(0x03 << (lane << 1)); // set 2 bits corresponding to lane index to 00
// SHUFPS dst, dst, shufMask
instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
// MOVSS dst, value
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
// SHUFPS dst, dst, shufMask
instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
}
if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4) //Canonicalizing lanes
{
instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQD, *dst);
}
IR::Instr* prevInstr = instr->m_prev;
instr->Remove();
return prevInstr;
}
/*
4 and 2 lane Swizzle.
*/
IR::Instr* LowererMD::Simd128LowerSwizzle_4(IR::Instr* instr)
{
Js::OpCode shufOpcode = Js::OpCode::SHUFPS;
Js::OpCode irOpcode = instr->m_opcode;
SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
IR::Opnd *dst = args->Pop();
IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
int i = 0;
while (!args->Empty() && i < 6)
{
srcs[i++] = args->Pop();
}
int8 shufMask = 0;
int lane0 = 0, lane1 = 0, lane2 = 0, lane3 = 0;
IR::Instr *pInstr = instr->m_prev;
Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128());
// globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
Assert(irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4 || irOpcode == Js::OpCode::Simd128_Swizzle_F4 /*|| irOpcode == Js::OpCode::Simd128_Swizzle_D2*/);
AssertMsg(srcs[1] && srcs[1]->IsIntConstOpnd() &&
srcs[2] && srcs[2]->IsIntConstOpnd() &&
(/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[3] && srcs[3]->IsIntConstOpnd())) &&
(/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[4] && srcs[4]->IsIntConstOpnd())), "Type-specialized swizzle is supported only with constant lane indices");
#if 0
if (irOpcode == Js::OpCode::Simd128_Swizzle_D2)
{
lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
Assert(lane0 >= 0 && lane0 < 2);
Assert(lane1 >= 0 && lane1 < 2);
shufMask = (int8)((lane1 << 1) | lane0);
shufOpcode = Js::OpCode::SHUFPD;
}
#endif // 0
if (irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4)
{
shufOpcode = Js::OpCode::PSHUFD;
}
AnalysisAssert(srcs[3] != nullptr && srcs[4] != nullptr);
lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
lane2 = srcs[3]->AsIntConstOpnd()->AsInt32();
lane3 = srcs[4]->AsIntConstOpnd()->AsInt32();
Assert(lane1 >= 0 && lane1 < 4);
Assert(lane2 >= 0 && lane2 < 4);
Assert(lane2 >= 0 && lane2 < 4);
Assert(lane3 >= 0 && lane3 < 4);
shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0);
instr->m_opcode = shufOpcode;
instr->SetDst(dst);
// MOVAPS dst, src1
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, srcs[0], m_func));
// SHUF dst, dst, imm8
instr->SetSrc1(dst);
instr->SetSrc2(IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true));
return pInstr;
}
/*
4 lane shuffle. Handles arbitrary lane values.
*/
IR::Instr* LowererMD::Simd128LowerShuffle_4(IR::Instr* instr)
{
Js::OpCode irOpcode = instr->m_opcode;
SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
IR::Opnd *dst = args->Pop();
IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
int j = 0;
while (!args->Empty() && j < 6)
{
srcs[j++] = args->Pop();
}
uint8 lanes[4], lanesSrc[4];
uint fromSrc1, fromSrc2;
IR::Instr *pInstr = instr->m_prev;
Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128() && srcs[1] && srcs[1]->IsSimd128());
Assert(irOpcode == Js::OpCode::Simd128_Shuffle_I4 || irOpcode == Js::OpCode::Simd128_Shuffle_U4 || irOpcode == Js::OpCode::Simd128_Shuffle_F4);
// globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
AssertMsg(srcs[2] && srcs[2]->IsIntConstOpnd() &&
srcs[3] && srcs[3]->IsIntConstOpnd() &&
srcs[4] && srcs[4]->IsIntConstOpnd() &&
srcs[5] && srcs[5]->IsIntConstOpnd(), "Type-specialized shuffle is supported only with constant lane indices");
lanes[0] = (uint8) srcs[2]->AsIntConstOpnd()->AsInt32();
lanes[1] = (uint8) srcs[3]->AsIntConstOpnd()->AsInt32();
lanes[2] = (uint8) srcs[4]->AsIntConstOpnd()->AsInt32();
lanes[3] = (uint8) srcs[5]->AsIntConstOpnd()->AsInt32();
Assert(lanes[0] >= 0 && lanes[0] < 8);
Assert(lanes[1] >= 0 && lanes[1] < 8);
Assert(lanes[2] >= 0 && lanes[2] < 8);
Assert(lanes[3] >= 0 && lanes[3] < 8);
CheckShuffleLanes_4(lanes, lanesSrc, &fromSrc1, &fromSrc2);
Assert(fromSrc1 + fromSrc2 == 4);
if (fromSrc1 == 4 || fromSrc2 == 4)
{
// can be done with a swizzle
IR::Opnd *srcOpnd = fromSrc1 == 4 ? srcs[0] : srcs[1];
InsertShufps(lanes, dst, srcOpnd, srcOpnd, instr);
}
else if (fromSrc1 == 2)
{
if (lanes[0] < 4 && lanes[1] < 4)
{
// x86 friendly shuffle
Assert(lanes[2] >= 4 && lanes[3] >= 4);
InsertShufps(lanes, dst, srcs[0], srcs[1], instr);
}
else
{
// arbitrary shuffle with 2 lanes from each src
uint8 ordLanes[4], reArrLanes[4];
// order lanes based on which src they come from
// compute re-arrangement mask
for (uint8 i = 0, j1 = 0, j2 = 2; i < 4; i++)
{
if (lanesSrc[i] == 1 && j1 < 4)
{
ordLanes[j1] = lanes[i];
reArrLanes[i] = j1;
j1++;
}
else if(j2 < 4)
{
Assert(lanesSrc[i] == 2);
ordLanes[j2] = lanes[i];
reArrLanes[i] = j2;
j2++;
}
}
IR::RegOpnd *temp = IR::RegOpnd::New(dst->GetType(), m_func);
InsertShufps(ordLanes, temp, srcs[0], srcs[1], instr);
InsertShufps(reArrLanes, dst, temp, temp, instr);
}
}
else if (fromSrc1 == 3 || fromSrc2 == 3)
{
// shuffle with 3 lanes from one src, one from another
IR::Instr *newInstr;
IR::Opnd * majSrc, *minSrc;
IR::RegOpnd *temp1 = IR::RegOpnd::New(dst->GetType(), m_func);
IR::RegOpnd *temp2 = IR::RegOpnd::New(dst->GetType(), m_func);
IR::RegOpnd *temp3 = IR::RegOpnd::New(dst->GetType(), m_func);
uint8 minorityLane = 0, maxLaneValue;
majSrc = fromSrc1 == 3 ? srcs[0] : srcs[1];
minSrc = fromSrc1 == 3 ? srcs[1] : srcs[0];
Assert(majSrc != minSrc);
// Algorithm:
// SHUFPS temp1, majSrc, lanes
// SHUFPS temp2, minSrc, lanes
// MOVUPS temp3, [minorityLane mask]
// ANDPS temp2, temp3 // mask all lanes but minorityLane
// ANDNPS temp3, temp1 // zero minorityLane
// ORPS dst, temp2, temp3
// find minorityLane to mask
maxLaneValue = minSrc == srcs[0] ? 4 : 8;
for (uint8 i = 0; i < 4; i++)
{
if (lanes[i] >= (maxLaneValue - 4) && lanes[i] < maxLaneValue)
{
minorityLane = i;
break;
}
}
IR::MemRefOpnd * laneMask = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86FourLanesMaskAddr(minorityLane), dst->GetType(), m_func);
InsertShufps(lanes, temp1, majSrc, majSrc, instr);
InsertShufps(lanes, temp2, minSrc, minSrc, instr);
newInstr = IR::Instr::New(Js::OpCode::MOVUPS, temp3, laneMask, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::ANDPS, temp2, temp2, temp3, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::ANDNPS, temp3, temp3, temp1, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, temp2, temp3, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
}
instr->Remove();
return pInstr;
}
// 8 and 16 lane shuffle with memory temps
IR::Instr* LowererMD::Simd128LowerShuffle(IR::Instr* instr)
{
Js::OpCode irOpcode = instr->m_opcode;
IR::Instr *pInstr = instr->m_prev, *newInstr = nullptr;
SList<IR::Opnd*> *args = nullptr;
IR::Opnd *dst = nullptr;
IR::Opnd *src1 = nullptr, *src2 = nullptr;
uint8 lanes[16], laneCount = 0, scale = 1;
bool isShuffle = false;
IRType laneType = TyInt16;
intptr_t temp1SIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
intptr_t temp2SIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(1);
intptr_t dstSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(2);
#if DBG
intptr_t endAddrSIMD = (intptr_t)(temp1SIMD + sizeof(X86SIMDValue) * SIMD_TEMP_SIZE);
#endif
void *address = nullptr;
args = Simd128GetExtendedArgs(instr);
switch (irOpcode)
{
case Js::OpCode::Simd128_Swizzle_I8:
case Js::OpCode::Simd128_Swizzle_U8:
Assert(args->Count() == 10);
laneCount = 8;
laneType = TyInt16;
isShuffle = false;
scale = 2;
break;
case Js::OpCode::Simd128_Swizzle_I16:
case Js::OpCode::Simd128_Swizzle_U16:
Assert(args->Count() == 18);
laneCount = 16;
laneType = TyInt8;
isShuffle = false;
scale = 1;
break;
case Js::OpCode::Simd128_Shuffle_I8:
case Js::OpCode::Simd128_Shuffle_U8:
Assert(args->Count() == 11);
laneCount = 8;
isShuffle = true;
laneType = TyUint16;
scale = 2;
break;
case Js::OpCode::Simd128_Shuffle_I16:
case Js::OpCode::Simd128_Shuffle_U16:
Assert(args->Count() == 19);
laneCount = 16;
isShuffle = true;
laneType = TyUint8;
scale = 1;
break;
default:
Assert(UNREACHED);
}
dst = args->Pop();
src1 = args->Pop();
if (isShuffle)
{
src2 = args->Pop();
}
Assert(dst->IsSimd128() && src1 && src1->IsSimd128() && (!isShuffle|| src2->IsSimd128()));
for (uint i = 0; i < laneCount; i++)
{
IR::Opnd * laneOpnd = args->Pop();
Assert(laneOpnd->IsIntConstOpnd());
lanes[i] = (uint8)laneOpnd->AsIntConstOpnd()->AsInt32();
}
// MOVUPS [temp], src1
newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)temp1SIMD, TySimd128I16, m_func), src1, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
if (isShuffle)
{
// MOVUPS [temp+16], src2
newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)(temp2SIMD), TySimd128I16, m_func), src2, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
}
for (uint i = 0; i < laneCount; i++)
{
//. MOV tmp, [temp1SIMD + laneValue*scale]
IR::RegOpnd *tmp = IR::RegOpnd::New(laneType, m_func);
address = (void*)(temp1SIMD + lanes[i] * scale);
Assert((intptr_t)address + (intptr_t)scale <= (intptr_t)dstSIMD);
newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, IR::MemRefOpnd::New(address, laneType, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
//. MOV [dstSIMD + i*scale], tmp
address = (void*)(dstSIMD + i * scale);
Assert((intptr_t)address + (intptr_t) scale <= endAddrSIMD);
newInstr = IR::Instr::New(Js::OpCode::MOV,IR::MemRefOpnd::New(address, laneType, m_func), tmp, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
}
// MOVUPS dst, [dstSIMD]
newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New((void*)dstSIMD, TySimd128I16, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerNotEqual(IR::Instr* instr)
{
Assert(instr->m_opcode == Js::OpCode::Simd128_Neq_I4 || instr->m_opcode == Js::OpCode::Simd128_Neq_I8 ||
instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U4 ||
instr->m_opcode == Js::OpCode::Simd128_Neq_U8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16);
IR::Instr *pInstr;
IR::Opnd* dst = instr->GetDst();
IR::Opnd* src1 = instr->GetSrc1();
IR::Opnd* src2 = instr->GetSrc2();
Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
Assert(src1->IsRegOpnd() && src1->IsSimd128());
Assert(src2->IsRegOpnd() && src2->IsSimd128());
Js::OpCode cmpOpcode = Js::OpCode::PCMPEQD;
if (instr->m_opcode == Js::OpCode::Simd128_Neq_I8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U8)
{
cmpOpcode = Js::OpCode::PCMPEQW;
}
else if (instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16)
{
cmpOpcode = Js::OpCode::PCMPEQB;
}
// dst = PCMPEQD src1, src2
pInstr = IR::Instr::New(cmpOpcode, dst, src1, src2, m_func);
instr->InsertBefore(pInstr);
//MakeDstEquSrc1(pInstr);
Legalize(pInstr);
// dst = PANDN dst, X86_ALL_NEG_ONES
pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
//MakeDstEquSrc1(pInstr);
Legalize(pInstr);
pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerLessThan(IR::Instr* instr)
{
Assert(instr->m_opcode == Js::OpCode::Simd128_Lt_U4 || instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_Lt_U16 ||
instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16);
IR::Instr *pInstr;
IR::Opnd* dst = instr->GetDst();
IR::Opnd* src1 = instr->GetSrc1();
IR::Opnd* src2 = instr->GetSrc2();
Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
Assert(src1->IsRegOpnd() && src1->IsSimd128());
Assert(src2->IsRegOpnd() && src2->IsSimd128());
IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func);
IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func);
IR::MemRefOpnd* signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86DoubleWordSignBitsAddr(), TySimd128I4, m_func);
IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func);
Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD;
if (instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8)
{
cmpOpcode = Js::OpCode::PCMPGTW;
signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86WordSignBitsAddr(), TySimd128I4, m_func);
}
else if (instr->m_opcode == Js::OpCode::Simd128_Lt_U16 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16)
{
cmpOpcode = Js::OpCode::PCMPGTB;
signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86ByteSignBitsAddr(), TySimd128I4, m_func);
}
// MOVUPS mask, [signBits]
pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// tmpa = PXOR src1, signBits
pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// tmpb = PXOR src2, signBits
pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// dst = cmpOpCode tmpb, tmpa (Less than, swapped opnds)
pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
if (instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16)
{
// for SIMD unsigned int, greaterThanOrEqual == lessThan + Not
// dst = PANDN dst, X86_ALL_NEG_ONES
// MOVUPS mask, [allNegOnes]
pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
}
pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerLessThanOrEqual(IR::Instr* instr)
{
Assert(instr->m_opcode == Js::OpCode::Simd128_LtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 ||
instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 ||
instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16);
IR::Instr *pInstr;
IR::Opnd* dst = instr->GetDst();
IR::Opnd* src1 = instr->GetSrc1();
IR::Opnd* src2 = instr->GetSrc2();
Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
Assert(src1->IsRegOpnd() && src1->IsSimd128());
Assert(src2->IsRegOpnd() && src2->IsSimd128());
IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func);
IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func);
Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD;
Js::OpCode eqpOpcode = Js::OpCode::PCMPEQD;
if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8)
{
cmpOpcode = Js::OpCode::PCMPGTW;
eqpOpcode = Js::OpCode::PCMPEQW;
}
else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
{
cmpOpcode = Js::OpCode::PCMPGTB;
eqpOpcode = Js::OpCode::PCMPEQB;
}
if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I4)
{
// dst = pcmpgtd src1, src2
pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src1, src2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES]
pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
}
else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16)
{
// tmpa = pcmpgtw src2, src1 (src1 < src2?) [pcmpgtb]
pInstr = IR::Instr::New(cmpOpcode, tmpa, src2, src1, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// tmpb = pcmpeqw src1, src2 [pcmpeqb]
pInstr = IR::Instr::New(eqpOpcode, tmpb, src1, src2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// dst = por tmpa, tmpb
pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmpa, tmpb, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
}
else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 ||
instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
{
IR::MemRefOpnd* signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86DoubleWordSignBitsAddr(), TySimd128I4, m_func);
IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func);
if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8)
{
signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86WordSignBitsAddr(), TySimd128I4, m_func);
}
else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
{
signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86ByteSignBitsAddr(), TySimd128I4, m_func);
}
// MOVUPS mask, [signBits]
pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// tmpa = PXOR src1, mask
pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// tmpb = PXOR src2, signBits
pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// dst = cmpOpCode tmpb, tmpa
pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// tmpa = pcmpeqd tmpa, tmpb
pInstr = IR::Instr::New(eqpOpcode, tmpa, tmpa, tmpb, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// dst = por dst, tmpa
pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmpa, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
if (instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
{ // for SIMD unsigned int, greaterThan == lessThanOrEqual + Not
// dst = PANDN dst, X86_ALL_NEG_ONES
pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
}
}
pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerGreaterThanOrEqual(IR::Instr* instr)
{
Assert(instr->m_opcode == Js::OpCode::Simd128_GtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16);
IR::Instr *pInstr;
IR::Opnd* dst = instr->GetDst();
IR::Opnd* src1 = instr->GetSrc1();
IR::Opnd* src2 = instr->GetSrc2();
Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
Assert(src1->IsRegOpnd() && src1->IsSimd128());
Assert(src2->IsRegOpnd() && src2->IsSimd128());
if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I4)
{
// dst = pcmpgtd src2, src1
pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src2, src1, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES]
pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
}
else if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16)
{
IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
IR::RegOpnd* tmp2 = IR::RegOpnd::New(src1->GetType(), m_func);
Js::OpCode cmpOpcode = Js::OpCode::PCMPGTW;
Js::OpCode eqpOpcode = Js::OpCode::PCMPEQW;
if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I16)
{
cmpOpcode = Js::OpCode::PCMPGTB;
eqpOpcode = Js::OpCode::PCMPEQB;
}
// tmp1 = pcmpgtw src1, src2 [pcmpgtb]
pInstr = IR::Instr::New(cmpOpcode, tmp1, src1, src2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// tmp2 = pcmpeqw src1, src2 [pcmpeqw]
pInstr = IR::Instr::New(eqpOpcode, tmp2, src1, src2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// dst = por tmp1, tmp2
pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmp1, tmp2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
}
pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerMinMax_F4(IR::Instr* instr)
{
IR::Instr *pInstr;
IR::Opnd* dst = instr->GetDst();
IR::Opnd* src1 = instr->GetSrc1();
IR::Opnd* src2 = instr->GetSrc2();
Assert(dst->IsRegOpnd() && dst->IsSimd128());
Assert(src1->IsRegOpnd() && src1->IsSimd128());
Assert(src2->IsRegOpnd() && src2->IsSimd128());
Assert(instr->m_opcode == Js::OpCode::Simd128_Min_F4 || instr->m_opcode == Js::OpCode::Simd128_Max_F4);
IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
IR::RegOpnd* tmp2 = IR::RegOpnd::New(src2->GetType(), m_func);
if (instr->m_opcode == Js::OpCode::Simd128_Min_F4)
{
pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp1, src1, src2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//
pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp2, src2, src1, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//
pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
}
else
{
//This sequence closely mirrors SIMDFloat32x4Operation::OpMax except for
//the fact that tmp2 (tmpbValue) is reused to reduce the number of registers
//needed for this sequence.
pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp1, src1, src2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//
pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp2, src2, src1, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//
pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp1, tmp1, tmp2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//
pInstr = IR::Instr::New(Js::OpCode::CMPUNORDPS, tmp2, src1, src2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
//
pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
}
pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
IR::Opnd* LowererMD::Simd128CanonicalizeToBoolsBeforeReduction(IR::Instr* instr)
{
IR::Opnd* src1 = instr->GetSrc1();
if (m_func->GetJITFunctionBody()->IsWasmFunction())
{
Js::OpCode cmpOpcode = Js::OpCode::InvalidOpCode;
switch (instr->m_opcode)
{
case Js::OpCode::Simd128_AnyTrue_B4:
case Js::OpCode::Simd128_AnyTrue_B2:
case Js::OpCode::Simd128_AllTrue_B4:
case Js::OpCode::Simd128_AllTrue_B2:
cmpOpcode = Js::OpCode::PCMPEQD;
break;
case Js::OpCode::Simd128_AnyTrue_B8:
case Js::OpCode::Simd128_AllTrue_B8:
cmpOpcode = Js::OpCode::PCMPEQW;
break;
case Js::OpCode::Simd128_AnyTrue_B16:
case Js::OpCode::Simd128_AllTrue_B16:
cmpOpcode = Js::OpCode::PCMPEQB;
break;
default:
Assert(UNREACHED);
}
IR::RegOpnd * newSrc = IR::RegOpnd::New(src1->GetType(), m_func);
m_lowerer->InsertMove(newSrc, src1, instr);
Simd128CanonicalizeToBools(instr, cmpOpcode, *newSrc);
return newSrc;
}
return src1;
}
IR::Instr* LowererMD::Simd128LowerAnyTrue(IR::Instr* instr)
{
Assert(instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 ||
instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B2);
IR::Instr *pInstr;
IR::Opnd* dst = instr->GetDst();
#ifdef ENABLE_WASM_SIMD
IR::Opnd* src1 = Simd128CanonicalizeToBoolsBeforeReduction(instr);
#else
IR::Opnd* src1 = instr->GetSrc1();
#endif
Assert(dst->IsRegOpnd() && dst->IsInt32());
Assert(src1->IsRegOpnd() && src1->IsSimd128());
// pmovmskb dst, src1
// neg dst
// sbb dst, dst
// neg dst
// pmovmskb dst, src1
pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// neg dst
pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// sbb dst, dst
pInstr = IR::Instr::New(Js::OpCode::SBB, dst, dst, dst, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// neg dst
pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerAllTrue(IR::Instr* instr)
{
Assert(instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 ||
instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2);
IR::Instr *pInstr;
IR::Opnd* dst = instr->GetDst();
#ifdef ENABLE_WASM_SIMD
IR::Opnd* src1 = Simd128CanonicalizeToBoolsBeforeReduction(instr);
#else
IR::Opnd* src1 = instr->GetSrc1();
#endif
Assert(dst->IsRegOpnd() && dst->IsInt32());
Assert(src1->IsRegOpnd() && src1->IsSimd128());
IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
// pmovmskb dst, src1
pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func);
instr->InsertBefore(pInstr);
//horizontally OR into 0th and 2nd positions
//TODO nikolayk revisit the sequence for in64x2.alltrue
IR::Opnd* newDst = dst;
uint cmpMask = 0xFFFF;
if (instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2)
{
cmpMask = 0x0F0F;
IR::RegOpnd* reduceReg = IR::RegOpnd::New(TyInt32, m_func);
pInstr = IR::Instr::New(Js::OpCode::SHR, reduceReg, dst, (IR::IntConstOpnd::New(4, TyInt32, m_func, true)), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
pInstr = IR::Instr::New(Js::OpCode::OR, reduceReg, reduceReg, dst, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
pInstr = IR::Instr::New(Js::OpCode::AND, reduceReg, reduceReg, (IR::IntConstOpnd::New(0x0F0F, TyInt32, m_func, true)), m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
newDst = reduceReg;
}
// cmp dst, cmpMask
pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
pInstr->SetSrc1(newDst);
pInstr->SetSrc2(IR::IntConstOpnd::New(cmpMask, TyInt32, m_func, true));
instr->InsertBefore(pInstr);
Legalize(pInstr);
// mov tmp(TyInt8), dst
pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, newDst, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// sete tmp(TyInt8)
pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func);
instr->InsertBefore(pInstr);
Legalize(pInstr);
// movsx dst, dst(TyInt8)
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
pInstr = instr->m_prev;
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerInt32x4FromFloat32x4(IR::Instr *instr)
{
IR::Opnd *dst, *src, *tmp, *tmp2, *mask1, *mask2;
IR::Instr *insertInstr, *pInstr, *newInstr;
IR::LabelInstr *doneLabel;
dst = instr->GetDst();
src = instr->GetSrc1();
Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
// CVTTPS2DQ dst, src
instr->m_opcode = Js::OpCode::CVTTPS2DQ;
insertInstr = instr->m_next;
pInstr = instr->m_prev;
doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
mask1 = IR::RegOpnd::New(TyInt32, m_func);
mask2 = IR::RegOpnd::New(TyInt32, m_func);
// bound checks
// check if any value is potentially out of range (0x80000000 in output)
// PCMPEQD tmp, dst, X86_NEG_MASK (0x80000000)
// MOVMSKPS mask1, tmp
// CMP mask1, 0
// JNE $doneLabel
tmp = IR::RegOpnd::New(TySimd128I4, m_func);
tmp2 = IR::RegOpnd::New(TySimd128I4, m_func);
newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), TySimd128I4, m_func), m_func);
insertInstr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, tmp2, m_func);
insertInstr->InsertBefore(newInstr);
Legalize(newInstr);
insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
newInstr->SetSrc1(mask1);
newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
insertInstr->InsertBefore(newInstr);
insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
// we have potential out of bound. check bounds
// MOVAPS tmp2, X86_TWO_31_F4 (0x4f000000)
// CMPLEPS tmp, tmp2, src
// MOVMSKPS mask1, tmp
// MOVAPS tmp2, X86_NEG_TWO_31_F4 (0xcf000000)
// CMPLTPS tmp, src, tmp2
// MOVMSKPS mask2, tmp
// OR mask1, mask1, mask2
// check for NaNs
// CMPEQPS tmp, src
// MOVMSKPS mask2, tmp
// NOT mask2
// AND mask2, 0x00000F
// OR mask1, mask2
//
// CMP mask1, 0
// JEQ $doneLabel
newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31F4Addr(), TySimd128I4, m_func), m_func);
insertInstr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, tmp2, src, m_func);
insertInstr->InsertBefore(newInstr);
Legalize(newInstr);
insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegTwoPower31F4Addr(), TySimd128I4, m_func), m_func);
insertInstr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::CMPLTPS, tmp, src, tmp2, m_func);
insertInstr->InsertBefore(newInstr);
Legalize(newInstr);
insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func));
insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func));
#ifdef ENABLE_WASM_SIMD
if (m_func->GetJITFunctionBody()->IsWasmFunction())
{
newInstr = IR::Instr::New(Js::OpCode::CMPEQPS, tmp, src, src, m_func);
insertInstr->InsertBefore(newInstr);
Legalize(newInstr);
insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func));
insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::NOT, mask2, mask2, m_func));
newInstr = IR::Instr::New(Js::OpCode::AND, mask2, mask2, IR::IntConstOpnd::New(0x00000F, TyInt32, m_func), m_func);
insertInstr->InsertBefore(newInstr);
Legalize(newInstr);
insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func));
}
#endif
newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
newInstr->SetSrc1(mask1);
newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
insertInstr->InsertBefore(newInstr);
insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
// throw range error
m_lowerer->GenerateRuntimeError(insertInstr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
insertInstr->InsertBefore(doneLabel);
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerUint32x4FromFloat32x4(IR::Instr *instr)
{
IR::Opnd *dst, *src, *tmp, *tmp2, *two_31_f4_mask, *two_31_i4_mask, *mask;
IR::Instr *pInstr, *newInstr;
IR::LabelInstr *doneLabel, *throwLabel;
dst = instr->GetDst();
src = instr->GetSrc1();
Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
throwLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true);
pInstr = instr->m_prev;
mask = IR::RegOpnd::New(TyInt32, m_func);
two_31_f4_mask = IR::RegOpnd::New(TySimd128F4, m_func);
two_31_i4_mask = IR::RegOpnd::New(TySimd128I4, m_func);
tmp = IR::RegOpnd::New(TySimd128F4, m_func);
tmp2 = IR::RegOpnd::New(TySimd128F4, m_func);
// check for NaNs
// CMPEQPS tmp, src
// MOVMSKPS mask2, tmp
// AND mask2, 0x00000F
// JNE throw
#ifdef ENABLE_WASM_SIMD
if (m_func->GetJITFunctionBody()->IsWasmFunction())
{
newInstr = IR::Instr::New(Js::OpCode::CMPEQPS, tmp, src, src, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func));
newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
newInstr->SetSrc1(mask);
newInstr->SetSrc2(IR::IntConstOpnd::New(0x0000000F, TyInt32, m_func));
instr->InsertBefore(newInstr);
Legalize(newInstr);
instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
}
#endif
// any lanes <= -1.0 ?
// CMPLEPS tmp, src, [X86_ALL_FLOAT32_NEG_ONES]
// MOVMSKPS mask, tmp
// CMP mask, 0
// JNE $throwLabel
newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, src, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesF4Addr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
newInstr->SetSrc1(mask);
newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
instr->InsertBefore(newInstr);
Legalize(newInstr);
instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
// CVTTPS2DQ does a range check over signed range [-2^31, 2^31-1], so will fail to convert values >= 2^31.
// To fix this, subtract 2^31 from values >= 2^31, do CVTTPS2DQ, then add 2^31 back.
// MOVAPS two_31_f4_mask, [X86_TWO_31]
// CMPLEPS tmp2, two_31_mask, src
// ANDPS two_31_f4_mask, tmp2 // tmp has f32(2^31) for lanes >= 2^31, 0 otherwise
// SUBPS tmp2, two_31_f4_mask // subtract 2^31 from lanes >= 2^31, unchanged otherwise.
// CVTTPS2DQ dst, tmp2
newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_f4_mask, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31F4Addr(), TySimd128F4, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp2, two_31_f4_mask, src, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_f4_mask, two_31_f4_mask, tmp2, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::SUBPS, tmp2, src, two_31_f4_mask, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::CVTTPS2DQ, dst, tmp2, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
// check if any value is out of range (i.e. >= 2^31, meaning originally >= 2^32 before value adjustment)
// PCMPEQD tmp, dst, [X86_NEG_MASK]
// MOVMSKPS mask, tmp
// CMP mask, 0
// JNE $throwLabel
newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
newInstr->SetSrc1(mask);
newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
instr->InsertBefore(newInstr);
Legalize(newInstr);
instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
// we pass range checks
// add i4(2^31) values back to adjusted values.
// Use first bit from the 2^31 float mask (0x4f000...0 << 1)
// and AND with 2^31 int mask (0x8000..0) setting first bit to zero if lane hasn't been adjusted
// MOVAPS two_31_i4_mask, [X86_TWO_31_I4]
// PSLLD two_31_f4_mask, 1
// ANDPS two_31_i4_mask, two_31_f4_mask
// PADDD dst, dst, two_31_i4_mask
// JMP $doneLabel
newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_i4_mask, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31I4Addr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::PSLLD, two_31_f4_mask, two_31_f4_mask, IR::IntConstOpnd::New(1, TyInt8, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_i4_mask, two_31_i4_mask, two_31_f4_mask, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
newInstr = IR::Instr::New(Js::OpCode::PADDD, dst, dst, two_31_i4_mask, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, doneLabel, m_func));
// throwLabel:
// Throw Range Error
instr->InsertBefore(throwLabel);
m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
// doneLabe:
instr->InsertBefore(doneLabel);
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128LowerFloat32x4FromUint32x4(IR::Instr *instr)
{
IR::Opnd *dst, *src, *tmp, *zero;
IR::Instr *pInstr, *newInstr;
dst = instr->GetDst();
src = instr->GetSrc1();
Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
pInstr = instr->m_prev;
zero = IR::RegOpnd::New(TySimd128I4, m_func);
tmp = IR::RegOpnd::New(TySimd128I4, m_func);
// find unsigned values above 2^31-1. Comparison is signed, so look for values < 0
// MOVAPS zero, [X86_ALL_ZEROS]
newInstr = IR::Instr::New(Js::OpCode::MOVAPS, zero, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllZerosAddr(), TySimd128I4, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
// tmp = PCMPGTD zero, src
newInstr = IR::Instr::New(Js::OpCode::PCMPGTD, tmp, zero, src, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
// temp1 has f32(2^32) for unsigned values above 2^31, 0 otherwise
// ANDPS tmp, tmp, [X86_TWO_32_F4]
newInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, tmp, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower32F4Addr(), TySimd128F4, m_func), m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
// convert
// dst = CVTDQ2PS src
newInstr = IR::Instr::New(Js::OpCode::CVTDQ2PS, dst, src, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
// Add f32(2^32) to negative values
// ADDPS dst, dst, tmp
newInstr = IR::Instr::New(Js::OpCode::ADDPS, dst, dst, tmp, m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
instr->Remove();
return pInstr;
}
IR::Instr* LowererMD::Simd128AsmJsLowerLoadElem(IR::Instr *instr)
{
Assert(instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_F4 ||
//instr->m_opcode == Js::OpCode::Simd128_LdArr_D2 ||
instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I4 ||
instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I8 ||
instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I16 ||
instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U4 ||
instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U8 ||
instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U16 ||
instr->m_opcode == Js::OpCode::Simd128_LdArrConst_F4
//instr->m_opcode == Js::OpCode::Simd128_LdArrConst_D2
);
IR::Instr * instrPrev = instr->m_prev;
IR::RegOpnd * indexOpnd = instr->GetSrc1()->AsIndirOpnd()->GetIndexOpnd();
IR::RegOpnd * baseOpnd = instr->GetSrc1()->AsIndirOpnd()->GetBaseOpnd();
IR::Opnd * dst = instr->GetDst();
IR::Opnd * src1 = instr->GetSrc1();
IR::Opnd * src2 = instr->GetSrc2();
ValueType arrType = baseOpnd->GetValueType();
uint8 dataWidth = instr->dataWidth;
// Type-specialized.
Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
IR::Instr * done;
if (indexOpnd || (((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth) > 0x1000000 /* 16 MB */))
{
uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
// bound check and helper
done = this->lowererMDArch.LowerAsmJsLdElemHelper(instr, true, bpe != dataWidth);
}
else
{
// Reaching here means:
// We have a constant index, and either
// (1) constant heap or (2) variable heap with constant index < 16MB.
// Case (1) requires static bound check. Case (2) means we are always in bound.
// this can happen in cases where globopt props a constant access which was not known at bytecodegen time or when heap is non-constant
if (src2->IsIntConstOpnd() && ((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
{
m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
instr->Remove();
return instrPrev;
}
done = instr;
}
return Simd128ConvertToLoad(dst, src1, dataWidth, instr);
}
IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
{
Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
Assert(
instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 ||
instr->m_opcode == Js::OpCode::Simd128_LdArr_F4
);
IR::Opnd * src = instr->GetSrc1();
IR::RegOpnd * indexOpnd =src->AsIndirOpnd()->GetIndexOpnd();
IR::Opnd * dst = instr->GetDst();
ValueType arrType = src->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
// If we type-specialized, then array is a definite typed-array.
Assert(arrType.IsObject() && arrType.IsTypedArray());
Simd128GenerateUpperBoundCheck(indexOpnd, src->AsIndirOpnd(), arrType, instr);
Simd128LoadHeadSegment(src->AsIndirOpnd(), arrType, instr);
return Simd128ConvertToLoad(dst, src, instr->dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /* scale factor */);
}
IR::Instr *
LowererMD::Simd128ConvertToLoad(IR::Opnd *dst, IR::Opnd *src, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0*/)
{
IR::Instr *newInstr = nullptr;
IR::Instr * instrPrev = instr->m_prev;
// Type-specialized.
Assert(dst && dst->IsSimd128());
Assert(src->IsIndirOpnd());
if (scaleFactor > 0)
{
// needed only for non-Asmjs code
Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
src->AsIndirOpnd()->SetScale(scaleFactor);
}
switch (dataWidth)
{
case 16:
// MOVUPS dst, src1([arrayBuffer + indexOpnd])
newInstr = IR::Instr::New(LowererMDArch::GetAssignOp(src->GetType()), dst, src, instr->m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
break;
case 12:
{
IR::RegOpnd *temp = IR::RegOpnd::New(src->GetType(), instr->m_func);
// MOVSD dst, src1([arrayBuffer + indexOpnd])
newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
// MOVSS temp, src1([arrayBuffer + indexOpnd + 8])
newInstr = IR::Instr::New(Js::OpCode::MOVSS, temp, src, instr->m_func);
instr->InsertBefore(newInstr);
newInstr->GetSrc1()->AsIndirOpnd()->SetOffset(src->AsIndirOpnd()->GetOffset() + 8, true);
Legalize(newInstr);
// PSLLDQ temp, 0x08
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, instr->m_func, true), instr->m_func));
// ORPS dst, temp
newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, temp, instr->m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
break;
}
case 8:
// MOVSD dst, src1([arrayBuffer + indexOpnd])
newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
break;
case 4:
// MOVSS dst, src1([arrayBuffer + indexOpnd])
newInstr = IR::Instr::New(Js::OpCode::MOVSS, dst, src, instr->m_func);
instr->InsertBefore(newInstr);
Legalize(newInstr);
break;
default:
Assume(UNREACHED);
}
instr->Remove();
return instrPrev;
}
IR::Instr*
LowererMD::Simd128AsmJsLowerStoreElem(IR::Instr *instr)
{
Assert(
instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_I8 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_I16 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_U4 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_U8 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_U16 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_F4 ||
//instr->m_opcode == Js::OpCode::Simd128_StArr_D2 ||
instr->m_opcode == Js::OpCode::Simd128_StArrConst_I4 ||
instr->m_opcode == Js::OpCode::Simd128_StArrConst_I8 ||
instr->m_opcode == Js::OpCode::Simd128_StArrConst_I16 ||
instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 ||
instr->m_opcode == Js::OpCode::Simd128_StArrConst_U8 ||
instr->m_opcode == Js::OpCode::Simd128_StArrConst_U16 ||
instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 ||
instr->m_opcode == Js::OpCode::Simd128_StArrConst_F4
//instr->m_opcode == Js::OpCode::Simd128_StArrConst_D2
);
IR::Instr * instrPrev = instr->m_prev;
IR::RegOpnd * indexOpnd = instr->GetDst()->AsIndirOpnd()->GetIndexOpnd();
IR::RegOpnd * baseOpnd = instr->GetDst()->AsIndirOpnd()->GetBaseOpnd();
IR::Opnd * dst = instr->GetDst();
IR::Opnd * src1 = instr->GetSrc1();
IR::Opnd * src2 = instr->GetSrc2();
ValueType arrType = baseOpnd->GetValueType();
uint8 dataWidth = instr->dataWidth;
// Type-specialized.
Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
IR::Instr * done;
if (indexOpnd || ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > 0x1000000))
{
// CMP indexOpnd, src2(arrSize)
// JA $helper
// JMP $store
// $helper:
// Throw RangeError
// JMP $done
// $store:
// MOV dst([arrayBuffer + indexOpnd]), src1
// $done:
uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
done = this->lowererMDArch.LowerAsmJsStElemHelper(instr, true, bpe != dataWidth);
}
else
{
// we might have a constant index if globopt propped a constant store. we can ahead of time check if it is in-bounds
if (src2->IsIntConstOpnd() && ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
{
m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
instr->Remove();
return instrPrev;
}
done = instr;
}
return Simd128ConvertToStore(dst, src1, dataWidth, instr);
}
IR::Instr*
LowererMD::Simd128LowerStoreElem(IR::Instr *instr)
{
Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
Assert(
instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_I8 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_I16 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_U4 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_U8 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_U16 ||
instr->m_opcode == Js::OpCode::Simd128_StArr_F4
);
IR::Opnd * dst = instr->GetDst();
IR::RegOpnd * indexOpnd = dst->AsIndirOpnd()->GetIndexOpnd();
IR::Opnd * src1 = instr->GetSrc1();
uint8 dataWidth = instr->dataWidth;
ValueType arrType = dst->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
// If we type-specialized, then array is a definite type-array.
Assert(arrType.IsObject() && arrType.IsTypedArray());
Simd128GenerateUpperBoundCheck(indexOpnd, dst->AsIndirOpnd(), arrType, instr);
Simd128LoadHeadSegment(dst->AsIndirOpnd(), arrType, instr);
return Simd128ConvertToStore(dst, src1, dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /*scale factor*/);
}
IR::Instr *
LowererMD::Simd128ConvertToStore(IR::Opnd *dst, IR::Opnd *src1, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0 */)
{
IR::Instr * instrPrev = instr->m_prev;
Assert(src1 && src1->IsSimd128());
Assert(dst->IsIndirOpnd());
if (scaleFactor > 0)
{
// needed only for non-Asmjs code
Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
dst->AsIndirOpnd()->SetScale(scaleFactor);
}
switch (dataWidth)
{
case 16:
// MOVUPS dst([arrayBuffer + indexOpnd]), src1
instr->InsertBefore(IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, instr->m_func));
break;
case 12:
{
IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), instr->m_func);
IR::Instr *movss;
// MOVAPS temp, src
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp, src1, instr->m_func));
// MOVSD dst([arrayBuffer + indexOpnd]), temp
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, temp, instr->m_func));
// PSRLDQ temp, 0x08
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), instr->m_func));
// MOVSS dst([arrayBuffer + indexOpnd + 8]), temp
movss = IR::Instr::New(Js::OpCode::MOVSS, dst, temp, instr->m_func);
instr->InsertBefore(movss);
movss->GetDst()->AsIndirOpnd()->SetOffset(dst->AsIndirOpnd()->GetOffset() + 8, true);
break;
}
case 8:
// MOVSD dst([arrayBuffer + indexOpnd]), src1
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, instr->m_func));
break;
case 4:
// MOVSS dst([arrayBuffer + indexOpnd]), src1
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, src1, instr->m_func));
break;
default:;
Assume(UNREACHED);
}
instr->Remove();
return instrPrev;
}
void
LowererMD::Simd128GenerateUpperBoundCheck(IR::RegOpnd *indexOpnd, IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
{
Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
IR::Opnd* headSegmentLengthOpnd;
if (arrayRegOpnd->EliminatedUpperBoundCheck())
{
// already eliminated or extracted by globOpt (OptArraySrc). Nothing to do.
return;
}
if (arrayRegOpnd->HeadSegmentLengthSym())
{
headSegmentLengthOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentLengthSym(), TyUint32, m_func);
}
else
{
// (headSegmentLength = [base + offset(length)])
int lengthOffset;
lengthOffset = m_lowerer->GetArrayOffsetOfLength(arrType);
headSegmentLengthOpnd = IR::IndirOpnd::New(arrayRegOpnd, lengthOffset, TyUint32, m_func);
}
IR::LabelInstr * skipLabel = Lowerer::InsertLabel(false, instr);
int32 elemCount = Lowerer::SimdGetElementCountFromBytes(arrayRegOpnd->GetValueType(), instr->dataWidth);
if (indexOpnd)
{
// MOV tmp, elemCount
// ADD tmp, index
// CMP tmp, Length -- upper bound check
// JBE $storeLabel
// Throw RuntimeError
// skipLabel:
IR::RegOpnd *tmp = IR::RegOpnd::New(indexOpnd->GetType(), m_func);
IR::IntConstOpnd *elemCountOpnd = IR::IntConstOpnd::New(elemCount, TyInt8, m_func, true);
m_lowerer->InsertMove(tmp, elemCountOpnd, skipLabel);
Lowerer::InsertAdd(false, tmp, tmp, indexOpnd, skipLabel);
m_lowerer->InsertCompareBranch(tmp, headSegmentLengthOpnd, Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
}
else
{
// CMP Length, (offset + elemCount)
// JA $storeLabel
int32 offset = indirOpnd->GetOffset();
int32 index = offset + elemCount;
m_lowerer->InsertCompareBranch(headSegmentLengthOpnd, IR::IntConstOpnd::New(index, TyInt32, m_func, true), Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
}
m_lowerer->GenerateRuntimeError(skipLabel, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
return;
}
void
LowererMD::Simd128LoadHeadSegment(IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
{
// For non-asm.js we check if headSeg symbol exists, else load it.
IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
IR::RegOpnd *headSegmentOpnd;
if (arrayRegOpnd->HeadSegmentSym())
{
headSegmentOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentSym(), TyMachPtr, m_func);
}
else
{
// MOV headSegment, [base + offset(head)]
int32 headOffset = m_lowerer->GetArrayOffsetOfHeadSegment(arrType);
IR::IndirOpnd * newIndirOpnd = IR::IndirOpnd::New(arrayRegOpnd, headOffset, TyMachPtr, this->m_func);
headSegmentOpnd = IR::RegOpnd::New(TyMachPtr, this->m_func);
m_lowerer->InsertMove(headSegmentOpnd, newIndirOpnd, instr);
}
// change base to be the head segment instead of the array object
indirOpnd->SetBaseOpnd(headSegmentOpnd);
}
// Builds args list <dst, src1, src2, src3 ..>
SList<IR::Opnd*> * LowererMD::Simd128GetExtendedArgs(IR::Instr *instr)
{
SList<IR::Opnd*> * args = JitAnew(m_lowerer->m_alloc, SList<IR::Opnd*>, m_lowerer->m_alloc);
IR::Instr *pInstr = instr;
IR::Opnd *dst, *src1, *src2;
dst = src1 = src2 = nullptr;
if (pInstr->GetDst())
{
dst = pInstr->UnlinkDst();
}
src1 = pInstr->UnlinkSrc1();
Assert(src1->GetStackSym()->IsSingleDef());
pInstr = src1->GetStackSym()->GetInstrDef();
while (pInstr && pInstr->m_opcode == Js::OpCode::ExtendArg_A)
{
Assert(pInstr->GetSrc1());
src1 = pInstr->GetSrc1()->Copy(this->m_func);
if (src1->IsRegOpnd())
{
this->m_lowerer->addToLiveOnBackEdgeSyms->Set(src1->AsRegOpnd()->m_sym->m_id);
}
args->Push(src1);
if (pInstr->GetSrc2())
{
src2 = pInstr->GetSrc2();
Assert(src2->GetStackSym()->IsSingleDef());
pInstr = src2->GetStackSym()->GetInstrDef();
}
else
{
pInstr = nullptr;
}
}
args->Push(dst);
Assert(args->Count() > 3);
return args;
}
IR::Opnd*
LowererMD::EnregisterBoolConst(IR::Instr* instr, IR::Opnd *opnd, IRType type)
{
if (opnd->IsIntConstOpnd() || opnd->IsInt64ConstOpnd())
{
bool isSet = opnd->GetImmediateValue(instr->m_func) != 0;
IR::RegOpnd *tempReg = IR::RegOpnd::New(type, m_func);
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, IR::IntConstOpnd::New(isSet ? -1 : 0, type, m_func, true), m_func));
return tempReg;
}
IRType origType = opnd->GetType();
IR::RegOpnd *tempReg = IR::RegOpnd::New(origType, m_func);
IR::Instr* cmovInstr = IR::Instr::New(Js::OpCode::MOV, tempReg, IR::IntConstOpnd::New(0, origType, m_func, true), m_func);
instr->InsertBefore(cmovInstr);
Legalize(cmovInstr);
cmovInstr = IR::Instr::New(Js::OpCode::SUB, tempReg, tempReg, opnd->UseWithNewType(origType, m_func), m_func);
instr->InsertBefore(cmovInstr);
Legalize(cmovInstr);
cmovInstr = IR::Instr::New(Js::OpCode::CMOVS, tempReg, tempReg, IR::IntConstOpnd::New(-1, origType, m_func, true), m_func);
instr->InsertBefore(cmovInstr);
Legalize(cmovInstr);
return tempReg->UseWithNewType(type, m_func);
}
IR::Opnd*
LowererMD::EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd, IRType type /* = TyInt32*/)
{
IRType constType = constOpnd->GetType();
if (!IRType_IsNativeInt(constType))
{
// not int opnd, nothing to do
return constOpnd;
}
Assert(type == TyInt32 || type == TyInt16 || type == TyInt8);
Assert(constType == TyInt32 || constType == TyInt16 || constType == TyInt8);
if (constOpnd->IsRegOpnd())
{
// already a register, just cast
constOpnd->SetType(type);
return constOpnd;
}
// en-register
IR::RegOpnd *tempReg = IR::RegOpnd::New(type, m_func);
// MOV tempReg, constOpnd
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, constOpnd, m_func));
return tempReg;
}
void LowererMD::Simd128InitOpcodeMap()
{
m_simd128OpCodesMap = JitAnewArrayZ(m_lowerer->m_alloc, Js::OpCode, Js::Simd128OpcodeCount());
// All simd ops should be contiguous for this mapping to work
Assert(Js::OpCode::Simd128_End + (Js::OpCode) 1 == Js::OpCode::Simd128_Start_Extend);
//SET_SIMDOPCODE(Simd128_FromFloat64x2_I4 , CVTTPD2DQ);
//SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_I4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I4 , MOVAPS);
SET_SIMDOPCODE(Simd128_Add_I4 , PADDD);
SET_SIMDOPCODE(Simd128_Sub_I4 , PSUBD);
SET_SIMDOPCODE(Simd128_Lt_I4 , PCMPGTD);
SET_SIMDOPCODE(Simd128_Gt_I4 , PCMPGTD);
SET_SIMDOPCODE(Simd128_Eq_I4 , PCMPEQD);
SET_SIMDOPCODE(Simd128_And_I4 , PAND);
SET_SIMDOPCODE(Simd128_Or_I4 , POR);
SET_SIMDOPCODE(Simd128_Xor_I4 , PXOR);
SET_SIMDOPCODE(Simd128_Not_I4 , XORPS);
SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I8 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I8 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I8 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I8 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I8 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I8 , MOVAPS);
SET_SIMDOPCODE(Simd128_Or_I16 , POR);
SET_SIMDOPCODE(Simd128_Xor_I16 , PXOR);
SET_SIMDOPCODE(Simd128_Not_I16 , XORPS);
SET_SIMDOPCODE(Simd128_And_I16 , PAND);
SET_SIMDOPCODE(Simd128_Add_I16 , PADDB);
SET_SIMDOPCODE(Simd128_Sub_I16 , PSUBB);
SET_SIMDOPCODE(Simd128_Lt_I16 , PCMPGTB);
SET_SIMDOPCODE(Simd128_Gt_I16 , PCMPGTB);
SET_SIMDOPCODE(Simd128_Eq_I16 , PCMPEQB);
SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I16, MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I16 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I16 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I16 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I16 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I16 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U8 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U8 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U8 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U8 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U8 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U8 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U16 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U16 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U16 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U16 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U16 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U16 , MOVAPS);
//SET_SIMDOPCODE(Simd128_FromFloat64x2_F4 , CVTPD2PS);
//SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_F4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt32x4_F4 , CVTDQ2PS);
SET_SIMDOPCODE(Simd128_FromInt32x4Bits_F4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt16x8Bits_F4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt8x16Bits_F4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint32x4Bits_F4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint16x8Bits_F4 , MOVAPS);
SET_SIMDOPCODE(Simd128_FromUint8x16Bits_F4 , MOVAPS);
SET_SIMDOPCODE(Simd128_Abs_F4 , ANDPS);
SET_SIMDOPCODE(Simd128_Neg_F4 , XORPS);
SET_SIMDOPCODE(Simd128_Add_F4 , ADDPS);
SET_SIMDOPCODE(Simd128_Sub_F4 , SUBPS);
SET_SIMDOPCODE(Simd128_Mul_F4 , MULPS);
SET_SIMDOPCODE(Simd128_Div_F4 , DIVPS);
SET_SIMDOPCODE(Simd128_Sqrt_F4 , SQRTPS);
SET_SIMDOPCODE(Simd128_Lt_F4 , CMPLTPS); // CMPLTPS
SET_SIMDOPCODE(Simd128_LtEq_F4 , CMPLEPS); // CMPLEPS
SET_SIMDOPCODE(Simd128_Eq_F4 , CMPEQPS); // CMPEQPS
SET_SIMDOPCODE(Simd128_Neq_F4 , CMPNEQPS); // CMPNEQPS
SET_SIMDOPCODE(Simd128_Gt_F4 , CMPLTPS); // CMPLTPS (swap srcs)
SET_SIMDOPCODE(Simd128_GtEq_F4 , CMPLEPS); // CMPLEPS (swap srcs)
SET_SIMDOPCODE(Simd128_Neg_D2 , XORPS);
SET_SIMDOPCODE(Simd128_Add_D2 , ADDPD);
SET_SIMDOPCODE(Simd128_Abs_D2 , ANDPD);
SET_SIMDOPCODE(Simd128_Sub_D2 , SUBPD);
SET_SIMDOPCODE(Simd128_Mul_D2 , MULPD);
SET_SIMDOPCODE(Simd128_Div_D2 , DIVPD);
SET_SIMDOPCODE(Simd128_Min_D2 , MINPD);
SET_SIMDOPCODE(Simd128_Max_D2 , MAXPD);
SET_SIMDOPCODE(Simd128_Sqrt_D2 , SQRTPD);
SET_SIMDOPCODE(Simd128_Lt_D2 , CMPLTPD); // CMPLTPD
SET_SIMDOPCODE(Simd128_LtEq_D2 , CMPLEPD); // CMPLEPD
SET_SIMDOPCODE(Simd128_Eq_D2 , CMPEQPD); // CMPEQPD
SET_SIMDOPCODE(Simd128_Neq_D2 , CMPNEQPD); // CMPNEQPD
SET_SIMDOPCODE(Simd128_Gt_D2 , CMPLTPD); // CMPLTPD (swap srcs)
SET_SIMDOPCODE(Simd128_GtEq_D2 , CMPLEPD); // CMPLEPD (swap srcs)
#if 0
SET_SIMDOPCODE(Simd128_FromFloat32x4_D2, CVTPS2PD);
SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_D2, MOVAPS);
SET_SIMDOPCODE(Simd128_FromInt32x4_D2, CVTDQ2PD);
SET_SIMDOPCODE(Simd128_FromInt32x4Bits_D2, MOVAPS);
#endif // 0
SET_SIMDOPCODE(Simd128_And_I8 , PAND);
SET_SIMDOPCODE(Simd128_Or_I8 , POR);
SET_SIMDOPCODE(Simd128_Xor_I8 , XORPS);
SET_SIMDOPCODE(Simd128_Not_I8 , XORPS);
SET_SIMDOPCODE(Simd128_Add_I8 , PADDW);
SET_SIMDOPCODE(Simd128_Sub_I8 , PSUBW);
SET_SIMDOPCODE(Simd128_Mul_I8 , PMULLW);
SET_SIMDOPCODE(Simd128_Eq_I8 , PCMPEQW);
SET_SIMDOPCODE(Simd128_Lt_I8 , PCMPGTW); // (swap srcs)
SET_SIMDOPCODE(Simd128_Gt_I8 , PCMPGTW);
SET_SIMDOPCODE(Simd128_AddSaturate_I8 , PADDSW);
SET_SIMDOPCODE(Simd128_SubSaturate_I8 , PSUBSW);
SET_SIMDOPCODE(Simd128_AddSaturate_I16 , PADDSB);
SET_SIMDOPCODE(Simd128_SubSaturate_I16 , PSUBSB);
SET_SIMDOPCODE(Simd128_And_U4 , PAND);
SET_SIMDOPCODE(Simd128_Or_U4 , POR);
SET_SIMDOPCODE(Simd128_Xor_U4 , XORPS);
SET_SIMDOPCODE(Simd128_Not_U4 , XORPS);
SET_SIMDOPCODE(Simd128_Add_U4 , PADDD);
SET_SIMDOPCODE(Simd128_Sub_U4 , PSUBD);
SET_SIMDOPCODE(Simd128_Eq_U4 , PCMPEQD); // same as int32x4.equal
SET_SIMDOPCODE(Simd128_And_U8 , PAND);
SET_SIMDOPCODE(Simd128_Or_U8 , POR);
SET_SIMDOPCODE(Simd128_Xor_U8 , XORPS);
SET_SIMDOPCODE(Simd128_Not_U8 , XORPS);
SET_SIMDOPCODE(Simd128_Add_U8 , PADDW);
SET_SIMDOPCODE(Simd128_Sub_U8 , PSUBW);
SET_SIMDOPCODE(Simd128_Mul_U8 , PMULLW);
SET_SIMDOPCODE(Simd128_Eq_U8 , PCMPEQW); // same as int16X8.equal
SET_SIMDOPCODE(Simd128_AddSaturate_U8 , PADDUSW);
SET_SIMDOPCODE(Simd128_SubSaturate_U8 , PSUBUSW);
SET_SIMDOPCODE(Simd128_And_U16 , PAND);
SET_SIMDOPCODE(Simd128_Or_U16 , POR);
SET_SIMDOPCODE(Simd128_Xor_U16 , XORPS);
SET_SIMDOPCODE(Simd128_Not_U16 , XORPS);
SET_SIMDOPCODE(Simd128_Add_U16 , PADDB);
SET_SIMDOPCODE(Simd128_Sub_U16 , PSUBB);
SET_SIMDOPCODE(Simd128_Eq_U16 , PCMPEQB); // same as int8x16.equal
SET_SIMDOPCODE(Simd128_AddSaturate_U16 , PADDUSB);
SET_SIMDOPCODE(Simd128_SubSaturate_U16 , PSUBUSB);
SET_SIMDOPCODE(Simd128_And_B4 , PAND);
SET_SIMDOPCODE(Simd128_Or_B4 , POR);
SET_SIMDOPCODE(Simd128_Xor_B4 , XORPS);
SET_SIMDOPCODE(Simd128_Not_B4 , XORPS);
SET_SIMDOPCODE(Simd128_And_B8 , PAND);
SET_SIMDOPCODE(Simd128_Or_B8 , POR);
SET_SIMDOPCODE(Simd128_Xor_B8 , XORPS);
SET_SIMDOPCODE(Simd128_Not_B8 , XORPS);
SET_SIMDOPCODE(Simd128_And_B16 , PAND);
SET_SIMDOPCODE(Simd128_Or_B16 , POR);
SET_SIMDOPCODE(Simd128_Xor_B16 , XORPS);
SET_SIMDOPCODE(Simd128_Not_B16 , XORPS);
SET_SIMDOPCODE(Simd128_Add_I2 , PADDQ);
SET_SIMDOPCODE(Simd128_Sub_I2 , PSUBQ);
}
#undef SIMD_SETOPCODE
#undef SIMD_GETOPCODE
void LowererMD::CheckShuffleLanes_4(uint8 lanes[], uint8 lanesSrc[], uint *fromSrc1, uint *fromSrc2)
{
Assert(lanes);
Assert(lanesSrc);
Assert(fromSrc1 && fromSrc2);
*fromSrc1 = 0;
*fromSrc2 = 0;
for (uint i = 0; i < 4; i++)
{
if (lanes[i] >= 0 && lanes[i] < 4)
{
(*fromSrc1)++;
lanesSrc[i] = 1;
}
else if (lanes[i] >= 4 && lanes[i] < 8)
{
(*fromSrc2)++;
lanesSrc[i] = 2;
}
else
{
Assert(UNREACHED);
}
}
}
void LowererMD::InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::Opnd *src2, IR::Instr *instr)
{
int8 shufMask;
uint8 normLanes[4];
IR::RegOpnd * tmp = IR::RegOpnd::New(TySimd128I4, m_func);
for (uint i = 0; i < 4; i++)
{
normLanes[i] = (lanes[i] >= 4) ? (lanes[i] - 4) : lanes[i];
}
shufMask = (int8)((normLanes[3] << 6) | (normLanes[2] << 4) | (normLanes[1] << 2) | normLanes[0]);
// ToDo: Move this to legalization code
if (dst->IsEqual(src1))
{
// instruction already legal
instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
}
else if (dst->IsEqual(src2))
{
// MOVAPS tmp, dst
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp, dst, m_func));
// MOVAPS dst, src1
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
// SHUF dst, tmp, imm8
instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, tmp, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
}
else
{
// MOVAPS dst, src1
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
// SHUF dst, src2, imm8
instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
}
}
BYTE LowererMD::Simd128GetTypedArrBytesPerElem(ValueType arrType)
{
return (1 << Lowerer::GetArrayIndirScale(arrType));
}
#endif