| //------------------------------------------------------------------------------------------------------- |
| // Copyright (C) Microsoft Corporation and contributors. All rights reserved. |
| // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. |
| //------------------------------------------------------------------------------------------------------- |
| |
| #include "Backend.h" |
| |
| static IR::Instr* removeInstr(IR::Instr* instr); |
| |
| #ifdef ENABLE_WASM_SIMD |
| |
| static IR::Instr* removeInstr(IR::Instr* instr) |
| { |
| IR::Instr* prevInstr; |
| prevInstr = instr->m_prev; |
| instr->Remove(); |
| return prevInstr; |
| } |
| |
| #define GET_SIMDOPCODE(irOpcode) m_simd128OpCodesMap[(uint32)(irOpcode - Js::OpCode::Simd128_Start)] |
| |
| #define SET_SIMDOPCODE(irOpcode, mdOpcode) \ |
| Assert((uint32)m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] == 0);\ |
| Assert(Js::OpCode::mdOpcode > Js::OpCode::MDStart);\ |
| m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] = Js::OpCode::mdOpcode; |
| |
| IR::Instr* LowererMD::Simd128Instruction(IR::Instr *instr) |
| { |
| // Currently only handles type-specialized/asm.js opcodes |
| |
| if (!instr->GetDst()) |
| { |
| // SIMD ops always have DST in asmjs |
| Assert(!instr->m_func->GetJITFunctionBody()->IsAsmJsMode()); |
| // unused result. Do nothing. |
| IR::Instr * pInstr = instr->m_prev; |
| instr->Remove(); |
| return pInstr; |
| } |
| |
| if (Simd128TryLowerMappedInstruction(instr)) |
| { |
| return instr->m_prev; |
| } |
| return Simd128LowerUnMappedInstruction(instr); |
| } |
| |
| bool LowererMD::Simd128TryLowerMappedInstruction(IR::Instr *instr) |
| { |
| bool legalize = true; |
| Js::OpCode opcode = GET_SIMDOPCODE(instr->m_opcode); |
| |
| if ((uint32)opcode == 0) |
| return false; |
| |
| Assert(instr->GetDst() && instr->GetDst()->IsRegOpnd() && instr->GetDst()->IsSimd128() || instr->GetDst()->GetType() == TyInt32); |
| Assert(instr->GetSrc1() && instr->GetSrc1()->IsRegOpnd() && instr->GetSrc1()->IsSimd128()); |
| Assert(!instr->GetSrc2() || (((instr->GetSrc2()->IsRegOpnd() && instr->GetSrc2()->IsSimd128()) || (instr->GetSrc2()->IsIntConstOpnd() && instr->GetSrc2()->GetType() == TyInt8)))); |
| |
| switch (instr->m_opcode) |
| { |
| case Js::OpCode::Simd128_Abs_F4: |
| Assert(opcode == Js::OpCode::ANDPS); |
| instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskF4Addr(), instr->GetSrc1()->GetType(), m_func)); |
| break; |
| case Js::OpCode::Simd128_Abs_D2: |
| Assert(opcode == Js::OpCode::ANDPD); |
| instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskD2Addr(), instr->GetSrc1()->GetType(), m_func)); |
| break; |
| case Js::OpCode::Simd128_Neg_F4: |
| Assert(opcode == Js::OpCode::XORPS); |
| instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), instr->GetSrc1()->GetType(), m_func)); |
| break; |
| case Js::OpCode::Simd128_Neg_D2: |
| Assert(opcode == Js::OpCode::XORPS); |
| instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskD2Addr(), instr->GetSrc1()->GetType(), m_func)); |
| break; |
| case Js::OpCode::Simd128_Not_I4: |
| case Js::OpCode::Simd128_Not_I16: |
| case Js::OpCode::Simd128_Not_I8: |
| case Js::OpCode::Simd128_Not_U4: |
| case Js::OpCode::Simd128_Not_U8: |
| case Js::OpCode::Simd128_Not_U16: |
| case Js::OpCode::Simd128_Not_B4: |
| case Js::OpCode::Simd128_Not_B8: |
| case Js::OpCode::Simd128_Not_B16: |
| Assert(opcode == Js::OpCode::XORPS); |
| instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), instr->GetSrc1()->GetType(), m_func)); |
| break; |
| case Js::OpCode::Simd128_Gt_F4: |
| case Js::OpCode::Simd128_Gt_D2: |
| case Js::OpCode::Simd128_GtEq_F4: |
| case Js::OpCode::Simd128_GtEq_D2: |
| case Js::OpCode::Simd128_Lt_I4: |
| case Js::OpCode::Simd128_Lt_I8: |
| case Js::OpCode::Simd128_Lt_I16: |
| { |
| Assert(opcode == Js::OpCode::CMPLTPS || opcode == Js::OpCode::CMPLTPD || opcode == Js::OpCode::CMPLEPS |
| || opcode == Js::OpCode::CMPLEPD || opcode == Js::OpCode::PCMPGTD || opcode == Js::OpCode::PCMPGTB |
| || opcode == Js::OpCode::PCMPGTW ); |
| // swap operands |
| auto *src1 = instr->UnlinkSrc1(); |
| auto *src2 = instr->UnlinkSrc2(); |
| instr->SetSrc1(src2); |
| instr->SetSrc2(src1); |
| break; |
| } |
| |
| } |
| instr->m_opcode = opcode; |
| if (legalize) |
| { |
| //MakeDstEquSrc1(instr); |
| Legalize(instr); |
| } |
| |
| return true; |
| } |
| |
| IR::MemRefOpnd * |
| LowererMD::LoadSimdHelperArgument(IR::Instr * instr, uint8 index) |
| { |
| //the most reliable way to pass a simd value on x86/x64 win/lnx across calls |
| //is to pass a pointer to a SIMD value in the simd temporary area. |
| //otherwise we have to use __m128 and msvc intrinsics which may or may not be the same across |
| //MSVC and Clang |
| |
| IR::MemRefOpnd* srcMemRef = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(index), TySimd128F4, m_func); |
| IR::AddrOpnd* argAddress = IR::AddrOpnd::New(m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(index), IR::AddrOpndKindDynamicMisc, m_func, true /* doesn't come from a user */); |
| LoadHelperArgument(instr, argAddress); |
| return srcMemRef; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr) |
| { |
| switch (instr->m_opcode) |
| { |
| case Js::OpCode::Simd128_LdC: |
| return Simd128LoadConst(instr); |
| |
| #ifdef ENABLE_SIMD |
| case Js::OpCode::Simd128_FloatsToF4: |
| case Js::OpCode::Simd128_IntsToI4: |
| case Js::OpCode::Simd128_IntsToU4: |
| case Js::OpCode::Simd128_IntsToB4: |
| return Simd128LowerConstructor_4(instr); |
| case Js::OpCode::Simd128_IntsToI8: |
| case Js::OpCode::Simd128_IntsToU8: |
| case Js::OpCode::Simd128_IntsToB8: |
| return Simd128LowerConstructor_8(instr); |
| case Js::OpCode::Simd128_IntsToI16: |
| case Js::OpCode::Simd128_IntsToU16: |
| case Js::OpCode::Simd128_IntsToB16: |
| return Simd128LowerConstructor_16(instr); |
| |
| case Js::OpCode::Simd128_Rcp_F4: |
| //case Js::OpCode::Simd128_Rcp_D2: |
| return Simd128LowerRcp(instr); |
| //SQRT |
| case Js::OpCode::Simd128_RcpSqrt_F4: |
| //case Js::OpCode::Simd128_RcpSqrt_D2: |
| return Simd128LowerRcpSqrt(instr); |
| |
| case Js::OpCode::Simd128_Select_F4: |
| case Js::OpCode::Simd128_Select_I4: |
| //case Js::OpCode::Simd128_Select_D2: |
| case Js::OpCode::Simd128_Select_I8: |
| case Js::OpCode::Simd128_Select_I16: |
| case Js::OpCode::Simd128_Select_U4: |
| case Js::OpCode::Simd128_Select_U8: |
| case Js::OpCode::Simd128_Select_U16: |
| return Simd128LowerSelect(instr); |
| #endif |
| |
| #if 0 |
| case Js::OpCode::Simd128_DoublesToD2: |
| return Simd128LowerConstructor_2(instr); |
| #endif // 0 |
| |
| case Js::OpCode::Simd128_ExtractLane_I2: |
| case Js::OpCode::Simd128_ExtractLane_I4: |
| case Js::OpCode::Simd128_ExtractLane_I8: |
| case Js::OpCode::Simd128_ExtractLane_I16: |
| case Js::OpCode::Simd128_ExtractLane_U4: |
| case Js::OpCode::Simd128_ExtractLane_U8: |
| case Js::OpCode::Simd128_ExtractLane_U16: |
| case Js::OpCode::Simd128_ExtractLane_B4: |
| case Js::OpCode::Simd128_ExtractLane_B8: |
| case Js::OpCode::Simd128_ExtractLane_B16: |
| case Js::OpCode::Simd128_ExtractLane_F4: |
| return Simd128LowerLdLane(instr); |
| |
| case Js::OpCode::Simd128_ReplaceLane_I2: |
| case Js::OpCode::Simd128_ReplaceLane_D2: |
| return SIMD128LowerReplaceLane_2(instr); |
| case Js::OpCode::Simd128_ReplaceLane_I4: |
| case Js::OpCode::Simd128_ReplaceLane_F4: |
| case Js::OpCode::Simd128_ReplaceLane_U4: |
| case Js::OpCode::Simd128_ReplaceLane_B4: |
| return SIMD128LowerReplaceLane_4(instr); |
| |
| case Js::OpCode::Simd128_ReplaceLane_I8: |
| case Js::OpCode::Simd128_ReplaceLane_U8: |
| case Js::OpCode::Simd128_ReplaceLane_B8: |
| return SIMD128LowerReplaceLane_8(instr); |
| |
| case Js::OpCode::Simd128_ReplaceLane_I16: |
| case Js::OpCode::Simd128_ReplaceLane_U16: |
| case Js::OpCode::Simd128_ReplaceLane_B16: |
| return SIMD128LowerReplaceLane_16(instr); |
| |
| case Js::OpCode::Simd128_Splat_F4: |
| case Js::OpCode::Simd128_Splat_I4: |
| case Js::OpCode::Simd128_Splat_I2: |
| case Js::OpCode::Simd128_Splat_D2: |
| case Js::OpCode::Simd128_Splat_I8: |
| case Js::OpCode::Simd128_Splat_I16: |
| case Js::OpCode::Simd128_Splat_U4: |
| case Js::OpCode::Simd128_Splat_U8: |
| case Js::OpCode::Simd128_Splat_U16: |
| case Js::OpCode::Simd128_Splat_B4: |
| case Js::OpCode::Simd128_Splat_B8: |
| case Js::OpCode::Simd128_Splat_B16: |
| return Simd128LowerSplat(instr); |
| |
| case Js::OpCode::Simd128_Sqrt_F4: |
| //case Js::OpCode::Simd128_Sqrt_D2: |
| return Simd128LowerSqrt(instr); |
| |
| case Js::OpCode::Simd128_Neg_I4: |
| case Js::OpCode::Simd128_Neg_I8: |
| case Js::OpCode::Simd128_Neg_I16: |
| case Js::OpCode::Simd128_Neg_U4: |
| case Js::OpCode::Simd128_Neg_U8: |
| case Js::OpCode::Simd128_Neg_U16: |
| return Simd128LowerNeg(instr); |
| |
| case Js::OpCode::Simd128_Mul_I4: |
| case Js::OpCode::Simd128_Mul_U4: |
| return Simd128LowerMulI4(instr); |
| case Js::OpCode::Simd128_Mul_I16: |
| case Js::OpCode::Simd128_Mul_U16: |
| return Simd128LowerMulI16(instr); |
| |
| case Js::OpCode::Simd128_ShRtByScalar_I4: |
| case Js::OpCode::Simd128_ShLtByScalar_I4: |
| case Js::OpCode::Simd128_ShRtByScalar_I8: |
| case Js::OpCode::Simd128_ShLtByScalar_I8: |
| case Js::OpCode::Simd128_ShLtByScalar_I16: |
| case Js::OpCode::Simd128_ShRtByScalar_I16: |
| case Js::OpCode::Simd128_ShRtByScalar_U4: |
| case Js::OpCode::Simd128_ShLtByScalar_U4: |
| case Js::OpCode::Simd128_ShRtByScalar_U8: |
| case Js::OpCode::Simd128_ShLtByScalar_U8: |
| case Js::OpCode::Simd128_ShRtByScalar_U16: |
| case Js::OpCode::Simd128_ShLtByScalar_U16: |
| case Js::OpCode::Simd128_ShLtByScalar_I2: |
| case Js::OpCode::Simd128_ShRtByScalar_U2: |
| case Js::OpCode::Simd128_ShRtByScalar_I2: |
| return Simd128LowerShift(instr); |
| |
| case Js::OpCode::Simd128_LdArr_I4: |
| case Js::OpCode::Simd128_LdArr_I8: |
| case Js::OpCode::Simd128_LdArr_I16: |
| case Js::OpCode::Simd128_LdArr_U4: |
| case Js::OpCode::Simd128_LdArr_U8: |
| case Js::OpCode::Simd128_LdArr_U16: |
| case Js::OpCode::Simd128_LdArr_F4: |
| //case Js::OpCode::Simd128_LdArr_D2: |
| case Js::OpCode::Simd128_LdArrConst_I4: |
| case Js::OpCode::Simd128_LdArrConst_I8: |
| case Js::OpCode::Simd128_LdArrConst_I16: |
| case Js::OpCode::Simd128_LdArrConst_U4: |
| case Js::OpCode::Simd128_LdArrConst_U8: |
| case Js::OpCode::Simd128_LdArrConst_U16: |
| case Js::OpCode::Simd128_LdArrConst_F4: |
| //case Js::OpCode::Simd128_LdArrConst_D2: |
| if (m_func->GetJITFunctionBody()->IsAsmJsMode()) |
| { |
| // with bound checks |
| return Simd128AsmJsLowerLoadElem(instr); |
| } |
| else |
| { |
| // non-AsmJs, boundChecks are extracted from instr |
| return Simd128LowerLoadElem(instr); |
| } |
| |
| case Js::OpCode::Simd128_StArr_I4: |
| case Js::OpCode::Simd128_StArr_I8: |
| case Js::OpCode::Simd128_StArr_I16: |
| case Js::OpCode::Simd128_StArr_U4: |
| case Js::OpCode::Simd128_StArr_U8: |
| case Js::OpCode::Simd128_StArr_U16: |
| case Js::OpCode::Simd128_StArr_F4: |
| //case Js::OpCode::Simd128_StArr_D2: |
| case Js::OpCode::Simd128_StArrConst_I4: |
| case Js::OpCode::Simd128_StArrConst_I8: |
| case Js::OpCode::Simd128_StArrConst_I16: |
| case Js::OpCode::Simd128_StArrConst_U4: |
| case Js::OpCode::Simd128_StArrConst_U8: |
| case Js::OpCode::Simd128_StArrConst_U16: |
| case Js::OpCode::Simd128_StArrConst_F4: |
| //case Js::OpCode::Simd128_StArrConst_D2: |
| if (m_func->GetJITFunctionBody()->IsAsmJsMode()) |
| { |
| return Simd128AsmJsLowerStoreElem(instr); |
| } |
| else |
| { |
| return Simd128LowerStoreElem(instr); |
| } |
| |
| case Js::OpCode::Simd128_Swizzle_U4: |
| case Js::OpCode::Simd128_Swizzle_I4: |
| case Js::OpCode::Simd128_Swizzle_F4: |
| //case Js::OpCode::Simd128_Swizzle_D2: |
| return Simd128LowerSwizzle_4(instr); |
| |
| case Js::OpCode::Simd128_Shuffle_U4: |
| case Js::OpCode::Simd128_Shuffle_I4: |
| case Js::OpCode::Simd128_Shuffle_F4: |
| //case Js::OpCode::Simd128_Shuffle_D2: |
| return Simd128LowerShuffle_4(instr); |
| case Js::OpCode::Simd128_Swizzle_I8: |
| case Js::OpCode::Simd128_Swizzle_I16: |
| case Js::OpCode::Simd128_Swizzle_U8: |
| case Js::OpCode::Simd128_Swizzle_U16: |
| case Js::OpCode::Simd128_Shuffle_I8: |
| case Js::OpCode::Simd128_Shuffle_I16: |
| case Js::OpCode::Simd128_Shuffle_U8: |
| case Js::OpCode::Simd128_Shuffle_U16: |
| return Simd128LowerShuffle(instr); |
| |
| case Js::OpCode::Simd128_FromUint32x4_F4: |
| return Simd128LowerFloat32x4FromUint32x4(instr); |
| |
| case Js::OpCode::Simd128_FromFloat32x4_I4: |
| return Simd128LowerInt32x4FromFloat32x4(instr); |
| |
| case Js::OpCode::Simd128_FromFloat32x4_U4: |
| return Simd128LowerUint32x4FromFloat32x4(instr); |
| |
| case Js::OpCode::Simd128_FromInt64x2_D2: |
| return EmitSimdConversion(instr, IR::HelperSimd128ConvertSD2); |
| case Js::OpCode::Simd128_FromUint64x2_D2: |
| return EmitSimdConversion(instr, IR::HelperSimd128ConvertUD2); |
| case Js::OpCode::Simd128_FromFloat64x2_I2: |
| return EmitSimdConversion(instr, IR::HelperSimd128TruncateI2); |
| case Js::OpCode::Simd128_FromFloat64x2_U2: |
| return EmitSimdConversion(instr, IR::HelperSimd128TruncateU2); |
| case Js::OpCode::Simd128_Neq_I4: |
| case Js::OpCode::Simd128_Neq_I8: |
| case Js::OpCode::Simd128_Neq_I16: |
| case Js::OpCode::Simd128_Neq_U4: |
| case Js::OpCode::Simd128_Neq_U8: |
| case Js::OpCode::Simd128_Neq_U16: |
| return Simd128LowerNotEqual(instr); |
| |
| case Js::OpCode::Simd128_Lt_U4: |
| case Js::OpCode::Simd128_Lt_U8: |
| case Js::OpCode::Simd128_Lt_U16: |
| case Js::OpCode::Simd128_GtEq_U4: |
| case Js::OpCode::Simd128_GtEq_U8: |
| case Js::OpCode::Simd128_GtEq_U16: |
| return Simd128LowerLessThan(instr); |
| case Js::OpCode::Simd128_LtEq_I4: |
| case Js::OpCode::Simd128_LtEq_I8: |
| case Js::OpCode::Simd128_LtEq_I16: |
| case Js::OpCode::Simd128_LtEq_U4: |
| case Js::OpCode::Simd128_LtEq_U8: |
| case Js::OpCode::Simd128_LtEq_U16: |
| case Js::OpCode::Simd128_Gt_U4: |
| case Js::OpCode::Simd128_Gt_U8: |
| case Js::OpCode::Simd128_Gt_U16: |
| return Simd128LowerLessThanOrEqual(instr); |
| |
| case Js::OpCode::Simd128_GtEq_I4: |
| case Js::OpCode::Simd128_GtEq_I8: |
| case Js::OpCode::Simd128_GtEq_I16: |
| return Simd128LowerGreaterThanOrEqual(instr); |
| |
| case Js::OpCode::Simd128_Min_F4: |
| case Js::OpCode::Simd128_Max_F4: |
| return Simd128LowerMinMax_F4(instr); |
| |
| case Js::OpCode::Simd128_AnyTrue_B2: |
| case Js::OpCode::Simd128_AnyTrue_B4: |
| case Js::OpCode::Simd128_AnyTrue_B8: |
| case Js::OpCode::Simd128_AnyTrue_B16: |
| return Simd128LowerAnyTrue(instr); |
| |
| case Js::OpCode::Simd128_AllTrue_B2: |
| case Js::OpCode::Simd128_AllTrue_B4: |
| case Js::OpCode::Simd128_AllTrue_B8: |
| case Js::OpCode::Simd128_AllTrue_B16: |
| return Simd128LowerAllTrue(instr); |
| case Js::OpCode::Simd128_BitSelect_I4: |
| return LowerSimd128BitSelect(instr); |
| default: |
| AssertMsg(UNREACHED, "Unsupported Simd128 instruction"); |
| } |
| return nullptr; |
| } |
| |
| |
| IR::Instr* LowererMD::LowerSimd128BitSelect(IR::Instr* instr) |
| { |
| SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr); |
| IR::Opnd *dst = args->Pop(); |
| IR::Opnd *src1 = args->Pop(); |
| IR::Opnd *src2 = args->Pop(); |
| IR::Opnd *mask = args->Pop(); |
| |
| IR::Instr* pInstr = IR::Instr::New(Js::OpCode::PXOR, dst, src1, src2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PAND, dst, dst, mask, m_func)); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PXOR, dst, dst, src2, m_func)); |
| return removeInstr(instr); |
| } |
| |
| IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr) |
| { |
| Assert(instr->GetDst() && instr->m_opcode == Js::OpCode::Simd128_LdC); |
| Assert(instr->GetDst()->IsSimd128()); |
| Assert(instr->GetSrc1()->IsSimd128()); |
| Assert(instr->GetSrc1()->IsSimd128ConstOpnd()); |
| Assert(instr->GetSrc2() == nullptr); |
| |
| AsmJsSIMDValue value = instr->GetSrc1()->AsSimd128ConstOpnd()->m_value; |
| |
| // MOVUPS dst, [const] |
| |
| void *pValue = NativeCodeDataNewNoFixup(this->m_func->GetNativeCodeDataAllocator(), SIMDType<DataDesc_LowererMD_Simd128LoadConst>, value); |
| IR::Opnd * simdRef; |
| if (!m_func->IsOOPJIT()) |
| { |
| simdRef = IR::MemRefOpnd::New((void *)pValue, instr->GetDst()->GetType(), instr->m_func); |
| } |
| else |
| { |
| int offset = NativeCodeData::GetDataTotalOffset(pValue); |
| |
| simdRef = IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), offset, instr->GetDst()->GetType(), |
| #if DBG |
| NativeCodeData::GetDataDescription(pValue, m_func->m_alloc), |
| #endif |
| m_func, true); |
| |
| GetLowerer()->addToLiveOnBackEdgeSyms->Set(m_func->GetTopFunc()->GetNativeCodeDataSym()->m_id); |
| } |
| |
| instr->ReplaceSrc1(simdRef); |
| instr->m_opcode = LowererMDArch::GetAssignOp(instr->GetDst()->GetType()); |
| Legalize(instr); |
| |
| return instr->m_prev; |
| } |
| |
| IR::Instr* LowererMD::Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode &cmpOpcode, IR::Opnd& dstOpnd) |
| { |
| Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToB4 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16 || |
| instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16 || |
| instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B2 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16 || |
| instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16 |
| ); |
| IR::Instr *pInstr; |
| //dst = cmpOpcode dst, X86_ALL_ZEROS |
| pInstr = IR::Instr::New(cmpOpcode, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllZerosAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // dst = PANDN dst, X86_ALL_NEG_ONES |
| pInstr = IR::Instr::New(Js::OpCode::PANDN, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| return instr; |
| } |
| |
| IR::Instr* LowererMD::EmitSimdConversion(IR::Instr *instr, IR::JnHelperMethod helper) |
| { |
| IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0); |
| IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1); |
| m_lowerer->InsertMove(srcMemRef, instr->UnlinkSrc1(), instr); |
| |
| IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func); |
| instr->InsertBefore(helperCall); |
| this->ChangeToHelperCall(helperCall, helper); |
| |
| m_lowerer->InsertMove(instr->UnlinkDst(), dstMemRef, instr); |
| return removeInstr(instr); |
| } |
| |
| void LowererMD::EmitShiftByScalarI2(IR::Instr *instr, IR::JnHelperMethod helper) |
| { |
| IR::Opnd* src2 = instr->GetSrc2(); |
| IR::Opnd* dst = instr->GetDst(); |
| LoadHelperArgument(instr, src2); |
| IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0); |
| m_lowerer->InsertMove(srcMemRef, instr->GetSrc1(), instr); |
| IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1); |
| IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func); |
| instr->InsertBefore(helperCall); |
| this->ChangeToHelperCall(helperCall, helper); |
| m_lowerer->InsertMove(dst, dstMemRef, instr); |
| } |
| |
| IR::Instr * LowererMD::SIMD128LowerReplaceLane_2(IR::Instr *instr) |
| { |
| SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr); |
| IR::Opnd *dst = args->Pop(); |
| IR::Opnd *src1 = args->Pop(); |
| IR::Opnd *src2 = args->Pop(); |
| IR::Opnd *src3 = args->Pop(); |
| |
| int lane = src2->AsIntConstOpnd()->AsInt32(); |
| Assert(dst->IsSimd128() && src1->IsSimd128()); |
| |
| if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_D2) |
| { |
| AssertMsg(AutoSystemInfo::Data.SSE2Available(), "SSE2 not supported"); |
| Assert(src3->IsFloat64()); |
| m_lowerer->InsertMove(dst, src1, instr); |
| if (lane) |
| { |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPD, dst, src3, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func)); |
| } |
| else |
| { |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src3, m_func)); |
| } |
| return removeInstr(instr); |
| } |
| |
| Assert(src3->IsInt64()); |
| |
| if (AutoSystemInfo::Data.SSE4_1Available()) |
| { |
| m_lowerer->InsertMove(dst, src1, instr); |
| instr->SetDst(dst); |
| EmitInsertInt64(src3, lane, instr); |
| } |
| else |
| { |
| LoadHelperArgument(instr, src2); |
| LoadInt64HelperArgument(instr, src3); |
| IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0); |
| m_lowerer->InsertMove(srcMemRef, src1, instr); |
| IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1); |
| IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func); |
| instr->InsertBefore(helperCall); |
| this->ChangeToHelperCall(helperCall, IR::HelperSimd128ReplaceLaneI2); |
| m_lowerer->InsertMove(dst, dstMemRef, instr); |
| } |
| return removeInstr(instr); |
| } |
| |
| void LowererMD::EmitInsertInt64(IR::Opnd* src, uint index, IR::Instr *instr) |
| { |
| IR::Opnd* dst = instr->GetDst(); |
| Assert(dst->IsSimd128() && src->IsInt64()); |
| |
| if (AutoSystemInfo::Data.SSE4_1Available()) |
| { |
| #ifdef _M_IX86 |
| index *= 2; |
| Int64RegPair srcPair = m_func->FindOrCreateInt64Pair(src); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRD, dst, srcPair.low, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func)); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRD, dst, srcPair.high, IR::IntConstOpnd::New(index + 1, TyInt8, m_func, true), m_func)); |
| #else |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRQ, dst, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func)); |
| #endif |
| } |
| else |
| { |
| intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0); |
| #ifdef _M_IX86 |
| Int64RegPair src1Pair = m_func->FindOrCreateInt64Pair(src); |
| IR::Opnd* lower = IR::MemRefOpnd::New(tempSIMD, TyMachPtr, m_func); |
| m_lowerer->InsertMove(lower, src1Pair.low, instr); |
| IR::Opnd* higher = IR::MemRefOpnd::New(tempSIMD + 4, TyMachPtr, m_func); |
| m_lowerer->InsertMove(higher, src1Pair.high, instr); |
| #else |
| IR::Opnd* mem = IR::MemRefOpnd::New(tempSIMD, TyMachPtr, m_func); |
| m_lowerer->InsertMove(mem, src, instr); |
| #endif |
| |
| IR::MemRefOpnd* tmp = IR::MemRefOpnd::New(tempSIMD, TyFloat64, m_func); |
| Js::OpCode opcode = (index) ? Js::OpCode::MOVHPD : Js::OpCode::MOVLPD; |
| IR::Instr* newInstr = IR::Instr::New(opcode, dst, tmp, m_func); |
| instr->InsertBefore(newInstr); |
| newInstr->HoistMemRefAddress(tmp, Js::OpCode::MOV); |
| Legalize(newInstr); |
| } |
| } |
| |
| void LowererMD::EmitExtractInt64(IR::Opnd* dst, IR::Opnd* src, uint index, IR::Instr *instr) |
| { |
| Assert(index == 0 || index == 1); |
| Assert(dst->IsInt64() && src->IsSimd128()); |
| if (AutoSystemInfo::Data.SSE4_1Available()) |
| { |
| #ifdef _M_IX86 |
| index *= 2; |
| Int64RegPair dstPair = m_func->FindOrCreateInt64Pair(dst); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRD, dstPair.low, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func)); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRD, dstPair.high, src, IR::IntConstOpnd::New(index + 1, TyInt8, m_func, true), m_func)); |
| #else |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRQ, dst, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func)); |
| #endif |
| } |
| else |
| { |
| IR::Opnd* tmp = src; |
| if (index) |
| { |
| tmp = IR::RegOpnd::New(TySimd128F4, m_func); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, tmp, src, IR::IntConstOpnd::New(2 | 3 << 2, TyInt8, m_func, true), m_func)); |
| } |
| //kludg-ish; we need a new instruction for LowerReinterpretPrimitive to transform |
| //and dummy one for a caller to remove |
| IR::Instr* tmpInstr = IR::Instr::New(Js::OpCode::Simd128_ExtractLane_I2, dst, tmp->UseWithNewType(TyFloat64, m_func), m_func); |
| instr->InsertBefore(tmpInstr); |
| m_lowerer->LowerReinterpretPrimitive(tmpInstr); |
| } |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr) |
| { |
| IR::Opnd* dst, *src1, *src2; |
| Js::OpCode movOpcode = Js::OpCode::MOVSS; |
| uint laneWidth = 0, laneIndex = 0, shamt = 0, mask = 0; |
| IRType laneType = TyInt32; |
| dst = instr->GetDst(); |
| src1 = instr->GetSrc1(); |
| src2 = instr->GetSrc2(); |
| |
| Assert(dst && dst->IsRegOpnd() && (dst->GetType() == TyFloat32 || dst->GetType() == TyInt32 || dst->GetType() == TyUint32 || dst->GetType() == TyFloat64 || dst->IsInt64())); |
| Assert(src1 && src1->IsRegOpnd() && src1->IsSimd128()); |
| Assert(src2 && src2->IsIntConstOpnd()); |
| |
| |
| laneIndex = (uint)src2->AsIntConstOpnd()->AsUint32(); |
| laneWidth = 4; |
| switch (instr->m_opcode) |
| { |
| case Js::OpCode::Simd128_ExtractLane_I2: |
| laneWidth = 8; |
| break; |
| case Js::OpCode::Simd128_ExtractLane_F4: |
| movOpcode = Js::OpCode::MOVSS; |
| Assert(laneIndex < 4); |
| break; |
| case Js::OpCode::Simd128_ExtractLane_I8: |
| case Js::OpCode::Simd128_ExtractLane_U8: |
| case Js::OpCode::Simd128_ExtractLane_B8: |
| movOpcode = Js::OpCode::MOVD; |
| Assert(laneIndex < 8); |
| shamt = (laneIndex % 2) * 16; |
| laneIndex = laneIndex / 2; |
| laneType = TyInt16; |
| mask = 0x0000ffff; |
| break; |
| case Js::OpCode::Simd128_ExtractLane_I16: |
| case Js::OpCode::Simd128_ExtractLane_U16: |
| case Js::OpCode::Simd128_ExtractLane_B16: |
| movOpcode = Js::OpCode::MOVD; |
| Assert(laneIndex < 16); |
| shamt = (laneIndex % 4) * 8; |
| laneIndex = laneIndex / 4; |
| laneType = TyInt8; |
| mask = 0x000000ff; |
| break; |
| case Js::OpCode::Simd128_ExtractLane_U4: |
| case Js::OpCode::Simd128_ExtractLane_I4: |
| case Js::OpCode::Simd128_ExtractLane_B4: |
| movOpcode = Js::OpCode::MOVD; |
| Assert(laneIndex < 4); |
| break; |
| default: |
| Assert(UNREACHED); |
| } |
| |
| if (laneWidth == 8) //Simd128_ExtractLane_I2 |
| { |
| EmitExtractInt64(dst, instr->GetSrc1(), laneIndex, instr); |
| } |
| else |
| { |
| IR::Opnd* tmp = src1; |
| if (laneIndex != 0) |
| { |
| // tmp = PSRLDQ src1, shamt |
| tmp = IR::RegOpnd::New(src1->GetType(), m_func); |
| IR::Instr *shiftInstr = IR::Instr::New(Js::OpCode::PSRLDQ, tmp, src1, IR::IntConstOpnd::New(laneWidth * laneIndex, TyInt8, m_func, true), m_func); |
| instr->InsertBefore(shiftInstr); |
| Legalize(shiftInstr); |
| } |
| // MOVSS/MOVSD/MOVD dst, tmp |
| instr->InsertBefore(IR::Instr::New(movOpcode, movOpcode == Js::OpCode::MOVD ? dst : dst->UseWithNewType(tmp->GetType(), m_func), tmp, m_func)); |
| } |
| |
| // dst has the 4-byte lane |
| if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 || |
| instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U16 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16) |
| { |
| // extract the 1/2 bytes sublane |
| IR::Instr *newInstr = nullptr; |
| if (shamt != 0) |
| { |
| // SHR dst, dst, shamt |
| newInstr = IR::Instr::New(Js::OpCode::SHR, dst, dst, IR::IntConstOpnd::New((IntConstType)shamt, TyInt8, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| } |
| |
| Assert(laneType == TyInt8 || laneType == TyInt16); |
| // zero or sign-extend upper bits |
| if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16) |
| { |
| if (laneType == TyInt8) |
| { |
| IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func); |
| newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| newInstr = IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func); |
| } |
| else |
| { |
| newInstr = IR::Instr::New(Js::OpCode::MOVSXW, dst, dst->UseWithNewType(laneType, m_func), m_func); |
| } |
| } |
| else |
| { |
| newInstr = IR::Instr::New(Js::OpCode::AND, dst, dst, IR::IntConstOpnd::New(mask, TyInt32, m_func), m_func); |
| } |
| |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| } |
| if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 || |
| instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16) |
| { |
| IR::Instr* pInstr = nullptr; |
| IR::RegOpnd* tmp = IR::RegOpnd::New(TyInt8, m_func); |
| |
| // cmp dst, 0 |
| pInstr = IR::Instr::New(Js::OpCode::CMP, m_func); |
| pInstr->SetSrc1(dst->UseWithNewType(laneType, m_func)); |
| pInstr->SetSrc2(IR::IntConstOpnd::New(0, laneType, m_func, true)); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // mov tmp(TyInt8), dst |
| pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // setne tmp(TyInt8) |
| pInstr = IR::Instr::New(Js::OpCode::SETNE, tmp, tmp, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // movsx dst, tmp(TyInt8) |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func)); |
| } |
| |
| IR::Instr* prevInstr = instr->m_prev; |
| instr->Remove(); |
| return prevInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr) |
| { |
| Js::OpCode shufOpCode = Js::OpCode::SHUFPS, movOpCode = Js::OpCode::MOVSS; |
| IR::Opnd *dst, *src1; |
| IR::Instr *pInstr = nullptr; |
| dst = instr->GetDst(); |
| src1 = instr->GetSrc1(); |
| |
| Assert(dst && dst->IsRegOpnd() && dst->IsSimd128()); |
| |
| Assert(src1 && src1->IsRegOpnd() && (src1->GetType() == TyFloat32 || src1->GetType() == TyInt32 || src1->GetType() == TyFloat64 || |
| src1->GetType() == TyInt16 || src1->GetType() == TyInt8 || src1->GetType() == TyUint16 || |
| src1->GetType() == TyUint8 || src1->GetType() == TyUint32 || src1->IsInt64())); |
| |
| Assert(!instr->GetSrc2()); |
| |
| IR::Opnd* tempTruncate = nullptr; |
| bool bSkip = false; |
| IR::LabelInstr *labelZero = IR::LabelInstr::New(Js::OpCode::Label, m_func); |
| IR::LabelInstr *labelDone = IR::LabelInstr::New(Js::OpCode::Label, m_func); |
| |
| switch (instr->m_opcode) |
| { |
| case Js::OpCode::Simd128_Splat_F4: |
| shufOpCode = Js::OpCode::SHUFPS; |
| movOpCode = Js::OpCode::MOVSS; |
| break; |
| case Js::OpCode::Simd128_Splat_I4: |
| case Js::OpCode::Simd128_Splat_U4: |
| shufOpCode = Js::OpCode::PSHUFD; |
| movOpCode = Js::OpCode::MOVD; |
| break; |
| case Js::OpCode::Simd128_Splat_D2: |
| shufOpCode = Js::OpCode::SHUFPD; |
| movOpCode = Js::OpCode::MOVSD; |
| break; |
| case Js::OpCode::Simd128_Splat_I2: |
| { |
| EmitInsertInt64(src1, 0, instr); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(68, TyInt8, m_func, true), m_func)); |
| bSkip = true; |
| break; |
| } |
| |
| case Js::OpCode::Simd128_Splat_I8: |
| case Js::OpCode::Simd128_Splat_U8: |
| // MOV tempTruncate(bx), src1: truncate the value to 16bit int |
| // MOVD dst, tempTruncate(bx) |
| // PUNPCKLWD dst, dst |
| // PSHUFD dst, dst, 0 |
| tempTruncate = EnregisterIntConst(instr, src1, TyInt16); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func)); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func)); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func)); |
| bSkip = true; |
| break; |
| case Js::OpCode::Simd128_Splat_I16: |
| case Js::OpCode::Simd128_Splat_U16: |
| // MOV tempTruncate(bx), src1: truncate the value to 8bit int |
| // MOVD dst, tempTruncate(bx) |
| // PUNPCKLBW dst, dst |
| // PUNPCKLWD dst, dst |
| // PSHUFD dst, dst, 0 |
| tempTruncate = EnregisterIntConst(instr, src1, TyInt8); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func)); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLBW, dst, dst, dst, m_func)); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func)); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func)); |
| bSkip = true; |
| break; |
| case Js::OpCode::Simd128_Splat_B4: |
| case Js::OpCode::Simd128_Splat_B8: |
| case Js::OpCode::Simd128_Splat_B16: |
| // CMP src1, 0 |
| // JEQ $labelZero |
| // MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES] |
| // JMP $labelDone |
| // $labelZero: |
| // XORPS dst, dst |
| // $labelDone: |
| //pInstr = IR::Instr::New(Js::OpCode::CMP, src1, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func); |
| //instr->InsertBefore(pInstr); |
| //Legalize(pInstr); |
| |
| // cmp src1, 0000h |
| pInstr = IR::Instr::New(Js::OpCode::CMP, m_func); |
| pInstr->SetSrc1(src1); |
| pInstr->SetSrc2(IR::IntConstOpnd::New(0x0000, TyInt32, m_func, true)); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| //JEQ $labelZero |
| instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, labelZero, m_func)); |
| // MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES] |
| pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // JMP $labelDone |
| instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, m_func)); |
| // $labelZero: |
| instr->InsertBefore(labelZero); |
| // XORPS dst, dst |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::XORPS, dst, dst, dst, m_func)); // make dst to be 0 |
| // $labelDone: |
| instr->InsertBefore(labelDone); |
| bSkip = true; |
| break; |
| default: |
| Assert(UNREACHED); |
| } |
| |
| if (instr->m_opcode == Js::OpCode::Simd128_Splat_F4 && instr->GetSrc1()->IsFloat64()) |
| { |
| IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func); |
| // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func)); |
| src1 = regOpnd32; |
| } |
| |
| if (!bSkip) |
| { |
| instr->InsertBefore(IR::Instr::New(movOpCode, dst, src1, m_func)); |
| instr->InsertBefore(IR::Instr::New(shufOpCode, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func)); |
| } |
| |
| IR::Instr* prevInstr = instr->m_prev; |
| instr->Remove(); |
| return prevInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerSqrt(IR::Instr *instr) |
| { |
| Js::OpCode opcode = Js::OpCode::SQRTPS; |
| |
| IR::Opnd *dst, *src1; |
| dst = instr->GetDst(); |
| src1 = instr->GetSrc1(); |
| |
| Assert(dst && dst->IsRegOpnd()); |
| Assert(src1 && src1->IsRegOpnd()); |
| Assert(instr->GetSrc2() == nullptr); |
| opcode = Js::OpCode::SQRTPS; |
| #if 0 |
| { |
| Assert(instr->m_opcode == Js::OpCode::Simd128_Sqrt_D2); |
| opcode = Js::OpCode::SQRTPD; |
| } |
| #endif // 0 |
| |
| instr->InsertBefore(IR::Instr::New(opcode, dst, src1, m_func)); |
| IR::Instr* prevInstr = instr->m_prev; |
| instr->Remove(); |
| return prevInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerNeg(IR::Instr *instr) |
| { |
| |
| IR::Opnd* dst = instr->GetDst(); |
| IR::Opnd* src1 = instr->GetSrc1(); |
| Js::OpCode addOpcode = Js::OpCode::PADDD; |
| ThreadContextInfo* threadContextInfo = m_func->GetThreadContextInfo(); |
| intptr_t allOnes = threadContextInfo->GetX86AllOnesI4Addr(); |
| |
| Assert(dst->IsRegOpnd() && dst->IsSimd128()); |
| Assert(src1->IsRegOpnd() && src1->IsSimd128()); |
| Assert(instr->GetSrc2() == nullptr); |
| |
| switch (instr->m_opcode) |
| { |
| case Js::OpCode::Simd128_Neg_I4: |
| case Js::OpCode::Simd128_Neg_U4: |
| break; |
| case Js::OpCode::Simd128_Neg_I8: |
| case Js::OpCode::Simd128_Neg_U8: |
| addOpcode = Js::OpCode::PADDW; |
| allOnes = threadContextInfo->GetX86AllOnesI8Addr(); |
| break; |
| case Js::OpCode::Simd128_Neg_I16: |
| case Js::OpCode::Simd128_Neg_U16: |
| addOpcode = Js::OpCode::PADDB; |
| allOnes = threadContextInfo->GetX86AllOnesI16Addr(); |
| break; |
| default: |
| Assert(UNREACHED); |
| } |
| |
| // MOVAPS dst, src1 |
| IR::Instr *pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func); |
| instr->InsertBefore(pInstr); |
| |
| // PANDN dst, dst, 0xfff...f |
| pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(threadContextInfo->GetX86AllNegOnesAddr(), src1->GetType(), m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // addOpCode dst, dst, {allOnes} |
| pInstr = IR::Instr::New(addOpcode, dst, dst, IR::MemRefOpnd::New(allOnes, src1->GetType(), m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| pInstr = instr->m_prev; |
| instr->Remove(); |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerMulI4(IR::Instr *instr) |
| { |
| Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I4 || instr->m_opcode == Js::OpCode::Simd128_Mul_U4); |
| IR::Instr *pInstr; |
| IR::Opnd* dst = instr->GetDst(); |
| IR::Opnd* src1 = instr->GetSrc1(); |
| IR::Opnd* src2 = instr->GetSrc2(); |
| IR::Opnd* temp1, *temp2, *temp3; |
| Assert(dst->IsRegOpnd() && dst->IsSimd128()); |
| Assert(src1->IsRegOpnd() && src1->IsSimd128()); |
| Assert(src2->IsRegOpnd() && src2->IsSimd128()); |
| |
| temp1 = IR::RegOpnd::New(src1->GetType(), m_func); |
| temp2 = IR::RegOpnd::New(src1->GetType(), m_func); |
| temp3 = IR::RegOpnd::New(src1->GetType(), m_func); |
| |
| // temp1 = PMULUDQ src1, src2 |
| pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp1, src1, src2, m_func); |
| instr->InsertBefore(pInstr); |
| //MakeDstEquSrc1(pInstr); |
| Legalize(pInstr); |
| |
| // temp2 = PSLRD src1, 0x4 |
| pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp2, src1, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func); |
| instr->InsertBefore(pInstr); |
| //MakeDstEquSrc1(pInstr); |
| Legalize(pInstr); |
| |
| // temp3 = PSLRD src2, 0x4 |
| pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp3, src2, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func); |
| instr->InsertBefore(pInstr); |
| //MakeDstEquSrc1(pInstr); |
| Legalize(pInstr); |
| |
| // temp2 = PMULUDQ temp2, temp3 |
| pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp2, temp2, temp3, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| //PSHUFD temp1, temp1, 0x8 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp1, temp1, IR::IntConstOpnd::New( 8 /*b00001000*/, TyInt8, m_func, true), m_func)); |
| |
| //PSHUFD temp2, temp2, 0x8 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp2, temp2, IR::IntConstOpnd::New(8 /*b00001000*/, TyInt8, m_func, true), m_func)); |
| |
| // PUNPCKLDQ dst, temp1, temp2 |
| pInstr = IR::Instr::New(Js::OpCode::PUNPCKLDQ, dst, temp1, temp2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| pInstr = instr->m_prev; |
| instr->Remove(); |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerMulI16(IR::Instr *instr) |
| { |
| Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I16 || instr->m_opcode == Js::OpCode::Simd128_Mul_U16); |
| IR::Instr *pInstr = nullptr; |
| IR::Opnd* dst = instr->GetDst(); |
| IR::Opnd* src1 = instr->GetSrc1(); |
| IR::Opnd* src2 = instr->GetSrc2(); |
| IR::Opnd* temp1, *temp2, *temp3; |
| IRType simdType, laneType; |
| if (instr->m_opcode == Js::OpCode::Simd128_Mul_I16) |
| { |
| simdType = TySimd128I16; |
| laneType = TyInt8; |
| } |
| else |
| { |
| simdType = TySimd128U16; |
| laneType = TyUint8; |
| } |
| Assert(dst->IsRegOpnd() && dst->GetType() == simdType); |
| Assert(src1->IsRegOpnd() && src1->GetType() == simdType); |
| Assert(src2->IsRegOpnd() && src2->GetType() == simdType); |
| |
| temp1 = IR::RegOpnd::New(simdType, m_func); |
| temp2 = IR::RegOpnd::New(simdType, m_func); |
| temp3 = IR::RegOpnd::New(simdType, m_func); |
| |
| // MOVAPS temp1, src1 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp1, src1, m_func)); |
| //PMULLW temp1, src2 |
| pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp1, temp1, src2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| //PAND temp1 {0x00ff00ff00ff00ff00ff00ff00ff00ff} :To zero out bytes 1,3,5... |
| pInstr = IR::Instr::New(Js::OpCode::PAND, temp1, temp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), simdType, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| //PSRLW src1, 8 |
| pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp2, src2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| //PSRLW src2, 8 :upper 8 bits of each word |
| pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp3, src1, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| //PMULLW src1, src2 |
| pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp2, temp2, temp3, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| //PSLLW src1, 8 :sets the results bytes 1,3,5.. |
| pInstr = IR::Instr::New(Js::OpCode::PSLLW, temp2, temp2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| //POR temp1, src1 :OR bytes 0,2,4.. to final result |
| pInstr = IR::Instr::New(Js::OpCode::POR, dst, temp1, temp2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| pInstr = instr->m_prev; |
| instr->Remove(); |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerShift(IR::Instr *instr) |
| { |
| IR::Opnd* dst = instr->GetDst(); |
| IR::Opnd* src1 = instr->GetSrc1(); |
| IR::Opnd* src2 = instr->GetSrc2(); |
| Assert(dst->IsRegOpnd() && dst->IsSimd128()); |
| Assert(src1->IsRegOpnd() && src1->IsSimd128()); |
| Assert(src2->IsInt32()); |
| |
| Js::OpCode opcode = Js::OpCode::PSLLD; |
| int elementSizeInBytes = 0; |
| |
| switch (instr->m_opcode) |
| { |
| case Js::OpCode::Simd128_ShRtByScalar_I2: |
| EmitShiftByScalarI2(instr, IR::HelperSimd128ShRtByScalarI2); |
| return removeInstr(instr); |
| case Js::OpCode::Simd128_ShLtByScalar_I2: |
| opcode = Js::OpCode::PSLLQ; |
| elementSizeInBytes = 8; |
| break; |
| case Js::OpCode::Simd128_ShRtByScalar_U2: |
| opcode = Js::OpCode::PSRLQ; |
| elementSizeInBytes = 8; |
| break; |
| case Js::OpCode::Simd128_ShLtByScalar_I4: |
| case Js::OpCode::Simd128_ShLtByScalar_U4: // same as int32x4.ShiftLeftScalar |
| opcode = Js::OpCode::PSLLD; |
| elementSizeInBytes = 4; |
| break; |
| case Js::OpCode::Simd128_ShRtByScalar_I4: |
| opcode = Js::OpCode::PSRAD; |
| elementSizeInBytes = 4; |
| break; |
| case Js::OpCode::Simd128_ShLtByScalar_I8: |
| case Js::OpCode::Simd128_ShLtByScalar_U8: // same as int16x8.ShiftLeftScalar |
| opcode = Js::OpCode::PSLLW; |
| elementSizeInBytes = 2; |
| break; |
| case Js::OpCode::Simd128_ShRtByScalar_I8: |
| opcode = Js::OpCode::PSRAW; |
| elementSizeInBytes = 2; |
| break; |
| case Js::OpCode::Simd128_ShRtByScalar_U4: |
| opcode = Js::OpCode::PSRLD; |
| elementSizeInBytes = 4; |
| break; |
| case Js::OpCode::Simd128_ShRtByScalar_U8: |
| opcode = Js::OpCode::PSRLW; |
| elementSizeInBytes = 2; |
| break; |
| case Js::OpCode::Simd128_ShLtByScalar_I16: // composite, int8x16.ShiftLeftScalar |
| case Js::OpCode::Simd128_ShRtByScalar_I16: // composite, int8x16.ShiftRightScalar |
| case Js::OpCode::Simd128_ShLtByScalar_U16: // same as int8x16.ShiftLeftScalar |
| case Js::OpCode::Simd128_ShRtByScalar_U16: // composite, uint8x16.ShiftRightScalar |
| elementSizeInBytes = 1; |
| break; |
| default: |
| Assert(UNREACHED); |
| } |
| |
| IR::Instr *pInstr = nullptr; |
| IR::RegOpnd *reg = IR::RegOpnd::New(TyInt32, m_func); |
| IR::RegOpnd *reg2 = IR::RegOpnd::New(TyInt32, m_func); |
| IR::RegOpnd *tmp0 = IR::RegOpnd::New(src1->GetType(), m_func); |
| IR::RegOpnd *tmp1 = IR::RegOpnd::New(src1->GetType(), m_func); |
| IR::RegOpnd *tmp2 = IR::RegOpnd::New(src1->GetType(), m_func); |
| |
| //Shift amount: The shift amount is masked by [ElementSize] * 8 |
| //The masked Shift amount is moved to xmm register |
| //AND shamt, shmask, shamt |
| //MOVD tmp0, shamt |
| |
| IR::RegOpnd *shamt = IR::RegOpnd::New(src2->GetType(), m_func); |
| // en-register |
| IR::Opnd *origShamt = EnregisterIntConst(instr, src2); //unnormalized shift amount |
| pInstr = IR::Instr::New(Js::OpCode::AND, shamt, origShamt, IR::IntConstOpnd::New(Js::SIMDUtils::SIMDGetShiftAmountMask(elementSizeInBytes), TyInt32, m_func), m_func); // normalizing by elm width (i.e. shamt % elm_width) |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp0, shamt, m_func); |
| instr->InsertBefore(pInstr); |
| |
| |
| if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I4 || |
| instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U4 || |
| instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I8 || |
| instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U8 || |
| instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I2 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U2) |
| { |
| // shiftOpCode dst, src1, tmp0 |
| pInstr = IR::Instr::New(opcode, dst, src1, tmp0, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| } |
| else if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I16 || instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U16) |
| { |
| // MOVAPS tmp1, src1 |
| pInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func); |
| instr->InsertBefore(pInstr); |
| // MOVAPS dst, src1 |
| pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func); |
| instr->InsertBefore(pInstr); |
| // PAND tmp1, [X86_HIGHBYTES_MASK] |
| pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86HighBytesMaskAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // PSLLW tmp1, tmp0 |
| pInstr = IR::Instr::New(Js::OpCode::PSLLW, tmp1, tmp1, tmp0, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // PSLLW dst, tmp0 |
| pInstr = IR::Instr::New(Js::OpCode::PSLLW, dst, dst, tmp0, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // PAND dst, [X86_LOWBYTES_MASK] |
| pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // POR dst, tmp1 |
| pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func); |
| instr->InsertBefore(pInstr); |
| } |
| else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I16) |
| { |
| // MOVAPS tmp1, src1 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func)); |
| // MOVAPS dst, src1 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func)); |
| // PSLLW dst, 8 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLW, dst, dst, IR::IntConstOpnd::New(8, TyInt8, m_func), m_func)); |
| // LEA reg, [shamt + 8] |
| IR::IndirOpnd *indirOpnd = IR::IndirOpnd::New(shamt->AsRegOpnd(), +8, TyInt32, m_func); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::LEA, reg, indirOpnd, m_func)); |
| // MOVD tmp0, reg |
| pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp2, reg, m_func); |
| instr->InsertBefore(pInstr); |
| // PSRAW dst, tmp0 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, dst, dst, tmp2, m_func)); |
| // PAND dst, [X86_LOWBYTES_MASK] |
| pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // PSRAW tmp1, tmp0 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, tmp1, tmp1, tmp0, m_func)); |
| // PAND tmp1, [X86_HIGHBYTES_MASK] |
| pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86HighBytesMaskAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // POR dst, tmp1 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func)); |
| } |
| else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U16) |
| { |
| IR::RegOpnd * shamtReg = IR::RegOpnd::New(TyInt8, m_func); |
| shamtReg->SetReg(LowererMDArch::GetRegShiftCount()); |
| IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func); |
| |
| // MOVAPS dst, src1 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func)); |
| // MOV reg2, 0FFh |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, reg2, IR::IntConstOpnd::New(0xFF, TyInt32, m_func), m_func)); |
| // MOV shamtReg, shamt |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, shamtReg, shamt, m_func)); |
| // SHR reg2, shamtReg (lower 8 bit) |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::SHR, reg2, reg2, shamtReg, m_func)); |
| |
| // MOV tmp, reg2 |
| // MOVSX reg2, tmp(TyInt8) |
| |
| pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, reg2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, reg2, tmp, m_func)); |
| IR::RegOpnd *mask = IR::RegOpnd::New(TySimd128I4, m_func); |
| // PSRLW dst, mask |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLW, dst, dst, tmp0, m_func)); |
| // splat (0xFF >> shamt) into mask |
| // MOVD mask, reg2 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, mask, reg2, m_func)); |
| // PUNPCKLBW mask, mask |
| pInstr = IR::Instr::New(Js::OpCode::PUNPCKLBW, mask, mask, mask, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // PUNPCKLWD mask, mask |
| pInstr = IR::Instr::New(Js::OpCode::PUNPCKLWD, mask, mask, mask, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // PSHUFD mask, mask, 0 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, mask, mask, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func)); |
| // PAND dst, mask |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PAND, dst, dst, mask, m_func)); |
| } |
| else |
| { |
| Assert(UNREACHED); |
| } |
| |
| pInstr = instr->m_prev; |
| instr->Remove(); |
| |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::SIMD128LowerReplaceLane_8(IR::Instr* instr) |
| { |
| SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr); |
| |
| int lane = 0; |
| |
| IR::Opnd *dst = args->Pop(); |
| IR::Opnd *src1 = args->Pop(); |
| IR::Opnd *src2 = args->Pop(); |
| IR::Opnd *src3 = args->Pop(); |
| IR::Instr * newInstr = nullptr; |
| |
| Assert(dst->IsSimd128() && src1->IsSimd128()); |
| |
| lane = src2->AsIntConstOpnd()->AsInt32(); |
| |
| IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt16); |
| |
| Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8); |
| |
| // MOVAPS dst, src1 |
| newInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| // PINSRW dst, value, index |
| newInstr = IR::Instr::New(Js::OpCode::PINSRW, dst, laneValue, IR::IntConstOpnd::New(lane, TyInt8, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8) //canonicalizing lanes |
| { |
| instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQW, *dst); |
| } |
| |
| IR::Instr* prevInstr = instr->m_prev; |
| instr->Remove(); |
| return prevInstr; |
| } |
| |
| |
| IR::Instr* LowererMD::SIMD128LowerReplaceLane_16(IR::Instr* instr) |
| { |
| SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr); |
| int lane = 0; |
| IR::Opnd *dst = args->Pop(); |
| IR::Opnd *src1 = args->Pop(); |
| IR::Opnd *src2 = args->Pop(); |
| IR::Opnd *src3 = args->Pop(); |
| IR::Instr * newInstr = nullptr; |
| |
| Assert(dst->IsSimd128() && src1->IsSimd128()); |
| |
| lane = src2->AsIntConstOpnd()->AsInt32(); |
| Assert(lane >= 0 && lane < 16); |
| |
| IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt8); |
| intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0); |
| #if DBG |
| // using only one SIMD temp |
| intptr_t endAddrSIMD = tempSIMD + sizeof(X86SIMDValue); |
| #endif |
| |
| Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16); |
| // MOVUPS [temp], src1 |
| intptr_t address = tempSIMD; |
| newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New(address, TySimd128I16, m_func), src1, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| // MOV [temp+offset], laneValue |
| address = tempSIMD + lane; |
| // check for buffer overrun |
| Assert((intptr_t)address < endAddrSIMD); |
| newInstr = IR::Instr::New(Js::OpCode::MOV, IR::MemRefOpnd::New(address, TyInt8, m_func), laneValue, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| // MOVUPS dst, [temp] |
| address = tempSIMD; |
| newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New(address, TySimd128I16, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16) //canonicalizing lanes. |
| { |
| instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQB, *dst); |
| } |
| |
| IR::Instr* prevInstr = instr->m_prev; |
| instr->Remove(); |
| return prevInstr; |
| } |
| |
| IR::Instr* LowererMD::SIMD128LowerReplaceLane_4(IR::Instr* instr) |
| { |
| SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr); |
| |
| int lane = 0, byteWidth = 0; |
| |
| IR::Opnd *dst = args->Pop(); |
| IR::Opnd *src1 = args->Pop(); |
| IR::Opnd *src2 = args->Pop(); |
| IR::Opnd *src3 = args->Pop(); |
| |
| Assert(dst->IsSimd128() && src1->IsSimd128()); |
| IRType type = dst->GetType(); |
| lane = src2->AsIntConstOpnd()->AsInt32(); |
| |
| IR::Opnd* laneValue = EnregisterIntConst(instr, src3); |
| |
| switch (instr->m_opcode) |
| { |
| case Js::OpCode::Simd128_ReplaceLane_I4: |
| case Js::OpCode::Simd128_ReplaceLane_U4: |
| case Js::OpCode::Simd128_ReplaceLane_B4: |
| byteWidth = TySize[TyInt32]; |
| break; |
| case Js::OpCode::Simd128_ReplaceLane_F4: |
| byteWidth = TySize[TyFloat32]; |
| break; |
| default: |
| Assert(UNREACHED); |
| } |
| |
| // MOVAPS dst, src1 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func)); |
| |
| if (laneValue->GetType() == TyInt32 || laneValue->GetType() == TyUint32) |
| { |
| IR::RegOpnd *tempReg = IR::RegOpnd::New(TyFloat32, m_func);//mov intval to xmm |
| //MOVD |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, tempReg, laneValue, m_func)); |
| laneValue = tempReg; |
| } |
| Assert(laneValue->GetType() == TyFloat32); |
| if (lane == 0) |
| { |
| // MOVSS for both TyFloat32 and TyInt32. MOVD zeroes upper bits. |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func)); |
| } |
| else if (lane == 2) |
| { |
| IR::RegOpnd *tmp = IR::RegOpnd::New(type, m_func); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVHLPS, tmp, dst, m_func)); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, tmp, laneValue, m_func)); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVLHPS, dst, tmp, m_func)); |
| } |
| else |
| { |
| Assert(lane == 1 || lane == 3); |
| uint8 shufMask = 0xE4; // 11 10 01 00 |
| shufMask |= lane; // 11 10 01 id |
| shufMask &= ~(0x03 << (lane << 1)); // set 2 bits corresponding to lane index to 00 |
| |
| // SHUFPS dst, dst, shufMask |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func)); |
| |
| // MOVSS dst, value |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func)); |
| |
| // SHUFPS dst, dst, shufMask |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func)); |
| } |
| if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4) //Canonicalizing lanes |
| { |
| instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQD, *dst); |
| } |
| IR::Instr* prevInstr = instr->m_prev; |
| instr->Remove(); |
| return prevInstr; |
| } |
| |
| /* |
| 4 and 2 lane Swizzle. |
| */ |
| IR::Instr* LowererMD::Simd128LowerSwizzle_4(IR::Instr* instr) |
| { |
| Js::OpCode shufOpcode = Js::OpCode::SHUFPS; |
| Js::OpCode irOpcode = instr->m_opcode; |
| |
| SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr); |
| |
| IR::Opnd *dst = args->Pop(); |
| IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }; |
| |
| int i = 0; |
| while (!args->Empty() && i < 6) |
| { |
| srcs[i++] = args->Pop(); |
| } |
| |
| int8 shufMask = 0; |
| int lane0 = 0, lane1 = 0, lane2 = 0, lane3 = 0; |
| IR::Instr *pInstr = instr->m_prev; |
| |
| Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128()); |
| |
| // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction |
| Assert(irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4 || irOpcode == Js::OpCode::Simd128_Swizzle_F4 /*|| irOpcode == Js::OpCode::Simd128_Swizzle_D2*/); |
| AssertMsg(srcs[1] && srcs[1]->IsIntConstOpnd() && |
| srcs[2] && srcs[2]->IsIntConstOpnd() && |
| (/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[3] && srcs[3]->IsIntConstOpnd())) && |
| (/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[4] && srcs[4]->IsIntConstOpnd())), "Type-specialized swizzle is supported only with constant lane indices"); |
| |
| #if 0 |
| if (irOpcode == Js::OpCode::Simd128_Swizzle_D2) |
| { |
| lane0 = srcs[1]->AsIntConstOpnd()->AsInt32(); |
| lane1 = srcs[2]->AsIntConstOpnd()->AsInt32(); |
| Assert(lane0 >= 0 && lane0 < 2); |
| Assert(lane1 >= 0 && lane1 < 2); |
| shufMask = (int8)((lane1 << 1) | lane0); |
| shufOpcode = Js::OpCode::SHUFPD; |
| } |
| #endif // 0 |
| |
| if (irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4) |
| { |
| shufOpcode = Js::OpCode::PSHUFD; |
| } |
| AnalysisAssert(srcs[3] != nullptr && srcs[4] != nullptr); |
| lane0 = srcs[1]->AsIntConstOpnd()->AsInt32(); |
| lane1 = srcs[2]->AsIntConstOpnd()->AsInt32(); |
| lane2 = srcs[3]->AsIntConstOpnd()->AsInt32(); |
| lane3 = srcs[4]->AsIntConstOpnd()->AsInt32(); |
| Assert(lane1 >= 0 && lane1 < 4); |
| Assert(lane2 >= 0 && lane2 < 4); |
| Assert(lane2 >= 0 && lane2 < 4); |
| Assert(lane3 >= 0 && lane3 < 4); |
| shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0); |
| |
| instr->m_opcode = shufOpcode; |
| instr->SetDst(dst); |
| |
| // MOVAPS dst, src1 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, srcs[0], m_func)); |
| // SHUF dst, dst, imm8 |
| instr->SetSrc1(dst); |
| instr->SetSrc2(IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true)); |
| return pInstr; |
| } |
| |
| /* |
| 4 lane shuffle. Handles arbitrary lane values. |
| */ |
| |
| IR::Instr* LowererMD::Simd128LowerShuffle_4(IR::Instr* instr) |
| { |
| Js::OpCode irOpcode = instr->m_opcode; |
| SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr); |
| IR::Opnd *dst = args->Pop(); |
| IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }; |
| |
| int j = 0; |
| while (!args->Empty() && j < 6) |
| { |
| srcs[j++] = args->Pop(); |
| } |
| |
| uint8 lanes[4], lanesSrc[4]; |
| uint fromSrc1, fromSrc2; |
| IR::Instr *pInstr = instr->m_prev; |
| |
| Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128() && srcs[1] && srcs[1]->IsSimd128()); |
| Assert(irOpcode == Js::OpCode::Simd128_Shuffle_I4 || irOpcode == Js::OpCode::Simd128_Shuffle_U4 || irOpcode == Js::OpCode::Simd128_Shuffle_F4); |
| |
| // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction |
| AssertMsg(srcs[2] && srcs[2]->IsIntConstOpnd() && |
| srcs[3] && srcs[3]->IsIntConstOpnd() && |
| srcs[4] && srcs[4]->IsIntConstOpnd() && |
| srcs[5] && srcs[5]->IsIntConstOpnd(), "Type-specialized shuffle is supported only with constant lane indices"); |
| |
| lanes[0] = (uint8) srcs[2]->AsIntConstOpnd()->AsInt32(); |
| lanes[1] = (uint8) srcs[3]->AsIntConstOpnd()->AsInt32(); |
| lanes[2] = (uint8) srcs[4]->AsIntConstOpnd()->AsInt32(); |
| lanes[3] = (uint8) srcs[5]->AsIntConstOpnd()->AsInt32(); |
| Assert(lanes[0] >= 0 && lanes[0] < 8); |
| Assert(lanes[1] >= 0 && lanes[1] < 8); |
| Assert(lanes[2] >= 0 && lanes[2] < 8); |
| Assert(lanes[3] >= 0 && lanes[3] < 8); |
| |
| CheckShuffleLanes_4(lanes, lanesSrc, &fromSrc1, &fromSrc2); |
| Assert(fromSrc1 + fromSrc2 == 4); |
| |
| if (fromSrc1 == 4 || fromSrc2 == 4) |
| { |
| // can be done with a swizzle |
| IR::Opnd *srcOpnd = fromSrc1 == 4 ? srcs[0] : srcs[1]; |
| InsertShufps(lanes, dst, srcOpnd, srcOpnd, instr); |
| } |
| else if (fromSrc1 == 2) |
| { |
| if (lanes[0] < 4 && lanes[1] < 4) |
| { |
| // x86 friendly shuffle |
| Assert(lanes[2] >= 4 && lanes[3] >= 4); |
| InsertShufps(lanes, dst, srcs[0], srcs[1], instr); |
| } |
| else |
| { |
| // arbitrary shuffle with 2 lanes from each src |
| uint8 ordLanes[4], reArrLanes[4]; |
| |
| // order lanes based on which src they come from |
| // compute re-arrangement mask |
| for (uint8 i = 0, j1 = 0, j2 = 2; i < 4; i++) |
| { |
| if (lanesSrc[i] == 1 && j1 < 4) |
| { |
| ordLanes[j1] = lanes[i]; |
| reArrLanes[i] = j1; |
| j1++; |
| } |
| else if(j2 < 4) |
| { |
| Assert(lanesSrc[i] == 2); |
| ordLanes[j2] = lanes[i]; |
| reArrLanes[i] = j2; |
| j2++; |
| } |
| } |
| IR::RegOpnd *temp = IR::RegOpnd::New(dst->GetType(), m_func); |
| InsertShufps(ordLanes, temp, srcs[0], srcs[1], instr); |
| InsertShufps(reArrLanes, dst, temp, temp, instr); |
| } |
| } |
| else if (fromSrc1 == 3 || fromSrc2 == 3) |
| { |
| // shuffle with 3 lanes from one src, one from another |
| |
| IR::Instr *newInstr; |
| IR::Opnd * majSrc, *minSrc; |
| IR::RegOpnd *temp1 = IR::RegOpnd::New(dst->GetType(), m_func); |
| IR::RegOpnd *temp2 = IR::RegOpnd::New(dst->GetType(), m_func); |
| IR::RegOpnd *temp3 = IR::RegOpnd::New(dst->GetType(), m_func); |
| uint8 minorityLane = 0, maxLaneValue; |
| majSrc = fromSrc1 == 3 ? srcs[0] : srcs[1]; |
| minSrc = fromSrc1 == 3 ? srcs[1] : srcs[0]; |
| Assert(majSrc != minSrc); |
| |
| // Algorithm: |
| // SHUFPS temp1, majSrc, lanes |
| // SHUFPS temp2, minSrc, lanes |
| // MOVUPS temp3, [minorityLane mask] |
| // ANDPS temp2, temp3 // mask all lanes but minorityLane |
| // ANDNPS temp3, temp1 // zero minorityLane |
| // ORPS dst, temp2, temp3 |
| |
| // find minorityLane to mask |
| maxLaneValue = minSrc == srcs[0] ? 4 : 8; |
| for (uint8 i = 0; i < 4; i++) |
| { |
| if (lanes[i] >= (maxLaneValue - 4) && lanes[i] < maxLaneValue) |
| { |
| minorityLane = i; |
| break; |
| } |
| } |
| IR::MemRefOpnd * laneMask = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86FourLanesMaskAddr(minorityLane), dst->GetType(), m_func); |
| |
| InsertShufps(lanes, temp1, majSrc, majSrc, instr); |
| InsertShufps(lanes, temp2, minSrc, minSrc, instr); |
| newInstr = IR::Instr::New(Js::OpCode::MOVUPS, temp3, laneMask, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| newInstr = IR::Instr::New(Js::OpCode::ANDPS, temp2, temp2, temp3, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| newInstr = IR::Instr::New(Js::OpCode::ANDNPS, temp3, temp3, temp1, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, temp2, temp3, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| } |
| |
| instr->Remove(); |
| return pInstr; |
| } |
| |
| // 8 and 16 lane shuffle with memory temps |
| IR::Instr* LowererMD::Simd128LowerShuffle(IR::Instr* instr) |
| { |
| Js::OpCode irOpcode = instr->m_opcode; |
| IR::Instr *pInstr = instr->m_prev, *newInstr = nullptr; |
| SList<IR::Opnd*> *args = nullptr; |
| IR::Opnd *dst = nullptr; |
| IR::Opnd *src1 = nullptr, *src2 = nullptr; |
| uint8 lanes[16], laneCount = 0, scale = 1; |
| bool isShuffle = false; |
| IRType laneType = TyInt16; |
| intptr_t temp1SIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0); |
| intptr_t temp2SIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(1); |
| intptr_t dstSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(2); |
| #if DBG |
| intptr_t endAddrSIMD = (intptr_t)(temp1SIMD + sizeof(X86SIMDValue) * SIMD_TEMP_SIZE); |
| #endif |
| void *address = nullptr; |
| args = Simd128GetExtendedArgs(instr); |
| |
| switch (irOpcode) |
| { |
| case Js::OpCode::Simd128_Swizzle_I8: |
| case Js::OpCode::Simd128_Swizzle_U8: |
| Assert(args->Count() == 10); |
| laneCount = 8; |
| laneType = TyInt16; |
| isShuffle = false; |
| scale = 2; |
| break; |
| case Js::OpCode::Simd128_Swizzle_I16: |
| case Js::OpCode::Simd128_Swizzle_U16: |
| Assert(args->Count() == 18); |
| laneCount = 16; |
| laneType = TyInt8; |
| isShuffle = false; |
| scale = 1; |
| break; |
| case Js::OpCode::Simd128_Shuffle_I8: |
| case Js::OpCode::Simd128_Shuffle_U8: |
| Assert(args->Count() == 11); |
| laneCount = 8; |
| isShuffle = true; |
| laneType = TyUint16; |
| scale = 2; |
| break; |
| case Js::OpCode::Simd128_Shuffle_I16: |
| case Js::OpCode::Simd128_Shuffle_U16: |
| Assert(args->Count() == 19); |
| laneCount = 16; |
| isShuffle = true; |
| laneType = TyUint8; |
| scale = 1; |
| break; |
| default: |
| Assert(UNREACHED); |
| } |
| |
| dst = args->Pop(); |
| src1 = args->Pop(); |
| if (isShuffle) |
| { |
| src2 = args->Pop(); |
| } |
| |
| Assert(dst->IsSimd128() && src1 && src1->IsSimd128() && (!isShuffle|| src2->IsSimd128())); |
| |
| for (uint i = 0; i < laneCount; i++) |
| { |
| IR::Opnd * laneOpnd = args->Pop(); |
| Assert(laneOpnd->IsIntConstOpnd()); |
| lanes[i] = (uint8)laneOpnd->AsIntConstOpnd()->AsInt32(); |
| } |
| |
| // MOVUPS [temp], src1 |
| newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)temp1SIMD, TySimd128I16, m_func), src1, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| if (isShuffle) |
| { |
| // MOVUPS [temp+16], src2 |
| newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)(temp2SIMD), TySimd128I16, m_func), src2, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| } |
| for (uint i = 0; i < laneCount; i++) |
| { |
| //. MOV tmp, [temp1SIMD + laneValue*scale] |
| IR::RegOpnd *tmp = IR::RegOpnd::New(laneType, m_func); |
| address = (void*)(temp1SIMD + lanes[i] * scale); |
| Assert((intptr_t)address + (intptr_t)scale <= (intptr_t)dstSIMD); |
| newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, IR::MemRefOpnd::New(address, laneType, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| //. MOV [dstSIMD + i*scale], tmp |
| address = (void*)(dstSIMD + i * scale); |
| Assert((intptr_t)address + (intptr_t) scale <= endAddrSIMD); |
| newInstr = IR::Instr::New(Js::OpCode::MOV,IR::MemRefOpnd::New(address, laneType, m_func), tmp, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| } |
| |
| // MOVUPS dst, [dstSIMD] |
| newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New((void*)dstSIMD, TySimd128I16, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| instr->Remove(); |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerNotEqual(IR::Instr* instr) |
| { |
| Assert(instr->m_opcode == Js::OpCode::Simd128_Neq_I4 || instr->m_opcode == Js::OpCode::Simd128_Neq_I8 || |
| instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U4 || |
| instr->m_opcode == Js::OpCode::Simd128_Neq_U8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16); |
| |
| IR::Instr *pInstr; |
| IR::Opnd* dst = instr->GetDst(); |
| IR::Opnd* src1 = instr->GetSrc1(); |
| IR::Opnd* src2 = instr->GetSrc2(); |
| Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16())); |
| Assert(src1->IsRegOpnd() && src1->IsSimd128()); |
| Assert(src2->IsRegOpnd() && src2->IsSimd128()); |
| |
| Js::OpCode cmpOpcode = Js::OpCode::PCMPEQD; |
| if (instr->m_opcode == Js::OpCode::Simd128_Neq_I8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U8) |
| { |
| cmpOpcode = Js::OpCode::PCMPEQW; |
| } |
| else if (instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16) |
| { |
| cmpOpcode = Js::OpCode::PCMPEQB; |
| } |
| // dst = PCMPEQD src1, src2 |
| pInstr = IR::Instr::New(cmpOpcode, dst, src1, src2, m_func); |
| instr->InsertBefore(pInstr); |
| //MakeDstEquSrc1(pInstr); |
| Legalize(pInstr); |
| |
| // dst = PANDN dst, X86_ALL_NEG_ONES |
| pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| //MakeDstEquSrc1(pInstr); |
| Legalize(pInstr); |
| |
| pInstr = instr->m_prev; |
| instr->Remove(); |
| |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerLessThan(IR::Instr* instr) |
| { |
| Assert(instr->m_opcode == Js::OpCode::Simd128_Lt_U4 || instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_Lt_U16 || |
| instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16); |
| |
| IR::Instr *pInstr; |
| IR::Opnd* dst = instr->GetDst(); |
| IR::Opnd* src1 = instr->GetSrc1(); |
| IR::Opnd* src2 = instr->GetSrc2(); |
| Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16())); |
| Assert(src1->IsRegOpnd() && src1->IsSimd128()); |
| Assert(src2->IsRegOpnd() && src2->IsSimd128()); |
| |
| IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func); |
| IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func); |
| |
| IR::MemRefOpnd* signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86DoubleWordSignBitsAddr(), TySimd128I4, m_func); |
| IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func); |
| |
| Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD; |
| if (instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8) |
| { |
| cmpOpcode = Js::OpCode::PCMPGTW; |
| signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86WordSignBitsAddr(), TySimd128I4, m_func); |
| } |
| else if (instr->m_opcode == Js::OpCode::Simd128_Lt_U16 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16) |
| { |
| cmpOpcode = Js::OpCode::PCMPGTB; |
| signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86ByteSignBitsAddr(), TySimd128I4, m_func); |
| } |
| |
| // MOVUPS mask, [signBits] |
| pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // tmpa = PXOR src1, signBits |
| pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // tmpb = PXOR src2, signBits |
| pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // dst = cmpOpCode tmpb, tmpa (Less than, swapped opnds) |
| pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| if (instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16) |
| { |
| // for SIMD unsigned int, greaterThanOrEqual == lessThan + Not |
| // dst = PANDN dst, X86_ALL_NEG_ONES |
| // MOVUPS mask, [allNegOnes] |
| pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| } |
| |
| pInstr = instr->m_prev; |
| instr->Remove(); |
| |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerLessThanOrEqual(IR::Instr* instr) |
| { |
| Assert(instr->m_opcode == Js::OpCode::Simd128_LtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 || |
| instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || |
| instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16); |
| |
| IR::Instr *pInstr; |
| IR::Opnd* dst = instr->GetDst(); |
| IR::Opnd* src1 = instr->GetSrc1(); |
| IR::Opnd* src2 = instr->GetSrc2(); |
| Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16())); |
| Assert(src1->IsRegOpnd() && src1->IsSimd128()); |
| Assert(src2->IsRegOpnd() && src2->IsSimd128()); |
| |
| IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func); |
| IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func); |
| |
| Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD; |
| Js::OpCode eqpOpcode = Js::OpCode::PCMPEQD; |
| if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8) |
| { |
| cmpOpcode = Js::OpCode::PCMPGTW; |
| eqpOpcode = Js::OpCode::PCMPEQW; |
| } |
| else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16) |
| { |
| cmpOpcode = Js::OpCode::PCMPGTB; |
| eqpOpcode = Js::OpCode::PCMPEQB; |
| } |
| |
| if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I4) |
| { |
| // dst = pcmpgtd src1, src2 |
| pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src1, src2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES] |
| pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| } |
| else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16) |
| { |
| // tmpa = pcmpgtw src2, src1 (src1 < src2?) [pcmpgtb] |
| pInstr = IR::Instr::New(cmpOpcode, tmpa, src2, src1, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // tmpb = pcmpeqw src1, src2 [pcmpeqb] |
| pInstr = IR::Instr::New(eqpOpcode, tmpb, src1, src2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // dst = por tmpa, tmpb |
| pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmpa, tmpb, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| } |
| else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || |
| instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16) |
| { |
| IR::MemRefOpnd* signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86DoubleWordSignBitsAddr(), TySimd128I4, m_func); |
| IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func); |
| if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8) |
| { |
| signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86WordSignBitsAddr(), TySimd128I4, m_func); |
| } |
| else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16) |
| { |
| signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86ByteSignBitsAddr(), TySimd128I4, m_func); |
| } |
| // MOVUPS mask, [signBits] |
| pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // tmpa = PXOR src1, mask |
| pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // tmpb = PXOR src2, signBits |
| pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // dst = cmpOpCode tmpb, tmpa |
| pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // tmpa = pcmpeqd tmpa, tmpb |
| pInstr = IR::Instr::New(eqpOpcode, tmpa, tmpa, tmpb, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // dst = por dst, tmpa |
| pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmpa, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| if (instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16) |
| { // for SIMD unsigned int, greaterThan == lessThanOrEqual + Not |
| // dst = PANDN dst, X86_ALL_NEG_ONES |
| pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| |
| Legalize(pInstr); |
| } |
| } |
| |
| pInstr = instr->m_prev; |
| instr->Remove(); |
| |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerGreaterThanOrEqual(IR::Instr* instr) |
| { |
| Assert(instr->m_opcode == Js::OpCode::Simd128_GtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16); |
| |
| IR::Instr *pInstr; |
| IR::Opnd* dst = instr->GetDst(); |
| IR::Opnd* src1 = instr->GetSrc1(); |
| IR::Opnd* src2 = instr->GetSrc2(); |
| Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16())); |
| Assert(src1->IsRegOpnd() && src1->IsSimd128()); |
| Assert(src2->IsRegOpnd() && src2->IsSimd128()); |
| |
| if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I4) |
| { |
| // dst = pcmpgtd src2, src1 |
| pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src2, src1, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES] |
| pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| } |
| else if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16) |
| { |
| IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func); |
| IR::RegOpnd* tmp2 = IR::RegOpnd::New(src1->GetType(), m_func); |
| |
| Js::OpCode cmpOpcode = Js::OpCode::PCMPGTW; |
| Js::OpCode eqpOpcode = Js::OpCode::PCMPEQW; |
| if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I16) |
| { |
| cmpOpcode = Js::OpCode::PCMPGTB; |
| eqpOpcode = Js::OpCode::PCMPEQB; |
| } |
| |
| // tmp1 = pcmpgtw src1, src2 [pcmpgtb] |
| pInstr = IR::Instr::New(cmpOpcode, tmp1, src1, src2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // tmp2 = pcmpeqw src1, src2 [pcmpeqw] |
| pInstr = IR::Instr::New(eqpOpcode, tmp2, src1, src2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // dst = por tmp1, tmp2 |
| pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmp1, tmp2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| } |
| |
| pInstr = instr->m_prev; |
| instr->Remove(); |
| |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerMinMax_F4(IR::Instr* instr) |
| { |
| IR::Instr *pInstr; |
| IR::Opnd* dst = instr->GetDst(); |
| IR::Opnd* src1 = instr->GetSrc1(); |
| IR::Opnd* src2 = instr->GetSrc2(); |
| Assert(dst->IsRegOpnd() && dst->IsSimd128()); |
| Assert(src1->IsRegOpnd() && src1->IsSimd128()); |
| Assert(src2->IsRegOpnd() && src2->IsSimd128()); |
| Assert(instr->m_opcode == Js::OpCode::Simd128_Min_F4 || instr->m_opcode == Js::OpCode::Simd128_Max_F4); |
| IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func); |
| IR::RegOpnd* tmp2 = IR::RegOpnd::New(src2->GetType(), m_func); |
| |
| if (instr->m_opcode == Js::OpCode::Simd128_Min_F4) |
| { |
| pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp1, src1, src2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // |
| pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp2, src2, src1, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // |
| pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| } |
| else |
| { |
| |
| //This sequence closely mirrors SIMDFloat32x4Operation::OpMax except for |
| //the fact that tmp2 (tmpbValue) is reused to reduce the number of registers |
| //needed for this sequence. |
| |
| pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp1, src1, src2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // |
| pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp2, src2, src1, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // |
| pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp1, tmp1, tmp2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // |
| pInstr = IR::Instr::New(Js::OpCode::CMPUNORDPS, tmp2, src1, src2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| // |
| pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| } |
| |
| pInstr = instr->m_prev; |
| instr->Remove(); |
| return pInstr; |
| |
| } |
| |
| IR::Opnd* LowererMD::Simd128CanonicalizeToBoolsBeforeReduction(IR::Instr* instr) |
| { |
| IR::Opnd* src1 = instr->GetSrc1(); |
| if (m_func->GetJITFunctionBody()->IsWasmFunction()) |
| { |
| Js::OpCode cmpOpcode = Js::OpCode::InvalidOpCode; |
| switch (instr->m_opcode) |
| { |
| case Js::OpCode::Simd128_AnyTrue_B4: |
| case Js::OpCode::Simd128_AnyTrue_B2: |
| case Js::OpCode::Simd128_AllTrue_B4: |
| case Js::OpCode::Simd128_AllTrue_B2: |
| cmpOpcode = Js::OpCode::PCMPEQD; |
| break; |
| case Js::OpCode::Simd128_AnyTrue_B8: |
| case Js::OpCode::Simd128_AllTrue_B8: |
| cmpOpcode = Js::OpCode::PCMPEQW; |
| break; |
| case Js::OpCode::Simd128_AnyTrue_B16: |
| case Js::OpCode::Simd128_AllTrue_B16: |
| cmpOpcode = Js::OpCode::PCMPEQB; |
| break; |
| default: |
| Assert(UNREACHED); |
| } |
| |
| IR::RegOpnd * newSrc = IR::RegOpnd::New(src1->GetType(), m_func); |
| m_lowerer->InsertMove(newSrc, src1, instr); |
| Simd128CanonicalizeToBools(instr, cmpOpcode, *newSrc); |
| return newSrc; |
| } |
| return src1; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerAnyTrue(IR::Instr* instr) |
| { |
| Assert(instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 || |
| instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B2); |
| |
| IR::Instr *pInstr; |
| IR::Opnd* dst = instr->GetDst(); |
| #ifdef ENABLE_WASM_SIMD |
| IR::Opnd* src1 = Simd128CanonicalizeToBoolsBeforeReduction(instr); |
| #else |
| IR::Opnd* src1 = instr->GetSrc1(); |
| #endif |
| Assert(dst->IsRegOpnd() && dst->IsInt32()); |
| Assert(src1->IsRegOpnd() && src1->IsSimd128()); |
| |
| // pmovmskb dst, src1 |
| // neg dst |
| // sbb dst, dst |
| // neg dst |
| |
| // pmovmskb dst, src1 |
| pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // neg dst |
| pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // sbb dst, dst |
| pInstr = IR::Instr::New(Js::OpCode::SBB, dst, dst, dst, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // neg dst |
| pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| pInstr = instr->m_prev; |
| instr->Remove(); |
| |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerAllTrue(IR::Instr* instr) |
| { |
| Assert(instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 || |
| instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2); |
| |
| IR::Instr *pInstr; |
| IR::Opnd* dst = instr->GetDst(); |
| #ifdef ENABLE_WASM_SIMD |
| IR::Opnd* src1 = Simd128CanonicalizeToBoolsBeforeReduction(instr); |
| #else |
| IR::Opnd* src1 = instr->GetSrc1(); |
| #endif |
| |
| Assert(dst->IsRegOpnd() && dst->IsInt32()); |
| Assert(src1->IsRegOpnd() && src1->IsSimd128()); |
| |
| IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func); |
| |
| // pmovmskb dst, src1 |
| pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func); |
| instr->InsertBefore(pInstr); |
| |
| //horizontally OR into 0th and 2nd positions |
| //TODO nikolayk revisit the sequence for in64x2.alltrue |
| IR::Opnd* newDst = dst; |
| uint cmpMask = 0xFFFF; |
| if (instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2) |
| { |
| cmpMask = 0x0F0F; |
| IR::RegOpnd* reduceReg = IR::RegOpnd::New(TyInt32, m_func); |
| pInstr = IR::Instr::New(Js::OpCode::SHR, reduceReg, dst, (IR::IntConstOpnd::New(4, TyInt32, m_func, true)), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| pInstr = IR::Instr::New(Js::OpCode::OR, reduceReg, reduceReg, dst, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| pInstr = IR::Instr::New(Js::OpCode::AND, reduceReg, reduceReg, (IR::IntConstOpnd::New(0x0F0F, TyInt32, m_func, true)), m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| newDst = reduceReg; |
| } |
| |
| // cmp dst, cmpMask |
| pInstr = IR::Instr::New(Js::OpCode::CMP, m_func); |
| pInstr->SetSrc1(newDst); |
| pInstr->SetSrc2(IR::IntConstOpnd::New(cmpMask, TyInt32, m_func, true)); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // mov tmp(TyInt8), dst |
| pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, newDst, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // sete tmp(TyInt8) |
| pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func); |
| instr->InsertBefore(pInstr); |
| Legalize(pInstr); |
| |
| // movsx dst, dst(TyInt8) |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func)); |
| |
| pInstr = instr->m_prev; |
| instr->Remove(); |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerInt32x4FromFloat32x4(IR::Instr *instr) |
| { |
| IR::Opnd *dst, *src, *tmp, *tmp2, *mask1, *mask2; |
| IR::Instr *insertInstr, *pInstr, *newInstr; |
| IR::LabelInstr *doneLabel; |
| dst = instr->GetDst(); |
| src = instr->GetSrc1(); |
| Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128()); |
| |
| // CVTTPS2DQ dst, src |
| instr->m_opcode = Js::OpCode::CVTTPS2DQ; |
| insertInstr = instr->m_next; |
| pInstr = instr->m_prev; |
| doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func); |
| mask1 = IR::RegOpnd::New(TyInt32, m_func); |
| mask2 = IR::RegOpnd::New(TyInt32, m_func); |
| |
| // bound checks |
| // check if any value is potentially out of range (0x80000000 in output) |
| // PCMPEQD tmp, dst, X86_NEG_MASK (0x80000000) |
| // MOVMSKPS mask1, tmp |
| // CMP mask1, 0 |
| // JNE $doneLabel |
| tmp = IR::RegOpnd::New(TySimd128I4, m_func); |
| tmp2 = IR::RegOpnd::New(TySimd128I4, m_func); |
| newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), TySimd128I4, m_func), m_func); |
| insertInstr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, tmp2, m_func); |
| insertInstr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func)); |
| newInstr = IR::Instr::New(Js::OpCode::CMP, m_func); |
| newInstr->SetSrc1(mask1); |
| newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func)); |
| insertInstr->InsertBefore(newInstr); |
| insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func)); |
| |
| // we have potential out of bound. check bounds |
| // MOVAPS tmp2, X86_TWO_31_F4 (0x4f000000) |
| // CMPLEPS tmp, tmp2, src |
| // MOVMSKPS mask1, tmp |
| // MOVAPS tmp2, X86_NEG_TWO_31_F4 (0xcf000000) |
| // CMPLTPS tmp, src, tmp2 |
| // MOVMSKPS mask2, tmp |
| // OR mask1, mask1, mask2 |
| // check for NaNs |
| // CMPEQPS tmp, src |
| // MOVMSKPS mask2, tmp |
| // NOT mask2 |
| // AND mask2, 0x00000F |
| // OR mask1, mask2 |
| // |
| // CMP mask1, 0 |
| // JEQ $doneLabel |
| newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31F4Addr(), TySimd128I4, m_func), m_func); |
| insertInstr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, tmp2, src, m_func); |
| insertInstr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func)); |
| |
| newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegTwoPower31F4Addr(), TySimd128I4, m_func), m_func); |
| insertInstr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| newInstr = IR::Instr::New(Js::OpCode::CMPLTPS, tmp, src, tmp2, m_func); |
| insertInstr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func)); |
| |
| insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func)); |
| |
| #ifdef ENABLE_WASM_SIMD |
| if (m_func->GetJITFunctionBody()->IsWasmFunction()) |
| { |
| newInstr = IR::Instr::New(Js::OpCode::CMPEQPS, tmp, src, src, m_func); |
| insertInstr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func)); |
| insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::NOT, mask2, mask2, m_func)); |
| newInstr = IR::Instr::New(Js::OpCode::AND, mask2, mask2, IR::IntConstOpnd::New(0x00000F, TyInt32, m_func), m_func); |
| insertInstr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func)); |
| } |
| #endif |
| |
| newInstr = IR::Instr::New(Js::OpCode::CMP, m_func); |
| newInstr->SetSrc1(mask1); |
| newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func)); |
| insertInstr->InsertBefore(newInstr); |
| insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func)); |
| |
| // throw range error |
| m_lowerer->GenerateRuntimeError(insertInstr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError); |
| |
| insertInstr->InsertBefore(doneLabel); |
| |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerUint32x4FromFloat32x4(IR::Instr *instr) |
| { |
| IR::Opnd *dst, *src, *tmp, *tmp2, *two_31_f4_mask, *two_31_i4_mask, *mask; |
| IR::Instr *pInstr, *newInstr; |
| IR::LabelInstr *doneLabel, *throwLabel; |
| dst = instr->GetDst(); |
| src = instr->GetSrc1(); |
| Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128()); |
| |
| doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func); |
| throwLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true); |
| pInstr = instr->m_prev; |
| |
| mask = IR::RegOpnd::New(TyInt32, m_func); |
| two_31_f4_mask = IR::RegOpnd::New(TySimd128F4, m_func); |
| two_31_i4_mask = IR::RegOpnd::New(TySimd128I4, m_func); |
| tmp = IR::RegOpnd::New(TySimd128F4, m_func); |
| tmp2 = IR::RegOpnd::New(TySimd128F4, m_func); |
| |
| // check for NaNs |
| // CMPEQPS tmp, src |
| // MOVMSKPS mask2, tmp |
| // AND mask2, 0x00000F |
| // JNE throw |
| #ifdef ENABLE_WASM_SIMD |
| if (m_func->GetJITFunctionBody()->IsWasmFunction()) |
| { |
| newInstr = IR::Instr::New(Js::OpCode::CMPEQPS, tmp, src, src, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func)); |
| newInstr = IR::Instr::New(Js::OpCode::CMP, m_func); |
| newInstr->SetSrc1(mask); |
| newInstr->SetSrc2(IR::IntConstOpnd::New(0x0000000F, TyInt32, m_func)); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func)); |
| } |
| #endif |
| |
| // any lanes <= -1.0 ? |
| // CMPLEPS tmp, src, [X86_ALL_FLOAT32_NEG_ONES] |
| // MOVMSKPS mask, tmp |
| // CMP mask, 0 |
| // JNE $throwLabel |
| newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, src, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesF4Addr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| newInstr = IR::Instr::New(Js::OpCode::CMP, m_func); |
| newInstr->SetSrc1(mask); |
| newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func)); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| |
| instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func)); |
| |
| // CVTTPS2DQ does a range check over signed range [-2^31, 2^31-1], so will fail to convert values >= 2^31. |
| // To fix this, subtract 2^31 from values >= 2^31, do CVTTPS2DQ, then add 2^31 back. |
| // MOVAPS two_31_f4_mask, [X86_TWO_31] |
| // CMPLEPS tmp2, two_31_mask, src |
| // ANDPS two_31_f4_mask, tmp2 // tmp has f32(2^31) for lanes >= 2^31, 0 otherwise |
| // SUBPS tmp2, two_31_f4_mask // subtract 2^31 from lanes >= 2^31, unchanged otherwise. |
| // CVTTPS2DQ dst, tmp2 |
| newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_f4_mask, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31F4Addr(), TySimd128F4, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp2, two_31_f4_mask, src, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_f4_mask, two_31_f4_mask, tmp2, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| newInstr = IR::Instr::New(Js::OpCode::SUBPS, tmp2, src, two_31_f4_mask, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| newInstr = IR::Instr::New(Js::OpCode::CVTTPS2DQ, dst, tmp2, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| // check if any value is out of range (i.e. >= 2^31, meaning originally >= 2^32 before value adjustment) |
| // PCMPEQD tmp, dst, [X86_NEG_MASK] |
| // MOVMSKPS mask, tmp |
| // CMP mask, 0 |
| // JNE $throwLabel |
| newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| newInstr = IR::Instr::New(Js::OpCode::CMP, m_func); |
| newInstr->SetSrc1(mask); |
| newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func)); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func)); |
| |
| // we pass range checks |
| // add i4(2^31) values back to adjusted values. |
| // Use first bit from the 2^31 float mask (0x4f000...0 << 1) |
| // and AND with 2^31 int mask (0x8000..0) setting first bit to zero if lane hasn't been adjusted |
| // MOVAPS two_31_i4_mask, [X86_TWO_31_I4] |
| // PSLLD two_31_f4_mask, 1 |
| // ANDPS two_31_i4_mask, two_31_f4_mask |
| // PADDD dst, dst, two_31_i4_mask |
| // JMP $doneLabel |
| newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_i4_mask, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31I4Addr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| newInstr = IR::Instr::New(Js::OpCode::PSLLD, two_31_f4_mask, two_31_f4_mask, IR::IntConstOpnd::New(1, TyInt8, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_i4_mask, two_31_i4_mask, two_31_f4_mask, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| newInstr = IR::Instr::New(Js::OpCode::PADDD, dst, dst, two_31_i4_mask, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, doneLabel, m_func)); |
| |
| // throwLabel: |
| // Throw Range Error |
| instr->InsertBefore(throwLabel); |
| m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError); |
| // doneLabe: |
| instr->InsertBefore(doneLabel); |
| |
| instr->Remove(); |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerFloat32x4FromUint32x4(IR::Instr *instr) |
| { |
| IR::Opnd *dst, *src, *tmp, *zero; |
| IR::Instr *pInstr, *newInstr; |
| |
| dst = instr->GetDst(); |
| src = instr->GetSrc1(); |
| Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128()); |
| |
| pInstr = instr->m_prev; |
| |
| zero = IR::RegOpnd::New(TySimd128I4, m_func); |
| tmp = IR::RegOpnd::New(TySimd128I4, m_func); |
| |
| // find unsigned values above 2^31-1. Comparison is signed, so look for values < 0 |
| // MOVAPS zero, [X86_ALL_ZEROS] |
| newInstr = IR::Instr::New(Js::OpCode::MOVAPS, zero, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllZerosAddr(), TySimd128I4, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| // tmp = PCMPGTD zero, src |
| newInstr = IR::Instr::New(Js::OpCode::PCMPGTD, tmp, zero, src, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| // temp1 has f32(2^32) for unsigned values above 2^31, 0 otherwise |
| // ANDPS tmp, tmp, [X86_TWO_32_F4] |
| newInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, tmp, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower32F4Addr(), TySimd128F4, m_func), m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| // convert |
| // dst = CVTDQ2PS src |
| newInstr = IR::Instr::New(Js::OpCode::CVTDQ2PS, dst, src, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| // Add f32(2^32) to negative values |
| // ADDPS dst, dst, tmp |
| newInstr = IR::Instr::New(Js::OpCode::ADDPS, dst, dst, tmp, m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| instr->Remove(); |
| return pInstr; |
| } |
| |
| IR::Instr* LowererMD::Simd128AsmJsLowerLoadElem(IR::Instr *instr) |
| { |
| Assert(instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_F4 || |
| //instr->m_opcode == Js::OpCode::Simd128_LdArr_D2 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I4 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I8 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I16 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U4 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U8 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U16 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArrConst_F4 |
| //instr->m_opcode == Js::OpCode::Simd128_LdArrConst_D2 |
| ); |
| |
| IR::Instr * instrPrev = instr->m_prev; |
| IR::RegOpnd * indexOpnd = instr->GetSrc1()->AsIndirOpnd()->GetIndexOpnd(); |
| IR::RegOpnd * baseOpnd = instr->GetSrc1()->AsIndirOpnd()->GetBaseOpnd(); |
| IR::Opnd * dst = instr->GetDst(); |
| IR::Opnd * src1 = instr->GetSrc1(); |
| IR::Opnd * src2 = instr->GetSrc2(); |
| ValueType arrType = baseOpnd->GetValueType(); |
| uint8 dataWidth = instr->dataWidth; |
| |
| // Type-specialized. |
| Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32); |
| |
| IR::Instr * done; |
| if (indexOpnd || (((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth) > 0x1000000 /* 16 MB */)) |
| { |
| uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType); |
| // bound check and helper |
| done = this->lowererMDArch.LowerAsmJsLdElemHelper(instr, true, bpe != dataWidth); |
| } |
| else |
| { |
| // Reaching here means: |
| // We have a constant index, and either |
| // (1) constant heap or (2) variable heap with constant index < 16MB. |
| // Case (1) requires static bound check. Case (2) means we are always in bound. |
| |
| // this can happen in cases where globopt props a constant access which was not known at bytecodegen time or when heap is non-constant |
| if (src2->IsIntConstOpnd() && ((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32())) |
| { |
| m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError); |
| instr->Remove(); |
| return instrPrev; |
| } |
| done = instr; |
| } |
| |
| return Simd128ConvertToLoad(dst, src1, dataWidth, instr); |
| } |
| |
| IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr) |
| { |
| Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode()); |
| |
| Assert( |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 || |
| instr->m_opcode == Js::OpCode::Simd128_LdArr_F4 |
| ); |
| |
| IR::Opnd * src = instr->GetSrc1(); |
| IR::RegOpnd * indexOpnd =src->AsIndirOpnd()->GetIndexOpnd(); |
| IR::Opnd * dst = instr->GetDst(); |
| ValueType arrType = src->AsIndirOpnd()->GetBaseOpnd()->GetValueType(); |
| |
| // If we type-specialized, then array is a definite typed-array. |
| Assert(arrType.IsObject() && arrType.IsTypedArray()); |
| |
| Simd128GenerateUpperBoundCheck(indexOpnd, src->AsIndirOpnd(), arrType, instr); |
| Simd128LoadHeadSegment(src->AsIndirOpnd(), arrType, instr); |
| return Simd128ConvertToLoad(dst, src, instr->dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /* scale factor */); |
| } |
| |
| IR::Instr * |
| LowererMD::Simd128ConvertToLoad(IR::Opnd *dst, IR::Opnd *src, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0*/) |
| { |
| IR::Instr *newInstr = nullptr; |
| IR::Instr * instrPrev = instr->m_prev; |
| |
| // Type-specialized. |
| Assert(dst && dst->IsSimd128()); |
| Assert(src->IsIndirOpnd()); |
| if (scaleFactor > 0) |
| { |
| // needed only for non-Asmjs code |
| Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode()); |
| src->AsIndirOpnd()->SetScale(scaleFactor); |
| } |
| |
| switch (dataWidth) |
| { |
| case 16: |
| // MOVUPS dst, src1([arrayBuffer + indexOpnd]) |
| newInstr = IR::Instr::New(LowererMDArch::GetAssignOp(src->GetType()), dst, src, instr->m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| break; |
| case 12: |
| { |
| IR::RegOpnd *temp = IR::RegOpnd::New(src->GetType(), instr->m_func); |
| |
| // MOVSD dst, src1([arrayBuffer + indexOpnd]) |
| newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| |
| // MOVSS temp, src1([arrayBuffer + indexOpnd + 8]) |
| newInstr = IR::Instr::New(Js::OpCode::MOVSS, temp, src, instr->m_func); |
| instr->InsertBefore(newInstr); |
| newInstr->GetSrc1()->AsIndirOpnd()->SetOffset(src->AsIndirOpnd()->GetOffset() + 8, true); |
| Legalize(newInstr); |
| |
| // PSLLDQ temp, 0x08 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, instr->m_func, true), instr->m_func)); |
| |
| // ORPS dst, temp |
| newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, temp, instr->m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| break; |
| } |
| case 8: |
| // MOVSD dst, src1([arrayBuffer + indexOpnd]) |
| newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| break; |
| case 4: |
| // MOVSS dst, src1([arrayBuffer + indexOpnd]) |
| newInstr = IR::Instr::New(Js::OpCode::MOVSS, dst, src, instr->m_func); |
| instr->InsertBefore(newInstr); |
| Legalize(newInstr); |
| break; |
| default: |
| Assume(UNREACHED); |
| } |
| |
| instr->Remove(); |
| return instrPrev; |
| } |
| |
| IR::Instr* |
| LowererMD::Simd128AsmJsLowerStoreElem(IR::Instr *instr) |
| { |
| Assert( |
| instr->m_opcode == Js::OpCode::Simd128_StArr_I4 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_I8 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_I16 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_U4 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_U8 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_U16 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_F4 || |
| //instr->m_opcode == Js::OpCode::Simd128_StArr_D2 || |
| instr->m_opcode == Js::OpCode::Simd128_StArrConst_I4 || |
| instr->m_opcode == Js::OpCode::Simd128_StArrConst_I8 || |
| instr->m_opcode == Js::OpCode::Simd128_StArrConst_I16 || |
| instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 || |
| instr->m_opcode == Js::OpCode::Simd128_StArrConst_U8 || |
| instr->m_opcode == Js::OpCode::Simd128_StArrConst_U16 || |
| instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 || |
| instr->m_opcode == Js::OpCode::Simd128_StArrConst_F4 |
| //instr->m_opcode == Js::OpCode::Simd128_StArrConst_D2 |
| ); |
| |
| IR::Instr * instrPrev = instr->m_prev; |
| IR::RegOpnd * indexOpnd = instr->GetDst()->AsIndirOpnd()->GetIndexOpnd(); |
| IR::RegOpnd * baseOpnd = instr->GetDst()->AsIndirOpnd()->GetBaseOpnd(); |
| IR::Opnd * dst = instr->GetDst(); |
| IR::Opnd * src1 = instr->GetSrc1(); |
| IR::Opnd * src2 = instr->GetSrc2(); |
| ValueType arrType = baseOpnd->GetValueType(); |
| uint8 dataWidth = instr->dataWidth; |
| |
| // Type-specialized. |
| Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32); |
| |
| IR::Instr * done; |
| |
| if (indexOpnd || ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > 0x1000000)) |
| { |
| // CMP indexOpnd, src2(arrSize) |
| // JA $helper |
| // JMP $store |
| // $helper: |
| // Throw RangeError |
| // JMP $done |
| // $store: |
| // MOV dst([arrayBuffer + indexOpnd]), src1 |
| // $done: |
| uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType); |
| done = this->lowererMDArch.LowerAsmJsStElemHelper(instr, true, bpe != dataWidth); |
| } |
| else |
| { |
| // we might have a constant index if globopt propped a constant store. we can ahead of time check if it is in-bounds |
| if (src2->IsIntConstOpnd() && ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32())) |
| { |
| m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError); |
| instr->Remove(); |
| return instrPrev; |
| } |
| done = instr; |
| } |
| |
| return Simd128ConvertToStore(dst, src1, dataWidth, instr); |
| } |
| |
| IR::Instr* |
| LowererMD::Simd128LowerStoreElem(IR::Instr *instr) |
| { |
| Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode()); |
| Assert( |
| instr->m_opcode == Js::OpCode::Simd128_StArr_I4 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_I8 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_I16 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_U4 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_U8 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_U16 || |
| instr->m_opcode == Js::OpCode::Simd128_StArr_F4 |
| ); |
| |
| IR::Opnd * dst = instr->GetDst(); |
| IR::RegOpnd * indexOpnd = dst->AsIndirOpnd()->GetIndexOpnd(); |
| IR::Opnd * src1 = instr->GetSrc1(); |
| uint8 dataWidth = instr->dataWidth; |
| ValueType arrType = dst->AsIndirOpnd()->GetBaseOpnd()->GetValueType(); |
| |
| // If we type-specialized, then array is a definite type-array. |
| Assert(arrType.IsObject() && arrType.IsTypedArray()); |
| |
| Simd128GenerateUpperBoundCheck(indexOpnd, dst->AsIndirOpnd(), arrType, instr); |
| Simd128LoadHeadSegment(dst->AsIndirOpnd(), arrType, instr); |
| return Simd128ConvertToStore(dst, src1, dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /*scale factor*/); |
| } |
| |
| IR::Instr * |
| LowererMD::Simd128ConvertToStore(IR::Opnd *dst, IR::Opnd *src1, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0 */) |
| { |
| IR::Instr * instrPrev = instr->m_prev; |
| |
| Assert(src1 && src1->IsSimd128()); |
| Assert(dst->IsIndirOpnd()); |
| |
| if (scaleFactor > 0) |
| { |
| // needed only for non-Asmjs code |
| Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode()); |
| dst->AsIndirOpnd()->SetScale(scaleFactor); |
| } |
| |
| switch (dataWidth) |
| { |
| case 16: |
| // MOVUPS dst([arrayBuffer + indexOpnd]), src1 |
| instr->InsertBefore(IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, instr->m_func)); |
| break; |
| case 12: |
| { |
| IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), instr->m_func); |
| IR::Instr *movss; |
| // MOVAPS temp, src |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp, src1, instr->m_func)); |
| // MOVSD dst([arrayBuffer + indexOpnd]), temp |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, temp, instr->m_func)); |
| // PSRLDQ temp, 0x08 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), instr->m_func)); |
| // MOVSS dst([arrayBuffer + indexOpnd + 8]), temp |
| movss = IR::Instr::New(Js::OpCode::MOVSS, dst, temp, instr->m_func); |
| instr->InsertBefore(movss); |
| movss->GetDst()->AsIndirOpnd()->SetOffset(dst->AsIndirOpnd()->GetOffset() + 8, true); |
| break; |
| } |
| case 8: |
| // MOVSD dst([arrayBuffer + indexOpnd]), src1 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, instr->m_func)); |
| break; |
| case 4: |
| // MOVSS dst([arrayBuffer + indexOpnd]), src1 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, src1, instr->m_func)); |
| break; |
| default:; |
| Assume(UNREACHED); |
| } |
| instr->Remove(); |
| return instrPrev; |
| } |
| |
| void |
| LowererMD::Simd128GenerateUpperBoundCheck(IR::RegOpnd *indexOpnd, IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr) |
| { |
| Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode()); |
| |
| IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd(); |
| IR::Opnd* headSegmentLengthOpnd; |
| |
| if (arrayRegOpnd->EliminatedUpperBoundCheck()) |
| { |
| // already eliminated or extracted by globOpt (OptArraySrc). Nothing to do. |
| return; |
| } |
| |
| if (arrayRegOpnd->HeadSegmentLengthSym()) |
| { |
| headSegmentLengthOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentLengthSym(), TyUint32, m_func); |
| } |
| else |
| { |
| // (headSegmentLength = [base + offset(length)]) |
| int lengthOffset; |
| lengthOffset = m_lowerer->GetArrayOffsetOfLength(arrType); |
| headSegmentLengthOpnd = IR::IndirOpnd::New(arrayRegOpnd, lengthOffset, TyUint32, m_func); |
| } |
| |
| IR::LabelInstr * skipLabel = Lowerer::InsertLabel(false, instr); |
| int32 elemCount = Lowerer::SimdGetElementCountFromBytes(arrayRegOpnd->GetValueType(), instr->dataWidth); |
| if (indexOpnd) |
| { |
| // MOV tmp, elemCount |
| // ADD tmp, index |
| // CMP tmp, Length -- upper bound check |
| // JBE $storeLabel |
| // Throw RuntimeError |
| // skipLabel: |
| IR::RegOpnd *tmp = IR::RegOpnd::New(indexOpnd->GetType(), m_func); |
| IR::IntConstOpnd *elemCountOpnd = IR::IntConstOpnd::New(elemCount, TyInt8, m_func, true); |
| m_lowerer->InsertMove(tmp, elemCountOpnd, skipLabel); |
| Lowerer::InsertAdd(false, tmp, tmp, indexOpnd, skipLabel); |
| m_lowerer->InsertCompareBranch(tmp, headSegmentLengthOpnd, Js::OpCode::BrLe_A, true, skipLabel, skipLabel); |
| } |
| else |
| { |
| // CMP Length, (offset + elemCount) |
| // JA $storeLabel |
| int32 offset = indirOpnd->GetOffset(); |
| int32 index = offset + elemCount; |
| m_lowerer->InsertCompareBranch(headSegmentLengthOpnd, IR::IntConstOpnd::New(index, TyInt32, m_func, true), Js::OpCode::BrLe_A, true, skipLabel, skipLabel); |
| } |
| m_lowerer->GenerateRuntimeError(skipLabel, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError); |
| return; |
| } |
| |
| void |
| LowererMD::Simd128LoadHeadSegment(IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr) |
| { |
| |
| // For non-asm.js we check if headSeg symbol exists, else load it. |
| IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd(); |
| IR::RegOpnd *headSegmentOpnd; |
| |
| if (arrayRegOpnd->HeadSegmentSym()) |
| { |
| headSegmentOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentSym(), TyMachPtr, m_func); |
| } |
| else |
| { |
| // MOV headSegment, [base + offset(head)] |
| int32 headOffset = m_lowerer->GetArrayOffsetOfHeadSegment(arrType); |
| IR::IndirOpnd * newIndirOpnd = IR::IndirOpnd::New(arrayRegOpnd, headOffset, TyMachPtr, this->m_func); |
| headSegmentOpnd = IR::RegOpnd::New(TyMachPtr, this->m_func); |
| m_lowerer->InsertMove(headSegmentOpnd, newIndirOpnd, instr); |
| } |
| |
| // change base to be the head segment instead of the array object |
| indirOpnd->SetBaseOpnd(headSegmentOpnd); |
| } |
| |
| // Builds args list <dst, src1, src2, src3 ..> |
| SList<IR::Opnd*> * LowererMD::Simd128GetExtendedArgs(IR::Instr *instr) |
| { |
| SList<IR::Opnd*> * args = JitAnew(m_lowerer->m_alloc, SList<IR::Opnd*>, m_lowerer->m_alloc); |
| IR::Instr *pInstr = instr; |
| IR::Opnd *dst, *src1, *src2; |
| |
| dst = src1 = src2 = nullptr; |
| |
| if (pInstr->GetDst()) |
| { |
| dst = pInstr->UnlinkDst(); |
| } |
| |
| src1 = pInstr->UnlinkSrc1(); |
| Assert(src1->GetStackSym()->IsSingleDef()); |
| |
| pInstr = src1->GetStackSym()->GetInstrDef(); |
| |
| while (pInstr && pInstr->m_opcode == Js::OpCode::ExtendArg_A) |
| { |
| Assert(pInstr->GetSrc1()); |
| src1 = pInstr->GetSrc1()->Copy(this->m_func); |
| if (src1->IsRegOpnd()) |
| { |
| this->m_lowerer->addToLiveOnBackEdgeSyms->Set(src1->AsRegOpnd()->m_sym->m_id); |
| } |
| args->Push(src1); |
| |
| if (pInstr->GetSrc2()) |
| { |
| src2 = pInstr->GetSrc2(); |
| Assert(src2->GetStackSym()->IsSingleDef()); |
| pInstr = src2->GetStackSym()->GetInstrDef(); |
| } |
| else |
| { |
| pInstr = nullptr; |
| } |
| |
| } |
| args->Push(dst); |
| Assert(args->Count() > 3); |
| return args; |
| } |
| |
| |
| |
| IR::Opnd* |
| LowererMD::EnregisterBoolConst(IR::Instr* instr, IR::Opnd *opnd, IRType type) |
| { |
| |
| if (opnd->IsIntConstOpnd() || opnd->IsInt64ConstOpnd()) |
| { |
| bool isSet = opnd->GetImmediateValue(instr->m_func) != 0; |
| IR::RegOpnd *tempReg = IR::RegOpnd::New(type, m_func); |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, IR::IntConstOpnd::New(isSet ? -1 : 0, type, m_func, true), m_func)); |
| return tempReg; |
| } |
| |
| IRType origType = opnd->GetType(); |
| IR::RegOpnd *tempReg = IR::RegOpnd::New(origType, m_func); |
| IR::Instr* cmovInstr = IR::Instr::New(Js::OpCode::MOV, tempReg, IR::IntConstOpnd::New(0, origType, m_func, true), m_func); |
| instr->InsertBefore(cmovInstr); |
| Legalize(cmovInstr); |
| cmovInstr = IR::Instr::New(Js::OpCode::SUB, tempReg, tempReg, opnd->UseWithNewType(origType, m_func), m_func); |
| instr->InsertBefore(cmovInstr); |
| Legalize(cmovInstr); |
| cmovInstr = IR::Instr::New(Js::OpCode::CMOVS, tempReg, tempReg, IR::IntConstOpnd::New(-1, origType, m_func, true), m_func); |
| instr->InsertBefore(cmovInstr); |
| Legalize(cmovInstr); |
| return tempReg->UseWithNewType(type, m_func); |
| } |
| |
| IR::Opnd* |
| LowererMD::EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd, IRType type /* = TyInt32*/) |
| { |
| IRType constType = constOpnd->GetType(); |
| if (!IRType_IsNativeInt(constType)) |
| { |
| // not int opnd, nothing to do |
| return constOpnd; |
| } |
| |
| Assert(type == TyInt32 || type == TyInt16 || type == TyInt8); |
| Assert(constType == TyInt32 || constType == TyInt16 || constType == TyInt8); |
| if (constOpnd->IsRegOpnd()) |
| { |
| // already a register, just cast |
| constOpnd->SetType(type); |
| return constOpnd; |
| } |
| |
| // en-register |
| IR::RegOpnd *tempReg = IR::RegOpnd::New(type, m_func); |
| |
| // MOV tempReg, constOpnd |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, constOpnd, m_func)); |
| return tempReg; |
| } |
| |
| void LowererMD::Simd128InitOpcodeMap() |
| { |
| m_simd128OpCodesMap = JitAnewArrayZ(m_lowerer->m_alloc, Js::OpCode, Js::Simd128OpcodeCount()); |
| |
| // All simd ops should be contiguous for this mapping to work |
| Assert(Js::OpCode::Simd128_End + (Js::OpCode) 1 == Js::OpCode::Simd128_Start_Extend); |
| //SET_SIMDOPCODE(Simd128_FromFloat64x2_I4 , CVTTPD2DQ); |
| //SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_I4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_Add_I4 , PADDD); |
| SET_SIMDOPCODE(Simd128_Sub_I4 , PSUBD); |
| SET_SIMDOPCODE(Simd128_Lt_I4 , PCMPGTD); |
| SET_SIMDOPCODE(Simd128_Gt_I4 , PCMPGTD); |
| SET_SIMDOPCODE(Simd128_Eq_I4 , PCMPEQD); |
| SET_SIMDOPCODE(Simd128_And_I4 , PAND); |
| SET_SIMDOPCODE(Simd128_Or_I4 , POR); |
| SET_SIMDOPCODE(Simd128_Xor_I4 , PXOR); |
| SET_SIMDOPCODE(Simd128_Not_I4 , XORPS); |
| |
| SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I8 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I8 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I8 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I8 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I8 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I8 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_Or_I16 , POR); |
| SET_SIMDOPCODE(Simd128_Xor_I16 , PXOR); |
| SET_SIMDOPCODE(Simd128_Not_I16 , XORPS); |
| SET_SIMDOPCODE(Simd128_And_I16 , PAND); |
| SET_SIMDOPCODE(Simd128_Add_I16 , PADDB); |
| SET_SIMDOPCODE(Simd128_Sub_I16 , PSUBB); |
| SET_SIMDOPCODE(Simd128_Lt_I16 , PCMPGTB); |
| SET_SIMDOPCODE(Simd128_Gt_I16 , PCMPGTB); |
| SET_SIMDOPCODE(Simd128_Eq_I16 , PCMPEQB); |
| SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I16, MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I16 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I16 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I16 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I16 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I16 , MOVAPS); |
| |
| SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U4 , MOVAPS); |
| |
| SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U8 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U8 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U8 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U8 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U8 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U8 , MOVAPS); |
| |
| SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U16 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U16 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U16 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U16 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U16 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U16 , MOVAPS); |
| |
| //SET_SIMDOPCODE(Simd128_FromFloat64x2_F4 , CVTPD2PS); |
| //SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_F4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt32x4_F4 , CVTDQ2PS); |
| SET_SIMDOPCODE(Simd128_FromInt32x4Bits_F4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt16x8Bits_F4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt8x16Bits_F4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint32x4Bits_F4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint16x8Bits_F4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromUint8x16Bits_F4 , MOVAPS); |
| SET_SIMDOPCODE(Simd128_Abs_F4 , ANDPS); |
| SET_SIMDOPCODE(Simd128_Neg_F4 , XORPS); |
| SET_SIMDOPCODE(Simd128_Add_F4 , ADDPS); |
| SET_SIMDOPCODE(Simd128_Sub_F4 , SUBPS); |
| SET_SIMDOPCODE(Simd128_Mul_F4 , MULPS); |
| SET_SIMDOPCODE(Simd128_Div_F4 , DIVPS); |
| SET_SIMDOPCODE(Simd128_Sqrt_F4 , SQRTPS); |
| SET_SIMDOPCODE(Simd128_Lt_F4 , CMPLTPS); // CMPLTPS |
| SET_SIMDOPCODE(Simd128_LtEq_F4 , CMPLEPS); // CMPLEPS |
| SET_SIMDOPCODE(Simd128_Eq_F4 , CMPEQPS); // CMPEQPS |
| SET_SIMDOPCODE(Simd128_Neq_F4 , CMPNEQPS); // CMPNEQPS |
| SET_SIMDOPCODE(Simd128_Gt_F4 , CMPLTPS); // CMPLTPS (swap srcs) |
| SET_SIMDOPCODE(Simd128_GtEq_F4 , CMPLEPS); // CMPLEPS (swap srcs) |
| SET_SIMDOPCODE(Simd128_Neg_D2 , XORPS); |
| SET_SIMDOPCODE(Simd128_Add_D2 , ADDPD); |
| SET_SIMDOPCODE(Simd128_Abs_D2 , ANDPD); |
| SET_SIMDOPCODE(Simd128_Sub_D2 , SUBPD); |
| SET_SIMDOPCODE(Simd128_Mul_D2 , MULPD); |
| SET_SIMDOPCODE(Simd128_Div_D2 , DIVPD); |
| SET_SIMDOPCODE(Simd128_Min_D2 , MINPD); |
| SET_SIMDOPCODE(Simd128_Max_D2 , MAXPD); |
| SET_SIMDOPCODE(Simd128_Sqrt_D2 , SQRTPD); |
| SET_SIMDOPCODE(Simd128_Lt_D2 , CMPLTPD); // CMPLTPD |
| SET_SIMDOPCODE(Simd128_LtEq_D2 , CMPLEPD); // CMPLEPD |
| SET_SIMDOPCODE(Simd128_Eq_D2 , CMPEQPD); // CMPEQPD |
| SET_SIMDOPCODE(Simd128_Neq_D2 , CMPNEQPD); // CMPNEQPD |
| SET_SIMDOPCODE(Simd128_Gt_D2 , CMPLTPD); // CMPLTPD (swap srcs) |
| SET_SIMDOPCODE(Simd128_GtEq_D2 , CMPLEPD); // CMPLEPD (swap srcs) |
| |
| #if 0 |
| SET_SIMDOPCODE(Simd128_FromFloat32x4_D2, CVTPS2PD); |
| SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_D2, MOVAPS); |
| SET_SIMDOPCODE(Simd128_FromInt32x4_D2, CVTDQ2PD); |
| SET_SIMDOPCODE(Simd128_FromInt32x4Bits_D2, MOVAPS); |
| #endif // 0 |
| |
| SET_SIMDOPCODE(Simd128_And_I8 , PAND); |
| SET_SIMDOPCODE(Simd128_Or_I8 , POR); |
| SET_SIMDOPCODE(Simd128_Xor_I8 , XORPS); |
| SET_SIMDOPCODE(Simd128_Not_I8 , XORPS); |
| SET_SIMDOPCODE(Simd128_Add_I8 , PADDW); |
| SET_SIMDOPCODE(Simd128_Sub_I8 , PSUBW); |
| SET_SIMDOPCODE(Simd128_Mul_I8 , PMULLW); |
| SET_SIMDOPCODE(Simd128_Eq_I8 , PCMPEQW); |
| SET_SIMDOPCODE(Simd128_Lt_I8 , PCMPGTW); // (swap srcs) |
| SET_SIMDOPCODE(Simd128_Gt_I8 , PCMPGTW); |
| SET_SIMDOPCODE(Simd128_AddSaturate_I8 , PADDSW); |
| SET_SIMDOPCODE(Simd128_SubSaturate_I8 , PSUBSW); |
| |
| SET_SIMDOPCODE(Simd128_AddSaturate_I16 , PADDSB); |
| SET_SIMDOPCODE(Simd128_SubSaturate_I16 , PSUBSB); |
| |
| SET_SIMDOPCODE(Simd128_And_U4 , PAND); |
| SET_SIMDOPCODE(Simd128_Or_U4 , POR); |
| SET_SIMDOPCODE(Simd128_Xor_U4 , XORPS); |
| SET_SIMDOPCODE(Simd128_Not_U4 , XORPS); |
| SET_SIMDOPCODE(Simd128_Add_U4 , PADDD); |
| SET_SIMDOPCODE(Simd128_Sub_U4 , PSUBD); |
| SET_SIMDOPCODE(Simd128_Eq_U4 , PCMPEQD); // same as int32x4.equal |
| |
| SET_SIMDOPCODE(Simd128_And_U8 , PAND); |
| SET_SIMDOPCODE(Simd128_Or_U8 , POR); |
| SET_SIMDOPCODE(Simd128_Xor_U8 , XORPS); |
| SET_SIMDOPCODE(Simd128_Not_U8 , XORPS); |
| SET_SIMDOPCODE(Simd128_Add_U8 , PADDW); |
| SET_SIMDOPCODE(Simd128_Sub_U8 , PSUBW); |
| SET_SIMDOPCODE(Simd128_Mul_U8 , PMULLW); |
| SET_SIMDOPCODE(Simd128_Eq_U8 , PCMPEQW); // same as int16X8.equal |
| SET_SIMDOPCODE(Simd128_AddSaturate_U8 , PADDUSW); |
| SET_SIMDOPCODE(Simd128_SubSaturate_U8 , PSUBUSW); |
| |
| SET_SIMDOPCODE(Simd128_And_U16 , PAND); |
| SET_SIMDOPCODE(Simd128_Or_U16 , POR); |
| SET_SIMDOPCODE(Simd128_Xor_U16 , XORPS); |
| SET_SIMDOPCODE(Simd128_Not_U16 , XORPS); |
| SET_SIMDOPCODE(Simd128_Add_U16 , PADDB); |
| SET_SIMDOPCODE(Simd128_Sub_U16 , PSUBB); |
| |
| SET_SIMDOPCODE(Simd128_Eq_U16 , PCMPEQB); // same as int8x16.equal |
| SET_SIMDOPCODE(Simd128_AddSaturate_U16 , PADDUSB); |
| SET_SIMDOPCODE(Simd128_SubSaturate_U16 , PSUBUSB); |
| |
| SET_SIMDOPCODE(Simd128_And_B4 , PAND); |
| SET_SIMDOPCODE(Simd128_Or_B4 , POR); |
| SET_SIMDOPCODE(Simd128_Xor_B4 , XORPS); |
| SET_SIMDOPCODE(Simd128_Not_B4 , XORPS); |
| |
| SET_SIMDOPCODE(Simd128_And_B8 , PAND); |
| SET_SIMDOPCODE(Simd128_Or_B8 , POR); |
| SET_SIMDOPCODE(Simd128_Xor_B8 , XORPS); |
| SET_SIMDOPCODE(Simd128_Not_B8 , XORPS); |
| |
| SET_SIMDOPCODE(Simd128_And_B16 , PAND); |
| SET_SIMDOPCODE(Simd128_Or_B16 , POR); |
| SET_SIMDOPCODE(Simd128_Xor_B16 , XORPS); |
| SET_SIMDOPCODE(Simd128_Not_B16 , XORPS); |
| |
| SET_SIMDOPCODE(Simd128_Add_I2 , PADDQ); |
| SET_SIMDOPCODE(Simd128_Sub_I2 , PSUBQ); |
| } |
| #undef SIMD_SETOPCODE |
| #undef SIMD_GETOPCODE |
| |
| void LowererMD::CheckShuffleLanes_4(uint8 lanes[], uint8 lanesSrc[], uint *fromSrc1, uint *fromSrc2) |
| { |
| Assert(lanes); |
| Assert(lanesSrc); |
| Assert(fromSrc1 && fromSrc2); |
| *fromSrc1 = 0; |
| *fromSrc2 = 0; |
| for (uint i = 0; i < 4; i++) |
| { |
| if (lanes[i] >= 0 && lanes[i] < 4) |
| { |
| (*fromSrc1)++; |
| lanesSrc[i] = 1; |
| } |
| else if (lanes[i] >= 4 && lanes[i] < 8) |
| { |
| (*fromSrc2)++; |
| lanesSrc[i] = 2; |
| } |
| else |
| { |
| Assert(UNREACHED); |
| } |
| } |
| } |
| |
| void LowererMD::InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::Opnd *src2, IR::Instr *instr) |
| { |
| int8 shufMask; |
| uint8 normLanes[4]; |
| IR::RegOpnd * tmp = IR::RegOpnd::New(TySimd128I4, m_func); |
| for (uint i = 0; i < 4; i++) |
| { |
| normLanes[i] = (lanes[i] >= 4) ? (lanes[i] - 4) : lanes[i]; |
| } |
| shufMask = (int8)((normLanes[3] << 6) | (normLanes[2] << 4) | (normLanes[1] << 2) | normLanes[0]); |
| // ToDo: Move this to legalization code |
| if (dst->IsEqual(src1)) |
| { |
| // instruction already legal |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func)); |
| } |
| else if (dst->IsEqual(src2)) |
| { |
| |
| // MOVAPS tmp, dst |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp, dst, m_func)); |
| // MOVAPS dst, src1 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func)); |
| // SHUF dst, tmp, imm8 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, tmp, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func)); |
| } |
| else |
| { |
| // MOVAPS dst, src1 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func)); |
| // SHUF dst, src2, imm8 |
| instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func)); |
| } |
| } |
| |
| BYTE LowererMD::Simd128GetTypedArrBytesPerElem(ValueType arrType) |
| { |
| return (1 << Lowerer::GetArrayIndirScale(arrType)); |
| } |
| |
| #endif |