blob: 24e5da8d00e9ef308e46dcec903b99dec4f1a232 [file] [log] [blame]
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/wasm/baseline/liftoff-assembler.h"
namespace v8 {
namespace internal {
namespace wasm {
namespace liftoff {
// Liftoff Frames.
// slot Frame
// +--------------------+---------------------------
// n+4 | optional padding slot to keep the stack 16 byte aligned.
// n+3 | parameter n |
// ... | ... |
// 4 | parameter 1 | or parameter 2
// 3 | parameter 0 | or parameter 1
// 2 | (result address) | or parameter 0
// -----+--------------------+---------------------------
// 1 | return addr (lr) |
// 0 | previous frame (fp)|
// -----+--------------------+ <-- frame ptr (fp)
// -1 | 0xa: WASM |
// -2 | instance |
// -----+--------------------+---------------------------
// -3 | slot 0 | ^
// -4 | slot 1 | |
// | | Frame slots
// | | |
// | | v
// | optional padding slot to keep the stack 16 byte aligned.
// -----+--------------------+ <-- stack ptr (sp)
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) {
return MemOperand(offset > 0 ? fp : sp, -offset);
inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
inline CPURegister GetRegFromType(const LiftoffRegister& reg, ValueType type) {
switch (type.kind()) {
case ValueType::kI32:
case ValueType::kI64:
case ValueType::kF32:
return reg.fp().S();
case ValueType::kF64:
return reg.fp().D();
case ValueType::kS128:
return reg.fp().Q();
inline CPURegList PadRegList(RegList list) {
if ((base::bits::CountPopulation(list) & 1) != 0) list |= padreg.bit();
return CPURegList(CPURegister::kRegister, kXRegSizeInBits, list);
inline CPURegList PadVRegList(RegList list) {
if ((base::bits::CountPopulation(list) & 1) != 0) list |= fp_scratch.bit();
return CPURegList(CPURegister::kVRegister, kQRegSizeInBits, list);
inline CPURegister AcquireByType(UseScratchRegisterScope* temps,
ValueType type) {
switch (type.kind()) {
case ValueType::kI32:
return temps->AcquireW();
case ValueType::kI64:
return temps->AcquireX();
case ValueType::kF32:
return temps->AcquireS();
case ValueType::kF64:
return temps->AcquireD();
inline MemOperand GetMemOp(LiftoffAssembler* assm,
UseScratchRegisterScope* temps, Register addr,
Register offset, uint32_t offset_imm) {
// Wasm memory is limited to a size <4GB.
if (offset.is_valid()) {
if (offset_imm == 0) return MemOperand(addr.X(), offset.W(), UXTW);
Register tmp = temps->AcquireW();
assm->Add(tmp, offset.W(), offset_imm);
return MemOperand(addr.X(), tmp, UXTW);
return MemOperand(addr.X(), offset_imm);
enum class ShiftDirection : bool { kLeft, kRight };
enum class ShiftSign : bool { kSigned, kUnsigned };
template <ShiftDirection dir, ShiftSign sign = ShiftSign::kSigned>
inline void EmitSimdShift(LiftoffAssembler* assm, VRegister dst, VRegister lhs,
Register rhs, VectorFormat format) {
DCHECK_IMPLIES(dir == ShiftDirection::kLeft, sign == ShiftSign::kSigned);
DCHECK_EQ(dst.LaneCount(), LaneCountFromFormat(format));
UseScratchRegisterScope temps(assm);
VRegister tmp = temps.AcquireV(format);
Register shift = dst.Is2D() ? temps.AcquireX() : temps.AcquireW();
int mask = LaneSizeInBitsFromFormat(format) - 1;
assm->And(shift, rhs, mask);
assm->Dup(tmp, shift);
if (dir == ShiftDirection::kRight) {
assm->Neg(tmp, tmp);
if (sign == ShiftSign::kSigned) {
assm->Sshl(dst, lhs, tmp);
} else {
assm->Ushl(dst, lhs, tmp);
template <VectorFormat format, ShiftSign sign>
inline void EmitSimdShiftRightImmediate(LiftoffAssembler* assm, VRegister dst,
VRegister lhs, int32_t rhs) {
// Sshr and Ushr does not allow shifts to be 0, so check for that here.
int mask = LaneSizeInBitsFromFormat(format) - 1;
int32_t shift = rhs & mask;
if (!shift) {
if (dst != lhs) {
assm->Mov(dst, lhs);
if (sign == ShiftSign::kSigned) {
assm->Sshr(dst, lhs, rhs & mask);
} else {
assm->Ushr(dst, lhs, rhs & mask);
inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister src) {
// AnyTrue does not depend on the number of lanes, so we can use V4S for all.
UseScratchRegisterScope scope(assm);
VRegister temp = scope.AcquireV(kFormatS);
assm->Umaxv(temp, src.fp().V4S());
assm->Umov(, temp, 0);
assm->Cmp(, 0);
assm->Cset(, ne);
inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister src, VectorFormat format) {
UseScratchRegisterScope scope(assm);
VRegister temp = scope.AcquireV(ScalarFormatFromFormat(format));
assm->Uminv(temp, VRegister::Create(src.fp().code(), format));
assm->Umov(, temp, 0);
assm->Cmp(, 0);
assm->Cset(, ne);
} // namespace liftoff
int LiftoffAssembler::PrepareStackFrame() {
int offset = pc_offset();
InstructionAccurateScope scope(this, 1);
sub(sp, sp, 0);
return offset;
void LiftoffAssembler::PatchPrepareStackFrame(int offset, int frame_size) {
static_assert(kStackSlotSize == kXRegSize,
"kStackSlotSize must equal kXRegSize");
// The stack pointer is required to be quadword aligned.
// Misalignment will cause a stack alignment fault.
frame_size = RoundUp(frame_size, kQuadWordSizeInBytes);
if (!IsImmAddSub(frame_size)) {
// Round the stack to a page to try to fit a add/sub immediate.
frame_size = RoundUp(frame_size, 0x1000);
if (!IsImmAddSub(frame_size)) {
// Stack greater than 4M! Because this is a quite improbable case, we
// just fallback to TurboFan.
bailout(kOtherReason, "Stack too big");
// When using the simulator, deal with Liftoff which allocates the stack
// before checking it.
// TODO(arm): Remove this when the stack check mechanism will be updated.
if (frame_size > KB / 2) {
"Stack limited to 512 bytes to avoid a bug in StackCheck");
PatchingAssembler patching_assembler(AssemblerOptions{},
buffer_start_ + offset, 1);
#if V8_OS_WIN
if (frame_size > kStackPageSize) {
// Generate OOL code (at the end of the function, where the current
// assembler is pointing) to do the explicit stack limit check (see
// visual-studio-6.0/aa227153(v=vs.60)).
// At the function start, emit a jump to that OOL code (from {offset} to
// {pc_offset()}).
int ool_offset = pc_offset() - offset;
patching_assembler.b(ool_offset >> kInstrSizeLog2);
// Now generate the OOL code.
Claim(frame_size, 1);
// Jump back to the start of the function (from {pc_offset()} to {offset +
// kInstrSize}).
int func_start_offset = offset + kInstrSize - pc_offset();
b(func_start_offset >> kInstrSizeLog2);
void LiftoffAssembler::FinishCode() { ForceConstantPoolEmissionWithoutJump(); }
void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kInstanceOffset;
int LiftoffAssembler::SlotSizeForType(ValueType type) {
// TODO(zhin): Unaligned access typically take additional cycles, we should do
// some performance testing to see how big an effect it will take.
switch (type.kind()) {
case ValueType::kS128:
return type.element_size_bytes();
return kStackSlotSize;
bool LiftoffAssembler::NeedsAlignment(ValueType type) {
switch (type.kind()) {
case ValueType::kS128:
return true;
// No alignment because all other types are kStackSlotSize.
return false;
void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
RelocInfo::Mode rmode) {
switch (value.type().kind()) {
case ValueType::kI32:
Mov(, Immediate(value.to_i32(), rmode));
case ValueType::kI64:
Mov(, Immediate(value.to_i64(), rmode));
case ValueType::kF32:
Fmov(reg.fp().S(), value.to_f32_boxed().get_scalar());
case ValueType::kF64:
Fmov(reg.fp().D(), value.to_f64_boxed().get_scalar());
void LiftoffAssembler::LoadFromInstance(Register dst, uint32_t offset,
int size) {
DCHECK_LE(offset, kMaxInt);
Ldr(dst, liftoff::GetInstanceOperand());
DCHECK(size == 4 || size == 8);
if (size == 4) {
Ldr(dst.W(), MemOperand(dst, offset));
} else {
Ldr(dst, MemOperand(dst, offset));
void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
uint32_t offset) {
DCHECK_LE(offset, kMaxInt);
Ldr(dst, liftoff::GetInstanceOperand());
LoadTaggedPointerField(dst, MemOperand(dst, offset));
void LiftoffAssembler::SpillInstance(Register instance) {
Str(instance, liftoff::GetInstanceOperand());
void LiftoffAssembler::FillInstanceInto(Register dst) {
Ldr(dst, liftoff::GetInstanceOperand());
void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
Register offset_reg,
uint32_t offset_imm,
LiftoffRegList pinned) {
UseScratchRegisterScope temps(this);
MemOperand src_op =
liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
LoadTaggedPointerField(dst, src_op);
void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type, LiftoffRegList pinned,
uint32_t* protected_load_pc, bool is_load_mem) {
UseScratchRegisterScope temps(this);
MemOperand src_op =
liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
if (protected_load_pc) *protected_load_pc = pc_offset();
switch (type.value()) {
case LoadType::kI32Load8U:
case LoadType::kI64Load8U:
Ldrb(, src_op);
case LoadType::kI32Load8S:
Ldrsb(, src_op);
case LoadType::kI64Load8S:
Ldrsb(, src_op);
case LoadType::kI32Load16U:
case LoadType::kI64Load16U:
Ldrh(, src_op);
case LoadType::kI32Load16S:
Ldrsh(, src_op);
case LoadType::kI64Load16S:
Ldrsh(, src_op);
case LoadType::kI32Load:
case LoadType::kI64Load32U:
Ldr(, src_op);
case LoadType::kI64Load32S:
Ldrsw(, src_op);
case LoadType::kI64Load:
Ldr(, src_op);
case LoadType::kF32Load:
Ldr(dst.fp().S(), src_op);
case LoadType::kF64Load:
Ldr(dst.fp().D(), src_op);
case LoadType::kS128Load:
Ldr(dst.fp().Q(), src_op);
void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister src,
StoreType type, LiftoffRegList pinned,
uint32_t* protected_store_pc, bool is_store_mem) {
UseScratchRegisterScope temps(this);
MemOperand dst_op =
liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm);
if (protected_store_pc) *protected_store_pc = pc_offset();
switch (type.value()) {
case StoreType::kI32Store8:
case StoreType::kI64Store8:
Strb(, dst_op);
case StoreType::kI32Store16:
case StoreType::kI64Store16:
Strh(, dst_op);
case StoreType::kI32Store:
case StoreType::kI64Store32:
Str(, dst_op);
case StoreType::kI64Store:
Str(, dst_op);
case StoreType::kF32Store:
Str(src.fp().S(), dst_op);
case StoreType::kF64Store:
Str(src.fp().D(), dst_op);
case StoreType::kS128Store:
Str(src.fp().Q(), dst_op);
void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type, LiftoffRegList pinned) {
bailout(kAtomics, "AtomicLoad");
void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister src,
StoreType type, LiftoffRegList pinned) {
bailout(kAtomics, "AtomicStore");
void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
LiftoffRegister result, StoreType type) {
bailout(kAtomics, "AtomicAdd");
void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
LiftoffRegister result, StoreType type) {
bailout(kAtomics, "AtomicSub");
void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
LiftoffRegister result, StoreType type) {
bailout(kAtomics, "AtomicAnd");
void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
LiftoffRegister result, StoreType type) {
bailout(kAtomics, "AtomicOr");
void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
LiftoffRegister result, StoreType type) {
bailout(kAtomics, "AtomicXor");
void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
uint32_t offset_imm,
LiftoffRegister value,
LiftoffRegister result, StoreType type) {
bailout(kAtomics, "AtomicExchange");
void LiftoffAssembler::AtomicCompareExchange(
Register dst_addr, Register offset_reg, uint32_t offset_imm,
LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
StoreType type) {
bailout(kAtomics, "AtomicCompareExchange");
void LiftoffAssembler::AtomicFence() { Dmb(InnerShareable, BarrierAll); }
void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
uint32_t caller_slot_idx,
ValueType type) {
int32_t offset = (caller_slot_idx + 1) * LiftoffAssembler::kStackSlotSize;
Ldr(liftoff::GetRegFromType(dst, type), MemOperand(fp, offset));
void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
uint32_t caller_slot_idx,
ValueType type) {
int32_t offset = (caller_slot_idx + 1) * LiftoffAssembler::kStackSlotSize;
Str(liftoff::GetRegFromType(src, type), MemOperand(fp, offset));
void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
ValueType type) {
UseScratchRegisterScope temps(this);
CPURegister scratch = liftoff::AcquireByType(&temps, type);
Ldr(scratch, liftoff::GetStackSlot(src_offset));
Str(scratch, liftoff::GetStackSlot(dst_offset));
void LiftoffAssembler::Move(Register dst, Register src, ValueType type) {
if (type == kWasmI32) {
Mov(dst.W(), src.W());
} else {
DCHECK_EQ(kWasmI64, type);
Mov(dst.X(), src.X());
void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
ValueType type) {
if (type == kWasmF32) {
Fmov(dst.S(), src.S());
} else if (type == kWasmF64) {
Fmov(dst.D(), src.D());
} else {
DCHECK_EQ(kWasmS128, type);
Mov(dst.Q(), src.Q());
void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueType type) {
MemOperand dst = liftoff::GetStackSlot(offset);
Str(liftoff::GetRegFromType(reg, type), dst);
void LiftoffAssembler::Spill(int offset, WasmValue value) {
MemOperand dst = liftoff::GetStackSlot(offset);
UseScratchRegisterScope temps(this);
CPURegister src = CPURegister::no_reg();
switch (value.type().kind()) {
case ValueType::kI32:
if (value.to_i32() == 0) {
src = wzr;
} else {
src = temps.AcquireW();
Mov(src.W(), value.to_i32());
case ValueType::kI64:
if (value.to_i64() == 0) {
src = xzr;
} else {
src = temps.AcquireX();
Mov(src.X(), value.to_i64());
// We do not track f32 and f64 constants, hence they are unreachable.
Str(src, dst);
void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueType type) {
MemOperand src = liftoff::GetStackSlot(offset);
Ldr(liftoff::GetRegFromType(reg, type), src);
void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) {
void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
DCHECK_LT(0, size);
DCHECK_EQ(0, size % 4);
RecordUsedSpillOffset(start + size);
int max_stp_offset = -start - size;
if (size <= 12 * kStackSlotSize &&
IsImmLSPair(max_stp_offset, kXRegSizeLog2)) {
// Special straight-line code for up to 12 slots. Generates one
// instruction per two slots (<= 7 instructions total).
STATIC_ASSERT(kStackSlotSize == kSystemPointerSize);
uint32_t remainder = size;
for (; remainder >= 2 * kStackSlotSize; remainder -= 2 * kStackSlotSize) {
stp(xzr, xzr, liftoff::GetStackSlot(start + remainder));
DCHECK_GE(12, remainder);
switch (remainder) {
case 12:
str(xzr, liftoff::GetStackSlot(start + remainder));
str(wzr, liftoff::GetStackSlot(start + remainder - 8));
case 8:
str(xzr, liftoff::GetStackSlot(start + remainder));
case 4:
str(wzr, liftoff::GetStackSlot(start + remainder));
case 0:
} else {
// General case for bigger counts (5-8 instructions).
UseScratchRegisterScope temps(this);
Register address_reg = temps.AcquireX();
// This {Sub} might use another temp register if the offset is too large.
Sub(address_reg, fp, start + size);
Register count_reg = temps.AcquireX();
Mov(count_reg, size / 4);
Label loop;
sub(count_reg, count_reg, 1);
str(wzr, MemOperand(address_reg, kSystemPointerSize / 2, PostIndex));
cbnz(count_reg, &loop);
#define I32_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name(Register dst, Register lhs, \
Register rhs) { \
instruction(dst.W(), lhs.W(), rhs.W()); \
#define I32_BINOP_I(name, instruction) \
I32_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name##i(Register dst, Register lhs, \
int32_t imm) { \
instruction(dst.W(), lhs.W(), Immediate(imm)); \
#define I64_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister lhs, \
LiftoffRegister rhs) { \
instruction(,,; \
#define I64_BINOP_I(name, instruction) \
I64_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name##i(LiftoffRegister dst, \
LiftoffRegister lhs, int32_t imm) { \
instruction(,, imm); \
#define FP32_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
DoubleRegister rhs) { \
instruction(dst.S(), lhs.S(), rhs.S()); \
#define FP32_UNOP(name, instruction) \
void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
instruction(dst.S(), src.S()); \
#define FP32_UNOP_RETURN_TRUE(name, instruction) \
bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
instruction(dst.S(), src.S()); \
return true; \
#define FP64_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
DoubleRegister rhs) { \
instruction(dst.D(), lhs.D(), rhs.D()); \
#define FP64_UNOP(name, instruction) \
void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
instruction(dst.D(), src.D()); \
#define FP64_UNOP_RETURN_TRUE(name, instruction) \
bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
instruction(dst.D(), src.D()); \
return true; \
#define I32_SHIFTOP(name, instruction) \
void LiftoffAssembler::emit_##name(Register dst, Register src, \
Register amount) { \
instruction(dst.W(), src.W(), amount.W()); \
} \
void LiftoffAssembler::emit_##name##i(Register dst, Register src, \
int32_t amount) { \
instruction(dst.W(), src.W(), amount & 31); \
#define I64_SHIFTOP(name, instruction) \
void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister src, \
Register amount) { \
instruction(,, amount.X()); \
} \
void LiftoffAssembler::emit_##name##i(LiftoffRegister dst, \
LiftoffRegister src, int32_t amount) { \
instruction(,, amount & 63); \
I32_BINOP_I(i32_add, Add)
I32_BINOP(i32_sub, Sub)
I32_BINOP(i32_mul, Mul)
I32_BINOP_I(i32_and, And)
I32_BINOP_I(i32_or, Orr)
I32_BINOP_I(i32_xor, Eor)
I32_SHIFTOP(i32_shl, Lsl)
I32_SHIFTOP(i32_sar, Asr)
I32_SHIFTOP(i32_shr, Lsr)
I64_BINOP_I(i64_add, Add)
I64_BINOP(i64_sub, Sub)
I64_BINOP(i64_mul, Mul)
I64_BINOP_I(i64_and, And)
I64_BINOP_I(i64_or, Orr)
I64_BINOP_I(i64_xor, Eor)
I64_SHIFTOP(i64_shl, Lsl)
I64_SHIFTOP(i64_sar, Asr)
I64_SHIFTOP(i64_shr, Lsr)
FP32_BINOP(f32_add, Fadd)
FP32_BINOP(f32_sub, Fsub)
FP32_BINOP(f32_mul, Fmul)
FP32_BINOP(f32_div, Fdiv)
FP32_BINOP(f32_min, Fmin)
FP32_BINOP(f32_max, Fmax)
FP32_UNOP(f32_abs, Fabs)
FP32_UNOP(f32_neg, Fneg)
FP32_UNOP_RETURN_TRUE(f32_ceil, Frintp)
FP32_UNOP_RETURN_TRUE(f32_floor, Frintm)
FP32_UNOP_RETURN_TRUE(f32_trunc, Frintz)
FP32_UNOP_RETURN_TRUE(f32_nearest_int, Frintn)
FP32_UNOP(f32_sqrt, Fsqrt)
FP64_BINOP(f64_add, Fadd)
FP64_BINOP(f64_sub, Fsub)
FP64_BINOP(f64_mul, Fmul)
FP64_BINOP(f64_div, Fdiv)
FP64_BINOP(f64_min, Fmin)
FP64_BINOP(f64_max, Fmax)
FP64_UNOP(f64_abs, Fabs)
FP64_UNOP(f64_neg, Fneg)
FP64_UNOP_RETURN_TRUE(f64_ceil, Frintp)
FP64_UNOP_RETURN_TRUE(f64_floor, Frintm)
FP64_UNOP_RETURN_TRUE(f64_trunc, Frintz)
FP64_UNOP_RETURN_TRUE(f64_nearest_int, Frintn)
FP64_UNOP(f64_sqrt, Fsqrt)
#undef I32_BINOP
#undef I64_BINOP
#undef FP32_BINOP
#undef FP32_UNOP
#undef FP64_BINOP
#undef FP64_UNOP
#undef I32_SHIFTOP
#undef I64_SHIFTOP
void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
Clz(dst.W(), src.W());
void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
Rbit(dst.W(), src.W());
Clz(dst.W(), dst.W());
bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
UseScratchRegisterScope temps(this);
VRegister scratch = temps.AcquireV(kFormat8B);
Fmov(scratch.S(), src.W());
Cnt(scratch, scratch);
Addv(scratch.B(), scratch);
Fmov(dst.W(), scratch.S());
return true;
void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
LiftoffRegister src) {
UseScratchRegisterScope temps(this);
VRegister scratch = temps.AcquireV(kFormat8B);
Cnt(scratch, scratch);
Addv(scratch.B(), scratch);
Fmov(, scratch.D());
return true;
void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
Label* trap_div_by_zero,
Label* trap_div_unrepresentable) {
Register dst_w = dst.W();
Register lhs_w = lhs.W();
Register rhs_w = rhs.W();
bool can_use_dst = !dst_w.Aliases(lhs_w) && !dst_w.Aliases(rhs_w);
if (can_use_dst) {
// Do div early.
Sdiv(dst_w, lhs_w, rhs_w);
// Check for division by zero.
Cbz(rhs_w, trap_div_by_zero);
// Check for kMinInt / -1. This is unrepresentable.
Cmp(rhs_w, -1);
Ccmp(lhs_w, 1, NoFlag, eq);
B(trap_div_unrepresentable, vs);
if (!can_use_dst) {
// Do div.
Sdiv(dst_w, lhs_w, rhs_w);
void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
Label* trap_div_by_zero) {
// Check for division by zero.
Cbz(rhs.W(), trap_div_by_zero);
// Do div.
Udiv(dst.W(), lhs.W(), rhs.W());
void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
Label* trap_div_by_zero) {
Register dst_w = dst.W();
Register lhs_w = lhs.W();
Register rhs_w = rhs.W();
// Do early div.
// No need to check kMinInt / -1 because the result is kMinInt and then
// kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
UseScratchRegisterScope temps(this);
Register scratch = temps.AcquireW();
Sdiv(scratch, lhs_w, rhs_w);
// Check for division by zero.
Cbz(rhs_w, trap_div_by_zero);
// Compute remainder.
Msub(dst_w, scratch, rhs_w, lhs_w);
void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
Label* trap_div_by_zero) {
Register dst_w = dst.W();
Register lhs_w = lhs.W();
Register rhs_w = rhs.W();
// Do early div.
UseScratchRegisterScope temps(this);
Register scratch = temps.AcquireW();
Udiv(scratch, lhs_w, rhs_w);
// Check for division by zero.
Cbz(rhs_w, trap_div_by_zero);
// Compute remainder.
Msub(dst_w, scratch, rhs_w, lhs_w);
bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs,
Label* trap_div_by_zero,
Label* trap_div_unrepresentable) {
Register dst_x =;
Register lhs_x =;
Register rhs_x =;
bool can_use_dst = !dst_x.Aliases(lhs_x) && !dst_x.Aliases(rhs_x);
if (can_use_dst) {
// Do div early.
Sdiv(dst_x, lhs_x, rhs_x);
// Check for division by zero.
Cbz(rhs_x, trap_div_by_zero);
// Check for kMinInt / -1. This is unrepresentable.
Cmp(rhs_x, -1);
Ccmp(lhs_x, 1, NoFlag, eq);
B(trap_div_unrepresentable, vs);
if (!can_use_dst) {
// Do div.
Sdiv(dst_x, lhs_x, rhs_x);
return true;
bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs,
Label* trap_div_by_zero) {
// Check for division by zero.
Cbz(, trap_div_by_zero);
// Do div.
return true;
bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs,
Label* trap_div_by_zero) {
Register dst_x =;
Register lhs_x =;
Register rhs_x =;
// Do early div.
// No need to check kMinInt / -1 because the result is kMinInt and then
// kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
UseScratchRegisterScope temps(this);
Register scratch = temps.AcquireX();
Sdiv(scratch, lhs_x, rhs_x);
// Check for division by zero.
Cbz(rhs_x, trap_div_by_zero);
// Compute remainder.
Msub(dst_x, scratch, rhs_x, lhs_x);
return true;
bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs,
Label* trap_div_by_zero) {
Register dst_x =;
Register lhs_x =;
Register rhs_x =;
// Do early div.
UseScratchRegisterScope temps(this);
Register scratch = temps.AcquireX();
Udiv(scratch, lhs_x, rhs_x);
// Check for division by zero.
Cbz(rhs_x, trap_div_by_zero);
// Compute remainder.
Msub(dst_x, scratch, rhs_x, lhs_x);
return true;
void LiftoffAssembler::emit_u32_to_intptr(Register dst, Register src) {
Uxtw(dst, src);
void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
DoubleRegister rhs) {
UseScratchRegisterScope temps(this);
DoubleRegister scratch = temps.AcquireD();
Ushr(scratch.V2S(), rhs.V2S(), 31);
if (dst != lhs) {
Fmov(dst.S(), lhs.S());
Sli(dst.V2S(), scratch.V2S(), 31);
void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
DoubleRegister rhs) {
UseScratchRegisterScope temps(this);
DoubleRegister scratch = temps.AcquireD();
Ushr(scratch.V1D(), rhs.V1D(), 63);
if (dst != lhs) {
Fmov(dst.D(), lhs.D());
Sli(dst.V1D(), scratch.V1D(), 63);
bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
LiftoffRegister dst,
LiftoffRegister src, Label* trap) {
switch (opcode) {
case kExprI32ConvertI64:
if (src != dst) Mov(,;
return true;
case kExprI32SConvertF32:
Fcvtzs(, src.fp().S()); // f32 -> i32 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().S(), static_cast<float>(INT32_MIN));
// Check overflow.
Ccmp(, -1, VFlag, ge);
B(trap, vs);
return true;
case kExprI32UConvertF32:
Fcvtzu(, src.fp().S()); // f32 -> i32 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().S(), -1.0);
// Check overflow.
Ccmp(, -1, ZFlag, gt);
B(trap, eq);
return true;
case kExprI32SConvertF64: {
// INT32_MIN and INT32_MAX are valid results, we cannot test the result
// to detect the overflows. We could have done two immediate floating
// point comparisons but it would have generated two conditional branches.
UseScratchRegisterScope temps(this);
VRegister fp_ref = temps.AcquireD();
VRegister fp_cmp = temps.AcquireD();
Fcvtzs(, src.fp().D()); // f64 -> i32 round to zero.
Frintz(fp_ref, src.fp().D()); // f64 -> f64 round to zero.
Scvtf(fp_cmp,; // i32 -> f64.
// If comparison fails, we have an overflow or a NaN.
Fcmp(fp_cmp, fp_ref);
B(trap, ne);
return true;
case kExprI32UConvertF64: {
// INT32_MAX is a valid result, we cannot test the result to detect the
// overflows. We could have done two immediate floating point comparisons
// but it would have generated two conditional branches.
UseScratchRegisterScope temps(this);
VRegister fp_ref = temps.AcquireD();
VRegister fp_cmp = temps.AcquireD();
Fcvtzu(, src.fp().D()); // f64 -> i32 round to zero.
Frintz(fp_ref, src.fp().D()); // f64 -> f64 round to zero.
Ucvtf(fp_cmp,; // i32 -> f64.
// If comparison fails, we have an overflow or a NaN.
Fcmp(fp_cmp, fp_ref);
B(trap, ne);
return true;
case kExprI32SConvertSatF32:
Fcvtzs(, src.fp().S());
return true;
case kExprI32UConvertSatF32:
Fcvtzu(, src.fp().S());
return true;
case kExprI32SConvertSatF64:
Fcvtzs(, src.fp().D());
return true;
case kExprI32UConvertSatF64:
Fcvtzu(, src.fp().D());
return true;
case kExprI64SConvertSatF32:
Fcvtzs(, src.fp().S());
return true;
case kExprI64UConvertSatF32:
Fcvtzu(, src.fp().S());
return true;
case kExprI64SConvertSatF64:
Fcvtzs(, src.fp().D());
return true;
case kExprI64UConvertSatF64:
Fcvtzu(, src.fp().D());
return true;
case kExprI32ReinterpretF32:
Fmov(, src.fp().S());
return true;
case kExprI64SConvertI32:
return true;
case kExprI64SConvertF32:
Fcvtzs(, src.fp().S()); // f32 -> i64 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().S(), static_cast<float>(INT64_MIN));
// Check overflow.
Ccmp(, -1, VFlag, ge);
B(trap, vs);
return true;
case kExprI64UConvertF32:
Fcvtzu(, src.fp().S()); // f32 -> i64 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().S(), -1.0);
// Check overflow.
Ccmp(, -1, ZFlag, gt);
B(trap, eq);
return true;
case kExprI64SConvertF64:
Fcvtzs(, src.fp().D()); // f64 -> i64 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().D(), static_cast<float>(INT64_MIN));
// Check overflow.
Ccmp(, -1, VFlag, ge);
B(trap, vs);
return true;
case kExprI64UConvertF64:
Fcvtzu(, src.fp().D()); // f64 -> i64 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().D(), -1.0);
// Check overflow.
Ccmp(, -1, ZFlag, gt);
B(trap, eq);
return true;
case kExprI64UConvertI32:
return true;
case kExprI64ReinterpretF64:
Fmov(, src.fp().D());
return true;
case kExprF32SConvertI32:
return true;
case kExprF32UConvertI32:
return true;
case kExprF32SConvertI64:
return true;
case kExprF32UConvertI64:
return true;
case kExprF32ConvertF64:
Fcvt(dst.fp().S(), src.fp().D());
return true;
case kExprF32ReinterpretI32:
return true;
case kExprF64SConvertI32:
return true;
case kExprF64UConvertI32:
return true;
case kExprF64SConvertI64:
return true;
case kExprF64UConvertI64:
return true;
case kExprF64ConvertF32:
Fcvt(dst.fp().D(), src.fp().S());
return true;
case kExprF64ReinterpretI64:
return true;
void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
sxtb(dst, src);
void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
sxth(dst, src);
void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
LiftoffRegister src) {
void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
LiftoffRegister src) {
void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
LiftoffRegister src) {
void LiftoffAssembler::emit_jump(Label* label) { B(label); }
void LiftoffAssembler::emit_jump(Register target) { Br(target); }
void LiftoffAssembler::emit_cond_jump(Condition cond, Label* label,
ValueType type, Register lhs,
Register rhs) {
switch (type.kind()) {
case ValueType::kI32:
if (rhs.is_valid()) {
Cmp(lhs.W(), rhs.W());
} else {
Cmp(lhs.W(), wzr);
case ValueType::kI64:
if (rhs.is_valid()) {
Cmp(lhs.X(), rhs.X());
} else {
Cmp(lhs.X(), xzr);
B(label, cond);
void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
Cmp(src.W(), wzr);
Cset(dst.W(), eq);
void LiftoffAssembler::emit_i32_set_cond(Condition cond, Register dst,
Register lhs, Register rhs) {
Cmp(lhs.W(), rhs.W());
Cset(dst.W(), cond);
void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
Cmp(, xzr);
Cset(dst.W(), eq);
void LiftoffAssembler::emit_i64_set_cond(Condition cond, Register dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Cset(dst.W(), cond);
void LiftoffAssembler::emit_f32_set_cond(Condition cond, Register dst,
DoubleRegister lhs,
DoubleRegister rhs) {
Fcmp(lhs.S(), rhs.S());
Cset(dst.W(), cond);
if (cond != ne) {
// If V flag set, at least one of the arguments was a Nan -> false.
Csel(dst.W(), wzr, dst.W(), vs);
void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
DoubleRegister lhs,
DoubleRegister rhs) {
Fcmp(lhs.D(), rhs.D());
Cset(dst.W(), cond);
if (cond != ne) {
// If V flag set, at least one of the arguments was a Nan -> false.
Csel(dst.W(), wzr, dst.W(), vs);
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type,
LoadTransformationKind transform,
uint32_t* protected_load_pc) {
UseScratchRegisterScope temps(this);
MemOperand src_op =
liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
*protected_load_pc = pc_offset();
MachineType memtype = type.mem_type();
if (transform == LoadTransformationKind::kExtend) {
if (memtype == MachineType::Int8()) {
Ldr(dst.fp().D(), src_op);
Sxtl(dst.fp().V8H(), dst.fp().V8B());
} else if (memtype == MachineType::Uint8()) {
Ldr(dst.fp().D(), src_op);
Uxtl(dst.fp().V8H(), dst.fp().V8B());
} else if (memtype == MachineType::Int16()) {
Ldr(dst.fp().D(), src_op);
Sxtl(dst.fp().V4S(), dst.fp().V4H());
} else if (memtype == MachineType::Uint16()) {
Ldr(dst.fp().D(), src_op);
Uxtl(dst.fp().V4S(), dst.fp().V4H());
} else if (memtype == MachineType::Int32()) {
Ldr(dst.fp().D(), src_op);
Sxtl(dst.fp().V2D(), dst.fp().V2S());
} else if (memtype == MachineType::Uint32()) {
Ldr(dst.fp().D(), src_op);
Uxtl(dst.fp().V2D(), dst.fp().V2S());
} else {
// ld1r only allows no offset or post-index, so emit an add.
DCHECK_EQ(LoadTransformationKind::kSplat, transform);
if (src_op.IsRegisterOffset()) {
// We have 2 tmp gps, so it's okay to acquire 1 more here, and actually
// doesn't matter if we acquire the same one.
Register tmp = temps.AcquireX();
Add(tmp, src_op.base(), src_op.regoffset().X());
src_op = MemOperand(tmp.X(), 0);
} else if (src_op.IsImmediateOffset() && src_op.offset() != 0) {
Register tmp = temps.AcquireX();
Add(tmp, src_op.base(), src_op.offset());
src_op = MemOperand(tmp.X(), 0);
if (memtype == MachineType::Int8()) {
ld1r(dst.fp().V16B(), src_op);
} else if (memtype == MachineType::Int16()) {
ld1r(dst.fp().V8H(), src_op);
} else if (memtype == MachineType::Int32()) {
ld1r(dst.fp().V4S(), src_op);
} else if (memtype == MachineType::Int64()) {
ld1r(dst.fp().V2D(), src_op);
void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Tbl(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V2D(), src.fp().D(), 0);
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Mov(dst.fp().D(), lhs.fp().V2D(), imm_lane_idx);
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V2D(), src1.fp().V2D());
Mov(dst.fp().V2D(), imm_lane_idx, src2.fp().V2D(), 0);
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
Fabs(dst.fp().V2D(), src.fp().V2D());
void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
Fneg(dst.fp().V2D(), src.fp().V2D());
void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
LiftoffRegister src) {
Fsqrt(dst.fp().V2D(), src.fp().V2D());
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fadd(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fsub(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmul(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fdiv(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmin(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmax(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.fp().S(), 0);
void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Mov(dst.fp().S(), lhs.fp().V4S(), imm_lane_idx);
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V4S(), src1.fp().V4S());
Mov(dst.fp().V4S(), imm_lane_idx, src2.fp().V4S(), 0);
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
Fabs(dst.fp().V4S(), src.fp().V4S());
void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
Fneg(dst.fp().V4S(), src.fp().V4S());
void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
LiftoffRegister src) {
Fsqrt(dst.fp().V4S(), src.fp().V4S());
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fadd(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fsub(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmul(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fdiv(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Mov(, lhs.fp().V2D(), imm_lane_idx);
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V2D(), src1.fp().V2D());
Mov(dst.fp().V2D(), imm_lane_idx,;
void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
Neg(dst.fp().V2D(), src.fp().V2D());
void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V2D(), lhs.fp().V2D(),, kFormat2D);
void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
Shl(dst.fp().V2D(), lhs.fp().V2D(), rhs & 63);
void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V2D(), lhs.fp().V2D(),, kFormat2D);
void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat2D, liftoff::ShiftSign::kSigned>(
this, dst.fp().V2D(), lhs.fp().V2D(), rhs);
void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V2D(), lhs.fp().V2D(),, kFormat2D);
void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
this, dst.fp().V2D(), lhs.fp().V2D(), rhs);
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Sub(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp1 = temps.AcquireV(kFormat2D);
VRegister tmp2 = temps.AcquireV(kFormat2D);
// Algorithm copied from with minor modifications:
// - 2 (max number of scratch registers in Liftoff) temporaries instead of 3
// - 1 more Umull instruction to calculate | cg | ae |,
// - so, we can no longer use Umlal in the last step, and use Add instead.
// Refer to comments there for details.
Xtn(tmp1.V2S(), lhs.fp().V2D());
Xtn(tmp2.V2S(), rhs.fp().V2D());
Umull(tmp1.V2D(), tmp1.V2S(), tmp2.V2S());
Rev64(tmp2.V4S(), rhs.fp().V4S());
Mul(tmp2.V4S(), tmp2.V4S(), lhs.fp().V4S());
Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S());
Shll(dst.fp().V2D(), tmp2.V2S(), 32);
Add(dst.fp().V2D(), dst.fp().V2D(), tmp1.V2D());
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Mov(, lhs.fp().V4S(), imm_lane_idx);
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V4S(), src1.fp().V4S());
Mov(dst.fp().V4S(), imm_lane_idx,;
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
Neg(dst.fp().V4S(), src.fp().V4S());
void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAnyTrue(this, dst, src);
void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAllTrue(this, dst, src, kFormat4S);
void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4_bitmask");
void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V4S(), lhs.fp().V4S(),, kFormat4S);
void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
Shl(dst.fp().V4S(), lhs.fp().V4S(), rhs & 31);
void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V4S(), lhs.fp().V4S(),, kFormat4S);
void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat4S, liftoff::ShiftSign::kSigned>(
this, dst.fp().V4S(), lhs.fp().V4S(), rhs);
void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V4S(), lhs.fp().V4S(),, kFormat4S);
void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
this, dst.fp().V4S(), lhs.fp().V4S(), rhs);
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Sub(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Mul(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Umov(, lhs.fp().V8H(), imm_lane_idx);
void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Smov(, lhs.fp().V8H(), imm_lane_idx);
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V8H(), src1.fp().V8H());
Mov(dst.fp().V8H(), imm_lane_idx,;
void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
LiftoffRegister src) {
Neg(dst.fp().V8H(), src.fp().V8H());
void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAnyTrue(this, dst, src);
void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAllTrue(this, dst, src, kFormat8H);
void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i16x8_bitmask");
void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V8H(), lhs.fp().V8H(),, kFormat8H);
void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
Shl(dst.fp().V8H(), lhs.fp().V8H(), rhs & 15);
void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V8H(), lhs.fp().V8H(),, kFormat8H);
void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat8H, liftoff::ShiftSign::kSigned>(
this, dst.fp().V8H(), lhs.fp().V8H(), rhs);
void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V8H(), lhs.fp().V8H(),, kFormat8H);
void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
this, dst.fp().V8H(), lhs.fp().V8H(), rhs);
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_add_saturate_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Sqadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Sub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_sub_saturate_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Sqsub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_sub_saturate_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Uqsub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Mul(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_add_saturate_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Uqadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smin(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umin(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Umov(, lhs.fp().V16B(), imm_lane_idx);
void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Smov(, lhs.fp().V16B(), imm_lane_idx);
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V16B(), src1.fp().V16B());
Mov(dst.fp().V16B(), imm_lane_idx,;
void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
LiftoffRegister src) {
Neg(dst.fp().V16B(), src.fp().V16B());
void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAnyTrue(this, dst, src);
void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAllTrue(this, dst, src, kFormat16B);
void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i8x16_bitmask");
void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V16B(), lhs.fp().V16B(),, kFormat16B);
void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
Shl(dst.fp().V16B(), lhs.fp().V16B(), rhs & 7);
void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V16B(), lhs.fp().V16B(),, kFormat16B);
void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat16B, liftoff::ShiftSign::kSigned>(
this, dst.fp().V16B(), lhs.fp().V16B(), rhs);
void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
this, dst.fp().V16B(), lhs.fp().V16B(),, kFormat16B);
void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
this, dst.fp().V16B(), lhs.fp().V16B(), rhs);
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_add_saturate_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Sqadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Sub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_sub_saturate_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Sqsub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_sub_saturate_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Uqsub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Mul(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_add_saturate_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Uqadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smin(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umin(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smax(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umax(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
Mvn(dst.fp().V16B(), dst.fp().V16B());
void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmgt(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhi(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmge(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhs(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
Mvn(dst.fp().V8H(), dst.fp().V8H());
void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmgt(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhi(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmge(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhs(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
Mvn(dst.fp().V4S(), dst.fp().V4S());
void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmgt(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhi(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmge(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhs(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
Mvn(dst.fp().V4S(), dst.fp().V4S());
void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmgt(dst.fp().V4S(), rhs.fp().V4S(), lhs.fp().V4S());
void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmge(dst.fp().V4S(), rhs.fp().V4S(), lhs.fp().V4S());
void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
Mvn(dst.fp().V2D(), dst.fp().V2D());
void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmgt(dst.fp().V2D(), rhs.fp().V2D(), lhs.fp().V2D());
void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmge(dst.fp().V2D(), rhs.fp().V2D(), lhs.fp().V2D());
void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
Mvn(dst.fp().V16B(), src.fp().V16B());
void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
And(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Orr(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Eor(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
if (dst != mask) {
Mov(dst.fp().V16B(), mask.fp().V16B());
Bsl(dst.fp().V16B(), src1.fp().V16B(), src2.fp().V16B());
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
Fcvtzs(dst.fp().V4S(), src.fp().V4S());
void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
Fcvtzu(dst.fp().V4S(), src.fp().V4S());
void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
LiftoffRegister src) {
Scvtf(dst.fp().V4S(), src.fp().V4S());
void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
LiftoffRegister src) {
Ucvtf(dst.fp().V4S(), src.fp().V4S());
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireV(kFormat8H);
VRegister right = rhs.fp().V8H();
if (dst == rhs) {
Mov(tmp, right);
right = tmp;
Sqxtn(dst.fp().V8B(), lhs.fp().V8H());
Sqxtn2(dst.fp().V16B(), right);
void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireV(kFormat8H);
VRegister right = rhs.fp().V8H();
if (dst == rhs) {
Mov(tmp, right);
right = tmp;
Sqxtun(dst.fp().V8B(), lhs.fp().V8H());
Sqxtun2(dst.fp().V16B(), right);
void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireV(kFormat4S);
VRegister right = rhs.fp().V4S();
if (dst == rhs) {
Mov(tmp, right);
right = tmp;
Sqxtn(dst.fp().V4H(), lhs.fp().V4S());
Sqxtn2(dst.fp().V8H(), right);
void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireV(kFormat4S);
VRegister right = rhs.fp().V4S();
if (dst == rhs) {
Mov(tmp, right);
right = tmp;
Sqxtun(dst.fp().V4H(), lhs.fp().V4S());
Sqxtun2(dst.fp().V8H(), right);
void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
LiftoffRegister src) {
Sxtl(dst.fp().V8H(), src.fp().V8B());
void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
LiftoffRegister src) {
Sxtl2(dst.fp().V8H(), src.fp().V16B());
void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
LiftoffRegister src) {
Uxtl(dst.fp().V8H(), src.fp().V8B());
void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
LiftoffRegister src) {
Uxtl2(dst.fp().V8H(), src.fp().V16B());
void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
LiftoffRegister src) {
Sxtl(dst.fp().V4S(), src.fp().V4H());
void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
LiftoffRegister src) {
Sxtl2(dst.fp().V4S(), src.fp().V8H());
void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
LiftoffRegister src) {
Uxtl(dst.fp().V4S(), src.fp().V4H());
void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
LiftoffRegister src) {
Uxtl2(dst.fp().V4S(), src.fp().V8H());
void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Bic(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Urhadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Urhadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
LiftoffRegister src) {
Abs(dst.fp().V16B(), src.fp().V16B());
void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
LiftoffRegister src) {
Abs(dst.fp().V8H(), src.fp().V8H());
void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
Abs(dst.fp().V4S(), src.fp().V4S());
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
Ldr(limit_address, MemOperand(limit_address));
Cmp(sp, limit_address);
B(ool_code, ls);
void LiftoffAssembler::CallTrapCallbackForTesting() {
CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
void LiftoffAssembler::CallC(const wasm::FunctionSig* sig,
const LiftoffRegister* args,
const LiftoffRegister* rets,
ValueType out_argument_type, int stack_bytes,
ExternalReference ext_ref) {
// The stack pointer is required to be quadword aligned.
int total_size = RoundUp(stack_bytes, kQuadWordSizeInBytes);
// Reserve space in the stack.
Claim(total_size, 1);
int arg_bytes = 0;
for (ValueType param_type : sig->parameters()) {
Poke(liftoff::GetRegFromType(*args++, param_type), arg_bytes);
arg_bytes += param_type.element_size_bytes();
DCHECK_LE(arg_bytes, stack_bytes);
// Pass a pointer to the buffer with the arguments to the C function.
Mov(x0, sp);
// Now call the C function.
constexpr int kNumCCallArgs = 1;
CallCFunction(ext_ref, kNumCCallArgs);
// Move return value to the right register.
const LiftoffRegister* next_result_reg = rets;
if (sig->return_count() > 0) {
DCHECK_EQ(1, sig->return_count());
constexpr Register kReturnReg = x0;
if (kReturnReg != next_result_reg->gp()) {
Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
// Load potential output value from the buffer on the stack.
if (out_argument_type != kWasmStmt) {
Peek(liftoff::GetRegFromType(*next_result_reg, out_argument_type), 0);
Drop(total_size, 1);
void LiftoffAssembler::CallNativeWasmCode(Address addr) {
Call(addr, RelocInfo::WASM_CALL);
void LiftoffAssembler::CallIndirect(const wasm::FunctionSig* sig,
compiler::CallDescriptor* call_descriptor,
Register target) {
// For Arm64, we have more cache registers than wasm parameters. That means
// that target will always be in a register.
void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
// A direct call to a wasm runtime stub defined in this module.
// Just encode the stub index. This will be patched at relocation.
Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
// The stack pointer is required to be quadword aligned.
size = RoundUp(size, kQuadWordSizeInBytes);
Claim(size, 1);
Mov(addr, sp);
void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
// The stack pointer is required to be quadword aligned.
size = RoundUp(size, kQuadWordSizeInBytes);
Drop(size, 1);
void LiftoffStackSlots::Construct() {
size_t num_slots = 0;
for (auto& slot : slots_) {
num_slots += slot.src_.type() == kWasmS128 ? 2 : 1;
// The stack pointer is required to be quadword aligned.
asm_->Claim(RoundUp(num_slots, 2));
size_t poke_offset = num_slots * kXRegSize;
for (auto& slot : slots_) {
poke_offset -= slot.src_.type() == kWasmS128 ? kXRegSize * 2 : kXRegSize;
switch (slot.src_.loc()) {
case LiftoffAssembler::VarState::kStack: {
UseScratchRegisterScope temps(asm_);
CPURegister scratch = liftoff::AcquireByType(&temps, slot.src_.type());
asm_->Ldr(scratch, liftoff::GetStackSlot(slot.src_offset_));
asm_->Poke(scratch, poke_offset);
case LiftoffAssembler::VarState::kRegister:
asm_->Poke(liftoff::GetRegFromType(slot.src_.reg(), slot.src_.type()),
case LiftoffAssembler::VarState::kIntConst:
DCHECK(slot.src_.type() == kWasmI32 || slot.src_.type() == kWasmI64);
if (slot.src_.i32_const() == 0) {
Register zero_reg = slot.src_.type() == kWasmI32 ? wzr : xzr;
asm_->Poke(zero_reg, poke_offset);
} else {
UseScratchRegisterScope temps(asm_);
Register scratch = slot.src_.type() == kWasmI32 ? temps.AcquireW()
: temps.AcquireX();
asm_->Mov(scratch, int64_t{slot.src_.i32_const()});
asm_->Poke(scratch, poke_offset);
} // namespace wasm
} // namespace internal
} // namespace v8