blob: a53632727776d769f4faf20b0057dcaca08752b2 [file] [log] [blame]
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <limits>
#include <optional>
#include <type_traits>
#include <vector>
#include "src/base/bits.h"
#include "src/base/flags.h"
#include "src/base/iterator.h"
#include "src/base/logging.h"
#include "src/base/macros.h"
#include "src/codegen/cpu-features.h"
#include "src/codegen/ia32/assembler-ia32.h"
#include "src/codegen/ia32/register-ia32.h"
#include "src/codegen/machine-type.h"
#include "src/codegen/macro-assembler-base.h"
#include "src/common/globals.h"
#include "src/compiler/backend/instruction-codes.h"
#include "src/compiler/backend/instruction-selector-adapter.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/backend/instruction-selector.h"
#include "src/compiler/backend/instruction.h"
#include "src/compiler/common-operator.h"
#include "src/compiler/frame.h"
#include "src/compiler/globals.h"
#include "src/compiler/linkage.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
#include "src/compiler/node.h"
#include "src/compiler/opcodes.h"
#include "src/compiler/operator.h"
#include "src/compiler/turboshaft/opmasks.h"
#include "src/compiler/write-barrier-kind.h"
#include "src/flags/flags.h"
#include "src/utils/utils.h"
#include "src/zone/zone-containers.h"
#if V8_ENABLE_WEBASSEMBLY
#include "src/wasm/simd-shuffle.h"
#endif // V8_ENABLE_WEBASSEMBLY
namespace v8 {
namespace internal {
namespace compiler {
namespace {
struct LoadStoreView {
explicit LoadStoreView(const turboshaft::Operation& op) {
DCHECK(op.Is<turboshaft::LoadOp>() || op.Is<turboshaft::StoreOp>());
if (const turboshaft::LoadOp* load = op.TryCast<turboshaft::LoadOp>()) {
base = load->base();
index = load->index();
offset = load->offset;
} else {
DCHECK(op.Is<turboshaft::StoreOp>());
const turboshaft::StoreOp& store = op.Cast<turboshaft::StoreOp>();
base = store.base();
index = store.index();
offset = store.offset;
}
}
turboshaft::OpIndex base;
turboshaft::OptionalOpIndex index;
int32_t offset;
};
template <typename Adapter>
struct ScaledIndexMatch {
using node_t = typename Adapter::node_t;
node_t base;
node_t index;
int scale;
};
template <typename Adapter>
struct BaseWithScaledIndexAndDisplacementMatch {
using node_t = typename Adapter::node_t;
node_t base = {};
node_t index = {};
int scale = 0;
int32_t displacement = 0;
DisplacementMode displacement_mode = kPositiveDisplacement;
};
// Copied from x64, dropped kWord64 constant support.
bool MatchScaledIndex(InstructionSelectorT<TurboshaftAdapter>* selector,
turboshaft::OpIndex node, turboshaft::OpIndex* index,
int* scale, bool* power_of_two_plus_one) {
DCHECK_NOT_NULL(index);
DCHECK_NOT_NULL(scale);
using namespace turboshaft; // NOLINT(build/namespaces)
auto MatchScaleConstant = [](const Operation& op, int& scale,
bool* plus_one) {
const ConstantOp* constant = op.TryCast<ConstantOp>();
if (constant == nullptr) return false;
if (constant->kind != ConstantOp::Kind::kWord32) return false;
uint64_t value = constant->integral();
if (plus_one) *plus_one = false;
if (value == 1) return (scale = 0), true;
if (value == 2) return (scale = 1), true;
if (value == 4) return (scale = 2), true;
if (value == 8) return (scale = 3), true;
if (plus_one == nullptr) return false;
*plus_one = true;
if (value == 3) return (scale = 1), true;
if (value == 5) return (scale = 2), true;
if (value == 9) return (scale = 3), true;
return false;
};
const Operation& op = selector->Get(node);
if (const WordBinopOp* binop = op.TryCast<WordBinopOp>()) {
if (binop->kind != WordBinopOp::Kind::kMul) return false;
if (MatchScaleConstant(selector->Get(binop->right()), *scale,
power_of_two_plus_one)) {
*index = binop->left();
return true;
}
if (MatchScaleConstant(selector->Get(binop->left()), *scale,
power_of_two_plus_one)) {
*index = binop->right();
return true;
}
return false;
} else if (const ShiftOp* shift = op.TryCast<ShiftOp>()) {
if (shift->kind != ShiftOp::Kind::kShiftLeft) return false;
int32_t scale_value;
if (selector->MatchIntegralWord32Constant(shift->right(), &scale_value)) {
if (scale_value < 0 || scale_value > 3) return false;
*index = shift->left();
*scale = static_cast<int>(scale_value);
if (power_of_two_plus_one) *power_of_two_plus_one = false;
return true;
}
}
return false;
}
std::optional<ScaledIndexMatch<TurboshaftAdapter>> TryMatchScaledIndex(
InstructionSelectorT<TurboshaftAdapter>* selector, turboshaft::OpIndex node,
bool allow_power_of_two_plus_one) {
ScaledIndexMatch<TurboshaftAdapter> match;
bool plus_one = false;
if (MatchScaledIndex(selector, node, &match.index, &match.scale,
allow_power_of_two_plus_one ? &plus_one : nullptr)) {
match.base = plus_one ? match.index : turboshaft::OpIndex{};
return match;
}
return std::nullopt;
}
// Copied verbatim from x64 (just renamed).
std::optional<BaseWithScaledIndexAndDisplacementMatch<TurboshaftAdapter>>
TryMatchBaseWithScaledIndexAndDisplacementForWordBinop(
InstructionSelectorT<TurboshaftAdapter>* selector, turboshaft::OpIndex left,
turboshaft::OpIndex right) {
using namespace turboshaft; // NOLINT(build/namespaces)
BaseWithScaledIndexAndDisplacementMatch<TurboshaftAdapter> result;
result.displacement_mode = kPositiveDisplacement;
auto OwnedByAddressingOperand = [](OpIndex) {
// TODO(nicohartmann@): Consider providing this. For now we just allow
// everything to be covered regardless of other uses.
return true;
};
// Check (S + ...)
if (MatchScaledIndex(selector, left, &result.index, &result.scale, nullptr) &&
OwnedByAddressingOperand(left)) {
result.displacement_mode = kPositiveDisplacement;
// Check (S + (... binop ...))
if (const WordBinopOp* right_binop =
selector->Get(right).TryCast<WordBinopOp>()) {
// Check (S + (B - D))
if (right_binop->kind == WordBinopOp::Kind::kSub &&
OwnedByAddressingOperand(right)) {
if (!selector->MatchIntegralWord32Constant(right_binop->right(),
&result.displacement)) {
return std::nullopt;
}
result.base = right_binop->left();
result.displacement_mode = kNegativeDisplacement;
return result;
}
// Check (S + (... + ...))
if (right_binop->kind == WordBinopOp::Kind::kAdd &&
OwnedByAddressingOperand(right)) {
if (selector->MatchIntegralWord32Constant(right_binop->right(),
&result.displacement)) {
// (S + (B + D))
result.base = right_binop->left();
} else if (selector->MatchIntegralWord32Constant(
right_binop->left(), &result.displacement)) {
// (S + (D + B))
result.base = right_binop->right();
} else {
// Treat it as (S + B)
result.base = right;
result.displacement = 0;
}
return result;
}
}
// Check (S + D)
if (selector->MatchIntegralWord32Constant(right, &result.displacement)) {
result.base = OpIndex{};
return result;
}
// Treat it as (S + B)
result.base = right;
result.displacement = 0;
return result;
}
// Check ((... + ...) + ...)
if (const WordBinopOp* left_add = selector->Get(left).TryCast<WordBinopOp>();
left_add && left_add->kind == WordBinopOp::Kind::kAdd &&
OwnedByAddressingOperand(left)) {
// Check ((S + ...) + ...)
if (MatchScaledIndex(selector, left_add->left(), &result.index,
&result.scale, nullptr)) {
result.displacement_mode = kPositiveDisplacement;
// Check ((S + D) + B)
if (selector->MatchIntegralWord32Constant(left_add->right(),
&result.displacement)) {
result.base = right;
return result;
}
// Check ((S + B) + D)
if (selector->MatchIntegralWord32Constant(right, &result.displacement)) {
result.base = left_add->right();
return result;
}
// Treat it as (B + B) and use index as right B.
result.base = left;
result.index = right;
result.scale = 0;
DCHECK_EQ(result.displacement, 0);
return result;
}
}
DCHECK_EQ(result.index, OpIndex{});
DCHECK_EQ(result.scale, 0);
result.displacement_mode = kPositiveDisplacement;
// Check (B + D)
if (selector->MatchIntegralWord32Constant(right, &result.displacement)) {
result.base = left;
return result;
}
// Treat as (B + B) and use index as left B.
result.index = left;
result.base = right;
return result;
}
// Copied verbatim from x64 (just renamed).
std::optional<BaseWithScaledIndexAndDisplacementMatch<TurboshaftAdapter>>
TryMatchBaseWithScaledIndexAndDisplacement(
InstructionSelectorT<TurboshaftAdapter>* selector,
turboshaft::OpIndex node) {
using namespace turboshaft; // NOLINT(build/namespaces)
// The BaseWithIndexAndDisplacementMatcher canonicalizes the order of
// displacements and scale factors that are used as inputs, so instead of
// enumerating all possible patterns by brute force, checking for node
// clusters using the following templates in the following order suffices
// to find all of the interesting cases (S = index * scale, B = base
// input, D = displacement input):
//
// (S + (B + D))
// (S + (B + B))
// (S + D)
// (S + B)
// ((S + D) + B)
// ((S + B) + D)
// ((B + D) + B)
// ((B + B) + D)
// (B + D)
// (B + B)
BaseWithScaledIndexAndDisplacementMatch<TurboshaftAdapter> result;
result.displacement_mode = kPositiveDisplacement;
const Operation& op = selector->Get(node);
if (const LoadOp* load = op.TryCast<LoadOp>()) {
result.base = load->base();
result.index = load->index().value_or_invalid();
result.scale = load->element_size_log2;
result.displacement = load->offset;
if (load->kind.tagged_base) result.displacement -= kHeapObjectTag;
return result;
} else if (const StoreOp* store = op.TryCast<StoreOp>()) {
result.base = store->base();
result.index = store->index().value_or_invalid();
result.scale = store->element_size_log2;
result.displacement = store->offset;
if (store->kind.tagged_base) result.displacement -= kHeapObjectTag;
return result;
} else if (op.Is<WordBinopOp>()) {
// Nothing to do here, fall into the case below.
#ifdef V8_ENABLE_WEBASSEMBLY
} else if (const Simd128LaneMemoryOp* lane_op =
op.TryCast<Simd128LaneMemoryOp>()) {
result.base = lane_op->base();
result.index = lane_op->index();
result.scale = 0;
result.displacement = 0;
if (lane_op->kind.tagged_base) result.displacement -= kHeapObjectTag;
return result;
} else if (const Simd128LoadTransformOp* load_transform =
op.TryCast<Simd128LoadTransformOp>()) {
result.base = load_transform->base();
result.index = load_transform->index();
DCHECK_EQ(load_transform->offset, 0);
result.scale = 0;
result.displacement = 0;
DCHECK(!load_transform->load_kind.tagged_base);
return result;
#endif // V8_ENABLE_WEBASSEMBLY
} else {
return std::nullopt;
}
const WordBinopOp& binop = op.Cast<WordBinopOp>();
OpIndex left = binop.left();
OpIndex right = binop.right();
return TryMatchBaseWithScaledIndexAndDisplacementForWordBinop(selector, left,
right);
}
} // namespace
// Adds IA32-specific methods for generating operands.
template <typename Adapter>
class IA32OperandGeneratorT final : public OperandGeneratorT<Adapter> {
public:
OPERAND_GENERATOR_T_BOILERPLATE(Adapter)
explicit IA32OperandGeneratorT(InstructionSelectorT<Adapter>* selector)
: super(selector) {}
InstructionOperand UseByteRegister(node_t node) {
// TODO(titzer): encode byte register use constraints.
return UseFixed(node, edx);
}
bool CanBeMemoryOperand(InstructionCode opcode, node_t node, node_t input,
int effect_level) {
if (!this->IsLoadOrLoadImmutable(input)) return false;
if (!selector()->CanCover(node, input)) return false;
if (effect_level != selector()->GetEffectLevel(input)) {
return false;
}
MachineRepresentation rep =
this->load_view(input).loaded_rep().representation();
switch (opcode) {
case kIA32And:
case kIA32Or:
case kIA32Xor:
case kIA32Add:
case kIA32Sub:
case kIA32Cmp:
case kIA32Test:
return rep == MachineRepresentation::kWord32 || IsAnyTagged(rep);
case kIA32Cmp16:
case kIA32Test16:
return rep == MachineRepresentation::kWord16;
case kIA32Cmp8:
case kIA32Test8:
return rep == MachineRepresentation::kWord8;
default:
break;
}
return false;
}
bool CanBeImmediate(node_t node) {
if (this->IsExternalConstant(node)) return true;
if (!this->is_constant(node)) return false;
auto constant = this->constant_view(node);
if (constant.is_int32() || constant.is_relocatable_int32() ||
constant.is_relocatable_int64()) {
return true;
}
if (constant.is_number_zero()) {
return true;
}
// If we want to support HeapConstant nodes here, we must find a way
// to check that they're not in new-space without dereferencing the
// handle (which isn't safe to do concurrently).
return false;
}
int32_t GetImmediateIntegerValue(node_t node) {
DCHECK(CanBeImmediate(node));
auto constant = this->constant_view(node);
if (constant.is_int32()) return constant.int32_value();
DCHECK(constant.is_number_zero());
return 0;
}
bool ValueFitsIntoImmediate(int64_t value) const {
// int32_t min will overflow if displacement mode is kNegativeDisplacement.
return std::numeric_limits<int32_t>::min() < value &&
value <= std::numeric_limits<int32_t>::max();
}
AddressingMode GenerateMemoryOperandInputs(
optional_node_t index, int scale, node_t base, int32_t displacement,
DisplacementMode displacement_mode, InstructionOperand inputs[],
size_t* input_count,
RegisterMode register_mode = RegisterMode::kRegister) {
AddressingMode mode = kMode_MRI;
if (displacement_mode == kNegativeDisplacement) {
displacement = base::bits::WraparoundNeg32(displacement);
}
if (this->valid(base) && this->is_constant(base)) {
auto constant_base = this->constant_view(base);
if (constant_base.is_int32()) {
displacement = base::bits::WraparoundAdd32(displacement,
constant_base.int32_value());
base = node_t{};
}
}
if (this->valid(base)) {
inputs[(*input_count)++] = UseRegisterWithMode(base, register_mode);
if (this->valid(index)) {
DCHECK(scale >= 0 && scale <= 3);
inputs[(*input_count)++] =
UseRegisterWithMode(this->value(index), register_mode);
if (displacement != 0) {
inputs[(*input_count)++] = TempImmediate(displacement);
static const AddressingMode kMRnI_modes[] = {kMode_MR1I, kMode_MR2I,
kMode_MR4I, kMode_MR8I};
mode = kMRnI_modes[scale];
} else {
static const AddressingMode kMRn_modes[] = {kMode_MR1, kMode_MR2,
kMode_MR4, kMode_MR8};
mode = kMRn_modes[scale];
}
} else {
if (displacement == 0) {
mode = kMode_MR;
} else {
inputs[(*input_count)++] = TempImmediate(displacement);
mode = kMode_MRI;
}
}
} else {
DCHECK(scale >= 0 && scale <= 3);
if (this->valid(index)) {
inputs[(*input_count)++] =
UseRegisterWithMode(this->value(index), register_mode);
if (displacement != 0) {
inputs[(*input_count)++] = TempImmediate(displacement);
static const AddressingMode kMnI_modes[] = {kMode_MRI, kMode_M2I,
kMode_M4I, kMode_M8I};
mode = kMnI_modes[scale];
} else {
static const AddressingMode kMn_modes[] = {kMode_MR, kMode_M2,
kMode_M4, kMode_M8};
mode = kMn_modes[scale];
}
} else {
inputs[(*input_count)++] = TempImmediate(displacement);
return kMode_MI;
}
}
return mode;
}
AddressingMode GenerateMemoryOperandInputs(
Node* index, int scale, Node* base, Node* displacement_node,
DisplacementMode displacement_mode, InstructionOperand inputs[],
size_t* input_count,
RegisterMode register_mode = RegisterMode::kRegister) {
int32_t displacement = (displacement_node == nullptr)
? 0
: OpParameter<int32_t>(displacement_node->op());
return GenerateMemoryOperandInputs(index, scale, base, displacement,
displacement_mode, inputs, input_count,
register_mode);
}
AddressingMode GetEffectiveAddressMemoryOperand(
node_t node, InstructionOperand inputs[], size_t* input_count,
RegisterMode register_mode = RegisterMode::kRegister) {
if constexpr (Adapter::IsTurboshaft) {
using namespace turboshaft; // NOLINT(build/namespaces)
const Operation& op = this->Get(node);
if (op.Is<LoadOp>() || op.Is<StoreOp>()) {
LoadStoreView load_or_store(op);
if (ExternalReference reference;
this->MatchExternalConstant(load_or_store.base, &reference) &&
!load_or_store.index.valid()) {
if (selector()->CanAddressRelativeToRootsRegister(reference)) {
const ptrdiff_t delta =
load_or_store.offset +
MacroAssemblerBase::RootRegisterOffsetForExternalReference(
selector()->isolate(), reference);
if (is_int32(delta)) {
inputs[(*input_count)++] =
TempImmediate(static_cast<int32_t>(delta));
return kMode_Root;
}
}
}
}
auto m = TryMatchBaseWithScaledIndexAndDisplacement(selector(), node);
DCHECK(m.has_value());
if (TurboshaftAdapter::valid(m->base) &&
this->Get(m->base).template Is<LoadRootRegisterOp>()) {
DCHECK(!this->valid(m->index));
DCHECK_EQ(m->scale, 0);
DCHECK(ValueFitsIntoImmediate(m->displacement));
inputs[(*input_count)++] =
UseImmediate(static_cast<int>(m->displacement));
return kMode_Root;
} else if (ValueFitsIntoImmediate(m->displacement)) {
return GenerateMemoryOperandInputs(
m->index, m->scale, m->base, m->displacement, m->displacement_mode,
inputs, input_count, register_mode);
} else if (!TurboshaftAdapter::valid(m->base) &&
m->displacement_mode == kPositiveDisplacement) {
// The displacement cannot be an immediate, but we can use the
// displacement as base instead and still benefit from addressing
// modes for the scale.
UNIMPLEMENTED();
} else {
// TODO(nicohartmann@): Turn this into a `DCHECK` once we have some
// coverage.
CHECK_EQ(m->displacement, 0);
inputs[(*input_count)++] = UseRegisterWithMode(m->base, register_mode);
inputs[(*input_count)++] = UseRegisterWithMode(m->index, register_mode);
return kMode_MR1;
}
} else {
{
LoadMatcher<ExternalReferenceMatcher> m(node);
if (m.index().HasResolvedValue() && m.object().HasResolvedValue() &&
selector()->CanAddressRelativeToRootsRegister(
m.object().ResolvedValue())) {
ptrdiff_t const delta =
m.index().ResolvedValue() +
MacroAssemblerBase::RootRegisterOffsetForExternalReference(
selector()->isolate(), m.object().ResolvedValue());
if (is_int32(delta)) {
inputs[(*input_count)++] =
TempImmediate(static_cast<int32_t>(delta));
return kMode_Root;
}
}
}
BaseWithIndexAndDisplacement32Matcher m(node, AddressOption::kAllowAll);
DCHECK(m.matches());
if (m.base() != nullptr &&
m.base()->opcode() == IrOpcode::kLoadRootRegister) {
DCHECK_EQ(m.index(), nullptr);
DCHECK_EQ(m.scale(), 0);
inputs[(*input_count)++] = UseImmediate(m.displacement());
return kMode_Root;
} else if ((m.displacement() == nullptr ||
CanBeImmediate(m.displacement()))) {
return GenerateMemoryOperandInputs(
m.index(), m.scale(), m.base(), m.displacement(),
m.displacement_mode(), inputs, input_count, register_mode);
} else {
inputs[(*input_count)++] =
UseRegisterWithMode(node->InputAt(0), register_mode);
inputs[(*input_count)++] =
UseRegisterWithMode(node->InputAt(1), register_mode);
return kMode_MR1;
}
}
}
InstructionOperand GetEffectiveIndexOperand(node_t index,
AddressingMode* mode) {
if (CanBeImmediate(index)) {
*mode = kMode_MRI;
return UseImmediate(index);
} else {
*mode = kMode_MR1;
return UseUniqueRegister(index);
}
}
bool CanBeBetterLeftOperand(node_t node) const {
return !selector()->IsLive(node);
}
};
namespace {
ArchOpcode GetLoadOpcode(LoadRepresentation load_rep) {
ArchOpcode opcode;
switch (load_rep.representation()) {
case MachineRepresentation::kFloat32:
opcode = kIA32Movss;
break;
case MachineRepresentation::kFloat64:
opcode = kIA32Movsd;
break;
case MachineRepresentation::kBit: // Fall through.
case MachineRepresentation::kWord8:
opcode = load_rep.IsSigned() ? kIA32Movsxbl : kIA32Movzxbl;
break;
case MachineRepresentation::kWord16:
opcode = load_rep.IsSigned() ? kIA32Movsxwl : kIA32Movzxwl;
break;
case MachineRepresentation::kTaggedSigned: // Fall through.
case MachineRepresentation::kTaggedPointer: // Fall through.
case MachineRepresentation::kTagged: // Fall through.
case MachineRepresentation::kWord32:
opcode = kIA32Movl;
break;
case MachineRepresentation::kSimd128:
opcode = kIA32Movdqu;
break;
case MachineRepresentation::kFloat16:
UNIMPLEMENTED();
case MachineRepresentation::kSimd256: // Fall through.
case MachineRepresentation::kCompressedPointer: // Fall through.
case MachineRepresentation::kCompressed: // Fall through.
case MachineRepresentation::kProtectedPointer: // Fall through.
case MachineRepresentation::kIndirectPointer: // Fall through.
case MachineRepresentation::kSandboxedPointer: // Fall through.
case MachineRepresentation::kWord64: // Fall through.
case MachineRepresentation::kMapWord: // Fall through.
case MachineRepresentation::kNone:
UNREACHABLE();
}
return opcode;
}
template <typename Adapter>
void VisitRO(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
typename Adapter::node_t input = selector->input_at(node, 0);
// We have to use a byte register as input to movsxb.
InstructionOperand input_op =
opcode == kIA32Movsxbl ? g.UseFixed(input, eax) : g.Use(input);
selector->Emit(opcode, g.DefineAsRegister(node), input_op);
}
template <typename Adapter>
void VisitROWithTemp(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand temps[] = {g.TempRegister()};
selector->Emit(opcode, g.DefineAsRegister(node),
g.Use(selector->input_at(node, 0)), arraysize(temps), temps);
}
template <typename Adapter>
void VisitROWithTempSimd(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand temps[] = {g.TempSimd128Register()};
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseUniqueRegister(selector->input_at(node, 0)),
arraysize(temps), temps);
}
template <typename Adapter>
void VisitRR(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, InstructionCode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(selector->input_at(node, 0)));
}
template <typename Adapter>
void VisitRROFloat(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
InstructionOperand operand1 = g.Use(selector->input_at(node, 1));
if (selector->IsSupported(AVX)) {
selector->Emit(opcode, g.DefineAsRegister(node), operand0, operand1);
} else {
selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1);
}
}
// For float unary operations. Also allocates a temporary general register for
// used in external operands. If a temp is not required, use VisitRRSimd (since
// float and SIMD registers are the same on IA32).
template <typename Adapter>
void VisitFloatUnop(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node,
typename Adapter::node_t input, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand temps[] = {g.TempRegister()};
// No need for unique because inputs are float but temp is general.
if (selector->IsSupported(AVX)) {
selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(input),
arraysize(temps), temps);
} else {
selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(input),
arraysize(temps), temps);
}
}
#if V8_ENABLE_WEBASSEMBLY
template <typename Adapter>
void VisitRRSimd(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode avx_opcode,
ArchOpcode sse_opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0);
} else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0);
}
}
template <typename Adapter>
void VisitRRSimd(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
VisitRRSimd(selector, node, opcode, opcode);
}
// TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be
// a register as we don't have memory alignment yet. For AVX, memory operands
// are fine, but can have performance issues if not aligned to 16/32 bytes
// (based on load size), see SDM Vol 1, chapter 14.9
template <typename Adapter>
void VisitRROSimd(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode avx_opcode,
ArchOpcode sse_opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0,
g.UseRegister(selector->input_at(node, 1)));
} else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0,
g.UseRegister(selector->input_at(node, 1)));
}
}
template <typename Adapter>
void VisitRRRSimd(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand dst = selector->IsSupported(AVX)
? g.DefineAsRegister(node)
: g.DefineSameAsFirst(node);
InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
InstructionOperand operand1 = g.UseRegister(selector->input_at(node, 1));
selector->Emit(opcode, dst, operand0, operand1);
}
int32_t GetSimdLaneConstant(InstructionSelectorT<TurboshaftAdapter>* selector,
turboshaft::OpIndex node) {
const turboshaft::Simd128ExtractLaneOp& op =
selector->Get(node).template Cast<turboshaft::Simd128ExtractLaneOp>();
return op.lane;
}
int32_t GetSimdLaneConstant(InstructionSelectorT<TurbofanAdapter>* selector,
Node* node) {
return OpParameter<int32_t>(node->op());
}
template <typename Adapter>
void VisitRRISimd(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
InstructionOperand operand1 =
g.UseImmediate(GetSimdLaneConstant(selector, node));
// 8x16 uses movsx_b on dest to extract a byte, which only works
// if dest is a byte register.
InstructionOperand dest = opcode == kIA32I8x16ExtractLaneS
? g.DefineAsFixed(node, eax)
: g.DefineAsRegister(node);
selector->Emit(opcode, dest, operand0, operand1);
}
template <typename Adapter>
void VisitRRISimd(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode avx_opcode,
ArchOpcode sse_opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
InstructionOperand operand1 =
g.UseImmediate(GetSimdLaneConstant(selector, node));
if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, operand1);
} else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1);
}
}
template <typename Adapter>
void VisitRROSimdShift(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
if (g.CanBeImmediate(selector->input_at(node, 1))) {
selector->Emit(opcode, g.DefineSameAsFirst(node),
g.UseRegister(selector->input_at(node, 0)),
g.UseImmediate(selector->input_at(node, 1)));
} else {
InstructionOperand operand0 =
g.UseUniqueRegister(selector->input_at(node, 0));
InstructionOperand operand1 =
g.UseUniqueRegister(selector->input_at(node, 1));
InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps);
}
}
template <typename Adapter>
void VisitRRRR(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, InstructionCode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(selector->input_at(node, 0)),
g.UseRegister(selector->input_at(node, 1)),
g.UseRegister(selector->input_at(node, 2)));
}
template <typename Adapter>
void VisitI8x16Shift(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand output = CpuFeatures::IsSupported(AVX)
? g.UseRegister(node)
: g.DefineSameAsFirst(node);
if (g.CanBeImmediate(selector->input_at(node, 1))) {
if (opcode == kIA32I8x16ShrS) {
selector->Emit(opcode, output, g.UseRegister(selector->input_at(node, 0)),
g.UseImmediate(selector->input_at(node, 1)));
} else {
InstructionOperand temps[] = {g.TempRegister()};
selector->Emit(opcode, output, g.UseRegister(selector->input_at(node, 0)),
g.UseImmediate(selector->input_at(node, 1)),
arraysize(temps), temps);
}
} else {
InstructionOperand operand0 =
g.UseUniqueRegister(selector->input_at(node, 0));
InstructionOperand operand1 =
g.UseUniqueRegister(selector->input_at(node, 1));
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
selector->Emit(opcode, output, operand0, operand1, arraysize(temps), temps);
}
}
#endif // V8_ENABLE_WEBASSEMBLY
} // namespace
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitStackSlot(node_t node) {
StackSlotRepresentation rep = this->stack_slot_representation_of(node);
int slot =
frame_->AllocateSpillSlot(rep.size(), rep.alignment(), rep.is_tagged());
OperandGenerator g(this);
Emit(kArchStackSlot, g.DefineAsRegister(node),
sequence()->AddImmediate(Constant(slot)), 0, nullptr);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitAbortCSADcheck(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
Emit(kArchAbortCSADcheck, g.NoOutput(),
g.UseFixed(this->input_at(node, 0), edx));
}
#if V8_ENABLE_WEBASSEMBLY
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitLoadLane(node_t node) {
InstructionCode opcode;
int lane;
if constexpr (Adapter::IsTurboshaft) {
using namespace turboshaft; // NOLINT(build/namespaces)
const Simd128LaneMemoryOp& load =
this->Get(node).template Cast<Simd128LaneMemoryOp>();
lane = load.lane;
switch (load.lane_kind) {
case Simd128LaneMemoryOp::LaneKind::k8:
opcode = kIA32Pinsrb;
break;
case Simd128LaneMemoryOp::LaneKind::k16:
opcode = kIA32Pinsrw;
break;
case Simd128LaneMemoryOp::LaneKind::k32:
opcode = kIA32Pinsrd;
break;
case Simd128LaneMemoryOp::LaneKind::k64:
// pinsrq not available on IA32.
if (lane == 0) {
opcode = kIA32Movlps;
} else {
DCHECK_EQ(1, lane);
opcode = kIA32Movhps;
}
break;
}
// IA32 supports unaligned loads.
DCHECK(!load.kind.maybe_unaligned);
// Trap handler is not supported on IA32.
DCHECK(!load.kind.with_trap_handler);
} else {
// Turbofan.
LoadLaneParameters params = LoadLaneParametersOf(node->op());
lane = params.laneidx;
if (params.rep == MachineType::Int8()) {
opcode = kIA32Pinsrb;
} else if (params.rep == MachineType::Int16()) {
opcode = kIA32Pinsrw;
} else if (params.rep == MachineType::Int32()) {
opcode = kIA32Pinsrd;
} else if (params.rep == MachineType::Int64()) {
// pinsrq not available on IA32.
if (params.laneidx == 0) {
opcode = kIA32Movlps;
} else {
DCHECK_EQ(1, params.laneidx);
opcode = kIA32Movhps;
}
} else {
UNREACHABLE();
}
// IA32 supports unaligned loads.
DCHECK_NE(params.kind, MemoryAccessKind::kUnaligned);
// Trap handler is not supported on IA32.
DCHECK_NE(params.kind, MemoryAccessKind::kProtectedByTrapHandler);
}
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand outputs[] = {IsSupported(AVX) ? g.DefineAsRegister(node)
: g.DefineSameAsFirst(node)};
// Input 0 is value node, 1 is lane idx, and GetEffectiveAddressMemoryOperand
// uses up to 3 inputs. This ordering is consistent with other operations that
// use the same opcode.
InstructionOperand inputs[5];
size_t input_count = 0;
inputs[input_count++] = g.UseRegister(this->input_at(node, 2));
inputs[input_count++] = g.UseImmediate(lane);
AddressingMode mode =
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
opcode |= AddressingModeField::encode(mode);
DCHECK_GE(5, input_count);
Emit(opcode, 1, outputs, input_count, inputs);
}
template <>
void InstructionSelectorT<TurboshaftAdapter>::VisitLoadTransform(node_t node) {
using namespace turboshaft; // NOLINT(build/namespaces)
const Simd128LoadTransformOp& op =
this->Get(node).Cast<Simd128LoadTransformOp>();
ArchOpcode opcode;
switch (op.transform_kind) {
case Simd128LoadTransformOp::TransformKind::k8x8S:
opcode = kIA32S128Load8x8S;
break;
case Simd128LoadTransformOp::TransformKind::k8x8U:
opcode = kIA32S128Load8x8U;
break;
case Simd128LoadTransformOp::TransformKind::k16x4S:
opcode = kIA32S128Load16x4S;
break;
case Simd128LoadTransformOp::TransformKind::k16x4U:
opcode = kIA32S128Load16x4U;
break;
case Simd128LoadTransformOp::TransformKind::k32x2S:
opcode = kIA32S128Load32x2S;
break;
case Simd128LoadTransformOp::TransformKind::k32x2U:
opcode = kIA32S128Load32x2U;
break;
case Simd128LoadTransformOp::TransformKind::k8Splat:
opcode = kIA32S128Load8Splat;
break;
case Simd128LoadTransformOp::TransformKind::k16Splat:
opcode = kIA32S128Load16Splat;
break;
case Simd128LoadTransformOp::TransformKind::k32Splat:
opcode = kIA32S128Load32Splat;
break;
case Simd128LoadTransformOp::TransformKind::k64Splat:
opcode = kIA32S128Load64Splat;
break;
case Simd128LoadTransformOp::TransformKind::k32Zero:
opcode = kIA32Movss;
break;
case Simd128LoadTransformOp::TransformKind::k64Zero:
opcode = kIA32Movsd;
break;
}
// IA32 supports unaligned loads
DCHECK(!op.load_kind.maybe_unaligned);
// Trap handler is not supported on IA32.
DCHECK(!op.load_kind.with_trap_handler);
VisitLoad(node, node, opcode);
}
template <>
void InstructionSelectorT<TurbofanAdapter>::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op());
InstructionCode opcode;
switch (params.transformation) {
case LoadTransformation::kS128Load8Splat:
opcode = kIA32S128Load8Splat;
break;
case LoadTransformation::kS128Load16Splat:
opcode = kIA32S128Load16Splat;
break;
case LoadTransformation::kS128Load32Splat:
opcode = kIA32S128Load32Splat;
break;
case LoadTransformation::kS128Load64Splat:
opcode = kIA32S128Load64Splat;
break;
case LoadTransformation::kS128Load8x8S:
opcode = kIA32S128Load8x8S;
break;
case LoadTransformation::kS128Load8x8U:
opcode = kIA32S128Load8x8U;
break;
case LoadTransformation::kS128Load16x4S:
opcode = kIA32S128Load16x4S;
break;
case LoadTransformation::kS128Load16x4U:
opcode = kIA32S128Load16x4U;
break;
case LoadTransformation::kS128Load32x2S:
opcode = kIA32S128Load32x2S;
break;
case LoadTransformation::kS128Load32x2U:
opcode = kIA32S128Load32x2U;
break;
case LoadTransformation::kS128Load32Zero:
opcode = kIA32Movss;
break;
case LoadTransformation::kS128Load64Zero:
opcode = kIA32Movsd;
break;
default:
UNREACHABLE();
}
// IA32 supports unaligned loads.
DCHECK_NE(params.kind, MemoryAccessKind::kUnaligned);
// Trap handler is not supported on IA32.
DCHECK_NE(params.kind, MemoryAccessKind::kProtectedByTrapHandler);
VisitLoad(node, node, opcode);
}
#endif // V8_ENABLE_WEBASSEMBLY
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitLoad(node_t node, node_t value,
InstructionCode opcode) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand outputs[1];
outputs[0] = g.DefineAsRegister(node);
InstructionOperand inputs[3];
size_t input_count = 0;
AddressingMode mode =
g.GetEffectiveAddressMemoryOperand(value, inputs, &input_count);
InstructionCode code = opcode | AddressingModeField::encode(mode);
Emit(code, 1, outputs, input_count, inputs);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitLoad(node_t node) {
LoadRepresentation load_rep = this->load_view(node).loaded_rep();
DCHECK(!load_rep.IsMapWord());
VisitLoad(node, node, GetLoadOpcode(load_rep));
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitProtectedLoad(node_t node) {
// Trap handler is not supported on IA32.
UNREACHABLE();
}
namespace {
ArchOpcode GetStoreOpcode(MachineRepresentation rep) {
switch (rep) {
case MachineRepresentation::kFloat32:
return kIA32Movss;
case MachineRepresentation::kFloat64:
return kIA32Movsd;
case MachineRepresentation::kBit: // Fall through.
case MachineRepresentation::kWord8:
return kIA32Movb;
case MachineRepresentation::kWord16:
return kIA32Movw;
case MachineRepresentation::kTaggedSigned: // Fall through.
case MachineRepresentation::kTaggedPointer: // Fall through.
case MachineRepresentation::kTagged: // Fall through.
case MachineRepresentation::kWord32:
return kIA32Movl;
case MachineRepresentation::kSimd128:
return kIA32Movdqu;
case MachineRepresentation::kFloat16:
UNIMPLEMENTED();
case MachineRepresentation::kSimd256: // Fall through.
case MachineRepresentation::kCompressedPointer: // Fall through.
case MachineRepresentation::kCompressed: // Fall through.
case MachineRepresentation::kProtectedPointer: // Fall through.
case MachineRepresentation::kIndirectPointer: // Fall through.
case MachineRepresentation::kSandboxedPointer: // Fall through.
case MachineRepresentation::kWord64: // Fall through.
case MachineRepresentation::kMapWord: // Fall through.
case MachineRepresentation::kNone:
UNREACHABLE();
}
}
ArchOpcode GetSeqCstStoreOpcode(MachineRepresentation rep) {
switch (rep) {
case MachineRepresentation::kWord8:
return kAtomicExchangeInt8;
case MachineRepresentation::kWord16:
return kAtomicExchangeInt16;
case MachineRepresentation::kTaggedSigned: // Fall through.
case MachineRepresentation::kTaggedPointer: // Fall through.
case MachineRepresentation::kTagged: // Fall through.
case MachineRepresentation::kWord32:
return kAtomicExchangeWord32;
default:
UNREACHABLE();
}
}
template <typename Adapter>
void VisitAtomicExchange(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode,
MachineRepresentation rep) {
using node_t = typename Adapter::node_t;
IA32OperandGeneratorT<Adapter> g(selector);
node_t base = selector->input_at(node, 0);
node_t index = selector->input_at(node, 1);
node_t value = selector->input_at(node, 2);
AddressingMode addressing_mode;
InstructionOperand value_operand = (rep == MachineRepresentation::kWord8)
? g.UseFixed(value, edx)
: g.UseUniqueRegister(value);
InstructionOperand inputs[] = {
value_operand, g.UseUniqueRegister(base),
g.GetEffectiveIndexOperand(index, &addressing_mode)};
InstructionOperand outputs[] = {
(rep == MachineRepresentation::kWord8)
// Using DefineSameAsFirst requires the register to be unallocated.
? g.DefineAsFixed(node, edx)
: g.DefineSameAsFirst(node)};
InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
selector->Emit(code, 1, outputs, arraysize(inputs), inputs);
}
template <typename Adapter>
void VisitStoreCommon(InstructionSelectorT<Adapter>* selector,
const typename Adapter::StoreView& store) {
using node_t = typename Adapter::node_t;
using optional_node_t = typename Adapter::optional_node_t;
IA32OperandGeneratorT<Adapter> g(selector);
node_t base = store.base();
optional_node_t index = store.index();
node_t value = store.value();
int32_t displacement = store.displacement();
uint8_t element_size_log2 = store.element_size_log2();
std::optional<AtomicMemoryOrder> atomic_order = store.memory_order();
StoreRepresentation store_rep = store.stored_rep();
WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
MachineRepresentation rep = store_rep.representation();
const bool is_seqcst =
atomic_order && *atomic_order == AtomicMemoryOrder::kSeqCst;
if (v8_flags.enable_unconditional_write_barriers && CanBeTaggedPointer(rep)) {
write_barrier_kind = kFullWriteBarrier;
}
if (write_barrier_kind != kNoWriteBarrier &&
!v8_flags.disable_write_barriers) {
DCHECK(CanBeTaggedPointer(rep));
AddressingMode addressing_mode;
InstructionOperand inputs[4];
size_t input_count = 0;
addressing_mode = g.GenerateMemoryOperandInputs(
index, element_size_log2, base, displacement,
DisplacementMode::kPositiveDisplacement, inputs, &input_count,
IA32OperandGeneratorT<Adapter>::RegisterMode::kUniqueRegister);
DCHECK_LT(input_count, 4);
inputs[input_count++] = g.UseUniqueRegister(value);
RecordWriteMode record_write_mode =
WriteBarrierKindToRecordWriteMode(write_barrier_kind);
InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
size_t const temp_count = arraysize(temps);
InstructionCode code = is_seqcst ? kArchAtomicStoreWithWriteBarrier
: kArchStoreWithWriteBarrier;
code |= AddressingModeField::encode(addressing_mode);
code |= RecordWriteModeField::encode(record_write_mode);
selector->Emit(code, 0, nullptr, input_count, inputs, temp_count, temps);
} else {
InstructionOperand inputs[4];
size_t input_count = 0;
// To inform the register allocator that xchg clobbered its input.
InstructionOperand outputs[1];
size_t output_count = 0;
ArchOpcode opcode;
AddressingMode addressing_mode;
if (is_seqcst) {
// SeqCst stores emit XCHG instead of MOV, so encode the inputs as we
// would for XCHG. XCHG can't encode the value as an immediate and has
// fewer addressing modes available.
if (rep == MachineRepresentation::kWord8 ||
rep == MachineRepresentation::kBit) {
inputs[input_count++] = g.UseFixed(value, edx);
outputs[output_count++] = g.DefineAsFixed(store, edx);
} else {
inputs[input_count++] = g.UseUniqueRegister(value);
outputs[output_count++] = g.DefineSameAsFirst(store);
}
addressing_mode = g.GetEffectiveAddressMemoryOperand(
store, inputs, &input_count,
IA32OperandGeneratorT<Adapter>::RegisterMode::kUniqueRegister);
opcode = GetSeqCstStoreOpcode(rep);
} else {
// Release and non-atomic stores emit MOV.
// https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
InstructionOperand val;
if (g.CanBeImmediate(value)) {
val = g.UseImmediate(value);
} else if (!atomic_order && (rep == MachineRepresentation::kWord8 ||
rep == MachineRepresentation::kBit)) {
val = g.UseByteRegister(value);
} else {
val = g.UseUniqueRegister(value);
}
addressing_mode = g.GetEffectiveAddressMemoryOperand(
store, inputs, &input_count,
IA32OperandGeneratorT<Adapter>::RegisterMode::kUniqueRegister);
inputs[input_count++] = val;
opcode = GetStoreOpcode(rep);
}
InstructionCode code =
opcode | AddressingModeField::encode(addressing_mode);
selector->Emit(code, output_count, outputs, input_count, inputs);
}
}
} // namespace
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitStorePair(node_t node) {
UNREACHABLE();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitStore(node_t node) {
VisitStoreCommon(this, this->store_view(node));
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitProtectedStore(node_t node) {
// Trap handler is not supported on IA32.
UNREACHABLE();
}
#if V8_ENABLE_WEBASSEMBLY
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitStoreLane(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionCode opcode = kArchNop;
int lane;
if constexpr (Adapter::IsTurboshaft) {
using namespace turboshaft; // NOLINT(build/namespaces)
const Simd128LaneMemoryOp& store =
this->Get(node).template Cast<Simd128LaneMemoryOp>();
lane = store.lane;
switch (store.lane_kind) {
case Simd128LaneMemoryOp::LaneKind::k8:
opcode = kIA32Pextrb;
break;
case Simd128LaneMemoryOp::LaneKind::k16:
opcode = kIA32Pextrw;
break;
case Simd128LaneMemoryOp::LaneKind::k32:
opcode = kIA32S128Store32Lane;
break;
case Simd128LaneMemoryOp::LaneKind::k64:
if (lane == 0) {
opcode = kIA32Movlps;
} else {
DCHECK_EQ(1, lane);
opcode = kIA32Movhps;
}
break;
}
} else {
StoreLaneParameters params = StoreLaneParametersOf(node->op());
lane = params.laneidx;
if (params.rep == MachineRepresentation::kWord8) {
opcode = kIA32Pextrb;
} else if (params.rep == MachineRepresentation::kWord16) {
opcode = kIA32Pextrw;
} else if (params.rep == MachineRepresentation::kWord32) {
opcode = kIA32S128Store32Lane;
} else if (params.rep == MachineRepresentation::kWord64) {
if (params.laneidx == 0) {
opcode = kIA32Movlps;
} else {
DCHECK_EQ(1, params.laneidx);
opcode = kIA32Movhps;
}
} else {
UNREACHABLE();
}
}
InstructionOperand inputs[4];
size_t input_count = 0;
AddressingMode addressing_mode =
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
opcode |= AddressingModeField::encode(addressing_mode);
InstructionOperand value_operand = g.UseRegister(this->input_at(node, 2));
inputs[input_count++] = value_operand;
inputs[input_count++] = g.UseImmediate(lane);
DCHECK_GE(4, input_count);
Emit(opcode, 0, nullptr, input_count, inputs);
}
#endif // V8_ENABLE_WEBASSEMBLY
// Architecture supports unaligned access, therefore VisitLoad is used instead
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitUnalignedLoad(node_t node) {
UNREACHABLE();
}
// Architecture supports unaligned access, therefore VisitStore is used instead
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitUnalignedStore(node_t node) {
UNREACHABLE();
}
namespace {
// Shared routine for multiple binary operations.
template <typename Adapter>
void VisitBinop(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, InstructionCode opcode,
FlagsContinuationT<Adapter>* cont) {
IA32OperandGeneratorT<Adapter> g(selector);
auto left = selector->input_at(node, 0);
auto right = selector->input_at(node, 1);
InstructionOperand inputs[6];
size_t input_count = 0;
InstructionOperand outputs[1];
size_t output_count = 0;
// TODO(turbofan): match complex addressing modes.
if (left == right) {
// If both inputs refer to the same operand, enforce allocating a register
// for both of them to ensure that we don't end up generating code like
// this:
//
// mov eax, [ebp-0x10]
// add eax, [ebp-0x10]
// jo label
InstructionOperand const input = g.UseRegister(left);
inputs[input_count++] = input;
inputs[input_count++] = input;
} else if (g.CanBeImmediate(right)) {
inputs[input_count++] = g.UseRegister(left);
inputs[input_count++] = g.UseImmediate(right);
} else {
int effect_level = selector->GetEffectLevel(node, cont);
if (selector->IsCommutative(node) && g.CanBeBetterLeftOperand(right) &&
(!g.CanBeBetterLeftOperand(left) ||
!g.CanBeMemoryOperand(opcode, node, right, effect_level))) {
std::swap(left, right);
}
if (g.CanBeMemoryOperand(opcode, node, right, effect_level)) {
inputs[input_count++] = g.UseRegister(left);
AddressingMode addressing_mode =
g.GetEffectiveAddressMemoryOperand(right, inputs, &input_count);
opcode |= AddressingModeField::encode(addressing_mode);
} else {
inputs[input_count++] = g.UseRegister(left);
inputs[input_count++] = g.Use(right);
}
}
outputs[output_count++] = g.DefineSameAsFirst(node);
DCHECK_NE(0u, input_count);
DCHECK_EQ(1u, output_count);
DCHECK_GE(arraysize(inputs), input_count);
DCHECK_GE(arraysize(outputs), output_count);
selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
inputs, cont);
}
template <typename Adapter>
void VisitBinop(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, InstructionCode opcode) {
FlagsContinuationT<Adapter> cont;
VisitBinop(selector, node, opcode, &cont);
}
} // namespace
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32And(node_t node) {
VisitBinop(this, node, kIA32And);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32Or(node_t node) {
VisitBinop(this, node, kIA32Or);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32Xor(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
if constexpr (Adapter::IsTurboshaft) {
const turboshaft::WordBinopOp& binop =
this->Get(node).template Cast<turboshaft::WordBinopOp>();
int32_t constant;
if (this->MatchIntegralWord32Constant(binop.right(), &constant) &&
constant == -1) {
Emit(kIA32Not, g.DefineSameAsFirst(node), g.UseRegister(binop.left()));
return;
}
} else {
Int32BinopMatcher m(node);
if (m.right().Is(-1)) {
Emit(kIA32Not, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()));
return;
}
}
VisitBinop(this, node, kIA32Xor);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitStackPointerGreaterThan(
node_t node, FlagsContinuation* cont) {
StackCheckKind kind;
if constexpr (Adapter::IsTurboshaft) {
kind = this->Get(node)
.template Cast<turboshaft::StackPointerGreaterThanOp>()
.kind;
} else {
kind = StackCheckKindOf(node->op());
}
{ // Temporary scope to minimize indentation change churn below.
InstructionCode opcode = kArchStackPointerGreaterThan |
MiscField::encode(static_cast<int>(kind));
int effect_level = GetEffectLevel(node, cont);
IA32OperandGeneratorT<Adapter> g(this);
// No outputs.
InstructionOperand* const outputs = nullptr;
const int output_count = 0;
// Applying an offset to this stack check requires a temp register. Offsets
// are only applied to the first stack check. If applying an offset, we must
// ensure the input and temp registers do not alias, thus kUniqueRegister.
InstructionOperand temps[] = {g.TempRegister()};
const int temp_count = (kind == StackCheckKind::kJSFunctionEntry) ? 1 : 0;
const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
? OperandGenerator::kUniqueRegister
: OperandGenerator::kRegister;
node_t value = this->input_at(node, 0);
if (g.CanBeMemoryOperand(kIA32Cmp, node, value, effect_level)) {
DCHECK(this->IsLoadOrLoadImmutable(value));
// GetEffectiveAddressMemoryOperand can create at most 3 inputs.
static constexpr int kMaxInputCount = 3;
size_t input_count = 0;
InstructionOperand inputs[kMaxInputCount];
AddressingMode addressing_mode = g.GetEffectiveAddressMemoryOperand(
value, inputs, &input_count, register_mode);
opcode |= AddressingModeField::encode(addressing_mode);
DCHECK_LE(input_count, kMaxInputCount);
EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
temp_count, temps, cont);
} else {
InstructionOperand inputs[] = {
g.UseRegisterWithMode(value, register_mode)};
static constexpr int input_count = arraysize(inputs);
EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
temp_count, temps, cont);
}
}
}
// Shared routine for multiple shift operations.
template <typename Adapter>
static inline void VisitShift(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node,
ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
auto left = selector->input_at(node, 0);
auto right = selector->input_at(node, 1);
if (g.CanBeImmediate(right)) {
selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
g.UseImmediate(right));
} else {
selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
g.UseFixed(right, ecx));
}
}
namespace {
template <typename Adapter>
void VisitMulHigh(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand temps[] = {g.TempRegister(eax)};
selector->Emit(opcode, g.DefineAsFixed(node, edx),
g.UseFixed(selector->input_at(node, 0), eax),
g.UseUniqueRegister(selector->input_at(node, 1)),
arraysize(temps), temps);
}
template <typename Adapter>
void VisitDiv(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand temps[] = {g.TempRegister(edx)};
selector->Emit(opcode, g.DefineAsFixed(node, eax),
g.UseFixed(selector->input_at(node, 0), eax),
g.UseUnique(selector->input_at(node, 1)), arraysize(temps),
temps);
}
template <typename Adapter>
void VisitMod(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand temps[] = {g.TempRegister(eax)};
selector->Emit(opcode, g.DefineAsFixed(node, edx),
g.UseFixed(selector->input_at(node, 0), eax),
g.UseUnique(selector->input_at(node, 1)), arraysize(temps),
temps);
}
// {Displacement} is either Adapter::node_t or int32_t.
template <typename Adapter, typename Displacement>
void EmitLea(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t result, typename Adapter::node_t index,
int scale, typename Adapter::node_t base,
Displacement displacement, DisplacementMode displacement_mode) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand inputs[4];
size_t input_count = 0;
AddressingMode mode =
g.GenerateMemoryOperandInputs(index, scale, base, displacement,
displacement_mode, inputs, &input_count);
DCHECK_NE(0u, input_count);
DCHECK_GE(arraysize(inputs), input_count);
InstructionOperand outputs[1];
outputs[0] = g.DefineAsRegister(result);
InstructionCode opcode = AddressingModeField::encode(mode) | kIA32Lea;
selector->Emit(opcode, 1, outputs, input_count, inputs);
}
} // namespace
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32Shl(node_t node) {
if constexpr (Adapter::IsTurboshaft) {
if (auto m = TryMatchScaledIndex(this, node, true)) {
EmitLea(this, node, m->index, m->scale, m->base, 0,
kPositiveDisplacement);
return;
}
} else {
Int32ScaleMatcher m(node, true);
if (m.matches()) {
Node* index = node->InputAt(0);
Node* base = m.power_of_two_plus_one() ? index : nullptr;
EmitLea(this, node, index, m.scale(), base, nullptr,
kPositiveDisplacement);
return;
}
}
VisitShift(this, node, kIA32Shl);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32Shr(node_t node) {
VisitShift(this, node, kIA32Shr);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32Sar(node_t node) {
VisitShift(this, node, kIA32Sar);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32PairAdd(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
node_t projection1 = FindProjection(node, 1);
if (this->valid(projection1)) {
// We use UseUniqueRegister here to avoid register sharing with the temp
// register.
InstructionOperand inputs[] = {
g.UseRegister(this->input_at(node, 0)),
g.UseUniqueRegisterOrSlotOrConstant(this->input_at(node, 1)),
g.UseRegister(this->input_at(node, 2)),
g.UseUniqueRegister(this->input_at(node, 3))};
InstructionOperand outputs[] = {g.DefineSameAsFirst(node),
g.DefineAsRegister(projection1)};
InstructionOperand temps[] = {g.TempRegister()};
Emit(kIA32AddPair, 2, outputs, 4, inputs, 1, temps);
} else {
// The high word of the result is not used, so we emit the standard 32 bit
// instruction.
Emit(kIA32Add, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)),
g.Use(this->input_at(node, 2)));
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32PairSub(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
node_t projection1 = FindProjection(node, 1);
if (this->valid(projection1)) {
// We use UseUniqueRegister here to avoid register sharing with the temp
// register.
InstructionOperand inputs[] = {
g.UseRegister(this->input_at(node, 0)),
g.UseUniqueRegisterOrSlotOrConstant(this->input_at(node, 1)),
g.UseRegister(this->input_at(node, 2)),
g.UseUniqueRegister(this->input_at(node, 3))};
InstructionOperand outputs[] = {g.DefineSameAsFirst(node),
g.DefineAsRegister(projection1)};
InstructionOperand temps[] = {g.TempRegister()};
Emit(kIA32SubPair, 2, outputs, 4, inputs, 1, temps);
} else {
// The high word of the result is not used, so we emit the standard 32 bit
// instruction.
Emit(kIA32Sub, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)),
g.Use(this->input_at(node, 2)));
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32PairMul(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
node_t projection1 = FindProjection(node, 1);
if (this->valid(projection1)) {
// InputAt(3) explicitly shares ecx with OutputRegister(1) to save one
// register and one mov instruction.
InstructionOperand inputs[] = {
g.UseUnique(this->input_at(node, 0)),
g.UseUniqueRegisterOrSlotOrConstant(this->input_at(node, 1)),
g.UseUniqueRegister(this->input_at(node, 2)),
g.UseFixed(this->input_at(node, 3), ecx)};
InstructionOperand outputs[] = {g.DefineAsFixed(node, eax),
g.DefineAsFixed(projection1, ecx)};
InstructionOperand temps[] = {g.TempRegister(edx)};
Emit(kIA32MulPair, 2, outputs, 4, inputs, 1, temps);
} else {
// The high word of the result is not used, so we emit the standard 32 bit
// instruction.
Emit(kIA32Imul, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)),
g.Use(this->input_at(node, 2)));
}
}
template <typename Adapter>
void VisitWord32PairShift(InstructionSelectorT<Adapter>* selector,
InstructionCode opcode,
typename Adapter::node_t node) {
using node_t = typename Adapter::node_t;
IA32OperandGeneratorT<Adapter> g(selector);
node_t shift = selector->input_at(node, 2);
InstructionOperand shift_operand;
if (g.CanBeImmediate(shift)) {
shift_operand = g.UseImmediate(shift);
} else {
shift_operand = g.UseFixed(shift, ecx);
}
InstructionOperand inputs[] = {g.UseFixed(selector->input_at(node, 0), eax),
g.UseFixed(selector->input_at(node, 1), edx),
shift_operand};
InstructionOperand outputs[2];
InstructionOperand temps[1];
int32_t output_count = 0;
int32_t temp_count = 0;
outputs[output_count++] = g.DefineAsFixed(node, eax);
node_t projection1 = selector->FindProjection(node, 1);
if (selector->valid(projection1)) {
outputs[output_count++] = g.DefineAsFixed(projection1, edx);
} else {
temps[temp_count++] = g.TempRegister(edx);
}
selector->Emit(opcode, output_count, outputs, 3, inputs, temp_count, temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32PairShl(node_t node) {
VisitWord32PairShift(this, kIA32ShlPair, node);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32PairShr(node_t node) {
VisitWord32PairShift(this, kIA32ShrPair, node);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32PairSar(node_t node) {
VisitWord32PairShift(this, kIA32SarPair, node);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32Rol(node_t node) {
VisitShift(this, node, kIA32Rol);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32Ror(node_t node) {
VisitShift(this, node, kIA32Ror);
}
#define RO_OP_T_LIST(V) \
V(Float32Sqrt, kIA32Float32Sqrt) \
V(Float64Sqrt, kIA32Float64Sqrt) \
V(ChangeInt32ToFloat64, kSSEInt32ToFloat64) \
V(TruncateFloat32ToInt32, kIA32Float32ToInt32) \
V(TruncateFloat64ToFloat32, kIA32Float64ToFloat32) \
V(BitcastFloat32ToInt32, kIA32BitcastFI) \
V(BitcastInt32ToFloat32, kIA32BitcastIF) \
V(Float64ExtractLowWord32, kIA32Float64ExtractLowWord32) \
V(Float64ExtractHighWord32, kIA32Float64ExtractHighWord32) \
V(ChangeFloat64ToInt32, kIA32Float64ToInt32) \
V(ChangeFloat32ToFloat64, kIA32Float32ToFloat64) \
V(RoundInt32ToFloat32, kSSEInt32ToFloat32) \
V(RoundFloat64ToInt32, kIA32Float64ToInt32) \
V(Word32Clz, kIA32Lzcnt) \
V(Word32Ctz, kIA32Tzcnt) \
V(Word32Popcnt, kIA32Popcnt) \
V(SignExtendWord8ToInt32, kIA32Movsxbl) \
V(SignExtendWord16ToInt32, kIA32Movsxwl) \
#define RO_WITH_TEMP_OP_T_LIST(V) V(ChangeUint32ToFloat64, kIA32Uint32ToFloat64)
#define RO_WITH_TEMP_SIMD_OP_T_LIST(V) \
V(TruncateFloat64ToUint32, kIA32Float64ToUint32) \
V(TruncateFloat32ToUint32, kIA32Float32ToUint32) \
V(ChangeFloat64ToUint32, kIA32Float64ToUint32)
#define RR_OP_T_LIST(V) \
V(Float32RoundDown, kIA32Float32Round | MiscField::encode(kRoundDown)) \
V(Float64RoundDown, kIA32Float64Round | MiscField::encode(kRoundDown)) \
V(Float32RoundUp, kIA32Float32Round | MiscField::encode(kRoundUp)) \
V(Float64RoundUp, kIA32Float64Round | MiscField::encode(kRoundUp)) \
V(Float32RoundTruncate, kIA32Float32Round | MiscField::encode(kRoundToZero)) \
V(Float64RoundTruncate, kIA32Float64Round | MiscField::encode(kRoundToZero)) \
V(Float32RoundTiesEven, \
kIA32Float32Round | MiscField::encode(kRoundToNearest)) \
V(Float64RoundTiesEven, \
kIA32Float64Round | MiscField::encode(kRoundToNearest)) \
V(TruncateFloat64ToWord32, kArchTruncateDoubleToI) \
IF_WASM(V, F32x4Ceil, kIA32F32x4Round | MiscField::encode(kRoundUp)) \
IF_WASM(V, F32x4Floor, kIA32F32x4Round | MiscField::encode(kRoundDown)) \
IF_WASM(V, F32x4Trunc, kIA32F32x4Round | MiscField::encode(kRoundToZero)) \
IF_WASM(V, F32x4NearestInt, \
kIA32F32x4Round | MiscField::encode(kRoundToNearest)) \
IF_WASM(V, F64x2Ceil, kIA32F64x2Round | MiscField::encode(kRoundUp)) \
IF_WASM(V, F64x2Floor, kIA32F64x2Round | MiscField::encode(kRoundDown)) \
IF_WASM(V, F64x2Trunc, kIA32F64x2Round | MiscField::encode(kRoundToZero)) \
IF_WASM(V, F64x2NearestInt, \
kIA32F64x2Round | MiscField::encode(kRoundToNearest)) \
IF_WASM(V, F64x2Sqrt, kIA32F64x2Sqrt)
#define RRO_FLOAT_OP_T_LIST(V) \
V(Float32Add, kFloat32Add) \
V(Float64Add, kFloat64Add) \
V(Float32Sub, kFloat32Sub) \
V(Float64Sub, kFloat64Sub) \
V(Float32Mul, kFloat32Mul) \
V(Float64Mul, kFloat64Mul) \
V(Float32Div, kFloat32Div) \
V(Float64Div, kFloat64Div)
#define FLOAT_UNOP_T_LIST(V) \
V(Float32Abs, kFloat32Abs) \
V(Float64Abs, kFloat64Abs) \
V(Float32Neg, kFloat32Neg) \
V(Float64Neg, kFloat64Neg) \
IF_WASM(V, F32x4Abs, kFloat32Abs) \
IF_WASM(V, F32x4Neg, kFloat32Neg) \
IF_WASM(V, F64x2Abs, kFloat64Abs) \
IF_WASM(V, F64x2Neg, kFloat64Neg)
#define RO_VISITOR(Name, opcode) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Name(node_t node) { \
VisitRO(this, node, opcode); \
}
RO_OP_T_LIST(RO_VISITOR)
#undef RO_VISITOR
#undef RO_OP_T_LIST
#define RO_WITH_TEMP_VISITOR(Name, opcode) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Name(node_t node) { \
VisitROWithTemp(this, node, opcode); \
}
RO_WITH_TEMP_OP_T_LIST(RO_WITH_TEMP_VISITOR)
#undef RO_WITH_TEMP_VISITOR
#undef RO_WITH_TEMP_OP_T_LIST
#define RO_WITH_TEMP_SIMD_VISITOR(Name, opcode) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Name(node_t node) { \
VisitROWithTempSimd(this, node, opcode); \
}
RO_WITH_TEMP_SIMD_OP_T_LIST(RO_WITH_TEMP_SIMD_VISITOR)
#undef RO_WITH_TEMP_SIMD_VISITOR
#undef RO_WITH_TEMP_SIMD_OP_T_LIST
#define RR_VISITOR(Name, opcode) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Name(node_t node) { \
VisitRR(this, node, opcode); \
}
RR_OP_T_LIST(RR_VISITOR)
#undef RR_VISITOR
#undef RR_OP_T_LIST
#define RRO_FLOAT_VISITOR(Name, opcode) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Name(node_t node) { \
VisitRROFloat(this, node, opcode); \
}
RRO_FLOAT_OP_T_LIST(RRO_FLOAT_VISITOR)
#undef RRO_FLOAT_VISITOR
#undef RRO_FLOAT_OP_T_LIST
#define FLOAT_UNOP_VISITOR(Name, opcode) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Name(node_t node) { \
DCHECK_EQ(this->value_input_count(node), 1); \
VisitFloatUnop(this, node, this->input_at(node, 0), opcode); \
}
FLOAT_UNOP_T_LIST(FLOAT_UNOP_VISITOR)
#undef FLOAT_UNOP_VISITOR
#undef FLOAT_UNOP_T_LIST
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitTruncateFloat64ToFloat16RawBits(
node_t node) {
UNIMPLEMENTED();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32ReverseBits(node_t node) {
UNREACHABLE();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord64ReverseBytes(node_t node) {
UNREACHABLE();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32ReverseBytes(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
DCHECK_EQ(this->value_input_count(node), 1);
Emit(kIA32Bswap, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)));
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitSimd128ReverseBytes(node_t node) {
UNREACHABLE();
}
template <>
void InstructionSelectorT<TurboshaftAdapter>::VisitInt32Add(node_t node) {
IA32OperandGeneratorT<TurboshaftAdapter> g(this);
const turboshaft::WordBinopOp& add =
this->Get(node).template Cast<turboshaft::WordBinopOp>();
turboshaft::OpIndex left = add.left();
turboshaft::OpIndex right = add.right();
std::optional<BaseWithScaledIndexAndDisplacementMatch<TurboshaftAdapter>> m =
TryMatchBaseWithScaledIndexAndDisplacementForWordBinop(this, left, right);
if (m.has_value()) {
if (g.ValueFitsIntoImmediate(m->displacement)) {
EmitLea(this, node, m->index, m->scale, m->base, m->displacement,
m->displacement_mode);
return;
}
}
// No lea pattern, use add.
VisitBinop(this, node, kIA32Add);
}
template <>
void InstructionSelectorT<TurbofanAdapter>::VisitInt32Add(Node* node) {
IA32OperandGeneratorT<TurbofanAdapter> g(this);
// Try to match the Add to a lea pattern
BaseWithIndexAndDisplacement32Matcher m(node);
if (m.matches() &&
(m.displacement() == nullptr || g.CanBeImmediate(m.displacement()))) {
InstructionOperand inputs[4];
size_t input_count = 0;
AddressingMode mode = g.GenerateMemoryOperandInputs(
m.index(), m.scale(), m.base(), m.displacement(), m.displacement_mode(),
inputs, &input_count);
DCHECK_NE(0u, input_count);
DCHECK_GE(arraysize(inputs), input_count);
InstructionOperand outputs[1];
outputs[0] = g.DefineAsRegister(node);
InstructionCode opcode = AddressingModeField::encode(mode) | kIA32Lea;
Emit(opcode, 1, outputs, input_count, inputs);
return;
}
// No lea pattern match, use add
VisitBinop(this, node, kIA32Add);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32Sub(node_t node) {
if constexpr (Adapter::IsTurboshaft) {
IA32OperandGeneratorT<Adapter> g(this);
auto binop = this->word_binop_view(node);
auto left = binop.left();
auto right = binop.right();
if (this->MatchIntegralZero(left)) {
Emit(kIA32Neg, g.DefineSameAsFirst(node), g.Use(right));
} else {
VisitBinop(this, node, kIA32Sub);
}
} else {
IA32OperandGeneratorT<Adapter> g(this);
Int32BinopMatcher m(node);
if (m.left().Is(0)) {
Emit(kIA32Neg, g.DefineSameAsFirst(node), g.Use(m.right().node()));
} else {
VisitBinop(this, node, kIA32Sub);
}
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32Mul(node_t node) {
if constexpr (Adapter::IsTurboshaft) {
if (auto m = TryMatchScaledIndex(this, node, true)) {
EmitLea(this, node, m->index, m->scale, m->base, 0,
kPositiveDisplacement);
return;
}
} else {
Int32ScaleMatcher m(node, true);
if (m.matches()) {
Node* index = node->InputAt(0);
Node* base = m.power_of_two_plus_one() ? index : nullptr;
EmitLea(this, node, index, m.scale(), base, nullptr,
kPositiveDisplacement);
return;
}
}
IA32OperandGeneratorT<Adapter> g(this);
auto left = this->input_at(node, 0);
auto right = this->input_at(node, 1);
if (g.CanBeImmediate(right)) {
Emit(kIA32Imul, g.DefineAsRegister(node), g.Use(left),
g.UseImmediate(right));
} else {
if (g.CanBeBetterLeftOperand(right)) {
std::swap(left, right);
}
Emit(kIA32Imul, g.DefineSameAsFirst(node), g.UseRegister(left),
g.Use(right));
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32MulHigh(node_t node) {
VisitMulHigh(this, node, kIA32ImulHigh);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitUint32MulHigh(node_t node) {
VisitMulHigh(this, node, kIA32UmulHigh);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32Div(node_t node) {
VisitDiv(this, node, kIA32Idiv);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitUint32Div(node_t node) {
VisitDiv(this, node, kIA32Udiv);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32Mod(node_t node) {
VisitMod(this, node, kIA32Idiv);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitUint32Mod(node_t node) {
VisitMod(this, node, kIA32Udiv);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitRoundUint32ToFloat32(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kIA32Uint32ToFloat32, g.DefineAsRegister(node),
g.Use(this->input_at(node, 0)), arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64Mod(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister(eax), g.TempRegister()};
Emit(kIA32Float64Mod, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)),
g.UseRegister(this->input_at(node, 1)), arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat32Max(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kIA32Float32Max, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)), g.Use(this->input_at(node, 1)),
arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64Max(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kIA32Float64Max, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)), g.Use(this->input_at(node, 1)),
arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat32Min(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kIA32Float32Min, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)), g.Use(this->input_at(node, 1)),
arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64Min(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kIA32Float64Min, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)), g.Use(this->input_at(node, 1)),
arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64RoundTiesAway(node_t node) {
UNREACHABLE();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64Ieee754Binop(
node_t node, InstructionCode opcode) {
IA32OperandGeneratorT<Adapter> g(this);
Emit(opcode, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)),
g.UseRegister(this->input_at(node, 1)))
->MarkAsCall();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64Ieee754Unop(
node_t node, InstructionCode opcode) {
IA32OperandGeneratorT<Adapter> g(this);
Emit(opcode, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)))
->MarkAsCall();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::EmitMoveParamToFPR(node_t node, int index) {
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::EmitMoveFPRToParam(
InstructionOperand* op, LinkageLocation location) {}
template <typename Adapter>
void InstructionSelectorT<Adapter>::EmitPrepareArguments(
ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
{ // Temporary scope to minimize indentation change churn below.
// Prepare for C function call.
if (call_descriptor->IsCFunctionCall()) {
InstructionOperand temps[] = {g.TempRegister()};
size_t const temp_count = arraysize(temps);
Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast<int>(
call_descriptor->ParameterCount())),
0, nullptr, 0, nullptr, temp_count, temps);
// Poke any stack arguments.
for (size_t n = 0; n < arguments->size(); ++n) {
PushParameter input = (*arguments)[n];
if (this->valid(input.node)) {
int const slot = static_cast<int>(n);
// TODO(jkummerow): The next line should use `input.node`, but
// fixing it causes mksnapshot failures. Investigate.
InstructionOperand value = g.CanBeImmediate(node)
? g.UseImmediate(input.node)
: g.UseRegister(input.node);
Emit(kIA32Poke | MiscField::encode(slot), g.NoOutput(), value);
}
}
} else {
// Push any stack arguments.
int effect_level = GetEffectLevel(node);
int stack_decrement = 0;
for (PushParameter input : base::Reversed(*arguments)) {
stack_decrement += kSystemPointerSize;
// Skip holes in the param array. These represent both extra slots for
// multi-slot values and padding slots for alignment.
if (!this->valid(input.node)) continue;
InstructionOperand decrement = g.UseImmediate(stack_decrement);
stack_decrement = 0;
if (g.CanBeImmediate(input.node)) {
Emit(kIA32Push, g.NoOutput(), decrement, g.UseImmediate(input.node));
} else if (IsSupported(INTEL_ATOM) ||
sequence()->IsFP(GetVirtualRegister(input.node))) {
// TODO(bbudge): IA32Push cannot handle stack->stack double moves
// because there is no way to encode fixed double slots.
Emit(kIA32Push, g.NoOutput(), decrement, g.UseRegister(input.node));
} else if (g.CanBeMemoryOperand(kIA32Push, node, input.node,
effect_level)) {
InstructionOperand outputs[1];
InstructionOperand inputs[5];
size_t input_count = 0;
inputs[input_count++] = decrement;
AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
input.node, inputs, &input_count);
InstructionCode opcode =
kIA32Push | AddressingModeField::encode(mode);
Emit(opcode, 0, outputs, input_count, inputs);
} else {
Emit(kIA32Push, g.NoOutput(), decrement, g.UseAny(input.node));
}
}
} // End of temporary scope.
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::EmitPrepareResults(
ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
node_t node) {
{ // Temporary scope to minimize indentation change churn below.
IA32OperandGeneratorT<Adapter> g(this);
for (PushParameter output : *results) {
if (!output.location.IsCallerFrameSlot()) continue;
// Skip any alignment holes in nodes.
if (this->valid(output.node)) {
DCHECK(!call_descriptor->IsCFunctionCall());
if (output.location.GetType() == MachineType::Float32()) {
MarkAsFloat32(output.node);
} else if (output.location.GetType() == MachineType::Float64()) {
MarkAsFloat64(output.node);
} else if (output.location.GetType() == MachineType::Simd128()) {
MarkAsSimd128(output.node);
}
int offset = call_descriptor->GetOffsetToReturns();
int reverse_slot = -output.location.GetLocation() - offset;
Emit(kIA32Peek, g.DefineAsRegister(output.node),
g.UseImmediate(reverse_slot));
}
}
} // End of temporary scope.
}
template <typename Adapter>
bool InstructionSelectorT<Adapter>::IsTailCallAddressImmediate() {
return true;
}
namespace {
template <typename Adapter>
void VisitCompareWithMemoryOperand(InstructionSelectorT<Adapter>* selector,
InstructionCode opcode,
typename Adapter::node_t left,
InstructionOperand right,
FlagsContinuationT<Adapter>* cont) {
DCHECK(selector->IsLoadOrLoadImmutable(left));
IA32OperandGeneratorT<Adapter> g(selector);
size_t input_count = 0;
InstructionOperand inputs[4];
AddressingMode addressing_mode =
g.GetEffectiveAddressMemoryOperand(left, inputs, &input_count);
opcode |= AddressingModeField::encode(addressing_mode);
inputs[input_count++] = right;
selector->EmitWithContinuation(opcode, 0, nullptr, input_count, inputs, cont);
}
// Shared routine for multiple compare operations.
template <typename Adapter>
void VisitCompare(InstructionSelectorT<Adapter>* selector,
InstructionCode opcode, InstructionOperand left,
InstructionOperand right, FlagsContinuationT<Adapter>* cont) {
selector->EmitWithContinuation(opcode, left, right, cont);
}
// Shared routine for multiple compare operations.
template <typename Adapter>
void VisitCompare(InstructionSelectorT<Adapter>* selector,
InstructionCode opcode, typename Adapter::node_t left,
typename Adapter::node_t right,
FlagsContinuationT<Adapter>* cont, bool commutative) {
IA32OperandGeneratorT<Adapter> g(selector);
if (commutative && g.CanBeBetterLeftOperand(right)) {
std::swap(left, right);
}
VisitCompare(selector, opcode, g.UseRegister(left), g.Use(right), cont);
}
template <typename Adapter>
MachineType MachineTypeForNarrow(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node,
typename Adapter::node_t hint_node) {
if (selector->IsLoadOrLoadImmutable(hint_node)) {
MachineType hint = selector->load_view(hint_node).loaded_rep();
if (selector->is_integer_constant(node)) {
int64_t constant = selector->integer_constant(node);
if (hint == MachineType::Int8()) {
if (constant >= std::numeric_limits<int8_t>::min() &&
constant <= std::numeric_limits<int8_t>::max()) {
return hint;
}
} else if (hint == MachineType::Uint8()) {
if (constant >= std::numeric_limits<uint8_t>::min() &&
constant <= std::numeric_limits<uint8_t>::max()) {
return hint;
}
} else if (hint == MachineType::Int16()) {
if (constant >= std::numeric_limits<int16_t>::min() &&
constant <= std::numeric_limits<int16_t>::max()) {
return hint;
}
} else if (hint == MachineType::Uint16()) {
if (constant >= std::numeric_limits<uint16_t>::min() &&
constant <= std::numeric_limits<uint16_t>::max()) {
return hint;
}
} else if (hint == MachineType::Int32()) {
return hint;
} else if (hint == MachineType::Uint32()) {
if (constant >= 0) return hint;
}
}
}
return selector->IsLoadOrLoadImmutable(node)
? selector->load_view(node).loaded_rep()
: MachineType::None();
}
// Tries to match the size of the given opcode to that of the operands, if
// possible.
template <typename Adapter>
InstructionCode TryNarrowOpcodeSize(InstructionSelectorT<Adapter>* selector,
InstructionCode opcode,
typename Adapter::node_t left,
typename Adapter::node_t right,
FlagsContinuationT<Adapter>* cont) {
// TODO(epertoso): we can probably get some size information out of phi nodes.
// If the load representations don't match, both operands will be
// zero/sign-extended to 32bit.
MachineType left_type = MachineTypeForNarrow(selector, left, right);
MachineType right_type = MachineTypeForNarrow(selector, right, left);
if (left_type == right_type) {
switch (left_type.representation()) {
case MachineRepresentation::kBit:
case MachineRepresentation::kWord8: {
if (opcode == kIA32Test) return kIA32Test8;
if (opcode == kIA32Cmp) {
if (left_type.semantic() == MachineSemantic::kUint32) {
cont->OverwriteUnsignedIfSigned();
} else {
CHECK_EQ(MachineSemantic::kInt32, left_type.semantic());
}
return kIA32Cmp8;
}
break;
}
case MachineRepresentation::kWord16:
if (opcode == kIA32Test) return kIA32Test16;
if (opcode == kIA32Cmp) {
if (left_type.semantic() == MachineSemantic::kUint32) {
cont->OverwriteUnsignedIfSigned();
} else {
CHECK_EQ(MachineSemantic::kInt32, left_type.semantic());
}
return kIA32Cmp16;
}
break;
default:
break;
}
}
return opcode;
}
// Shared routine for multiple float32 compare operations (inputs commuted).
template <typename Adapter>
void VisitFloat32Compare(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node,
FlagsContinuationT<Adapter>* cont) {
auto left = selector->input_at(node, 0);
auto right = selector->input_at(node, 1);
VisitCompare(selector, kIA32Float32Cmp, right, left, cont, false);
}
// Shared routine for multiple float64 compare operations (inputs commuted).
template <typename Adapter>
void VisitFloat64Compare(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node,
FlagsContinuationT<Adapter>* cont) {
auto left = selector->input_at(node, 0);
auto right = selector->input_at(node, 1);
VisitCompare(selector, kIA32Float64Cmp, right, left, cont, false);
}
// Shared routine for multiple word compare operations.
template <typename Adapter>
void VisitWordCompare(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, InstructionCode opcode,
FlagsContinuationT<Adapter>* cont) {
{ // Temporary scope to minimize indentation change churn below.
IA32OperandGeneratorT<Adapter> g(selector);
auto left = selector->input_at(node, 0);
auto right = selector->input_at(node, 1);
InstructionCode narrowed_opcode =
TryNarrowOpcodeSize(selector, opcode, left, right, cont);
int effect_level = selector->GetEffectLevel(node, cont);
// If one of the two inputs is an immediate, make sure it's on the right, or
// if one of the two inputs is a memory operand, make sure it's on the left.
if ((!g.CanBeImmediate(right) && g.CanBeImmediate(left)) ||
(g.CanBeMemoryOperand(narrowed_opcode, node, right, effect_level) &&
!g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level))) {
if (!selector->IsCommutative(node)) cont->Commute();
std::swap(left, right);
}
// Match immediates on right side of comparison.
if (g.CanBeImmediate(right)) {
if (g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level)) {
return VisitCompareWithMemoryOperand(selector, narrowed_opcode, left,
g.UseImmediate(right), cont);
}
return VisitCompare(selector, opcode, g.Use(left), g.UseImmediate(right),
cont);
}
// Match memory operands on left side of comparison.
if (g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level)) {
bool needs_byte_register =
narrowed_opcode == kIA32Test8 || narrowed_opcode == kIA32Cmp8;
return VisitCompareWithMemoryOperand(
selector, narrowed_opcode, left,
needs_byte_register ? g.UseByteRegister(right) : g.UseRegister(right),
cont);
}
return VisitCompare(selector, opcode, left, right, cont,
selector->IsCommutative(node));
}
}
template <typename Adapter>
void VisitWordCompare(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node,
FlagsContinuationT<Adapter>* cont) {
VisitWordCompare(selector, node, kIA32Cmp, cont);
}
template <typename Adapter>
void VisitAtomicBinOp(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode,
MachineRepresentation rep) {
using node_t = typename Adapter::node_t;
AddressingMode addressing_mode;
IA32OperandGeneratorT<Adapter> g(selector);
node_t base = selector->input_at(node, 0);
node_t index = selector->input_at(node, 1);
node_t value = selector->input_at(node, 2);
InstructionOperand inputs[] = {
g.UseUniqueRegister(value), g.UseUniqueRegister(base),
g.GetEffectiveIndexOperand(index, &addressing_mode)};
InstructionOperand outputs[] = {g.DefineAsFixed(node, eax)};
InstructionOperand temp[] = {(rep == MachineRepresentation::kWord8)
? g.UseByteRegister(node)
: g.TempRegister()};
InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
arraysize(temp), temp);
}
template <typename Adapter>
void VisitPairAtomicBinOp(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode) {
using node_t = typename Adapter::node_t;
IA32OperandGeneratorT<Adapter> g(selector);
node_t base = selector->input_at(node, 0);
node_t index = selector->input_at(node, 1);
node_t value = selector->input_at(node, 2);
// For Word64 operations, the value input is split into the a high node,
// and a low node in the int64-lowering phase.
node_t value_high = selector->input_at(node, 3);
// Wasm lives in 32-bit address space, so we do not need to worry about
// base/index lowering. This will need to be fixed for Wasm64.
AddressingMode addressing_mode;
InstructionOperand inputs[] = {
g.UseUniqueRegisterOrSlotOrConstant(value), g.UseFixed(value_high, ecx),
g.UseUniqueRegister(base),
g.GetEffectiveIndexOperand(index, &addressing_mode)};
InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
node_t projection0 = selector->FindProjection(node, 0);
node_t projection1 = selector->FindProjection(node, 1);
InstructionOperand outputs[2];
size_t output_count = 0;
InstructionOperand temps[2];
size_t temp_count = 0;
if (selector->valid(projection0)) {
outputs[output_count++] = g.DefineAsFixed(projection0, eax);
} else {
temps[temp_count++] = g.TempRegister(eax);
}
if (selector->valid(projection1)) {
outputs[output_count++] = g.DefineAsFixed(projection1, edx);
} else {
temps[temp_count++] = g.TempRegister(edx);
}
selector->Emit(code, output_count, outputs, arraysize(inputs), inputs,
temp_count, temps);
}
} // namespace
// Shared routine for word comparison with zero.
template <>
void InstructionSelectorT<TurboshaftAdapter>::VisitWordCompareZero(
node_t user, node_t value, FlagsContinuation* cont) {
using namespace turboshaft; // NOLINT(build/namespaces)
// Try to combine with comparisons against 0 by simply inverting the branch.
ConsumeEqualZero(&user, &value, cont);
if (CanCover(user, value)) {
const Operation& value_op = Get(value);
if (const ComparisonOp* comparison = value_op.TryCast<ComparisonOp>()) {
switch (comparison->rep.MapTaggedToWord().value()) {
case RegisterRepresentation::Word32():
cont->OverwriteAndNegateIfEqual(
GetComparisonFlagCondition(*comparison));
return VisitWordCompare(this, value, cont);
case RegisterRepresentation::Float32():
switch (comparison->kind) {
case ComparisonOp::Kind::kEqual:
cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
return VisitFloat32Compare(this, value, cont);
case ComparisonOp::Kind::kSignedLessThan:
cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
return VisitFloat32Compare(this, value, cont);
case ComparisonOp::Kind::kSignedLessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
return VisitFloat32Compare(this, value, cont);
default:
UNREACHABLE();
}
case RegisterRepresentation::Float64():
switch (comparison->kind) {
case ComparisonOp::Kind::kEqual:
cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
return VisitFloat64Compare(this, value, cont);
case ComparisonOp::Kind::kSignedLessThan:
cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
return VisitFloat64Compare(this, value, cont);
case ComparisonOp::Kind::kSignedLessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
return VisitFloat64Compare(this, value, cont);
default:
UNREACHABLE();
}
default:
break;
}
} else if (value_op.Is<Opmask::kWord32Sub>()) {
return VisitWordCompare(this, value, cont);
} else if (value_op.Is<Opmask::kWord32BitwiseAnd>()) {
return VisitWordCompare(this, value, kIA32Test, cont);
} else if (const ProjectionOp* projection =
value_op.TryCast<ProjectionOp>()) {
// Check if this is the overflow output projection of an
// OverflowCheckedBinop operation.
if (projection->index == 1u) {
// We cannot combine the OverflowCheckedBinop operation with this branch
// unless the 0th projection (the use of the actual value of the
// operation is either {OpIndex::Invalid()}, which means there's no use
// of the actual value, or was already defined, which means it is
// scheduled *AFTER* this branch).
OpIndex node = projection->input();
OpIndex result = FindProjection(node, 0);
if (!result.valid() || IsDefined(result)) {
if (const OverflowCheckedBinopOp* binop =
this->TryCast<OverflowCheckedBinopOp>(node)) {
DCHECK_EQ(binop->rep, WordRepresentation::Word32());
cont->OverwriteAndNegateIfEqual(kOverflow);
switch (binop->kind) {
case OverflowCheckedBinopOp::Kind::kSignedAdd:
return VisitBinop(this, node, kIA32Add, cont);
case OverflowCheckedBinopOp::Kind::kSignedSub:
return VisitBinop(this, node, kIA32Sub, cont);
case OverflowCheckedBinopOp::Kind::kSignedMul:
return VisitBinop(this, node, kIA32Imul, cont);
}
UNREACHABLE();
}
}
}
} else if (value_op.Is<StackPointerGreaterThanOp>()) {
cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
return VisitStackPointerGreaterThan(value, cont);
}
}
// Branch could not be combined with a compare, emit compare against 0.
IA32OperandGeneratorT<TurboshaftAdapter> g(this);
VisitCompare(this, kIA32Cmp, g.Use(value), g.TempImmediate(0), cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWordCompareZero(
node_t user, node_t value, FlagsContinuation* cont) {
if constexpr (Adapter::IsTurboshaft) {
UNREACHABLE(); // Template-specialized above.
} else {
// Try to combine with comparisons against 0 by simply inverting the branch.
while (value->opcode() == IrOpcode::kWord32Equal && CanCover(user, value)) {
Int32BinopMatcher m(value);
if (!m.right().Is(0)) break;
user = value;
value = m.left().node();
cont->Negate();
}
if (CanCover(user, value)) {
switch (value->opcode()) {
case IrOpcode::kWord32Equal:
cont->OverwriteAndNegateIfEqual(kEqual);
return VisitWordCompare(this, value, cont);
case IrOpcode::kInt32LessThan:
cont->OverwriteAndNegateIfEqual(kSignedLessThan);
return VisitWordCompare(this, value, cont);
case IrOpcode::kInt32LessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
return VisitWordCompare(this, value, cont);
case IrOpcode::kUint32LessThan:
cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
return VisitWordCompare(this, value, cont);
case IrOpcode::kUint32LessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
return VisitWordCompare(this, value, cont);
case IrOpcode::kFloat32Equal:
cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
return VisitFloat32Compare(this, value, cont);
case IrOpcode::kFloat32LessThan:
cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
return VisitFloat32Compare(this, value, cont);
case IrOpcode::kFloat32LessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
return VisitFloat32Compare(this, value, cont);
case IrOpcode::kFloat64Equal:
cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
return VisitFloat64Compare(this, value, cont);
case IrOpcode::kFloat64LessThan:
cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
return VisitFloat64Compare(this, value, cont);
case IrOpcode::kFloat64LessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
return VisitFloat64Compare(this, value, cont);
case IrOpcode::kProjection:
// Check if this is the overflow output projection of an
// <Operation>WithOverflow node.
if (ProjectionIndexOf(value->op()) == 1u) {
// We cannot combine the <Operation>WithOverflow with this branch
// unless the 0th projection (the use of the actual value of the
// <Operation> is either nullptr, which means there's no use of the
// actual value, or was already defined, which means it is scheduled
// *AFTER* this branch).
Node* const node = value->InputAt(0);
Node* const result = NodeProperties::FindProjection(node, 0);
if (result == nullptr || IsDefined(result)) {
switch (node->opcode()) {
case IrOpcode::kInt32AddWithOverflow:
cont->OverwriteAndNegateIfEqual(kOverflow);
return VisitBinop(this, node, kIA32Add, cont);
case IrOpcode::kInt32SubWithOverflow:
cont->OverwriteAndNegateIfEqual(kOverflow);
return VisitBinop(this, node, kIA32Sub, cont);
case IrOpcode::kInt32MulWithOverflow:
cont->OverwriteAndNegateIfEqual(kOverflow);
return VisitBinop(this, node, kIA32Imul, cont);
default:
break;
}
}
}
break;
case IrOpcode::kInt32Sub:
return VisitWordCompare(this, value, cont);
case IrOpcode::kWord32And:
return VisitWordCompare(this, value, kIA32Test, cont);
case IrOpcode::kStackPointerGreaterThan:
cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
return VisitStackPointerGreaterThan(value, cont);
default:
break;
}
}
// Continuation could not be combined with a compare, emit compare against
// 0.
IA32OperandGeneratorT<Adapter> g(this);
VisitCompare(this, kIA32Cmp, g.Use(value), g.TempImmediate(0), cont);
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitSwitch(node_t node,
const SwitchInfo& sw) {
{ // Temporary scope to minimize indentation change churn below.
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand value_operand = g.UseRegister(this->input_at(node, 0));
// Emit either ArchTableSwitch or ArchBinarySearchSwitch.
if (enable_switch_jump_table_ ==
InstructionSelector::kEnableSwitchJumpTable) {
static const size_t kMaxTableSwitchValueRange = 2 << 16;
size_t table_space_cost = 4 + sw.value_range();
size_t table_time_cost = 3;
size_t lookup_space_cost = 3 + 2 * sw.case_count();
size_t lookup_time_cost = sw.case_count();
if (sw.case_count() > 4 &&
table_space_cost + 3 * table_time_cost <=
lookup_space_cost + 3 * lookup_time_cost &&
sw.min_value() > std::numeric_limits<int32_t>::min() &&
sw.value_range() <= kMaxTableSwitchValueRange) {
InstructionOperand index_operand = value_operand;
if (sw.min_value()) {
index_operand = g.TempRegister();
Emit(kIA32Lea | AddressingModeField::encode(kMode_MRI), index_operand,
value_operand, g.TempImmediate(-sw.min_value()));
}
// Generate a table lookup.
return EmitTableSwitch(sw, index_operand);
}
}
// Generate a tree of conditional jumps.
return EmitBinarySearchSwitch(sw, value_operand);
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32Equal(node_t node) {
FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
if constexpr (Adapter::IsTurboshaft) {
const turboshaft::ComparisonOp& comparison =
this->Get(node).template Cast<turboshaft::ComparisonOp>();
if (this->MatchIntegralZero(comparison.right())) {
return VisitWordCompareZero(node, comparison.left(), &cont);
}
} else {
Int32BinopMatcher m(node);
if (m.right().Is(0)) {
return VisitWordCompareZero(m.node(), m.left().node(), &cont);
}
}
VisitWordCompare(this, node, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32LessThan(node_t node) {
FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
VisitWordCompare(this, node, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32LessThanOrEqual(node_t node) {
FlagsContinuation cont =
FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
VisitWordCompare(this, node, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitUint32LessThan(node_t node) {
FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
VisitWordCompare(this, node, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitUint32LessThanOrEqual(node_t node) {
FlagsContinuation cont =
FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
VisitWordCompare(this, node, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32AddWithOverflow(node_t node) {
node_t ovf = FindProjection(node, 1);
if (this->valid(ovf)) {
FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
return VisitBinop(this, node, kIA32Add, &cont);
}
FlagsContinuation cont;
VisitBinop(this, node, kIA32Add, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32SubWithOverflow(node_t node) {
node_t ovf = FindProjection(node, 1);
if (this->valid(ovf)) {
FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
return VisitBinop(this, node, kIA32Sub, &cont);
}
FlagsContinuation cont;
VisitBinop(this, node, kIA32Sub, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32MulWithOverflow(node_t node) {
node_t ovf = FindProjection(node, 1);
if (this->valid(ovf)) {
FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
return VisitBinop(this, node, kIA32Imul, &cont);
}
FlagsContinuation cont;
VisitBinop(this, node, kIA32Imul, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat32Equal(node_t node) {
FlagsContinuation cont = FlagsContinuation::ForSet(kUnorderedEqual, node);
VisitFloat32Compare(this, node, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat32LessThan(node_t node) {
FlagsContinuation cont =
FlagsContinuation::ForSet(kUnsignedGreaterThan, node);
VisitFloat32Compare(this, node, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat32LessThanOrEqual(node_t node) {
FlagsContinuation cont =
FlagsContinuation::ForSet(kUnsignedGreaterThanOrEqual, node);
VisitFloat32Compare(this, node, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64Equal(node_t node) {
FlagsContinuation cont = FlagsContinuation::ForSet(kUnorderedEqual, node);
VisitFloat64Compare(this, node, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64LessThan(node_t node) {
FlagsContinuation cont =
FlagsContinuation::ForSet(kUnsignedGreaterThan, node);
VisitFloat64Compare(this, node, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64LessThanOrEqual(node_t node) {
FlagsContinuation cont =
FlagsContinuation::ForSet(kUnsignedGreaterThanOrEqual, node);
VisitFloat64Compare(this, node, &cont);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64InsertLowWord32(node_t node) {
if constexpr (Adapter::IsTurboshaft) {
// Turboshaft uses {BitcastWord32PairToFloat64}.
UNREACHABLE();
} else {
IA32OperandGeneratorT<Adapter> g(this);
Node* left = node->InputAt(0);
Node* right = node->InputAt(1);
Float64Matcher mleft(left);
if (mleft.HasResolvedValue() &&
(base::bit_cast<uint64_t>(mleft.ResolvedValue()) >> 32) == 0u) {
Emit(kIA32Float64LoadLowWord32, g.DefineAsRegister(node), g.Use(right));
return;
}
Emit(kIA32Float64InsertLowWord32, g.DefineSameAsFirst(node),
g.UseRegister(left), g.Use(right));
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64InsertHighWord32(node_t node) {
if constexpr (Adapter::IsTurboshaft) {
// Turboshaft uses {BitcastWord32PairToFloat64}.
UNREACHABLE();
} else {
IA32OperandGeneratorT<Adapter> g(this);
Node* left = node->InputAt(0);
Node* right = node->InputAt(1);
Emit(kIA32Float64InsertHighWord32, g.DefineSameAsFirst(node),
g.UseRegister(left), g.Use(right));
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitBitcastWord32PairToFloat64(
node_t node) {
if constexpr (Adapter::IsTurbofan) {
// Turbofan uses {Float64Insert{High,Low}Word32}.
UNREACHABLE();
} else {
IA32OperandGeneratorT<Adapter> g(this);
const turboshaft::BitcastWord32PairToFloat64Op& cast_op =
this->Get(node)
.template Cast<turboshaft::BitcastWord32PairToFloat64Op>();
Emit(kIA32Float64FromWord32Pair, g.DefineAsRegister(node),
g.Use(cast_op.low_word32()), g.Use(cast_op.high_word32()));
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitFloat64SilenceNaN(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
Emit(kIA32Float64SilenceNaN, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)));
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitMemoryBarrier(node_t node) {
// ia32 is no weaker than release-acquire and only needs to emit an
// instruction for SeqCst memory barriers.
AtomicMemoryOrder order = AtomicOrder(this, node);
if (order == AtomicMemoryOrder::kSeqCst) {
IA32OperandGeneratorT<Adapter> g(this);
Emit(kIA32MFence, g.NoOutput());
return;
}
DCHECK_EQ(AtomicMemoryOrder::kAcqRel, order);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicLoad(node_t node) {
LoadRepresentation load_rep = this->load_view(node).loaded_rep();
DCHECK(load_rep.representation() == MachineRepresentation::kWord8 ||
load_rep.representation() == MachineRepresentation::kWord16 ||
load_rep.representation() == MachineRepresentation::kWord32 ||
load_rep.representation() == MachineRepresentation::kTaggedSigned ||
load_rep.representation() == MachineRepresentation::kTaggedPointer ||
load_rep.representation() == MachineRepresentation::kTagged);
// The memory order is ignored as both acquire and sequentially consistent
// loads can emit MOV.
// https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
VisitLoad(node, node, GetLoadOpcode(load_rep));
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicStore(node_t node) {
VisitStoreCommon(this, this->store_view(node));
}
MachineType AtomicOpType(InstructionSelectorT<TurboshaftAdapter>* selector,
turboshaft::OpIndex node) {
const turboshaft::AtomicRMWOp& atomic_op =
selector->Get(node).template Cast<turboshaft::AtomicRMWOp>();
return atomic_op.memory_rep.ToMachineType();
}
MachineType AtomicOpType(InstructionSelectorT<TurbofanAdapter>* selector,
Node* node) {
return AtomicOpType(node->op());
}
AtomicMemoryOrder AtomicOrder(InstructionSelectorT<TurboshaftAdapter>* selector,
turboshaft::OpIndex node) {
const turboshaft::Operation& op = selector->Get(node);
if (op.Is<turboshaft::AtomicWord32PairOp>()) {
// TODO(nicohartmann): Turboshaft doesn't support configurable memory
// orders yet; see also {TurboshaftAdapter::StoreView}.
return AtomicMemoryOrder::kSeqCst;
}
if (const turboshaft::MemoryBarrierOp* barrier =
op.TryCast<turboshaft::MemoryBarrierOp>()) {
return barrier->memory_order;
}
UNREACHABLE();
}
AtomicMemoryOrder AtomicOrder(InstructionSelectorT<TurbofanAdapter>* selector,
Node* node) {
return OpParameter<AtomicMemoryOrder>(node->op());
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicExchange(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
MachineType type = AtomicOpType(this, node);
ArchOpcode opcode;
if (type == MachineType::Int8()) {
opcode = kAtomicExchangeInt8;
} else if (type == MachineType::Uint8()) {
opcode = kAtomicExchangeUint8;
} else if (type == MachineType::Int16()) {
opcode = kAtomicExchangeInt16;
} else if (type == MachineType::Uint16()) {
opcode = kAtomicExchangeUint16;
} else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
opcode = kAtomicExchangeWord32;
} else {
UNREACHABLE();
}
VisitAtomicExchange(this, node, opcode, type.representation());
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicCompareExchange(
node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
auto atomic_op = this->atomic_rmw_view(node);
node_t base = atomic_op.base();
node_t index = atomic_op.index();
node_t old_value = atomic_op.expected();
node_t new_value = atomic_op.value();
MachineType type = AtomicOpType(this, node);
ArchOpcode opcode;
if (type == MachineType::Int8()) {
opcode = kAtomicCompareExchangeInt8;
} else if (type == MachineType::Uint8()) {
opcode = kAtomicCompareExchangeUint8;
} else if (type == MachineType::Int16()) {
opcode = kAtomicCompareExchangeInt16;
} else if (type == MachineType::Uint16()) {
opcode = kAtomicCompareExchangeUint16;
} else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
opcode = kAtomicCompareExchangeWord32;
} else {
UNREACHABLE();
}
AddressingMode addressing_mode;
InstructionOperand new_val_operand =
(type.representation() == MachineRepresentation::kWord8)
? g.UseByteRegister(new_value)
: g.UseUniqueRegister(new_value);
InstructionOperand inputs[] = {
g.UseFixed(old_value, eax), new_val_operand, g.UseUniqueRegister(base),
g.GetEffectiveIndexOperand(index, &addressing_mode)};
InstructionOperand outputs[] = {g.DefineAsFixed(node, eax)};
InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
Emit(code, 1, outputs, arraysize(inputs), inputs);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicBinaryOperation(
node_t node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
ArchOpcode uint16_op, ArchOpcode word32_op) {
{ // Temporary scope to minimize indentation change churn below.
MachineType type = AtomicOpType(this, node);
ArchOpcode opcode;
if (type == MachineType::Int8()) {
opcode = int8_op;
} else if (type == MachineType::Uint8()) {
opcode = uint8_op;
} else if (type == MachineType::Int16()) {
opcode = int16_op;
} else if (type == MachineType::Uint16()) {
opcode = uint16_op;
} else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
opcode = word32_op;
} else {
UNREACHABLE();
}
VisitAtomicBinOp(this, node, opcode, type.representation());
}
}
#define VISIT_ATOMIC_BINOP(op) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::VisitWord32Atomic##op(node_t node) { \
VisitWord32AtomicBinaryOperation( \
node, kAtomic##op##Int8, kAtomic##op##Uint8, kAtomic##op##Int16, \
kAtomic##op##Uint16, kAtomic##op##Word32); \
}
VISIT_ATOMIC_BINOP(Add)
VISIT_ATOMIC_BINOP(Sub)
VISIT_ATOMIC_BINOP(And)
VISIT_ATOMIC_BINOP(Or)
VISIT_ATOMIC_BINOP(Xor)
#undef VISIT_ATOMIC_BINOP
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicPairLoad(node_t node) {
// Both acquire and sequentially consistent loads can emit MOV.
// https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
IA32OperandGeneratorT<Adapter> g(this);
AddressingMode mode;
node_t base = this->input_at(node, 0);
node_t index = this->input_at(node, 1);
node_t projection0 = FindProjection(node, 0);
node_t projection1 = FindProjection(node, 1);
if (this->valid(projection0) && this->valid(projection1)) {
InstructionOperand inputs[] = {g.UseUniqueRegister(base),
g.GetEffectiveIndexOperand(index, &mode)};
InstructionCode code =
kIA32Word32AtomicPairLoad | AddressingModeField::encode(mode);
InstructionOperand outputs[] = {g.DefineAsRegister(projection0),
g.DefineAsRegister(projection1)};
Emit(code, 2, outputs, 2, inputs);
} else if (this->valid(projection0) || this->valid(projection1)) {
// Only one word is needed, so it's enough to load just that.
ArchOpcode opcode = kIA32Movl;
InstructionOperand outputs[] = {g.DefineAsRegister(
this->valid(projection0) ? projection0 : projection1)};
InstructionOperand inputs[3];
size_t input_count = 0;
// TODO(ahaas): Introduce an enum for {scale} instead of an integer.
// {scale = 0} means *1 in the generated code.
int scale = 0;
mode = g.GenerateMemoryOperandInputs(
index, scale, base, this->valid(projection0) ? 0 : 4,
kPositiveDisplacement, inputs, &input_count);
InstructionCode code = opcode | AddressingModeField::encode(mode);
Emit(code, 1, outputs, input_count, inputs);
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicPairStore(node_t node) {
// Release pair stores emit a MOVQ via a double register, and sequentially
// consistent stores emit CMPXCHG8B.
// https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
IA32OperandGeneratorT<Adapter> g(this);
node_t base = this->input_at(node, 0);
node_t index = this->input_at(node, 1);
node_t value = this->input_at(node, 2);
node_t value_high = this->input_at(node, 3);
AtomicMemoryOrder order = AtomicOrder(this, node);
if (order == AtomicMemoryOrder::kAcqRel) {
AddressingMode addressing_mode;
InstructionOperand inputs[] = {
g.UseUniqueRegisterOrSlotOrConstant(value),
g.UseUniqueRegisterOrSlotOrConstant(value_high),
g.UseUniqueRegister(base),
g.GetEffectiveIndexOperand(index, &addressing_mode),
};
InstructionCode code = kIA32Word32ReleasePairStore |
AddressingModeField::encode(addressing_mode);
Emit(code, 0, nullptr, arraysize(inputs), inputs);
} else {
DCHECK_EQ(order, AtomicMemoryOrder::kSeqCst);
AddressingMode addressing_mode;
InstructionOperand inputs[] = {
g.UseUniqueRegisterOrSlotOrConstant(value), g.UseFixed(value_high, ecx),
g.UseUniqueRegister(base),
g.GetEffectiveIndexOperand(index, &addressing_mode)};
// Allocating temp registers here as stores are performed using an atomic
// exchange, the output of which is stored in edx:eax, which should be saved
// and restored at the end of the instruction.
InstructionOperand temps[] = {g.TempRegister(eax), g.TempRegister(edx)};
const int num_temps = arraysize(temps);
InstructionCode code = kIA32Word32SeqCstPairStore |
AddressingModeField::encode(addressing_mode);
Emit(code, 0, nullptr, arraysize(inputs), inputs, num_temps, temps);
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicPairAdd(node_t node) {
VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairAdd);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicPairSub(node_t node) {
VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairSub);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicPairAnd(node_t node) {
VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairAnd);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicPairOr(node_t node) {
VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairOr);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicPairXor(node_t node) {
VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairXor);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicPairExchange(node_t node) {
VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairExchange);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitWord32AtomicPairCompareExchange(
node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
node_t index = this->input_at(node, 1);
AddressingMode addressing_mode;
// In the Turbofan and the Turboshaft graph the order of expected and value is
// swapped.
const size_t expected_offset = Adapter::IsTurboshaft ? 4 : 2;
const size_t value_offset = Adapter::IsTurboshaft ? 2 : 4;
InstructionOperand inputs[] = {
// High, Low values of old value
g.UseFixed(this->input_at(node, expected_offset), eax),
g.UseFixed(this->input_at(node, expected_offset + 1), edx),
// High, Low values of new value
g.UseUniqueRegisterOrSlotOrConstant(this->input_at(node, value_offset)),
g.UseFixed(this->input_at(node, value_offset + 1), ecx),
// InputAt(0) => base
g.UseUniqueRegister(this->input_at(node, 0)),
g.GetEffectiveIndexOperand(index, &addressing_mode)};
node_t projection0 = FindProjection(node, 0);
node_t projection1 = FindProjection(node, 1);
InstructionCode code = kIA32Word32AtomicPairCompareExchange |
AddressingModeField::encode(addressing_mode);
InstructionOperand outputs[2];
size_t output_count = 0;
InstructionOperand temps[2];
size_t temp_count = 0;
if (this->valid(projection0)) {
outputs[output_count++] = g.DefineAsFixed(projection0, eax);
} else {
temps[temp_count++] = g.TempRegister(eax);
}
if (this->valid(projection1)) {
outputs[output_count++] = g.DefineAsFixed(projection1, edx);
} else {
temps[temp_count++] = g.TempRegister(edx);
}
Emit(code, output_count, outputs, arraysize(inputs), inputs, temp_count,
temps);
}
#define SIMD_INT_TYPES(V) \
V(I32x4) \
V(I16x8) \
V(I8x16)
#define SIMD_BINOP_LIST(V) \
V(I32x4GtU) \
V(I32x4GeU) \
V(I16x8Ne) \
V(I16x8GeS) \
V(I16x8GtU) \
V(I16x8GeU) \
V(I8x16Ne) \
V(I8x16GeS) \
V(I8x16GtU) \
V(I8x16GeU)
#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
V(F32x4Add) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(F32x4Eq) \
V(F32x4Ne) \
V(F32x4Lt) \
V(F32x4Le) \
V(F32x4Min) \
V(F32x4Max) \
IF_WASM(V, F64x2Add) \
IF_WASM(V, F64x2Sub) \
IF_WASM(V, F64x2Mul) \
IF_WASM(V, F64x2Div) \
IF_WASM(V, F64x2Eq) \
IF_WASM(V, F64x2Ne) \
IF_WASM(V, F64x2Lt) \
IF_WASM(V, F64x2Le) \
V(I64x2Add) \
V(I64x2Sub) \
V(I64x2Eq) \
V(I64x2Ne) \
V(I32x4Add) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I32x4MinS) \
V(I32x4MaxS) \
V(I32x4Eq) \
V(I32x4Ne) \
V(I32x4GtS) \
V(I32x4GeS) \
V(I32x4MinU) \
V(I32x4MaxU) \
V(I32x4DotI16x8S) \
V(I16x8Add) \
V(I16x8AddSatS) \
V(I16x8Sub) \
V(I16x8SubSatS) \
V(I16x8Mul) \
V(I16x8Eq) \
V(I16x8GtS) \
V(I16x8MinS) \
V(I16x8MaxS) \
V(I16x8AddSatU) \
V(I16x8SubSatU) \
V(I16x8MinU) \
V(I16x8MaxU) \
V(I16x8SConvertI32x4) \
V(I16x8UConvertI32x4) \
V(I16x8RoundingAverageU) \
V(I8x16Add) \
V(I8x16AddSatS) \
V(I8x16Sub) \
V(I8x16SubSatS) \
V(I8x16MinS) \
V(I8x16MaxS) \
V(I8x16Eq) \
V(I8x16GtS) \
V(I8x16AddSatU) \
V(I8x16SubSatU) \
V(I8x16MinU) \
V(I8x16MaxU) \
V(I8x16SConvertI16x8) \
V(I8x16UConvertI16x8) \
V(I8x16RoundingAverageU) \
V(S128And) \
V(S128Or) \
V(S128Xor)
// These opcodes require all inputs to be registers because the codegen is
// simpler with all registers.
#define SIMD_BINOP_RRR(V) \
V(I64x2ExtMulLowI32x4S) \
V(I64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U) \
V(I32x4ExtMulLowI16x8S) \
V(I32x4ExtMulHighI16x8S) \
V(I32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U) \
V(I16x8ExtMulLowI8x16S) \
V(I16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U) \
V(I16x8Q15MulRSatS) \
V(I16x8RelaxedQ15MulRS)
#define SIMD_UNOP_LIST(V) \
V(F64x2ConvertLowI32x4S) \
V(F32x4DemoteF64x2Zero) \
V(F32x4Sqrt) \
V(F32x4SConvertI32x4) \
V(I64x2BitMask) \
V(I64x2SConvertI32x4Low) \
V(I64x2SConvertI32x4High) \
V(I64x2UConvertI32x4Low) \
V(I64x2UConvertI32x4High) \
V(I32x4SConvertI16x8Low) \
V(I32x4SConvertI16x8High) \
V(I32x4Neg) \
V(I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High) \
V(I32x4Abs) \
V(I32x4BitMask) \
V(I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High) \
V(I16x8Neg) \
V(I16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High) \
V(I16x8Abs) \
V(I8x16Neg) \
V(I8x16Abs) \
V(I8x16BitMask) \
V(S128Not)
#define SIMD_ALLTRUE_LIST(V) \
V(I64x2AllTrue) \
V(I32x4AllTrue) \
V(I16x8AllTrue) \
V(I8x16AllTrue)
#define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \
V(I64x2Shl) \
V(I64x2ShrU) \
V(I32x4Shl) \
V(I32x4ShrS) \
V(I32x4ShrU) \
V(I16x8Shl) \
V(I16x8ShrS) \
V(I16x8ShrU)
#if V8_ENABLE_WEBASSEMBLY
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitS128Const(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
static const int kUint32Immediates = kSimd128Size / sizeof(uint32_t);
uint32_t val[kUint32Immediates];
if constexpr (Adapter::IsTurboshaft) {
const turboshaft::Simd128ConstantOp& constant =
this->Get(node).template Cast<turboshaft::Simd128ConstantOp>();
memcpy(val, constant.value, kSimd128Size);
} else {
memcpy(val, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
}
// If all bytes are zeros or ones, avoid emitting code for generic constants
bool all_zeros = !(val[0] || val[1] || val[2] || val[3]);
bool all_ones = val[0] == UINT32_MAX && val[1] == UINT32_MAX &&
val[2] == UINT32_MAX && val[3] == UINT32_MAX;
InstructionOperand dst = g.DefineAsRegister(node);
if (all_zeros) {
Emit(kIA32S128Zero, dst);
} else if (all_ones) {
Emit(kIA32S128AllOnes, dst);
} else {
InstructionOperand inputs[kUint32Immediates];
for (int i = 0; i < kUint32Immediates; ++i) {
inputs[i] = g.UseImmediate(val[i]);
}
InstructionOperand temp(g.TempRegister());
Emit(kIA32S128Const, 1, &dst, kUint32Immediates, inputs, 1, &temp);
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2Min(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand operand0 = g.UseRegister(this->input_at(node, 0));
InstructionOperand operand1 = g.UseRegister(this->input_at(node, 1));
if (IsSupported(AVX)) {
Emit(kIA32F64x2Min, g.DefineAsRegister(node), operand0, operand1);
} else {
Emit(kIA32F64x2Min, g.DefineSameAsFirst(node), operand0, operand1);
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2Max(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand operand0 = g.UseRegister(this->input_at(node, 0));
InstructionOperand operand1 = g.UseRegister(this->input_at(node, 1));
if (IsSupported(AVX)) {
Emit(kIA32F64x2Max, g.DefineAsRegister(node), operand0, operand1);
} else {
Emit(kIA32F64x2Max, g.DefineSameAsFirst(node), operand0, operand1);
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2Splat(node_t node) {
VisitRRSimd(this, node, kIA32F64x2Splat);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2ExtractLane(node_t node) {
VisitRRISimd(this, node, kIA32F64x2ExtractLane, kIA32F64x2ExtractLane);
}
template <>
void InstructionSelectorT<TurboshaftAdapter>::VisitI64x2SplatI32Pair(
node_t node) {
// In turboshaft it gets lowered to an I32x4Splat.
UNREACHABLE();
}
template <>
void InstructionSelectorT<TurbofanAdapter>::VisitI64x2SplatI32Pair(Node* node) {
IA32OperandGeneratorT<TurbofanAdapter> g(this);
Int32Matcher match_left(node->InputAt(0));
Int32Matcher match_right(node->InputAt(1));
if (match_left.Is(0) && match_right.Is(0)) {
Emit(kIA32S128Zero, g.DefineAsRegister(node));
} else {
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.Use(node->InputAt(1));
Emit(kIA32I64x2SplatI32Pair, g.DefineAsRegister(node), operand0, operand1);
}
}
template <>
void InstructionSelectorT<TurboshaftAdapter>::VisitI64x2ReplaceLaneI32Pair(
node_t node) {
// In turboshaft it gets lowered to an I32x4ReplaceLane.
UNREACHABLE();
}
template <>
void InstructionSelectorT<TurbofanAdapter>::VisitI64x2ReplaceLaneI32Pair(
Node* node) {
IA32OperandGeneratorT<TurbofanAdapter> g(this);
InstructionOperand operand = g.UseRegister(node->InputAt(0));
InstructionOperand lane = g.UseImmediate(OpParameter<int32_t>(node->op()));
InstructionOperand low = g.Use(node->InputAt(1));
InstructionOperand high = g.Use(node->InputAt(2));
Emit(kIA32I64x2ReplaceLaneI32Pair, g.DefineSameAsFirst(node), operand, lane,
low, high);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI64x2Neg(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
// If AVX unsupported, make sure dst != src to avoid a move.
InstructionOperand operand0 =
IsSupported(AVX) ? g.UseRegister(this->input_at(node, 0))
: g.UseUniqueRegister(this->input_at(node, 0));
Emit(kIA32I64x2Neg, g.DefineAsRegister(node), operand0);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI64x2ShrS(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand dst =
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
if (g.CanBeImmediate(this->input_at(node, 1))) {
Emit(kIA32I64x2ShrS, dst, g.UseRegister(this->input_at(node, 0)),
g.UseImmediate(this->input_at(node, 1)));
} else {
InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
Emit(kIA32I64x2ShrS, dst, g.UseUniqueRegister(this->input_at(node, 0)),
g.UseRegister(this->input_at(node, 1)), arraysize(temps), temps);
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI64x2Mul(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempSimd128Register(),
g.TempSimd128Register()};
Emit(kIA32I64x2Mul, g.DefineAsRegister(node),
g.UseUniqueRegister(this->input_at(node, 0)),
g.UseUniqueRegister(this->input_at(node, 1)), arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF32x4Splat(node_t node) {
VisitRRSimd(this, node, kIA32F32x4Splat);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF32x4ExtractLane(node_t node) {
VisitRRISimd(this, node, kIA32F32x4ExtractLane);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF32x4UConvertI32x4(node_t node) {
VisitRRSimd(this, node, kIA32F32x4UConvertI32x4);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4SConvertF32x4(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister()};
InstructionOperand dst =
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
Emit(kIA32I32x4SConvertF32x4, dst, g.UseRegister(this->input_at(node, 0)),
arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4UConvertF32x4(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempSimd128Register(),
g.TempSimd128Register()};
InstructionCode opcode =
IsSupported(AVX) ? kAVXI32x4UConvertF32x4 : kSSEI32x4UConvertF32x4;
Emit(opcode, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)), arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitS128Zero(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
Emit(kIA32S128Zero, g.DefineAsRegister(node));
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitS128Select(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand dst =
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
Emit(kIA32S128Select, dst, g.UseRegister(this->input_at(node, 0)),
g.UseRegister(this->input_at(node, 1)),
g.UseRegister(this->input_at(node, 2)));
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitS128AndNot(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
// andnps a b does ~a & b, but we want a & !b, so flip the input.
InstructionOperand dst =
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
Emit(kIA32S128AndNot, dst, g.UseRegister(this->input_at(node, 1)),
g.UseRegister(this->input_at(node, 0)));
}
#define VISIT_SIMD_SPLAT(Type) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Type##Splat(node_t node) { \
bool set_zero; \
if constexpr (Adapter::IsTurboshaft) { \
set_zero = this->MatchIntegralZero(this->input_at(node, 0)); \
} else { \
set_zero = Int32Matcher(node->InputAt(0)).Is(0); \
} \
if (set_zero) { \
IA32OperandGeneratorT<Adapter> g(this); \
Emit(kIA32S128Zero, g.DefineAsRegister(node)); \
} else { \
VisitRO(this, node, kIA32##Type##Splat); \
} \
}
SIMD_INT_TYPES(VISIT_SIMD_SPLAT)
#undef SIMD_INT_TYPES
#undef VISIT_SIMD_SPLAT
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF16x8Splat(node_t node) {
UNIMPLEMENTED();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI8x16ExtractLaneU(node_t node) {
VisitRRISimd(this, node, kIA32Pextrb);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI8x16ExtractLaneS(node_t node) {
VisitRRISimd(this, node, kIA32I8x16ExtractLaneS);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI16x8ExtractLaneU(node_t node) {
VisitRRISimd(this, node, kIA32Pextrw);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI16x8ExtractLaneS(node_t node) {
VisitRRISimd(this, node, kIA32I16x8ExtractLaneS);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4ExtractLane(node_t node) {
VisitRRISimd(this, node, kIA32I32x4ExtractLane);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF16x8ExtractLane(node_t node) {
UNIMPLEMENTED();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF16x8ReplaceLane(node_t node) {
UNIMPLEMENTED();
}
#define SIMD_REPLACE_LANE_TYPE_OP(V) \
V(I32x4, kIA32Pinsrd) \
V(I16x8, kIA32Pinsrw) \
V(I8x16, kIA32Pinsrb) \
V(F32x4, kIA32Insertps) \
V(F64x2, kIA32F64x2ReplaceLane)
#define VISIT_SIMD_REPLACE_LANE(TYPE, OPCODE) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##TYPE##ReplaceLane(node_t node) { \
IA32OperandGeneratorT<Adapter> g(this); \
int lane; \
if constexpr (Adapter::IsTurboshaft) { \
const turboshaft::Simd128ReplaceLaneOp& op = \
this->Get(node).template Cast<turboshaft::Simd128ReplaceLaneOp>(); \
lane = op.lane; \
} else { \
lane = OpParameter<int32_t>(node->op()); \
} \
InstructionOperand operand0 = g.UseRegister(this->input_at(node, 0)); \
InstructionOperand operand1 = g.UseImmediate(lane); \
auto input1 = this->input_at(node, 1); \
InstructionOperand operand2; \
if constexpr (OPCODE == kIA32F64x2ReplaceLane) { \
operand2 = g.UseRegister(input1); \
} else { \
operand2 = g.Use(input1); \
} \
/* When no-AVX, define dst == src to save a move. */ \
InstructionOperand dst = IsSupported(AVX) ? g.DefineAsRegister(node) \
: g.DefineSameAsFirst(node); \
Emit(OPCODE, dst, operand0, operand1, operand2); \
}
SIMD_REPLACE_LANE_TYPE_OP(VISIT_SIMD_REPLACE_LANE)
#undef VISIT_SIMD_REPLACE_LANE
#undef SIMD_REPLACE_LANE_TYPE_OP
#define VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX(Opcode) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Opcode(node_t node) { \
VisitRROSimdShift(this, node, kIA32##Opcode); \
}
SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX)
#undef VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX
#undef SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX
// TODO(v8:9198): SSE requires operand0 to be a register as we don't have memory
// alignment yet. For AVX, memory operands are fine, but can have performance
// issues if not aligned to 16/32 bytes (based on load size), see SDM Vol 1,
// chapter 14.9
#define VISIT_SIMD_UNOP(Opcode) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Opcode(node_t node) { \
IA32OperandGeneratorT<Adapter> g(this); \
Emit(kIA32##Opcode, g.DefineAsRegister(node), \
g.UseRegister(this->input_at(node, 0))); \
}
SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
#undef VISIT_SIMD_UNOP
#undef SIMD_UNOP_LIST
#define UNIMPLEMENTED_SIMD_UNOP_LIST(V) \
V(F16x8Abs) \
V(F16x8Neg) \
V(F16x8Sqrt) \
V(F16x8Floor) \
V(F16x8Ceil) \
V(F16x8Trunc) \
V(F16x8NearestInt)
#define SIMD_VISIT_UNIMPL_UNOP(Name) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Name(node_t node) { \
UNIMPLEMENTED(); \
}
UNIMPLEMENTED_SIMD_UNOP_LIST(SIMD_VISIT_UNIMPL_UNOP)
#undef SIMD_VISIT_UNIMPL_UNOP
#undef UNIMPLEMENTED_SIMD_UNOP_LIST
#define UNIMPLEMENTED_SIMD_CVTOP_LIST(V) \
V(F16x8SConvertI16x8) \
V(F16x8UConvertI16x8) \
V(I16x8SConvertF16x8) \
V(I16x8UConvertF16x8) \
V(F32x4PromoteLowF16x8) \
V(F16x8DemoteF32x4Zero) \
V(F16x8DemoteF64x2Zero)
#define SIMD_VISIT_UNIMPL_CVTOP(Name) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Name(node_t node) { \
UNIMPLEMENTED(); \
}
UNIMPLEMENTED_SIMD_CVTOP_LIST(SIMD_VISIT_UNIMPL_CVTOP)
#undef SIMD_VISIT_UNIMPL_CVTOP
#undef UNIMPLEMENTED_SIMD_CVTOP_LIST
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitV128AnyTrue(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kIA32S128AnyTrue, g.DefineAsRegister(node),
g.UseRegister(this->input_at(node, 0)), arraysize(temps), temps);
}
#define VISIT_SIMD_ALLTRUE(Opcode) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Opcode(node_t node) { \
IA32OperandGeneratorT<Adapter> g(this); \
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \
Emit(kIA32##Opcode, g.DefineAsRegister(node), \
g.UseUniqueRegister(this->input_at(node, 0)), arraysize(temps), \
temps); \
}
SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
#undef VISIT_SIMD_ALLTRUE
#undef SIMD_ALLTRUE_LIST
#define VISIT_SIMD_BINOP(Opcode) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Opcode(node_t node) { \
VisitRROSimd(this, node, kAVX##Opcode, kSSE##Opcode); \
}
SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
#undef VISIT_SIMD_BINOP
#undef SIMD_BINOP_LIST
#define UNIMPLEMENTED_SIMD_BINOP_LIST(V) \
V(F16x8Add) \
V(F16x8Sub) \
V(F16x8Mul) \
V(F16x8Div) \
V(F16x8Min) \
V(F16x8Max) \
V(F16x8Pmin) \
V(F16x8Pmax) \
V(F16x8Eq) \
V(F16x8Ne) \
V(F16x8Lt) \
V(F16x8Le)
#define SIMD_VISIT_UNIMPL_BINOP(Name) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Name(node_t node) { \
UNIMPLEMENTED(); \
}
UNIMPLEMENTED_SIMD_BINOP_LIST(SIMD_VISIT_UNIMPL_BINOP)
#undef SIMD_VISIT_UNIMPL_BINOP
#undef UNIMPLEMENTED_SIMD_BINOP_LIST
#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##Opcode(node_t node) { \
VisitRROSimd(this, node, kIA32##Opcode, kIA32##Opcode); \
}
SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX)
#undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX
#undef SIMD_BINOP_UNIFIED_SSE_AVX_LIST
#define VISIT_SIMD_BINOP_RRR(OPCODE) \
template <typename Adapter> \
void InstructionSelectorT<Adapter>::Visit##OPCODE(node_t node) { \
VisitRRRSimd(this, node, kIA32##OPCODE); \
}
SIMD_BINOP_RRR(VISIT_SIMD_BINOP_RRR)
#undef VISIT_SIMD_BINOP_RRR
#undef SIMD_BINOP_RRR
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI16x8BitMask(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempSimd128Register()};
Emit(kIA32I16x8BitMask, g.DefineAsRegister(node),
g.UseUniqueRegister(this->input_at(node, 0)), arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI8x16Shl(node_t node) {
VisitI8x16Shift(this, node, kIA32I8x16Shl);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI8x16ShrS(node_t node) {
VisitI8x16Shift(this, node, kIA32I8x16ShrS);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI8x16ShrU(node_t node) {
VisitI8x16Shift(this, node, kIA32I8x16ShrU);
}
#endif // V8_ENABLE_WEBASSEMBLY
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt32AbsWithOverflow(node_t node) {
UNREACHABLE();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitInt64AbsWithOverflow(node_t node) {
UNREACHABLE();
}
#if V8_ENABLE_WEBASSEMBLY
namespace {
// Returns true if shuffle can be decomposed into two 16x4 half shuffles
// followed by a 16x8 blend.
// E.g. [3 2 1 0 15 14 13 12].
bool TryMatch16x8HalfShuffle(uint8_t* shuffle16x8, uint8_t* blend_mask) {
*blend_mask = 0;
for (int i = 0; i < 8; i++) {
if ((shuffle16x8[i] & 0x4) != (i & 0x4)) return false;
*blend_mask |= (shuffle16x8[i] > 7 ? 1 : 0) << i;
}
return true;
}
struct ShuffleEntry {
uint8_t shuffle[kSimd128Size];
ArchOpcode opcode;
ArchOpcode avx_opcode;
bool src0_needs_reg;
bool src1_needs_reg;
};
// Shuffles that map to architecture-specific instruction sequences. These are
// matched very early, so we shouldn't include shuffles that match better in
// later tests, like 32x4 and 16x8 shuffles. In general, these patterns should
// map to either a single instruction, or be finer grained, such as zip/unzip or
// transpose patterns.
static const ShuffleEntry arch_shuffles[] = {
{{0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23},
kIA32S64x2UnpackLow,
kIA32S64x2UnpackLow,
true,
false},
{{8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31},
kIA32S64x2UnpackHigh,
kIA32S64x2UnpackHigh,
true,
false},
{{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
kIA32S32x4UnpackLow,
kIA32S32x4UnpackLow,
true,
false},
{{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
kIA32S32x4UnpackHigh,
kIA32S32x4UnpackHigh,
true,
false},
{{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
kIA32S16x8UnpackLow,
kIA32S16x8UnpackLow,
true,
false},
{{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
kIA32S16x8UnpackHigh,
kIA32S16x8UnpackHigh,
true,
false},
{{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
kIA32S8x16UnpackLow,
kIA32S8x16UnpackLow,
true,
false},
{{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
kIA32S8x16UnpackHigh,
kIA32S8x16UnpackHigh,
true,
false},
{{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
kSSES16x8UnzipLow,
kAVXS16x8UnzipLow,
true,
false},
{{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
kSSES16x8UnzipHigh,
kAVXS16x8UnzipHigh,
true,
true},
{{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
kSSES8x16UnzipLow,
kAVXS8x16UnzipLow,
true,
true},
{{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
kSSES8x16UnzipHigh,
kAVXS8x16UnzipHigh,
true,
true},
{{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
kSSES8x16TransposeLow,
kAVXS8x16TransposeLow,
true,
true},
{{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
kSSES8x16TransposeHigh,
kAVXS8x16TransposeHigh,
true,
true},
{{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8},
kSSES8x8Reverse,
kAVXS8x8Reverse,
true,
true},
{{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
kSSES8x4Reverse,
kAVXS8x4Reverse,
true,
true},
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
kSSES8x2Reverse,
kAVXS8x2Reverse,
true,
true}};
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
size_t num_entries, bool is_swizzle,
const ShuffleEntry** arch_shuffle) {
uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
for (size_t i = 0; i < num_entries; ++i) {
const ShuffleEntry& entry = table[i];
int j = 0;
for (; j < kSimd128Size; ++j) {
if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
break;
}
}
if (j == kSimd128Size) {
*arch_shuffle = &entry;
return true;
}
}
return false;
}
} // namespace
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI8x16Shuffle(node_t node) {
uint8_t shuffle[kSimd128Size];
bool is_swizzle;
auto view = this->simd_shuffle_view(node);
CanonicalizeShuffle(view, shuffle, &is_swizzle);
int imm_count = 0;
static const int kMaxImms = 6;
uint32_t imms[kMaxImms];
int temp_count = 0;
static const int kMaxTemps = 2;
InstructionOperand temps[kMaxTemps];
IA32OperandGeneratorT<Adapter> g(this);
bool use_avx = CpuFeatures::IsSupported(AVX);
// AVX and swizzles don't generally need DefineSameAsFirst to avoid a move.
bool no_same_as_first = use_avx || is_swizzle;
// We generally need UseRegister for input0, Use for input1.
// TODO(v8:9198): We don't have 16-byte alignment for SIMD operands yet, but
// we retain this logic (continue setting these in the various shuffle match
// clauses), but ignore it when selecting registers or slots.
bool src0_needs_reg = true;
bool src1_needs_reg = false;
ArchOpcode opcode = kIA32I8x16Shuffle; // general shuffle is the default
uint8_t offset;
uint8_t shuffle32x4[4];
uint8_t shuffle16x8[8];
int index;
const ShuffleEntry* arch_shuffle;
if (wasm::SimdShuffle::TryMatchConcat(shuffle, &offset)) {
if (wasm::SimdShuffle::TryMatch32x4Rotate(shuffle, shuffle32x4,
is_swizzle)) {
uint8_t shuffle_mask = wasm::SimdShuffle::PackShuffle4(shuffle32x4);
opcode = kIA32S32x4Rotate;
imms[imm_count++] = shuffle_mask;
} else {
// Swap inputs from the normal order for (v)palignr.
SwapShuffleInputs(view);
is_swizzle = false; // It's simpler to just handle the general case.
no_same_as_first = use_avx; // SSE requires same-as-first.
opcode = kIA32S8x16Alignr;
// palignr takes a single imm8 offset.
imms[imm_count++] = offset;
}
} else if (TryMatchArchShuffle(shuffle, arch_shuffles,
arraysize(arch_shuffles), is_swizzle,
&arch_shuffle)) {
opcode = use_avx ? arch_shuffle->avx_opcode : arch_shuffle->opcode;
src0_needs_reg = !use_avx || arch_shuffle->src0_needs_reg;
// SSE can't take advantage of both operands in registers and needs
// same-as-first.
src1_needs_reg = use_avx && arch_shuffle->src1_needs_reg;
no_same_as_first = use_avx;
} else if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
uint8_t shuffle_mask = wasm::SimdShuffle::PackShuffle4(shuffle32x4);
if (is_swizzle) {
if (wasm::SimdShuffle::TryMatchIdentity(shuffle)) {
// Bypass normal shuffle code generation in this case.
node_t input = view.input(0);
// EmitIdentity
MarkAsUsed(input);
MarkAsDefined(node);
SetRename(node, input);
return;
} else {
// pshufd takes a single imm8 shuffle mask.
opcode = kIA32S32x4Swizzle;
no_same_as_first = true;
// TODO(v8:9198): This doesn't strictly require a register, forcing the
// swizzles to always use registers until generation of incorrect memory
// operands can be fixed.
src0_needs_reg = true;
imms[imm_count++] = shuffle_mask;
}
} else {
// 2 operand shuffle
// A blend is more efficient than a general 32x4 shuffle; try it first.
if (wasm::SimdShuffle::TryMatchBlend(shuffle)) {
opcode = kIA32S16x8Blend;
uint8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
imms[imm_count++] = blend_mask;
} else {
opcode = kIA32S32x4Shuffle;
no_same_as_first = true;
// TODO(v8:9198): src0 and src1 is used by pshufd in codegen, which
// requires memory to be 16-byte aligned, since we cannot guarantee that
// yet, force using a register here.
src0_needs_reg = true;
src1_needs_reg = true;
imms[imm_count++] = shuffle_mask;
int8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
imms[imm_count++] = blend_mask;
}
}
} else if (wasm::SimdShuffle::TryMatch16x8Shuffle(shuffle, shuffle16x8)) {
uint8_t blend_mask;
if (wasm::SimdShuffle::TryMatchBlend(shuffle)) {
opcode = kIA32S16x8Blend;
blend_mask = wasm::SimdShuffle::PackBlend8(shuffle16x8);
imms[imm_count++] = blend_mask;
} else if (wasm::SimdShuffle::TryMatchSplat<8>(shuffle, &index)) {
opcode = kIA32S16x8Dup;
src0_needs_reg = false;
imms[imm_count++] = index;
} else if (TryMatch16x8HalfShuffle(shuffle16x8, &blend_mask)) {
opcode = is_swizzle ? kIA32S16x8HalfShuffle1 : kIA32S16x8HalfShuffle2;
// Half-shuffles don't need DefineSameAsFirst or UseRegister(src0).
no_same_as_first = true;
src0_needs_reg = false;
uint8_t mask_lo = wasm::SimdShuffle::PackShuffle4(shuffle16x8);
uint8_t mask_hi = wasm::SimdShuffle::PackShuffle4(shuffle16x8 + 4);
imms[imm_count++] = mask_lo;
imms[imm_count++] = mask_hi;
if (!is_swizzle) imms[imm_count++] = blend_mask;
}
} else if (wasm::SimdShuffle::TryMatchSplat<16>(shuffle, &index)) {
opcode = kIA32S8x16Dup;
no_same_as_first = use_avx;
src0_needs_reg = true;
imms[imm_count++] = index;
}
if (opcode == kIA32I8x16Shuffle) {
// Use same-as-first for general swizzle, but not shuffle.
no_same_as_first = !is_swizzle;
src0_needs_reg = !no_same_as_first;
imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle);
imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 4);
imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 8);
imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 12);
temps[temp_count++] = g.TempRegister();
}
// Use DefineAsRegister(node) and Use(src0) if we can without forcing an extra
// move instruction in the CodeGenerator.
node_t input0 = view.input(0);
InstructionOperand dst =
no_same_as_first ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
// TODO(v8:9198): Use src0_needs_reg when we have memory alignment for SIMD.
InstructionOperand src0 = g.UseRegister(input0);
USE(src0_needs_reg);
int input_count = 0;
InstructionOperand inputs[2 + kMaxImms + kMaxTemps];
inputs[input_count++] = src0;
if (!is_swizzle) {
node_t input1 = view.input(1);
// TODO(v8:9198): Use src1_needs_reg when we have memory alignment for SIMD.
inputs[input_count++] = g.UseRegister(input1);
USE(src1_needs_reg);
}
for (int i = 0; i < imm_count; ++i) {
inputs[input_count++] = g.UseImmediate(imms[i]);
}
Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI8x16Swizzle(node_t node) {
InstructionCode op = kIA32I8x16Swizzle;
node_t left = this->input_at(node, 0);
node_t right = this->input_at(node, 1);
if constexpr (Adapter::IsTurboshaft) {
const turboshaft::Simd128BinopOp& binop =
this->Get(node).template Cast<turboshaft::Simd128BinopOp>();
DCHECK(binop.kind ==
turboshaft::any_of(
turboshaft::Simd128BinopOp::Kind::kI8x16Swizzle,
turboshaft::Simd128BinopOp::Kind::kI8x16RelaxedSwizzle));
bool relaxed =
binop.kind == turboshaft::Simd128BinopOp::Kind::kI8x16RelaxedSwizzle;
if (relaxed) {
op |= MiscField::encode(true);
} else {
// If the indices vector is a const, check if they are in range, or if the
// top bit is set, then we can avoid the paddusb in the codegen and simply
// emit a pshufb.
const turboshaft::Operation& right_op = this->Get(right);
if (auto c = right_op.TryCast<turboshaft::Simd128ConstantOp>()) {
std::array<uint8_t, kSimd128Size> imms;
std::memcpy(&imms, c->value, kSimd128Size);
op |= MiscField::encode(wasm::SimdSwizzle::AllInRangeOrTopBitSet(imms));
}
}
} else {
// Turbofan.
bool relaxed = OpParameter<bool>(node->op());
if (relaxed) {
op |= MiscField::encode(true);
} else {
auto m = V128ConstMatcher(node->InputAt(1));
if (m.HasResolvedValue()) {
// If the indices vector is a const, check if they are in range, or if
// the top bit is set, then we can avoid the paddusb in the codegen and
// simply emit a pshufb.
auto imms = m.ResolvedValue().immediate();
op |= MiscField::encode(wasm::SimdSwizzle::AllInRangeOrTopBitSet(imms));
}
}
}
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(op,
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node),
g.UseRegister(left), g.UseRegister(right), arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitSetStackPointer(node_t node) {
OperandGenerator g(this);
auto input = g.UseAny(this->input_at(node, 0));
Emit(kArchSetStackPointer, 0, nullptr, 1, &input);
}
namespace {
template <typename Adapter>
void VisitMinOrMax(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode,
bool flip_inputs) {
// Due to the way minps/minpd work, we want the dst to be same as the second
// input: b = pmin(a, b) directly maps to minps b a.
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand dst = selector->IsSupported(AVX)
? g.DefineAsRegister(node)
: g.DefineSameAsFirst(node);
if (flip_inputs) {
// Due to the way minps/minpd work, we want the dst to be same as the second
// input: b = pmin(a, b) directly maps to minps b a.
selector->Emit(opcode, dst, g.UseRegister(selector->input_at(node, 1)),
g.UseRegister(selector->input_at(node, 0)));
} else {
selector->Emit(opcode, dst, g.UseRegister(selector->input_at(node, 0)),
g.UseRegister(selector->input_at(node, 1)));
}
}
} // namespace
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF32x4Pmin(node_t node) {
VisitMinOrMax(this, node, kIA32Minps, true);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF32x4Pmax(node_t node) {
VisitMinOrMax(this, node, kIA32Maxps, true);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2Pmin(node_t node) {
VisitMinOrMax(this, node, kIA32Minpd, true);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2Pmax(node_t node) {
VisitMinOrMax(this, node, kIA32Maxpd, true);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF32x4RelaxedMin(node_t node) {
VisitMinOrMax(this, node, kIA32Minps, false);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF32x4RelaxedMax(node_t node) {
VisitMinOrMax(this, node, kIA32Maxps, false);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2RelaxedMin(node_t node) {
VisitMinOrMax(this, node, kIA32Minpd, false);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2RelaxedMax(node_t node) {
VisitMinOrMax(this, node, kIA32Maxpd, false);
}
namespace {
template <typename Adapter>
void VisitExtAddPairwise(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node, ArchOpcode opcode,
bool need_temp) {
IA32OperandGeneratorT<Adapter> g(selector);
InstructionOperand operand0 = g.UseRegister(selector->input_at(node, 0));
InstructionOperand dst = (selector->IsSupported(AVX))
? g.DefineAsRegister(node)
: g.DefineSameAsFirst(node);
if (need_temp) {
InstructionOperand temps[] = {g.TempRegister()};
selector->Emit(opcode, dst, operand0, arraysize(temps), temps);
} else {
selector->Emit(opcode, dst, operand0);
}
}
} // namespace
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4ExtAddPairwiseI16x8S(
node_t node) {
VisitExtAddPairwise(this, node, kIA32I32x4ExtAddPairwiseI16x8S, true);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4ExtAddPairwiseI16x8U(
node_t node) {
VisitExtAddPairwise(this, node, kIA32I32x4ExtAddPairwiseI16x8U, false);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI16x8ExtAddPairwiseI8x16S(
node_t node) {
VisitExtAddPairwise(this, node, kIA32I16x8ExtAddPairwiseI8x16S, true);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI16x8ExtAddPairwiseI8x16U(
node_t node) {
VisitExtAddPairwise(this, node, kIA32I16x8ExtAddPairwiseI8x16U, true);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI8x16Popcnt(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand dst = CpuFeatures::IsSupported(AVX)
? g.DefineAsRegister(node)
: g.DefineAsRegister(node);
InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
Emit(kIA32I8x16Popcnt, dst, g.UseUniqueRegister(this->input_at(node, 0)),
arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2ConvertLowI32x4U(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister()};
InstructionOperand dst =
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
Emit(kIA32F64x2ConvertLowI32x4U, dst, g.UseRegister(this->input_at(node, 0)),
arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4TruncSatF64x2SZero(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister()};
if (IsSupported(AVX)) {
// Requires dst != src.
Emit(kIA32I32x4TruncSatF64x2SZero, g.DefineAsRegister(node),
g.UseUniqueRegister(this->input_at(node, 0)), arraysize(temps), temps);
} else {
Emit(kIA32I32x4TruncSatF64x2SZero, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)), arraysize(temps), temps);
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4TruncSatF64x2UZero(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempRegister()};
InstructionOperand dst =
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
Emit(kIA32I32x4TruncSatF64x2UZero, dst,
g.UseRegister(this->input_at(node, 0)), arraysize(temps), temps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4RelaxedTruncF64x2SZero(
node_t node) {
VisitRRSimd(this, node, kIA32Cvttpd2dq);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4RelaxedTruncF64x2UZero(
node_t node) {
VisitFloatUnop(this, node, this->input_at(node, 0),
kIA32I32x4TruncF64x2UZero);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4RelaxedTruncF32x4S(node_t node) {
VisitRRSimd(this, node, kIA32Cvttps2dq);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4RelaxedTruncF32x4U(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
node_t input = this->input_at(node, 0);
InstructionOperand temps[] = {g.TempSimd128Register()};
// No need for unique because inputs are float but temp is general.
if (IsSupported(AVX)) {
Emit(kIA32I32x4TruncF32x4U, g.DefineAsRegister(node), g.UseRegister(input),
arraysize(temps), temps);
} else {
Emit(kIA32I32x4TruncF32x4U, g.DefineSameAsFirst(node), g.UseRegister(input),
arraysize(temps), temps);
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI64x2GtS(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
if (CpuFeatures::IsSupported(AVX)) {
Emit(kIA32I64x2GtS, g.DefineAsRegister(node),
g.UseRegister(this->input_at(node, 0)),
g.UseRegister(this->input_at(node, 1)));
} else if (CpuFeatures::IsSupported(SSE4_2)) {
Emit(kIA32I64x2GtS, g.DefineSameAsFirst(node),
g.UseRegister(this->input_at(node, 0)),
g.UseRegister(this->input_at(node, 1)));
} else {
Emit(kIA32I64x2GtS, g.DefineAsRegister(node),
g.UseUniqueRegister(this->input_at(node, 0)),
g.UseUniqueRegister(this->input_at(node, 1)));
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI64x2GeS(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
if (CpuFeatures::IsSupported(AVX)) {
Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
g.UseRegister(this->input_at(node, 0)),
g.UseRegister(this->input_at(node, 1)));
} else if (CpuFeatures::IsSupported(SSE4_2)) {
Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
g.UseUniqueRegister(this->input_at(node, 0)),
g.UseRegister(this->input_at(node, 1)));
} else {
Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
g.UseUniqueRegister(this->input_at(node, 0)),
g.UseUniqueRegister(this->input_at(node, 1)));
}
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI64x2Abs(node_t node) {
VisitRRSimd(this, node, kIA32I64x2Abs, kIA32I64x2Abs);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2PromoteLowF32x4(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionCode code = kIA32F64x2PromoteLowF32x4;
node_t input = this->input_at(node, 0);
if constexpr (Adapter::IsTurboshaft) {
// TODO(nicohartmann@): Implement this special case for turboshaft. Note
// that this special case may require adaptions in instruction-selector.cc
// in `FinishEmittedInstructions`, similar to what exists for TurboFan.
} else {
LoadTransformMatcher m(input);
if (m.Is(LoadTransformation::kS128Load64Zero) && CanCover(node, input)) {
// Trap handler is not supported on IA32.
DCHECK_NE(m.ResolvedValue().kind,
MemoryAccessKind::kProtectedByTrapHandler);
// LoadTransforms cannot be eliminated, so they are visited even if
// unused. Mark it as defined so that we don't visit it.
MarkAsDefined(input);
VisitLoad(node, input, code);
return;
}
}
VisitRR(this, node, code);
}
namespace {
template <typename Adapter>
void VisitRelaxedLaneSelect(InstructionSelectorT<Adapter>* selector,
typename Adapter::node_t node,
InstructionCode code = kIA32Pblendvb) {
IA32OperandGeneratorT<Adapter> g(selector);
// pblendvb/blendvps/blendvpd copies src2 when mask is set, opposite from Wasm
// semantics. node's inputs are: mask, lhs, rhs (determined in
// wasm-compiler.cc).
if (selector->IsSupported(AVX)) {
selector->Emit(code, g.DefineAsRegister(node),
g.UseRegister(selector->input_at(node, 2)),
g.UseRegister(selector->input_at(node, 1)),
g.UseRegister(selector->input_at(node, 0)));
} else {
// SSE4.1 pblendvb/blendvps/blendvpd requires xmm0 to hold the mask as an
// implicit operand.
selector->Emit(code, g.DefineSameAsFirst(node),
g.UseRegister(selector->input_at(node, 2)),
g.UseRegister(selector->input_at(node, 1)),
g.UseFixed(selector->input_at(node, 0), xmm0));
}
}
} // namespace
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI8x16RelaxedLaneSelect(node_t node) {
VisitRelaxedLaneSelect(this, node);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI16x8RelaxedLaneSelect(node_t node) {
VisitRelaxedLaneSelect(this, node);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4RelaxedLaneSelect(node_t node) {
VisitRelaxedLaneSelect(this, node, kIA32Blendvps);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI64x2RelaxedLaneSelect(node_t node) {
VisitRelaxedLaneSelect(this, node, kIA32Blendvpd);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2Qfma(node_t node) {
VisitRRRR(this, node, kIA32F64x2Qfma);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF64x2Qfms(node_t node) {
VisitRRRR(this, node, kIA32F64x2Qfms);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF32x4Qfma(node_t node) {
VisitRRRR(this, node, kIA32F32x4Qfma);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF32x4Qfms(node_t node) {
VisitRRRR(this, node, kIA32F32x4Qfms);
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF16x8Qfma(node_t node) {
UNIMPLEMENTED();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitF16x8Qfms(node_t node) {
UNIMPLEMENTED();
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI16x8DotI8x16I7x16S(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
Emit(kIA32I16x8DotI8x16I7x16S, g.DefineAsRegister(node),
g.UseUniqueRegister(this->input_at(node, 0)),
g.UseRegister(this->input_at(node, 1)));
}
template <typename Adapter>
void InstructionSelectorT<Adapter>::VisitI32x4DotI8x16I7x16AddS(node_t node) {
IA32OperandGeneratorT<Adapter> g(this);
InstructionOperand temps[] = {g.TempSimd128Register()};
Emit(kIA32I32x4DotI8x16I7x16AddS, g.DefineSameAsInput(node, 2),
g.UseUniqueRegister(this->input_at(node, 0)),
g.UseUniqueRegister(this->input_at(node, 1)),
g.UseUniqueRegister(this->input_at(node, 2)), arraysize(temps), temps);
}
#endif // V8_ENABLE_WEBASSEMBLY
template <typename Adapter>
void InstructionSelectorT<Adapter>::AddOutputToSelectContinuation(
OperandGeneratorT<Adapter>* g, int first_input_index, node_t node) {
UNREACHABLE();
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
MachineOperatorBuilder::Flags flags =
MachineOperatorBuilder::kWord32ShiftIsSafe |
MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord32Rol;
if (CpuFeatures::IsSupported(POPCNT)) {
flags |= MachineOperatorBuilder::kWord32Popcnt;
}
if (CpuFeatures::IsSupported(SSE4_1)) {
flags |= MachineOperatorBuilder::kFloat32RoundDown |
MachineOperatorBuilder::kFloat64RoundDown |
MachineOperatorBuilder::kFloat32RoundUp |
MachineOperatorBuilder::kFloat64RoundUp |
MachineOperatorBuilder::kFloat32RoundTruncate |
MachineOperatorBuilder::kFloat64RoundTruncate |
MachineOperatorBuilder::kFloat32RoundTiesEven |
MachineOperatorBuilder::kFloat64RoundTiesEven;
}
return flags;
}
// static
MachineOperatorBuilder::AlignmentRequirements
InstructionSelector::AlignmentRequirements() {
return MachineOperatorBuilder::AlignmentRequirements::
FullUnalignedAccessSupport();
}
template class EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE)
InstructionSelectorT<TurbofanAdapter>;
template class EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE)
InstructionSelectorT<TurboshaftAdapter>;
} // namespace compiler
} // namespace internal
} // namespace v8