[turboshaft][wasm] Fix stale reference in array.fill and array.new
When calling to C++ for array.fill (if the length is more than 16
elements), Turboshaft passed the value in a 64 bit stack slot.
For reference values on 32 bits and on pointer-compressed builds, this
required converting a 32 bit tagged value into a 64 bit stack slot.
The emitted __ ChangeInt32ToInt64() makes the value untagged.
When performing GVN, these zero-extensions can be de-duplicated.
When spilled, it will be spilled in an untagged stack slot, e.g.
when calling into an allocation that can trigger a GC, making the
stack slot ignored by the GC and therefore stale afterwards.
This CL instead emits a stack slot that aligns with the input type
removing the need for any conversions (like zero-extensions) for all
types and furthermore also allowing us to mark the stack slot for the
C call as tagged. (These will not be GVN-ed and array.fill doesn't
allocate, so this isn't strictly required.)
Fixed: 381332096
Change-Id: If20a9a97ca8b75db4b7d533acb698efe859a94df
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6070000
Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
Commit-Queue: Matthias Liedtke <mliedtke@chromium.org>
Cr-Commit-Position: refs/heads/main@{#97552}
diff --git a/src/wasm/turboshaft-graph-interface.cc b/src/wasm/turboshaft-graph-interface.cc
index 08b2eb1..17141ca 100644
--- a/src/wasm/turboshaft-graph-interface.cc
+++ b/src/wasm/turboshaft-graph-interface.cc
@@ -8001,7 +8001,7 @@
constexpr uint32_t kArrayNewMinimumSizeForMemSet = 16;
IF_NOT (__ Uint32LessThan(
length, __ Word32Constant(kArrayNewMinimumSizeForMemSet))) {
- OpIndex stack_slot = StoreInInt64StackSlot(value, element_type);
+ OpIndex stack_slot = StoreInStackSlot(value, element_type);
MachineType arg_types[]{
MachineType::TaggedPointer(), MachineType::Uint32(),
MachineType::Uint32(), MachineType::Uint32(),
@@ -8027,31 +8027,25 @@
BIND(done);
}
- V<WordPtr> StoreInInt64StackSlot(OpIndex value, wasm::ValueType type) {
- OpIndex value_int64;
+ V<WordPtr> StoreInStackSlot(OpIndex value, wasm::ValueType type) {
+ // The input is unpacked.
+ ValueType input_type = type.Unpacked();
+
switch (type.kind()) {
case wasm::kI32:
case wasm::kI8:
case wasm::kI16:
- value_int64 = __ ChangeInt32ToInt64(value);
- break;
case wasm::kI64:
- value_int64 = value;
+ case wasm::kF32:
+ case wasm::kF64:
+ case wasm::kRefNull:
+ case wasm::kRef:
break;
case wasm::kS128:
// We can only get here if {value} is the constant 0.
DCHECK(__ output_graph().Get(value).Cast<Simd128ConstantOp>().IsZero());
- value_int64 = __ Word64Constant(uint64_t{0});
- break;
- case wasm::kF32:
- value_int64 = __ ChangeUint32ToUint64(__ BitcastFloat32ToWord32(value));
- break;
- case wasm::kF64:
- value_int64 = __ BitcastFloat64ToWord64(value);
- break;
- case wasm::kRefNull:
- case wasm::kRef:
- value_int64 = kTaggedSize == 4 ? __ ChangeInt32ToInt64(value) : value;
+ value = __ Word64Constant(uint64_t{0});
+ input_type = kWasmI64;
break;
case wasm::kF16:
UNIMPLEMENTED();
@@ -8062,10 +8056,15 @@
UNREACHABLE();
}
- MemoryRepresentation int64_rep = MemoryRepresentation::Int64();
+ // Differently to values on the heap, stack slots are always uncompressed.
+ MemoryRepresentation memory_rep =
+ type.is_reference() ? MemoryRepresentation::UncompressedTaggedPointer()
+ : MemoryRepresentation::FromMachineRepresentation(
+ input_type.machine_representation());
V<WordPtr> stack_slot =
- __ StackSlot(int64_rep.SizeInBytes(), int64_rep.SizeInBytes());
- __ Store(stack_slot, value_int64, StoreOp::Kind::RawAligned(), int64_rep,
+ __ StackSlot(memory_rep.SizeInBytes(), memory_rep.SizeInBytes(),
+ type.is_reference());
+ __ Store(stack_slot, value, StoreOp::Kind::RawAligned(), memory_rep,
compiler::WriteBarrierKind::kNoWriteBarrier);
return stack_slot;
}
diff --git a/src/wasm/wasm-external-refs.cc b/src/wasm/wasm-external-refs.cc
index 86b9ef7..d497318 100644
--- a/src/wasm/wasm-external-refs.cc
+++ b/src/wasm/wasm-external-refs.cc
@@ -868,24 +868,23 @@
ValueType type = ValueType::FromRawBitField(raw_type);
int8_t* initial_element_address = reinterpret_cast<int8_t*>(
ArrayElementAddress(raw_array, index, type.value_kind_size()));
- // Stack pointers are only aligned to 4 bytes.
- int64_t initial_value = base::ReadUnalignedValue<int64_t>(initial_value_addr);
const int bytes_to_set = length * type.value_kind_size();
- // If the initial value is zero, we memset the array.
- if (type.is_numeric() && initial_value == 0) {
- std::memset(initial_element_address, 0, bytes_to_set);
- return;
- }
-
// We implement the general case by setting the first 8 bytes manually, then
// filling the rest by exponentially growing {memcpy}s.
- DCHECK_GE(static_cast<size_t>(bytes_to_set), sizeof(int64_t));
+ CHECK_GE(static_cast<size_t>(bytes_to_set), sizeof(int64_t));
switch (type.kind()) {
case kI64:
case kF64: {
+ // Stack pointers are only aligned to 4 bytes.
+ int64_t initial_value =
+ base::ReadUnalignedValue<int64_t>(initial_value_addr);
+ if (initial_value == 0) {
+ std::memset(initial_element_address, 0, bytes_to_set);
+ return;
+ }
// Array elements are only aligned to 4 bytes, therefore
// `initial_element_address` may be misaligned as a 64-bit pointer.
base::WriteUnalignedValue<int64_t>(
@@ -894,36 +893,59 @@
}
case kI32:
case kF32: {
+ int32_t initial_value = *reinterpret_cast<int32_t*>(initial_value_addr);
+ if (initial_value == 0) {
+ std::memset(initial_element_address, 0, bytes_to_set);
+ return;
+ }
int32_t* base = reinterpret_cast<int32_t*>(initial_element_address);
- base[0] = base[1] = static_cast<int32_t>(initial_value);
+ base[0] = base[1] = initial_value;
break;
}
case kF16:
case kI16: {
+ // The array.fill input is an i32!
+ int16_t initial_value = *reinterpret_cast<int32_t*>(initial_value_addr);
+ if (initial_value == 0) {
+ std::memset(initial_element_address, 0, bytes_to_set);
+ return;
+ }
int16_t* base = reinterpret_cast<int16_t*>(initial_element_address);
- base[0] = base[1] = base[2] = base[3] =
- static_cast<int16_t>(initial_value);
+ base[0] = base[1] = base[2] = base[3] = initial_value;
break;
}
case kI8: {
+ // The array.fill input is an i32!
+ int8_t initial_value = *reinterpret_cast<int32_t*>(initial_value_addr);
+ if (initial_value == 0) {
+ std::memset(initial_element_address, 0, bytes_to_set);
+ return;
+ }
int8_t* base = reinterpret_cast<int8_t*>(initial_element_address);
for (size_t i = 0; i < sizeof(int64_t); i++) {
- base[i] = static_cast<int8_t>(initial_value);
+ base[i] = initial_value;
}
break;
}
case kRefNull:
- case kRef:
+ case kRef: {
+ intptr_t uncompressed_pointer =
+ base::ReadUnalignedValue<intptr_t>(initial_value_addr);
if constexpr (kTaggedSize == 4) {
int32_t* base = reinterpret_cast<int32_t*>(initial_element_address);
- base[0] = base[1] = static_cast<int32_t>(initial_value);
+ base[0] = base[1] = static_cast<int32_t>(uncompressed_pointer);
} else {
- // We use WriteUnalignedValue; see above.
base::WriteUnalignedValue(
- reinterpret_cast<Address>(initial_element_address), initial_value);
+ reinterpret_cast<Address>(initial_element_address),
+ uncompressed_pointer);
}
break;
+ }
case kS128:
+ // S128 can only be filled with zeros.
+ DCHECK_EQ(base::ReadUnalignedValue<int64_t>(initial_value_addr), 0);
+ std::memset(initial_element_address, 0, bytes_to_set);
+ return;
case kRtt:
case kVoid:
case kTop:
diff --git a/test/mjsunit/wasm/array-fill-gc.js b/test/mjsunit/wasm/array-fill-gc.js
new file mode 100644
index 0000000..65f4761
--- /dev/null
+++ b/test/mjsunit/wasm/array-fill-gc.js
@@ -0,0 +1,74 @@
+// Copyright 2024 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Flags: --expose-gc
+
+d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
+
+(function TestRepeatedArrayFillWithGC() {
+ print(arguments.callee.name);
+ let builder = new WasmModuleBuilder();
+
+ let struct = builder.addStruct([makeField(kWasmI64, true)]);
+ let array = builder.addArray(wasmRefNullType(struct), true);
+
+ let gcFunc = builder.addImport("m", "gc", kSig_v_v);
+
+ // This needs to be large enough, so that we call into C++ instead of running
+ // a loop in the generated code. (see kArrayNewMinimumSizeForMemSet)
+ let arrayLength = 50;
+
+ builder.addFunction("create", makeSig([kWasmI64], [wasmRefNullType(array)]))
+ .addLocals(wasmRefNullType(array), 1)
+ .addLocals(wasmRefNullType(struct), 1)
+ .addBody([
+ // Create the fill value (young generation).
+ kExprLocalGet, 0,
+ kGCPrefix, kExprStructNew, struct,
+ kExprLocalTee, 2,
+ // Create an array.
+ // As the size is larger than kArrayNewMinimumSizeForMemSet (16), this will
+ // call to C++ for initializing the elements. The fill value is passed as a
+ // pointer. There were two issues with that:
+ // 1) For a reference value on pointer-compressed builds, this emitted a
+ // __ ChangeInt32ToInt64() operation causing the value to become
+ // untagged.
+ // 2) This untagged value was then stored in an untagged stack slot. So any
+ // GC would not visit this stack slot. Note that this bug doesn't seem
+ // possible to be triggered as the stack slot is not GVN'ed (so the
+ // second fill initializes a separate stack slot).
+ kExprI32Const, arrayLength,
+ kGCPrefix, kExprArrayNew, array,
+ kExprDrop,
+ // Trigger a gc.
+ kExprCallFunction, gcFunc,
+ // Create another array.
+ // The initialization emitted the same __ ChangeInt32ToInt64() for the fill
+ // value (the struct.new above). Due to GVN the compiler decides to reuse
+ // the instruction from above. However, due to the GC in between this
+ // "int64" now contains an outdated pointer pointing into the previous
+ // half-space for the young generation. (Note that anyways we should pass
+ // the decompressed pointer, zeroing out the upper half doesn't fit to our
+ // calling convention, it just happens to not be an issue as we only use the
+ // value to store it inside the tagged (compressed) slots inside the array.)
+ kExprLocalGet, 2,
+ kExprI32Const, arrayLength,
+ kGCPrefix, kExprArrayNew, array,
+ ]).exportFunc();
+
+ builder.addFunction("get",
+ makeSig([wasmRefNullType(array), kWasmI32], [kWasmI64]))
+ .addBody([
+ kExprLocalGet, 0,
+ kExprLocalGet, 1,
+ kGCPrefix, kExprArrayGet, array,
+ kGCPrefix, kExprStructGet, struct, 0,
+ ]).exportFunc();
+
+ let wasm = builder.instantiate({m: {gc}}).exports;
+ let arr = wasm.create(43n);
+ for (let i = 0; i < arrayLength; ++i) {
+ assertEquals(43n, wasm.get(arr, i));
+ }
+})();