blob: 73f80ca6170d4da0eb665bdba0697509ddaedd81 [file] [log] [blame] [edit]
// Copyright 2024 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --wasm-staging
// For {isOneByteString}:
// Flags: --expose-externalize-string
d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
let kRefExtern = wasmRefType(kWasmExternRef);
// We use "r" for nullable "externref", and "e" for non-nullable "ref extern".
let kSig_e_ii = makeSig([kWasmI32, kWasmI32], [kRefExtern]);
let kSig_e_v = makeSig([], [kRefExtern]);
let kSig_e_rii = makeSig([kWasmExternRef, kWasmI32, kWasmI32],
[kRefExtern]);
let kSig_e_r = makeSig([kWasmExternRef], [kRefExtern]);
let interestingStrings = [
'',
'ascii',
'latin\xa91', // Latin-1.
'2 \ucccc b', // Two-byte.
'a \ud800\udc00 b', // Proper surrogate pair.
'a \ud800 b', // Lone lead surrogate.
'a \udc00 b', // Lone trail surrogate.
'\ud800 bc', // Lone lead surrogate at the start.
'\udc00 bc', // Lone trail surrogate at the start.
'ab \ud800', // Lone lead surrogate at the end.
'ab \udc00', // Lone trail surrogate at the end.
'a \udc00\ud800 b', // Swapped surrogate pair.
];
function IsSurrogate(codepoint) {
return 0xD800 <= codepoint && codepoint <= 0xDFFF
}
function HasIsolatedSurrogate(str) {
for (let codepoint of str) {
let value = codepoint.codePointAt(0);
if (IsSurrogate(value)) return true;
}
return false;
}
function ReplaceIsolatedSurrogates(str, replacement='\ufffd') {
let replaced = '';
for (let codepoint of str) {
replaced +=
IsSurrogate(codepoint.codePointAt(0)) ? replacement : codepoint;
}
return replaced;
}
let kArrayI8;
let kStringFromUtf8Array;
let kStringIntoUtf8Array;
let kStringToUtf8Array;
let kStringMeasureUtf8;
function MakeBuilder() {
let builder = new WasmModuleBuilder();
builder.startRecGroup();
kArrayI8 = builder.addArray(kWasmI8, true, kNoSuperType, true);
builder.endRecGroup();
let array8ref = wasmRefNullType(kArrayI8);
kStringFromUtf8Array = builder.addImport(
'wasm:text-decoder', 'decodeStringFromUTF8Array',
makeSig([array8ref, kWasmI32, kWasmI32], [kRefExtern]));
kStringMeasureUtf8 =
builder.addImport('wasm:text-encoder', 'measureStringAsUTF8', kSig_i_r);
kStringIntoUtf8Array = builder.addImport(
'wasm:text-encoder', 'encodeStringIntoUTF8Array',
makeSig([kWasmExternRef, array8ref, kWasmI32], [kWasmI32]));
kStringToUtf8Array = builder.addImport(
'wasm:text-encoder', 'encodeStringToUTF8Array',
makeSig([kWasmExternRef], [wasmRefType(kArrayI8)]));
return builder;
}
let kImports = {};
let kBuiltins = { builtins: ["text-decoder", "text-encoder"] };
function encodeWtf8(str) {
// String iterator coalesces surrogate pairs.
let out = [];
for (let codepoint of str) {
codepoint = codepoint.codePointAt(0);
if (codepoint <= 0x7f) {
out.push(codepoint);
} else if (codepoint <= 0x7ff) {
out.push(0xc0 | (codepoint >> 6));
out.push(0x80 | (codepoint & 0x3f));
} else if (codepoint <= 0xffff) {
out.push(0xe0 | (codepoint >> 12));
out.push(0x80 | ((codepoint >> 6) & 0x3f));
out.push(0x80 | (codepoint & 0x3f));
} else if (codepoint <= 0x10ffff) {
out.push(0xf0 | (codepoint >> 18));
out.push(0x80 | ((codepoint >> 12) & 0x3f));
out.push(0x80 | ((codepoint >> 6) & 0x3f));
out.push(0x80 | (codepoint & 0x3f));
} else {
throw new Error("bad codepoint " + codepoint);
}
}
return out;
}
function makeWtf8TestDataSegment() {
let data = []
let valid = {};
let invalid = {};
for (let str of interestingStrings) {
let bytes = encodeWtf8(str);
valid[str] = { offset: data.length, length: bytes.length };
for (let byte of bytes) {
data.push(byte);
}
}
let invalid_inputs = [
'trailing high byte \xa9',
'interstitial high \xa9 byte',
'invalid \xc0 byte',
'invalid three-byte \xed\xd0\x80',
'surrogate \xed\xa0\x80\xed\xb0\x80 pair'
];
let invalid_replaced = [
'trailing high byte \uFFFD',
'interstitial high \uFFFD byte',
'invalid \uFFFD byte',
'invalid three-byte \uFFFD\u0400',
'surrogate \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD pair'
];
for (let i = 0; i < invalid_inputs.length; i++) {
let bytes = invalid_inputs[i];
invalid[bytes] = {
offset: data.length,
length: bytes.length,
replaced: invalid_replaced[i]
};
for (let i = 0; i < bytes.length; i++) {
data.push(bytes.charCodeAt(i));
}
}
return { valid, invalid, data: Uint8Array.from(data) };
};
(function TestStringNewUtf8Array() {
print(arguments.callee.name);
let builder = MakeBuilder();
let data = makeWtf8TestDataSegment();
let data_index = builder.addPassiveDataSegment(data.data);
let ascii_data_index =
builder.addPassiveDataSegment(Uint8Array.from(encodeWtf8("ascii")));
let make_i8_array = builder.addFunction(
"make_i8_array", makeSig([], [wasmRefType(kArrayI8)]))
.addBody([
...wasmI32Const(0),
...wasmI32Const(data.data.length),
kGCPrefix, kExprArrayNewData, kArrayI8, data_index
]).index;
builder.addFunction("new_utf8", kSig_e_ii)
.exportFunc()
.addBody([
kExprCallFunction, make_i8_array,
kExprLocalGet, 0, kExprLocalGet, 1,
kExprCallFunction, kStringFromUtf8Array,
]);
builder.addFunction("bounds_check", kSig_e_ii)
.exportFunc()
.addBody([
...wasmI32Const(0),
...wasmI32Const("ascii".length),
kGCPrefix, kExprArrayNewData, kArrayI8, ascii_data_index,
kExprLocalGet, 0, kExprLocalGet, 1,
kExprCallFunction, kStringFromUtf8Array,
]);
builder.addFunction("null_array", kSig_e_v).exportFunc()
.addBody([
kExprRefNull, kArrayI8,
kExprI32Const, 0, kExprI32Const, 0,
kExprCallFunction, kStringFromUtf8Array,
]);
let instance = builder.instantiate(kImports, kBuiltins);
for (let [str, {offset, length}] of Object.entries(data.valid)) {
let start = offset;
let end = offset + length;
if (HasIsolatedSurrogate(str)) {
// Isolated surrogates have the three-byte pattern ED [A0,BF] [80,BF].
// When the sloppy decoder gets to the second byte, it will reject
// the sequence, and then retry parsing at the second byte.
// Seeing the second byte can't start a sequence, it replaces the
// second byte and continues with the next, which also can't start
// a sequence. The result is that one isolated surrogate is replaced
// by three U+FFFD codepoints.
assertEquals(ReplaceIsolatedSurrogates(str, '\ufffd\ufffd\ufffd'),
instance.exports.new_utf8(start, end));
} else {
assertEquals(str, instance.exports.new_utf8(start, end));
}
}
for (let [str, {offset, length, replaced}] of Object.entries(data.invalid)) {
let start = offset;
let end = offset + length;
assertEquals(replaced, instance.exports.new_utf8(start, end));
}
assertEquals("ascii", instance.exports.bounds_check(0, "ascii".length));
assertEquals("", instance.exports.bounds_check("ascii".length,
"ascii".length));
assertEquals("i", instance.exports.bounds_check("ascii".length - 1,
"ascii".length));
assertThrows(() => instance.exports.bounds_check(0, 100),
WebAssembly.RuntimeError, "array element access out of bounds");
assertThrows(() => instance.exports.bounds_check(0, -1),
WebAssembly.RuntimeError, "array element access out of bounds");
assertThrows(() => instance.exports.bounds_check(-1, 0),
WebAssembly.RuntimeError, "array element access out of bounds");
assertThrows(() => instance.exports.bounds_check("ascii".length,
"ascii".length + 1),
WebAssembly.RuntimeError, "array element access out of bounds");
assertThrows(() => instance.exports.null_array(),
WebAssembly.RuntimeError, "dereferencing a null pointer");
})();
(function TestStringMeasureUtf8() {
print(arguments.callee.name);
let builder = MakeBuilder();
builder.addFunction("string_measure_utf8", kSig_i_r)
.exportFunc()
.addBody([
kExprLocalGet, 0,
kExprCallFunction, kStringMeasureUtf8,
]);
builder.addFunction("string_measure_utf8_null", kSig_i_v)
.exportFunc()
.addBody([
kExprRefNull, kExternRefCode,
kExprCallFunction, kStringMeasureUtf8,
]);
let instance = builder.instantiate(kImports, kBuiltins);
for (let str of interestingStrings) {
let wtf8 = encodeWtf8(str);
assertEquals(wtf8.length, instance.exports.string_measure_utf8(str));
}
assertThrows(() => instance.exports.string_measure_utf8_null(),
WebAssembly.RuntimeError, "illegal cast");
})();
(function TestStringEncodeUtf8Array() {
print(arguments.callee.name);
let builder = MakeBuilder();
// Allocate an array that's exactly the expected size, and encode
// into it. Then decode it.
// (str, length, offset=0) -> str
builder.addFunction("encode_utf8", kSig_e_rii)
.exportFunc()
.addLocals(wasmRefNullType(kArrayI8), 1)
.addLocals(kWasmI32, 1)
.addBody([
// Allocate buffer.
kExprLocalGet, 1,
kGCPrefix, kExprArrayNewDefault, kArrayI8,
kExprLocalSet, 3,
// Write buffer, store number of bytes written.
kExprLocalGet, 0,
kExprLocalGet, 3,
kExprLocalGet, 2,
kExprCallFunction, kStringIntoUtf8Array,
kExprLocalSet, 4,
// Read buffer.
kExprLocalGet, 3,
kExprLocalGet, 2,
kExprLocalGet, 2, kExprLocalGet, 4, kExprI32Add,
kExprCallFunction, kStringFromUtf8Array,
]);
builder.addFunction("encode_null_string", kSig_i_v)
.exportFunc()
.addBody([
kExprRefNull, kExternRefCode,
kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, kArrayI8,
kExprI32Const, 0,
kExprCallFunction, kStringIntoUtf8Array,
]);
builder.addFunction("encode_null_array", kSig_i_v)
.exportFunc()
.addBody([
kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, kArrayI8,
kExprI32Const, 0, kExprI32Const, 0,
kExprCallFunction, kStringFromUtf8Array,
kExprRefNull, kArrayI8,
kExprI32Const, 0,
kExprCallFunction, kStringIntoUtf8Array,
]);
let instance = builder.instantiate(kImports, kBuiltins);
for (let str of interestingStrings) {
let replaced = ReplaceIsolatedSurrogates(str);
if (!HasIsolatedSurrogate(str)) assertEquals(str, replaced);
let wtf8 = encodeWtf8(replaced);
assertEquals(replaced,
instance.exports.encode_utf8(str, wtf8.length, 0));
assertEquals(replaced,
instance.exports.encode_utf8(str, wtf8.length + 20, 10));
}
assertThrows(() => instance.exports.encode_null_array(),
WebAssembly.RuntimeError, "dereferencing a null pointer");
assertThrows(() => instance.exports.encode_null_string(),
WebAssembly.RuntimeError, "illegal cast");
for (let str of interestingStrings) {
let wtf8 = encodeWtf8(str);
let message = "array element access out of bounds";
assertThrows(() => instance.exports.encode_utf8(str, wtf8.length, 1),
WebAssembly.RuntimeError, message);
}
})();
(function TestStringToUtf8Array() {
print(arguments.callee.name);
let builder = MakeBuilder();
// Convert the string to an array, then decode it back.
builder.addFunction("encode_utf8", kSig_e_r)
.exportFunc()
.addLocals(wasmRefNullType(kArrayI8), 1)
.addBody([
kExprLocalGet, 0,
kExprCallFunction, kStringToUtf8Array,
kExprLocalTee, 1,
kExprI32Const, 0, // start
kExprLocalGet, 1, kGCPrefix, kExprArrayLen, // end
kExprCallFunction, kStringFromUtf8Array,
]);
let sig_a8_v = makeSig([], [wasmRefType(kArrayI8)]);
builder.addFunction("encode_null_string", sig_a8_v)
.exportFunc()
.addBody([
kExprRefNull, kExternRefCode,
kExprCallFunction, kStringToUtf8Array,
]);
let instance = builder.instantiate(kImports, kBuiltins);
for (let str of interestingStrings) {
let replaced = ReplaceIsolatedSurrogates(str);
if (!HasIsolatedSurrogate(str)) assertEquals(str, replaced);
let wtf8 = encodeWtf8(replaced);
assertEquals(replaced,
instance.exports.encode_utf8(str, wtf8.length, 0));
assertEquals(replaced,
instance.exports.encode_utf8(str, wtf8.length + 20, 10));
}
assertThrows(() => instance.exports.encode_null_string(),
WebAssembly.RuntimeError, "illegal cast");
})();