blob: 0228e9cfdcf1115ff457a9405c473a5022f21658 [file]
/*
* Copyright (C) 2023-2026 Apple Inc. All rights reserved.
* Copyright (C) 2025-2026 the V8 project authors. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <JavaScriptCore/CPU.h>
#include <JavaScriptCore/SIMDInfo.h>
WTF_ALLOW_UNSAFE_BUFFER_USAGE_BEGIN
namespace JSC {
// Canonical shuffle patterns recognized for ARM64 specialized instruction selection.
enum class CanonicalShuffle : uint8_t {
Unknown,
Identity,
// 64-bit element patterns
S64x2UnzipEven, // UZP1.2D
S64x2UnzipOdd, // UZP2.2D
S64x2Reverse, // EXT #8 (unary): swap two 64-bit halves
// 32-bit element patterns
S32x4UnzipEven, // UZP1.4S
S32x4UnzipOdd, // UZP2.4S
S32x4ZipLower, // ZIP1.4S
S32x4ZipHigher, // ZIP2.4S
S32x4TransposeEven, // TRN1.4S
S32x4TransposeOdd, // TRN2.4S
S32x2Reverse, // REV64.4S
// 16-bit element patterns
S16x8UnzipEven, // UZP1.8H
S16x8UnzipOdd, // UZP2.8H
S16x8ZipLower, // ZIP1.8H
S16x8ZipHigher, // ZIP2.8H
S16x8TransposeEven, // TRN1.8H
S16x8TransposeOdd, // TRN2.8H
S16x2Reverse, // REV32.8H
S16x4Reverse, // REV64.8H
// 8-bit element patterns
S8x16UnzipEven, // UZP1.16B
S8x16UnzipOdd, // UZP2.16B
S8x16ZipLower, // ZIP1.16B
S8x16ZipHigher, // ZIP2.16B
S8x16TransposeEven, // TRN1.16B
S8x16TransposeOdd, // TRN2.16B
S8x2Reverse, // REV16.16B
S8x4Reverse, // REV32.16B
S8x8Reverse, // REV64.16B
};
class SIMDShuffle {
public:
static std::optional<unsigned> isOnlyOneSideMask(v128_t pattern)
{
unsigned first = pattern.u8x16[0];
if (first < 16) {
for (unsigned i = 1; i < 16; ++i) {
if (pattern.u8x16[i] >= 16)
return std::nullopt;
}
return 0;
}
if (first >= 32)
return std::nullopt;
for (unsigned i = 1; i < 16; ++i) {
if (pattern.u8x16[i] < 16)
return std::nullopt;
if (pattern.u8x16[i] >= 32)
return std::nullopt;
}
return 1;
}
static std::optional<uint8_t> isI8x16SameElement(v128_t pattern)
{
constexpr unsigned numberOfElements = 16 / sizeof(uint8_t);
if (std::all_of(pattern.u8x16, pattern.u8x16 + numberOfElements, [&](auto value) { return value == pattern.u8x16[0]; }))
return pattern.u8x16[0];
return std::nullopt;
}
static std::optional<uint8_t> isI8x16DupElement(v128_t pattern)
{
constexpr unsigned numberOfElements = 16 / sizeof(uint8_t);
if (std::all_of(pattern.u8x16, pattern.u8x16 + numberOfElements, [&](auto value) { return value == pattern.u8x16[0]; })) {
uint8_t lane = pattern.u8x16[0] / sizeof(uint8_t);
if (lane < numberOfElements)
return lane;
}
return std::nullopt;
}
static std::optional<uint8_t> isI16x8DupElement(v128_t pattern)
{
if (!isI16x8Shuffle(pattern))
return std::nullopt;
constexpr unsigned numberOfElements = 16 / sizeof(uint16_t);
if (std::all_of(pattern.u16x8, pattern.u16x8 + numberOfElements, [&](auto value) { return value == pattern.u16x8[0]; })) {
uint8_t lane = pattern.u8x16[0] / sizeof(uint16_t);
if (lane < numberOfElements)
return lane;
}
return std::nullopt;
}
static std::optional<uint8_t> isI32x4DupElement(v128_t pattern)
{
if (!isI32x4Shuffle(pattern))
return std::nullopt;
constexpr unsigned numberOfElements = 16 / sizeof(uint32_t);
if (std::all_of(pattern.u32x4, pattern.u32x4 + numberOfElements, [&](auto value) { return value == pattern.u32x4[0]; })) {
uint8_t lane = pattern.u8x16[0] / sizeof(uint32_t);
if (lane < numberOfElements)
return lane;
}
return std::nullopt;
}
static std::optional<uint8_t> isI64x2DupElement(v128_t pattern)
{
if (!isI64x2Shuffle(pattern))
return std::nullopt;
constexpr unsigned numberOfElements = 16 / sizeof(uint64_t);
if (std::all_of(pattern.u64x2, pattern.u64x2 + numberOfElements, [&](auto value) { return value == pattern.u64x2[0]; })) {
uint8_t lane = pattern.u8x16[0] / sizeof(uint64_t);
if (lane < numberOfElements)
return lane;
}
return std::nullopt;
}
static bool isI16x8Shuffle(v128_t pattern)
{
return isLargerElementShuffle(pattern, 2);
}
static bool isI32x4Shuffle(v128_t pattern)
{
return isLargerElementShuffle(pattern, 4);
}
static bool isI64x2Shuffle(v128_t pattern)
{
return isLargerElementShuffle(pattern, 8);
}
static bool isIdentity(v128_t pattern)
{
return isLargerElementShuffle(pattern, 16);
}
static bool isAllOutOfBoundsForUnaryShuffle(v128_t pattern)
{
for (unsigned i = 0; i < 16; ++i) {
if constexpr (isX86()) {
// https://www.felixcloutier.com/x86/pshufb
// On x64, OOB index means that highest bit is set.
// The acutal index is extracted by masking with 0b1111.
// So, for example, 0x11 index (17) will be converted to 0x1 access (not OOB).
if (!(pattern.u8x16[i] & 0x80))
return false;
} else if constexpr (isARM64()) {
// https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/TBL--vector-
// On ARM64, OOB index means out of 0..15 range for unary TBL.
if (pattern.u8x16[i] < 16)
return false;
} else
return false;
}
return true;
}
static bool isAllOutOfBoundsForBinaryShuffle(v128_t pattern)
{
ASSERT(isARM64()); // Binary Shuffle is only supported by ARM64.
for (unsigned i = 0; i < 16; ++i) {
if (pattern.u8x16[i] < 32)
return false;
}
return true;
}
// Detect unary EXT (byte rotation) pattern.
// Returns offset if pattern[i] == (offset + i) % 16 for all i,
// with offset in [1, 15]. Offset 0 is identity (handled separately).
static std::optional<uint8_t> isUnaryEXT(v128_t pattern)
{
uint8_t first = pattern.u8x16[0];
if (first == 0 || first >= 16)
return std::nullopt;
for (unsigned i = 1; i < 16; ++i) {
if (pattern.u8x16[i] != ((first + i) % 16))
return std::nullopt;
}
return first;
}
// Detect EXT (byte extraction / concatenation) pattern for binary shuffle.
// Returns the byte offset if the pattern is {offset, offset+1, ..., 31, 0, 1, ...}
// i.e., pattern[i] == (offset + i) % 32 for some offset in [0, 15].
// ARM64 EXT instruction: EXT Vd.16B, Vn.16B, Vm.16B, #imm
// extracts bytes from the concatenation of Vn:Vm.
// Returns {offset, needsSwap}.
struct EXTInfo {
uint8_t offset;
bool needsSwap;
};
static std::optional<EXTInfo> isEXTWithSwap(v128_t pattern)
{
uint8_t first = pattern.u8x16[0];
if (first >= 32)
return std::nullopt;
for (unsigned i = 1; i < 16; ++i) {
if (pattern.u8x16[i] != ((first + i) % 32))
return std::nullopt;
}
if (first < 16)
return EXTInfo { first, false };
return EXTInfo { static_cast<uint8_t>(first - 16), true };
}
// Try to match a canonical binary shuffle pattern for ARM64 specialized instructions.
static CanonicalShuffle tryMatchCanonicalBinary(v128_t pattern)
{
return tryMatchCanonicalBinaryImpl(pattern, [](v128_t v) constexpr { return v; });
}
// Try to match a unary shuffle pattern as a binary canonical shuffle.
// Binary canonical pattern gets converted to unary canonical pattern with the invariant that both inputs are the same.
// TRN2.16B {1,17, 3,19, 5,21, 7,23, 9,25, 11,27, 13,29, 15,31} gets converted to
// {1,1, 3,3, 5,5, 7,7, 9,9, 11,11, 13,13, 15,15}.
static CanonicalShuffle tryMatchUnaryAsBinaryCanonical(v128_t pattern)
{
return tryMatchCanonicalBinaryImpl(pattern,
[](v128_t v) constexpr {
for (unsigned i = 0; i < 16; ++i) {
if (v.u8x16[i] >= 16)
v.u8x16[i] = v.u8x16[i] - 16;
}
return v;
});
}
// Try to match a canonical unary shuffle pattern.
static CanonicalShuffle tryMatchCanonicalUnary(v128_t pattern)
{
// REV64.4S: reverse 32-bit pairs within 64-bit lanes
if (bitEquals(pattern, v128_t::fromU8x16(4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11)))
return CanonicalShuffle::S32x2Reverse;
// REV64.8H: reverse 16-bit elements within 64-bit lanes
if (bitEquals(pattern, v128_t::fromU8x16(6,7, 4,5, 2,3, 0,1, 14,15, 12,13, 10,11, 8,9)))
return CanonicalShuffle::S16x4Reverse;
// REV32.8H: reverse 16-bit pairs within 32-bit groups
if (bitEquals(pattern, v128_t::fromU8x16(2,3, 0,1, 6,7, 4,5, 10,11, 8,9, 14,15, 12,13)))
return CanonicalShuffle::S16x2Reverse;
// REV64.16B: reverse bytes within 64-bit lanes
if (bitEquals(pattern, v128_t::fromU8x16(7,6,5,4,3,2,1,0, 15,14,13,12,11,10,9,8)))
return CanonicalShuffle::S8x8Reverse;
// REV32.16B: reverse bytes within 32-bit groups
if (bitEquals(pattern, v128_t::fromU8x16(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12)))
return CanonicalShuffle::S8x4Reverse;
// REV16.16B: reverse bytes within 16-bit groups
if (bitEquals(pattern, v128_t::fromU8x16(1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14)))
return CanonicalShuffle::S8x2Reverse;
// S64x2 reverse: swap two 64-bit halves = {8..15, 0..7}
if (bitEquals(pattern, v128_t::fromU8x16(8,9,10,11,12,13,14,15, 0,1,2,3,4,5,6,7)))
return CanonicalShuffle::S64x2Reverse;
return CanonicalShuffle::Unknown;
}
// Compose two unary shuffles: outer(inner(x)) → combined(x).
// Both patterns have indices in 0..15 (or OOB >= 16).
static v128_t composeUnaryShuffle(v128_t outerPattern, v128_t innerPattern)
{
v128_t result;
for (unsigned i = 0; i < 16; ++i) {
uint8_t outerIdx = outerPattern.u8x16[i];
if (outerIdx >= 16) {
result.u8x16[i] = 0xFF; // OOB
continue;
}
result.u8x16[i] = innerPattern.u8x16[outerIdx];
}
return result;
}
// Compose an outer shuffle with an inner shuffle.
// Given outer = shuffle(inner_result, other, outerPattern) or
// outer = shuffle(other, inner_result, outerPattern)
// where inner_result = shuffle(innerSrc, innerPattern) [unary]
// Produces a combined pattern that reads from (innerSrc, other) or (other, innerSrc).
//
// innerIsChild0: whether the inner shuffle's result is child(0) of the outer.
// Returns nullopt if composition is not possible (e.g., indices go out of range).
static v128_t composeShuffle(v128_t outerPattern, v128_t innerPattern, bool innerIsChild0)
{
v128_t result;
for (unsigned i = 0; i < 16; ++i) {
uint8_t outerIdx = outerPattern.u8x16[i];
if (outerIdx >= 32) {
// Out of bounds in outer — result is 0 on ARM64 TBL.
result.u8x16[i] = 0xFF; // OOB
continue;
}
if (innerIsChild0) {
if (outerIdx < 16) {
// Reads from inner's output. Chase through inner.
uint8_t innerIdx = innerPattern.u8x16[outerIdx];
if (innerIdx >= 16) {
result.u8x16[i] = 0xFF; // OOB in inner → zero
continue;
}
// innerIdx is 0..15, referring to inner's input.
// In the composed shuffle, inner's input is child0, other is child1.
result.u8x16[i] = innerIdx;
} else {
// Reads from the other child (child1 of outer).
// In composed result, other is child1, offset 16..31.
result.u8x16[i] = outerIdx; // stays as 16..31
}
} else {
// inner is child1 of outer
if (outerIdx >= 16) {
// Reads from inner's output (which is child1 of outer).
uint8_t innerIdx = innerPattern.u8x16[outerIdx - 16];
if (innerIdx >= 16) {
result.u8x16[i] = 0xFF;
continue;
}
// inner's input becomes child1 in composed result.
result.u8x16[i] = innerIdx + 16;
} else {
// Reads from the other child (child0 of outer).
result.u8x16[i] = outerIdx; // stays as 0..15
}
}
}
return result;
}
private:
static CanonicalShuffle tryMatchCanonicalBinaryImpl(v128_t pattern, const Invocable<v128_t(v128_t)> auto& canonicalize)
{
// 64-bit element patterns
// UZP1.2D: {a[lo64], b[lo64]}
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23))))
return CanonicalShuffle::S64x2UnzipEven;
// UZP2.2D: {a[hi64], b[hi64]}
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(8,9,10,11,12,13,14,15, 24,25,26,27,28,29,30,31))))
return CanonicalShuffle::S64x2UnzipOdd;
// 32-bit element patterns
// UZP1.4S: {a[0], a[2], b[0], b[2]}
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(0,1,2,3, 8,9,10,11, 16,17,18,19, 24,25,26,27))))
return CanonicalShuffle::S32x4UnzipEven;
// UZP2.4S: {a[1], a[3], b[1], b[3]}
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(4,5,6,7, 12,13,14,15, 20,21,22,23, 28,29,30,31))))
return CanonicalShuffle::S32x4UnzipOdd;
// ZIP1.4S: {a[0], b[0], a[1], b[1]}
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(0,1,2,3, 16,17,18,19, 4,5,6,7, 20,21,22,23))))
return CanonicalShuffle::S32x4ZipLower;
// ZIP2.4S: {a[2], b[2], a[3], b[3]}
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(8,9,10,11, 24,25,26,27, 12,13,14,15, 28,29,30,31))))
return CanonicalShuffle::S32x4ZipHigher;
// TRN1.4S: {a[0], b[0], a[2], b[2]}
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(0,1,2,3, 16,17,18,19, 8,9,10,11, 24,25,26,27))))
return CanonicalShuffle::S32x4TransposeEven;
// TRN2.4S: {a[1], b[1], a[3], b[3]}
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(4,5,6,7, 20,21,22,23, 12,13,14,15, 28,29,30,31))))
return CanonicalShuffle::S32x4TransposeOdd;
// 16-bit element patterns
// UZP1.8H: {a[0],a[2],a[4],a[6], b[0],b[2],b[4],b[6]}
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(0,1, 4,5, 8,9, 12,13, 16,17, 20,21, 24,25, 28,29))))
return CanonicalShuffle::S16x8UnzipEven;
// UZP2.8H
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(2,3, 6,7, 10,11, 14,15, 18,19, 22,23, 26,27, 30,31))))
return CanonicalShuffle::S16x8UnzipOdd;
// ZIP1.8H
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(0,1, 16,17, 2,3, 18,19, 4,5, 20,21, 6,7, 22,23))))
return CanonicalShuffle::S16x8ZipLower;
// ZIP2.8H
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(8,9, 24,25, 10,11, 26,27, 12,13, 28,29, 14,15, 30,31))))
return CanonicalShuffle::S16x8ZipHigher;
// TRN1.8H
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(0,1, 16,17, 4,5, 20,21, 8,9, 24,25, 12,13, 28,29))))
return CanonicalShuffle::S16x8TransposeEven;
// TRN2.8H
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(2,3, 18,19, 6,7, 22,23, 10,11, 26,27, 14,15, 30,31))))
return CanonicalShuffle::S16x8TransposeOdd;
// 8-bit element patterns
// UZP1.16B
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(0,2,4,6,8,10,12,14, 16,18,20,22,24,26,28,30))))
return CanonicalShuffle::S8x16UnzipEven;
// UZP2.16B
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(1,3,5,7,9,11,13,15, 17,19,21,23,25,27,29,31))))
return CanonicalShuffle::S8x16UnzipOdd;
// ZIP1.16B
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(0,16, 1,17, 2,18, 3,19, 4,20, 5,21, 6,22, 7,23))))
return CanonicalShuffle::S8x16ZipLower;
// ZIP2.16B
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(8,24, 9,25, 10,26, 11,27, 12,28, 13,29, 14,30, 15,31))))
return CanonicalShuffle::S8x16ZipHigher;
// TRN1.16B
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(0,16, 2,18, 4,20, 6,22, 8,24, 10,26, 12,28, 14,30))))
return CanonicalShuffle::S8x16TransposeEven;
// TRN2.16B
if (bitEquals(pattern, canonicalize(v128_t::fromU8x16(1,17, 3,19, 5,21, 7,23, 9,25, 11,27, 13,29, 15,31))))
return CanonicalShuffle::S8x16TransposeOdd;
return CanonicalShuffle::Unknown;
}
static bool isLargerElementShuffle(v128_t pattern, unsigned size)
{
unsigned numberOfElements = 16 / size;
for (unsigned i = 0; i < numberOfElements; ++i) {
unsigned firstIndex = i * size;
unsigned first = pattern.u8x16[firstIndex];
if (first % size != 0)
return false;
for (unsigned j = 1; j < size; ++j) {
unsigned index = j + firstIndex;
if (pattern.u8x16[index] != (first + j))
return false;
}
}
return true;
}
};
} // namespace JSC
WTF_ALLOW_UNSAFE_BUFFER_USAGE_END