blob: f92bf4dfd0717fff1d4e79229caee9264af40e0d [file] [log] [blame]
/*
* Copyright (C) 2023 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "SIMDInfo.h"
namespace JSC {
class SIMDShuffle {
public:
static std::optional<unsigned> isOnlyOneSideMask(v128_t pattern)
{
unsigned first = pattern.u8x16[0];
if (first < 16) {
for (unsigned i = 1; i < 16; ++i) {
if (pattern.u8x16[i] >= 16)
return std::nullopt;
}
return 0;
}
if (first >= 32)
return std::nullopt;
for (unsigned i = 1; i < 16; ++i) {
if (pattern.u8x16[i] < 16)
return std::nullopt;
if (pattern.u8x16[i] >= 32)
return std::nullopt;
}
return 1;
}
static std::optional<uint8_t> isI8x16DupElement(v128_t pattern)
{
constexpr unsigned numberOfElements = 16 / sizeof(uint8_t);
if (std::all_of(pattern.u8x16, pattern.u8x16 + numberOfElements, [&](auto value) { return value == pattern.u8x16[0]; })) {
uint8_t lane = pattern.u8x16[0] / sizeof(uint8_t);
if (lane < numberOfElements)
return lane;
}
return std::nullopt;
}
static std::optional<uint8_t> isI16x8DupElement(v128_t pattern)
{
if (!isI16x8Shuffle(pattern))
return std::nullopt;
constexpr unsigned numberOfElements = 16 / sizeof(uint16_t);
if (std::all_of(pattern.u16x8, pattern.u16x8 + numberOfElements, [&](auto value) { return value == pattern.u16x8[0]; })) {
uint8_t lane = pattern.u8x16[0] / sizeof(uint16_t);
if (lane < numberOfElements)
return lane;
}
return std::nullopt;
}
static std::optional<uint8_t> isI32x4DupElement(v128_t pattern)
{
if (!isI32x4Shuffle(pattern))
return std::nullopt;
constexpr unsigned numberOfElements = 16 / sizeof(uint32_t);
if (std::all_of(pattern.u32x4, pattern.u32x4 + numberOfElements, [&](auto value) { return value == pattern.u32x4[0]; })) {
uint8_t lane = pattern.u8x16[0] / sizeof(uint32_t);
if (lane < numberOfElements)
return lane;
}
return std::nullopt;
}
static std::optional<uint8_t> isI64x2DupElement(v128_t pattern)
{
if (!isI64x2Shuffle(pattern))
return std::nullopt;
constexpr unsigned numberOfElements = 16 / sizeof(uint64_t);
if (std::all_of(pattern.u64x2, pattern.u64x2 + numberOfElements, [&](auto value) { return value == pattern.u64x2[0]; })) {
uint8_t lane = pattern.u8x16[0] / sizeof(uint64_t);
if (lane < numberOfElements)
return lane;
}
return std::nullopt;
}
static bool isI16x8Shuffle(v128_t pattern)
{
return isLargerElementShuffle(pattern, 2);
}
static bool isI32x4Shuffle(v128_t pattern)
{
return isLargerElementShuffle(pattern, 4);
}
static bool isI64x2Shuffle(v128_t pattern)
{
return isLargerElementShuffle(pattern, 8);
}
static bool isIdentity(v128_t pattern)
{
return isLargerElementShuffle(pattern, 16);
}
static bool isAllOutOfBoundsForUnaryShuffle(v128_t pattern)
{
for (unsigned i = 0; i < 16; ++i) {
if constexpr (isX86()) {
// https://www.felixcloutier.com/x86/pshufb
// On x64, OOB index means that highest bit is set.
// The acutal index is extracted by masking with 0b1111.
// So, for example, 0x11 index (17) will be converted to 0x1 access (not OOB).
if (!(pattern.u8x16[i] & 0x80))
return false;
} else if constexpr (isARM64()) {
// https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/TBL--vector-
// On ARM64, OOB index means out of 0..15 range for unary TBL.
if (pattern.u8x16[i] < 16)
return false;
} else
return false;
}
return true;
}
static bool isAllOutOfBoundsForBinaryShuffle(v128_t pattern)
{
ASSERT(isARM64()); // Binary Shuffle is only supported by ARM64.
for (unsigned i = 0; i < 16; ++i) {
if (pattern.u8x16[i] < 32)
return false;
}
return true;
}
private:
static bool isLargerElementShuffle(v128_t pattern, unsigned size)
{
unsigned numberOfElements = 16 / size;
for (unsigned i = 0; i < numberOfElements; ++i) {
unsigned firstIndex = i * size;
unsigned first = pattern.u8x16[firstIndex];
if (first % size != 0)
return false;
for (unsigned j = 1; j < size; ++j) {
unsigned index = j + firstIndex;
if (pattern.u8x16[index] != (first + j))
return false;
}
}
return true;
}
};
} // namespace JSC