blob: a41e978d1327aa2fe66e619aa351687426fb61a8 [file] [log] [blame]
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stddef.h>
#include <stdint.h>
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "tests/arithmetic_test.cc"
#include "hwy/foreach_target.h"
#include "hwy/highway.h"
#include "hwy/tests/test_util-inl.h"
HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {
struct TestPlusMinus {
template <class T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto v2 = Iota(d, T(2));
const auto v3 = Iota(d, T(3));
const auto v4 = Iota(d, T(4));
const size_t N = Lanes(d);
auto lanes = AllocateAligned<T>(N);
for (size_t i = 0; i < N; ++i) {
lanes[i] = static_cast<T>((2 + i) + (3 + i));
}
HWY_ASSERT_VEC_EQ(d, Load(d, lanes.get()), v2 + v3);
HWY_ASSERT_VEC_EQ(d, v3, (v2 + v3) - v2);
for (size_t i = 0; i < N; ++i) {
lanes[i] = static_cast<T>((2 + i) + (4 + i));
}
auto sum = v2;
sum += v4; // sum == 6,8..
HWY_ASSERT_VEC_EQ(d, Load(d, lanes.get()), sum);
sum -= v4;
HWY_ASSERT_VEC_EQ(d, v2, sum);
}
};
HWY_NOINLINE void TestAllPlusMinus() {
ForAllTypes(ForPartialVectors<TestPlusMinus>());
}
struct TestUnsignedSaturatingArithmetic {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto v0 = Zero(d);
const auto vi = Iota(d, 1);
const auto vm = Set(d, LimitsMax<T>());
HWY_ASSERT_VEC_EQ(d, v0 + v0, SaturatedAdd(v0, v0));
HWY_ASSERT_VEC_EQ(d, v0 + vi, SaturatedAdd(v0, vi));
HWY_ASSERT_VEC_EQ(d, v0 + vm, SaturatedAdd(v0, vm));
HWY_ASSERT_VEC_EQ(d, vm, SaturatedAdd(vi, vm));
HWY_ASSERT_VEC_EQ(d, vm, SaturatedAdd(vm, vm));
HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(v0, v0));
HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(v0, vi));
HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(vi, vi));
HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(vi, vm));
HWY_ASSERT_VEC_EQ(d, vm - vi, SaturatedSub(vm, vi));
}
};
struct TestSignedSaturatingArithmetic {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto v0 = Zero(d);
const auto vi = Iota(d, 1);
const auto vpm = Set(d, LimitsMax<T>());
const auto vn = Iota(d, -T(Lanes(d)));
const auto vnm = Set(d, LimitsMin<T>());
HWY_ASSERT_VEC_EQ(d, v0, SaturatedAdd(v0, v0));
HWY_ASSERT_VEC_EQ(d, vi, SaturatedAdd(v0, vi));
HWY_ASSERT_VEC_EQ(d, vpm, SaturatedAdd(v0, vpm));
HWY_ASSERT_VEC_EQ(d, vpm, SaturatedAdd(vi, vpm));
HWY_ASSERT_VEC_EQ(d, vpm, SaturatedAdd(vpm, vpm));
HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(v0, v0));
HWY_ASSERT_VEC_EQ(d, v0 - vi, SaturatedSub(v0, vi));
HWY_ASSERT_VEC_EQ(d, vn, SaturatedSub(vn, v0));
HWY_ASSERT_VEC_EQ(d, vnm, SaturatedSub(vnm, vi));
HWY_ASSERT_VEC_EQ(d, vnm, SaturatedSub(vnm, vpm));
}
};
HWY_NOINLINE void TestAllSaturatingArithmetic() {
const ForPartialVectors<TestUnsignedSaturatingArithmetic> test_unsigned;
test_unsigned(uint8_t());
test_unsigned(uint16_t());
const ForPartialVectors<TestSignedSaturatingArithmetic> test_signed;
test_signed(int8_t());
test_signed(int16_t());
}
struct TestAverage {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
const auto v0 = Zero(d);
const auto v1 = Set(d, T(1));
const auto v2 = Set(d, T(2));
HWY_ASSERT_VEC_EQ(d, v0, AverageRound(v0, v0));
HWY_ASSERT_VEC_EQ(d, v1, AverageRound(v0, v1));
HWY_ASSERT_VEC_EQ(d, v1, AverageRound(v1, v1));
HWY_ASSERT_VEC_EQ(d, v2, AverageRound(v1, v2));
HWY_ASSERT_VEC_EQ(d, v2, AverageRound(v2, v2));
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
HWY_NOINLINE void TestAllAverage() {
const ForPartialVectors<TestAverage> test;
test(uint8_t());
test(uint16_t());
}
struct TestAbs {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto v0 = Zero(d);
const auto vp1 = Set(d, T(1));
const auto vn1 = Set(d, T(-1));
const auto vpm = Set(d, LimitsMax<T>());
const auto vnm = Set(d, LimitsMin<T>());
HWY_ASSERT_VEC_EQ(d, v0, Abs(v0));
HWY_ASSERT_VEC_EQ(d, vp1, Abs(vp1));
HWY_ASSERT_VEC_EQ(d, vp1, Abs(vn1));
HWY_ASSERT_VEC_EQ(d, vpm, Abs(vpm));
HWY_ASSERT_VEC_EQ(d, vnm, Abs(vnm));
}
};
struct TestFloatAbs {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto v0 = Zero(d);
const auto vp1 = Set(d, T(1));
const auto vn1 = Set(d, T(-1));
const auto vp2 = Set(d, T(0.01));
const auto vn2 = Set(d, T(-0.01));
HWY_ASSERT_VEC_EQ(d, v0, Abs(v0));
HWY_ASSERT_VEC_EQ(d, vp1, Abs(vp1));
HWY_ASSERT_VEC_EQ(d, vp1, Abs(vn1));
HWY_ASSERT_VEC_EQ(d, vp2, Abs(vp2));
HWY_ASSERT_VEC_EQ(d, vp2, Abs(vn2));
}
};
HWY_NOINLINE void TestAllAbs() {
const ForPartialVectors<TestAbs> test;
test(int8_t());
test(int16_t());
test(int32_t());
const ForPartialVectors<TestFloatAbs> test_float;
test_float(float());
#if HWY_CAP_FLOAT64
test_float(double());
#endif
}
struct TestUnsignedShifts {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
constexpr int kSign = (sizeof(T) * 8) - 1;
const auto v0 = Zero(d);
const auto vi = Iota(d, 0);
const size_t N = Lanes(d);
auto expected = AllocateAligned<T>(N);
// Shifting out of right side => zero
HWY_ASSERT_VEC_EQ(d, v0, ShiftRight<7>(vi));
#if HWY_VARIABLE_SHIFT_LANES == 1 || HWY_IDE
HWY_ASSERT_VEC_EQ(d, v0, ShiftRightSame(vi, 7));
#endif
// Simple left shift
for (size_t i = 0; i < N; ++i) {
expected[i] = T(i << 1);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeft<1>(vi));
#if HWY_VARIABLE_SHIFT_LANES == 1 || HWY_IDE
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftSame(vi, 1));
#endif
// Simple right shift
for (size_t i = 0; i < N; ++i) {
expected[i] = T(i >> 1);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRight<1>(vi));
#if HWY_VARIABLE_SHIFT_LANES == 1 || HWY_IDE
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightSame(vi, 1));
#endif
// Verify truncation for left-shift
for (size_t i = 0; i < N; ++i) {
expected[i] = (T(i) << kSign) & ~T(0);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeft<kSign>(vi));
#if HWY_VARIABLE_SHIFT_LANES == 1 || HWY_IDE
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftSame(vi, kSign));
#endif
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
struct TestSignedShifts {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
using TU = MakeUnsigned<T>;
const auto v0 = Zero(d);
const auto vi = Iota(d, 0);
const size_t N = Lanes(d);
auto expected = AllocateAligned<T>(N);
// Shifting out of right side => zero
HWY_ASSERT_VEC_EQ(d, v0, ShiftRight<7>(vi));
#if HWY_VARIABLE_SHIFT_LANES == 1 || HWY_IDE
HWY_ASSERT_VEC_EQ(d, v0, ShiftRightSame(vi, 7));
#endif
// Simple left shift
for (size_t i = 0; i < N; ++i) {
expected[i] = T(i << 1);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeft<1>(vi));
#if HWY_VARIABLE_SHIFT_LANES == 1 || HWY_IDE
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftSame(vi, 1));
#endif
// Simple right shift
for (size_t i = 0; i < N; ++i) {
expected[i] = T(i >> 1);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRight<1>(vi));
#if HWY_VARIABLE_SHIFT_LANES == 1 || HWY_IDE
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightSame(vi, 1));
#endif
// Sign extension
constexpr T min = LimitsMin<T>();
const auto vn = Iota(d, min);
for (int i = 0; i < static_cast<int>(N); ++i) {
// We want a right-shift here, which is undefined behavior for negative
// numbers. Since we want (-1)>>1 to be -1, we need to adjust rounding if
// minT is odd and negative.
T minT = static_cast<T>(min + i);
expected[i] = T(minT / 2 + (minT < 0 ? minT % 2 : 0));
}
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRight<1>(vn));
#if HWY_VARIABLE_SHIFT_LANES == 1 || HWY_IDE
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightSame(vn, 1));
#endif
// Shifting negative left
for (int i = 0; i < static_cast<int>(N); ++i) {
expected[i] = T(TU(min + i) << 1);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeft<1>(vn));
#if HWY_VARIABLE_SHIFT_LANES == 1 || HWY_IDE
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftSame(vn, 1));
#endif
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
struct TestUnsignedVarShifts {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
constexpr int kSign = (sizeof(T) * 8) - 1;
const auto v0 = Zero(d);
const auto v1 = Set(d, 1);
const auto vi = Iota(d, 0);
const size_t N = Lanes(d);
auto expected = AllocateAligned<T>(N);
// Shifting out of right side => zero
HWY_ASSERT_VEC_EQ(d, v0, vi >> Set(d, 7));
// Simple left shift
for (size_t i = 0; i < N; ++i) {
expected[i] = T(i << 1);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vi << Set(d, 1));
// Simple right shift
for (size_t i = 0; i < N; ++i) {
expected[i] = T(i >> 1);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vi >> Set(d, 1));
// Verify truncation for left-shift
for (size_t i = 0; i < N; ++i) {
expected[i] = (T(i) << kSign) & ~T(0);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vi << Set(d, kSign));
// Verify variable left shift (assumes < 32 lanes)
for (size_t i = 0; i < N; ++i) {
expected[i] = T(1) << i;
}
HWY_ASSERT_VEC_EQ(d, expected.get(), v1 << vi);
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
struct TestSignedVarLeftShifts {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
const auto v1 = Set(d, 1);
const auto vi = Iota(d, 0);
const size_t N = Lanes(d);
auto expected = AllocateAligned<T>(N);
// Simple left shift
for (size_t i = 0; i < N; ++i) {
expected[i] = T(i << 1);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vi << v1);
// Shifting negative numbers left
constexpr T min = LimitsMin<T>();
const auto vn = Iota(d, min);
for (size_t i = 0; i < N; ++i) {
expected[i] = T((min + i) << 1);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vn << v1);
// Differing shift counts (assumes < 32 lanes)
for (size_t i = 0; i < N; ++i) {
expected[i] = T(1) << i;
}
HWY_ASSERT_VEC_EQ(d, expected.get(), v1 << vi);
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
struct TestSignedVarRightShifts {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
const auto v0 = Zero(d);
const auto vi = Iota(d, 0);
const auto vmax = Set(d, LimitsMax<T>());
const size_t N = Lanes(d);
auto expected = AllocateAligned<T>(N);
// Shifting out of right side => zero
HWY_ASSERT_VEC_EQ(d, v0, vi >> Set(d, 7));
// Simple right shift
for (size_t i = 0; i < N; ++i) {
expected[i] = T(i >> 1);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vi >> Set(d, 1));
// Sign extension
constexpr T min = LimitsMin<T>();
const auto vn = Iota(d, min);
for (size_t i = 0; i < N; ++i) {
// We want a right-shift here, which is undefined behavior for negative
// numbers. Since we want (-1)>>1 to be -1, we need to adjust rounding if
// minT is odd and negative.
T minT = min + i;
expected[i] = T(minT / 2 + (minT < 0 ? minT % 2 : 0));
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vn >> Set(d, 1));
// Differing shift counts (assumes < 32 lanes)
for (size_t i = 0; i < N; ++i) {
expected[i] = LimitsMax<T>() >> i;
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vmax >> vi);
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
HWY_NOINLINE void TestAllShifts() {
const ForPartialVectors<TestUnsignedShifts> test_unsigned;
// No u8.
test_unsigned(uint16_t());
test_unsigned(uint32_t());
#if HWY_CAP_INTEGER64
test_unsigned(uint64_t());
#endif
const ForPartialVectors<TestSignedShifts> test_signed;
// No i8.
test_signed(int16_t());
test_signed(int32_t());
// No i64/f32/f64.
ForPartialVectors<TestUnsignedVarShifts, 1, 1,
HWY_VARIABLE_SHIFT_LANES(uint32_t)>()(uint32_t());
#if HWY_CAP_INTEGER64
ForPartialVectors<TestUnsignedVarShifts, 1, 1,
HWY_VARIABLE_SHIFT_LANES(uint64_t)>()(uint64_t());
#endif
ForPartialVectors<TestSignedVarLeftShifts, 1, 1,
HWY_VARIABLE_SHIFT_LANES(int32_t)>()(int32_t());
#if HWY_CAP_INTEGER64
ForPartialVectors<TestSignedVarLeftShifts, 1, 1,
HWY_VARIABLE_SHIFT_LANES(int64_t)>()(int64_t());
#endif
ForPartialVectors<TestSignedVarRightShifts, 1, 1,
HWY_VARIABLE_SHIFT_LANES(int32_t)>()(int32_t());
// No i64 (right-shift).
}
struct TestUnsignedMinMax {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto v0 = Zero(d);
const auto v1 = Iota(d, 1);
const auto v2 = Iota(d, 2);
const auto v_max = Iota(d, LimitsMax<T>() - Lanes(d) + 1);
HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2));
HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2));
HWY_ASSERT_VEC_EQ(d, v0, Min(v1, v0));
HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v0));
HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v_max));
HWY_ASSERT_VEC_EQ(d, v_max, Max(v1, v_max));
HWY_ASSERT_VEC_EQ(d, v0, Min(v0, v_max));
HWY_ASSERT_VEC_EQ(d, v_max, Max(v0, v_max));
}
};
struct TestSignedMinMax {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto v1 = Iota(d, 1);
const auto v2 = Iota(d, 2);
const auto v_neg = Iota(d, -T(Lanes(d)));
const auto v_neg_max = Iota(d, LimitsMin<T>());
HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2));
HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2));
HWY_ASSERT_VEC_EQ(d, v_neg, Min(v1, v_neg));
HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v_neg));
HWY_ASSERT_VEC_EQ(d, v_neg_max, Min(v1, v_neg_max));
HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v_neg_max));
HWY_ASSERT_VEC_EQ(d, v_neg_max, Min(v_neg, v_neg_max));
HWY_ASSERT_VEC_EQ(d, v_neg, Max(v_neg, v_neg_max));
}
};
struct TestFloatMinMax {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
const auto v1 = Iota(d, 1);
const auto v2 = Iota(d, 2);
const auto v_neg = Iota(d, -T(Lanes(d)));
HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2));
HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2));
HWY_ASSERT_VEC_EQ(d, v_neg, Min(v1, v_neg));
HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v_neg));
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
HWY_NOINLINE void TestAllMinMax() {
const ForPartialVectors<TestUnsignedMinMax> test_unsigned;
test_unsigned(uint8_t());
test_unsigned(uint16_t());
test_unsigned(uint32_t());
// No u64.
const ForPartialVectors<TestSignedMinMax> test_signed;
test_signed(int8_t());
test_signed(int16_t());
test_signed(int32_t());
// No i64.
ForFloatTypes(ForPartialVectors<TestFloatMinMax>());
}
struct TestUnsignedMul {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto v0 = Zero(d);
const auto v1 = Set(d, T(1));
const auto vi = Iota(d, 1);
const auto vj = Iota(d, 3);
const size_t N = Lanes(d);
auto expected = AllocateAligned<T>(N);
HWY_ASSERT_VEC_EQ(d, v0, v0 * v0);
HWY_ASSERT_VEC_EQ(d, v1, v1 * v1);
HWY_ASSERT_VEC_EQ(d, vi, v1 * vi);
HWY_ASSERT_VEC_EQ(d, vi, vi * v1);
for (size_t i = 0; i < N; ++i) {
expected[i] = static_cast<T>((1 + i) * (1 + i));
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vi * vi);
for (size_t i = 0; i < N; ++i) {
expected[i] = static_cast<T>((1 + i) * (3 + i));
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vi * vj);
const T max = LimitsMax<T>();
const auto vmax = Set(d, max);
HWY_ASSERT_VEC_EQ(d, vmax, vmax * v1);
HWY_ASSERT_VEC_EQ(d, vmax, v1 * vmax);
const size_t bits = sizeof(T) * 8;
const uint64_t mask = (1ull << bits) - 1;
const T max2 = (uint64_t(max) * max) & mask;
HWY_ASSERT_VEC_EQ(d, Set(d, max2), vmax * vmax);
}
};
struct TestSignedMul {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const size_t N = Lanes(d);
auto expected = AllocateAligned<T>(N);
const auto v0 = Zero(d);
const auto v1 = Set(d, T(1));
const auto vi = Iota(d, 1);
const auto vn = Iota(d, -T(N));
HWY_ASSERT_VEC_EQ(d, v0, v0 * v0);
HWY_ASSERT_VEC_EQ(d, v1, v1 * v1);
HWY_ASSERT_VEC_EQ(d, vi, v1 * vi);
HWY_ASSERT_VEC_EQ(d, vi, vi * v1);
for (size_t i = 0; i < N; ++i) {
expected[i] = static_cast<T>((1 + i) * (1 + i));
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vi * vi);
for (int i = 0; i < static_cast<int>(N); ++i) {
expected[i] = static_cast<T>((-T(N) + i) * (1 + i));
}
HWY_ASSERT_VEC_EQ(d, expected.get(), vn * vi);
HWY_ASSERT_VEC_EQ(d, expected.get(), vi * vn);
}
};
HWY_NOINLINE void TestAllMul() {
const ForPartialVectors<TestUnsignedMul> test_unsigned;
// No u8.
test_unsigned(uint16_t());
test_unsigned(uint32_t());
// No u64.
const ForPartialVectors<TestSignedMul> test_signed;
// No i8.
test_signed(int16_t());
test_signed(int32_t());
// No i64.
}
template <typename Wide>
struct TestMulHigh {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
const size_t N = Lanes(d);
auto in_lanes = AllocateAligned<T>(N);
auto expected_lanes = AllocateAligned<T>(N);
const auto vi = Iota(d, 1);
const auto vni = Iota(d, -T(N));
const auto v0 = Zero(d);
HWY_ASSERT_VEC_EQ(d, v0, MulHigh(v0, v0));
HWY_ASSERT_VEC_EQ(d, v0, MulHigh(v0, vi));
HWY_ASSERT_VEC_EQ(d, v0, MulHigh(vi, v0));
// Large positive squared
for (size_t i = 0; i < N; ++i) {
in_lanes[i] = LimitsMax<T>() >> i;
expected_lanes[i] = (Wide(in_lanes[i]) * in_lanes[i]) >> 16;
}
auto v = Load(d, in_lanes.get());
HWY_ASSERT_VEC_EQ(d, expected_lanes.get(), MulHigh(v, v));
// Large positive * small positive
for (int i = 0; i < static_cast<int>(N); ++i) {
expected_lanes[i] = static_cast<T>((Wide(in_lanes[i]) * T(1 + i)) >> 16);
}
HWY_ASSERT_VEC_EQ(d, expected_lanes.get(), MulHigh(v, vi));
HWY_ASSERT_VEC_EQ(d, expected_lanes.get(), MulHigh(vi, v));
// Large positive * small negative
for (size_t i = 0; i < N; ++i) {
expected_lanes[i] = (Wide(in_lanes[i]) * T(i - N)) >> 16;
}
HWY_ASSERT_VEC_EQ(d, expected_lanes.get(), MulHigh(v, vni));
HWY_ASSERT_VEC_EQ(d, expected_lanes.get(), MulHigh(vni, v));
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
HWY_NOINLINE void TestAllMulHigh() {
ForPartialVectors<TestMulHigh<int32_t>>()(int16_t());
ForPartialVectors<TestMulHigh<uint32_t>>()(uint16_t());
}
template <typename Wide>
struct TestMulEven {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
if (Lanes(d) == 1) return;
static_assert(sizeof(Wide) == 2 * sizeof(T), "Expected double-width type");
const Repartition<Wide, D> d2;
const auto v0 = Zero(d);
HWY_ASSERT_VEC_EQ(d2, Zero(d2), MulEven(v0, v0));
// scalar has N=1 and we write to "lane 1" below, though it isn't used by
// the actual MulEven.
auto in_lanes = AllocateAligned<T>(HWY_MAX(Lanes(d), 2));
auto expected = AllocateAligned<Wide>(Lanes(d2));
for (size_t i = 0; i < Lanes(d); i += 2) {
in_lanes[i + 0] = LimitsMax<T>() >> i;
in_lanes[i + 1] = 1; // will be overwritten with upper half of result
expected[i / 2] = Wide(in_lanes[i + 0]) * in_lanes[i + 0];
}
const auto v = Load(d, in_lanes.get());
HWY_ASSERT_VEC_EQ(d2, expected.get(), MulEven(v, v));
}
};
HWY_NOINLINE void TestAllMulEven() {
ForPartialVectors<TestMulEven<int64_t>>()(int32_t());
ForPartialVectors<TestMulEven<uint64_t>>()(uint32_t());
}
struct TestMulAdd {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto k0 = Zero(d);
const auto kNeg0 = Set(d, T(-0.0));
const auto v1 = Iota(d, 1);
const auto v2 = Iota(d, 2);
const size_t N = Lanes(d);
auto expected = AllocateAligned<T>(N);
HWY_ASSERT_VEC_EQ(d, k0, MulAdd(k0, k0, k0));
HWY_ASSERT_VEC_EQ(d, v2, MulAdd(k0, v1, v2));
HWY_ASSERT_VEC_EQ(d, v2, MulAdd(v1, k0, v2));
HWY_ASSERT_VEC_EQ(d, k0, NegMulAdd(k0, k0, k0));
HWY_ASSERT_VEC_EQ(d, v2, NegMulAdd(k0, v1, v2));
HWY_ASSERT_VEC_EQ(d, v2, NegMulAdd(v1, k0, v2));
for (size_t i = 0; i < N; ++i) {
expected[i] = (i + 1) * (i + 2);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), MulAdd(v2, v1, k0));
HWY_ASSERT_VEC_EQ(d, expected.get(), MulAdd(v1, v2, k0));
HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulAdd(Neg(v2), v1, k0));
HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulAdd(v1, Neg(v2), k0));
for (size_t i = 0; i < N; ++i) {
expected[i] = (i + 2) * (i + 2) + (i + 1);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), MulAdd(v2, v2, v1));
HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulAdd(Neg(v2), v2, v1));
for (size_t i = 0; i < N; ++i) {
expected[i] = -T(i + 2) * (i + 2) + (1 + i);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulAdd(v2, v2, v1));
HWY_ASSERT_VEC_EQ(d, k0, MulSub(k0, k0, k0));
HWY_ASSERT_VEC_EQ(d, kNeg0, NegMulSub(k0, k0, k0));
for (size_t i = 0; i < N; ++i) {
expected[i] = -T(i + 2);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), MulSub(k0, v1, v2));
HWY_ASSERT_VEC_EQ(d, expected.get(), MulSub(v1, k0, v2));
HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulSub(Neg(k0), v1, v2));
HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulSub(v1, Neg(k0), v2));
for (size_t i = 0; i < N; ++i) {
expected[i] = (i + 1) * (i + 2);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), MulSub(v1, v2, k0));
HWY_ASSERT_VEC_EQ(d, expected.get(), MulSub(v2, v1, k0));
HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulSub(Neg(v1), v2, k0));
HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulSub(v2, Neg(v1), k0));
for (size_t i = 0; i < N; ++i) {
expected[i] = (i + 2) * (i + 2) - (1 + i);
}
HWY_ASSERT_VEC_EQ(d, expected.get(), MulSub(v2, v2, v1));
HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulSub(Neg(v2), v2, v1));
}
};
HWY_NOINLINE void TestAllMulAdd() {
ForFloatTypes(ForPartialVectors<TestMulAdd>());
}
struct TestSquareRoot {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto vi = Iota(d, 0);
HWY_ASSERT_VEC_EQ(d, vi, Sqrt(vi * vi));
}
};
HWY_NOINLINE void TestAllSquareRoot() {
ForFloatTypes(ForPartialVectors<TestSquareRoot>());
}
struct TestReciprocalSquareRoot {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
const auto v = Set(d, 123.0f);
const size_t N = Lanes(d);
auto lanes = AllocateAligned<T>(N);
Store(ApproximateReciprocalSqrt(v), d, lanes.get());
for (size_t i = 0; i < N; ++i) {
float err = lanes[i] - 0.090166f;
if (err < 0.0f) err = -err;
HWY_ASSERT(err < 1E-4f);
}
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
HWY_NOINLINE void TestAllReciprocalSquareRoot() {
ForPartialVectors<TestReciprocalSquareRoot>()(float());
}
struct TestRound {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
// Numbers close to 0
{
const auto v = Set(d, T(0.4));
const auto zero = Set(d, 0);
const auto one = Set(d, 1);
HWY_ASSERT_VEC_EQ(d, one, Ceil(v));
HWY_ASSERT_VEC_EQ(d, zero, Floor(v));
HWY_ASSERT_VEC_EQ(d, zero, Round(v));
HWY_ASSERT_VEC_EQ(d, zero, Trunc(v));
}
{
const auto v = Set(d, T(-0.4));
const auto neg_zero = Set(d, T(-0.0));
const auto neg_one = Set(d, -1);
HWY_ASSERT_VEC_EQ(d, neg_zero, Ceil(v));
HWY_ASSERT_VEC_EQ(d, neg_one, Floor(v));
HWY_ASSERT_VEC_EQ(d, neg_zero, Round(v));
HWY_ASSERT_VEC_EQ(d, neg_zero, Trunc(v));
}
// Integer positive
{
const auto v = Iota(d, 4.0);
HWY_ASSERT_VEC_EQ(d, v, Ceil(v));
HWY_ASSERT_VEC_EQ(d, v, Floor(v));
HWY_ASSERT_VEC_EQ(d, v, Round(v));
HWY_ASSERT_VEC_EQ(d, v, Trunc(v));
}
// Integer negative
{
const auto v = Iota(d, T(-32.0));
HWY_ASSERT_VEC_EQ(d, v, Ceil(v));
HWY_ASSERT_VEC_EQ(d, v, Floor(v));
HWY_ASSERT_VEC_EQ(d, v, Round(v));
HWY_ASSERT_VEC_EQ(d, v, Trunc(v));
}
// Huge positive
{
const auto v = Set(d, T(1E15));
HWY_ASSERT_VEC_EQ(d, v, Ceil(v));
HWY_ASSERT_VEC_EQ(d, v, Floor(v));
}
// Huge negative
{
const auto v = Set(d, T(-1E15));
HWY_ASSERT_VEC_EQ(d, v, Ceil(v));
HWY_ASSERT_VEC_EQ(d, v, Floor(v));
}
// Above positive
{
const auto v = Iota(d, T(2.0001));
const auto v3 = Iota(d, T(3));
const auto v2 = Iota(d, T(2));
HWY_ASSERT_VEC_EQ(d, v3, Ceil(v));
HWY_ASSERT_VEC_EQ(d, v2, Floor(v));
HWY_ASSERT_VEC_EQ(d, v2, Round(v));
HWY_ASSERT_VEC_EQ(d, v2, Trunc(v));
}
// Below positive
{
const auto v = Iota(d, T(3.9999));
const auto v4 = Iota(d, T(4));
const auto v3 = Iota(d, T(3));
HWY_ASSERT_VEC_EQ(d, v4, Ceil(v));
HWY_ASSERT_VEC_EQ(d, v3, Floor(v));
HWY_ASSERT_VEC_EQ(d, v4, Round(v));
HWY_ASSERT_VEC_EQ(d, v3, Trunc(v));
}
// Above negative
{
// WARNING: using iota => ensure negative value is low enough that
// even 16 lanes remain negative, otherwise trunc will behave differently
// for positive/negative values.
const auto v = Iota(d, T(-19.9999));
const auto v3 = Iota(d, T(-19));
const auto v4 = Iota(d, T(-20));
HWY_ASSERT_VEC_EQ(d, v3, Ceil(v));
HWY_ASSERT_VEC_EQ(d, v4, Floor(v));
HWY_ASSERT_VEC_EQ(d, v4, Round(v));
HWY_ASSERT_VEC_EQ(d, v3, Trunc(v));
}
// Below negative
{
const auto v = Iota(d, T(-18.0001));
const auto v2 = Iota(d, T(-18));
const auto v3 = Iota(d, T(-19));
HWY_ASSERT_VEC_EQ(d, v2, Ceil(v));
HWY_ASSERT_VEC_EQ(d, v3, Floor(v));
HWY_ASSERT_VEC_EQ(d, v2, Round(v));
HWY_ASSERT_VEC_EQ(d, v2, Trunc(v));
}
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
HWY_NOINLINE void TestAllRound() {
ForFloatTypes(ForPartialVectors<TestRound>());
}
struct TestSumsOfU8 {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if (!defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3) && \
HWY_TARGET != HWY_SCALAR && HWY_CAP_INTEGER64
const size_t N = Lanes(d);
auto in_bytes = AllocateAligned<uint8_t>(N * sizeof(uint64_t));
auto sums = AllocateAligned<T>(N);
std::fill(sums.get(), sums.get() + N, 0);
for (size_t i = 0; i < N * sizeof(uint64_t); ++i) {
const size_t group = i / sizeof(uint64_t);
in_bytes[i] = static_cast<uint8_t>(2 * i + 1);
sums[group] += in_bytes[i];
}
const Repartition<uint8_t, D> du8;
const auto v = Load(du8, in_bytes.get());
HWY_ASSERT_VEC_EQ(d, sums.get(), SumsOfU8x8(v));
#else
(void)d;
#endif
}
};
HWY_NOINLINE void TestAllSumsOfU8() {
#if HWY_CAP_INTEGER64
ForPartialVectors<TestSumsOfU8>()(uint64_t());
#endif
}
struct TestSumOfLanes {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const size_t N = Lanes(d);
auto in_lanes = AllocateAligned<T>(N);
double sum = 0.0;
for (size_t i = 0; i < N; ++i) {
in_lanes[i] = static_cast<T>(1u << i);
sum += static_cast<double>(in_lanes[i]);
}
const auto v = Load(d, in_lanes.get());
const auto expected = Set(d, T(sum));
HWY_ASSERT_VEC_EQ(d, expected, SumOfLanes(v));
}
};
HWY_NOINLINE void TestAllSumOfLanes() {
// Only full vectors because lanes in partial vectors are undefined.
const ForFullVectors<TestSumOfLanes> sum;
// No u8/u16/i8/i16.
sum(uint32_t());
sum(int32_t());
#if HWY_CAP_INTEGER64
sum(uint64_t());
sum(int64_t());
#endif
ForFloatTypes(sum);
}
struct TestMinOfLanes {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
const size_t N = Lanes(d);
auto in_lanes = AllocateAligned<T>(N);
T min = LimitsMax<T>();
for (size_t i = 0; i < N; ++i) {
in_lanes[i] = static_cast<T>(1u << i);
min = std::min(min, in_lanes[i]);
}
const auto v = Load(d, in_lanes.get());
const auto expected = Set(d, min);
HWY_ASSERT_VEC_EQ(d, expected, MinOfLanes(v));
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
struct TestMaxOfLanes {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
// Avoid "Do not know how to split the result of this operator"
#if !defined(HWY_DISABLE_BROKEN_AVX3_TESTS) || HWY_TARGET != HWY_AVX3
const size_t N = Lanes(d);
auto in_lanes = AllocateAligned<T>(N);
T max = LimitsMin<T>();
for (size_t i = 0; i < N; ++i) {
in_lanes[i] = static_cast<T>(1u << i);
max = std::max(max, in_lanes[i]);
}
const auto v = Load(d, in_lanes.get());
const auto expected = Set(d, max);
HWY_ASSERT_VEC_EQ(d, expected, MaxOfLanes(v));
#else // HWY_DISABLE_BROKEN_AVX3_TESTS
(void)d;
#endif
}
};
HWY_NOINLINE void TestAllMinMaxOfLanes() {
// Only full vectors because lanes in partial vectors are undefined.
const ForFullVectors<TestMinOfLanes> min;
const ForFullVectors<TestMaxOfLanes> max;
// No u8/u16/i8/i16.
min(uint32_t());
max(uint32_t());
min(int32_t());
max(int32_t());
#if HWY_CAP_INTEGER64 && HWY_MINMAX64_LANES > 1
min(uint64_t());
max(uint64_t());
min(int64_t());
max(int64_t());
#endif
ForFloatTypes(min);
ForFloatTypes(max);
}
struct TestAbsDiff {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const size_t N = Lanes(d);
auto in_lanes_a = AllocateAligned<T>(N);
auto in_lanes_b = AllocateAligned<T>(N);
auto out_lanes = AllocateAligned<T>(N);
for (size_t i = 0; i < N; ++i) {
in_lanes_a[i] = (i ^ 1u) << i;
in_lanes_b[i] = i << i;
out_lanes[i] = fabsf(in_lanes_a[i] - in_lanes_b[i]);
}
const auto a = Load(d, in_lanes_a.get());
const auto b = Load(d, in_lanes_b.get());
const auto expected = Load(d, out_lanes.get());
HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(a, b));
HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(b, a));
}
};
HWY_NOINLINE void TestAllAbsDiff() {
ForPartialVectors<TestAbsDiff>()(float());
}
struct TestNeg {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto v0 = Zero(d);
const auto vn = Set(d, T(-3));
const auto vp = Set(d, T(3));
HWY_ASSERT_VEC_EQ(d, v0, Neg(v0));
HWY_ASSERT_VEC_EQ(d, vp, Neg(vn));
HWY_ASSERT_VEC_EQ(d, vn, Neg(vp));
}
};
HWY_NOINLINE void TestAllNeg() {
ForSignedTypes(ForPartialVectors<TestNeg>());
ForFloatTypes(ForPartialVectors<TestNeg>());
}
// NOLINTNEXTLINE(google-readability-namespace-comments)
} // namespace HWY_NAMESPACE
} // namespace hwy
HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace hwy {
class HwyArithmeticTest : public hwy::TestWithParamTarget {};
HWY_TARGET_INSTANTIATE_TEST_SUITE_P(HwyArithmeticTest);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllPlusMinus);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllSaturatingArithmetic);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllShifts);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMinMax);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllAverage);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllAbs);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMul);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMulHigh);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMulEven);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMulAdd);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllSquareRoot);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllReciprocalSquareRoot);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllSumsOfU8);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllSumOfLanes);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMinMaxOfLanes);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllRound);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllAbsDiff);
HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllNeg);
} // namespace hwy
#endif