blob: b96ff8b5ea44d1de0fc8ec2eedb43bd70c028d93 [file] [log] [blame]
/*
* Test 128 and 256-bit two operand integer intrinsics,
* with masked and zero-masked forms, by comparing
* their output with the corresponding 512-bit intrinsic.
* Here we check for _mm512_[mask|maskz]_[and|andnot|or|xor|add|max|min|mul|sub]
* intrinsics
*/
#include "m512_test_util.h"
#include <stdio.h>
V512 i8_src1;
V512 i8_src2;
V512 i16_src1;
V512 i16_src2;
V512 i32_src1;
V512 i32_src2;
V512 i64_src1;
V512 i64_src2;
void NOINLINE init() {
volatile int i;
for (i = 0; i < 64; i++) {
i8_src1.s8[i] = i;
i8_src2.s8[i] = (i & 1) ? i : -i;
}
for (i = 0; i < 32; i++) {
i16_src1.s16[i] = i;
i16_src2.s16[i] = (i & 1) ? i : -i;
}
for (i = 0; i < 16; i++) {
i32_src1.s32[i] = i;
i32_src2.s32[i] = (i & 1) ? i : -i;
}
for (i = 0; i < 8; i++) {
i64_src1.s64[i] = i;
i64_src2.s64[i] = (i & 1) ? i : -i;
}
}
/*
* Use "soft update" between tests to make compiler think src was updated.
* Prevents PRE'ing a load of src, thus allowing ciscization.
* Also prevents PRE'ing intrinsic operations, ensuring we
* execute the intended instructions.
*/
volatile int vol0 = 0;
#define soft_v512_update(var) (var).xmmi[vol0] = (var).xmmi[vol0]
/*
* Generate a function that tests a packed int64 intrinsic
* by implementing the XMM, YMM and ZMM versions, and comparing
* the XMM and YMM results with the low part of the ZMM result.
*
* We test regular, masked and zero masked forms.
*
* Use GEN_I64_UNIFORM when the core intrinsic name is the same
* for all vector lengths, e.g. "add_epi64". Otherwise use
* GEN_I64 to list the different names, e.g. "and_si128" and "and_si256".
*/
#define GEN_I64_UNIFORM(oper) GEN_I64(oper, oper, oper, oper, oper)
#define GEN_I64(test_name, oper_epi64, oper_xmm, oper_ymm, oper_zmm) \
void NOINLINE do_##test_name() { \
V512 xmm_res, ymm_res, zmm_res; \
__mmask8 k8 = 0x5a; \
\
/* Non-masked. */ \
\
soft_v512_update(i64_src2); \
zmm_res.zmmi = _mm512_##oper_zmm(i64_src1.zmmi, i64_src2.zmmi); \
\
/* Masked. */ \
\
zmm_res.zmmi = _mm512_setzero_epi32(); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i64_src2); \
zmm_res.zmmi = _mm512_mask_##oper_epi64(zmm_res.zmmi, k8, i64_src1.zmmi, \
i64_src2.zmmi); \
\
/* Zero-masked. */ \
\
zmm_res.zmmi = _mm512_set1_epi64(1); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i64_src2); \
zmm_res.zmmi = \
_mm512_maskz_##oper_epi64(k8, i64_src1.zmmi, i64_src2.zmmi); \
}
#define GEN_I32_UNIFORM(oper) GEN_I32(oper, oper, oper, oper, oper)
#define GEN_I32(test_name, oper_epi32, oper_xmm, oper_ymm, oper_zmm) \
void NOINLINE do_##test_name() { \
V512 xmm_res, ymm_res, zmm_res; \
__mmask16 k16 = 0x7feb; \
__mmask8 k8 = (__mmask8)k16; \
\
/* Non-masked. */ \
\
soft_v512_update(i32_src2); \
zmm_res.zmmi = _mm512_##oper_zmm(i32_src1.zmmi, i32_src2.zmmi); \
\
/* Masked. */ \
\
zmm_res.zmmi = _mm512_setzero_epi32(); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i32_src2); \
zmm_res.zmmi = _mm512_mask_##oper_epi32(zmm_res.zmmi, k16, i32_src1.zmmi, \
i32_src2.zmmi); \
\
/* Zero-masked. */ \
\
zmm_res.zmmi = _mm512_set1_epi32(1.0); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i32_src2); \
zmm_res.zmmi = \
_mm512_maskz_##oper_epi32(k16, i32_src1.zmmi, i32_src2.zmmi); \
}
#define GEN_I16_UNIFORM(oper) GEN_I16(oper, oper, oper, oper, oper)
#define GEN_I16(test_name, oper_epi16, oper_xmm, oper_ymm, oper_zmm) \
void NOINLINE do_##test_name() { \
V512 xmm_res, ymm_res, zmm_res; \
__mmask32 k32 = 0x7febeb7f; \
__mmask16 k16 = (__mmask16)k32; \
__mmask8 k8 = (__mmask8)k16; \
\
/* Non-masked. */ \
\
soft_v512_update(i16_src2); \
zmm_res.zmmi = _mm512_##oper_zmm(i16_src1.zmmi, i16_src2.zmmi); \
\
/* Masked. */ \
\
zmm_res.zmmi = _mm512_setzero_epi32(); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i16_src2); \
zmm_res.zmmi = _mm512_mask_##oper_epi16(zmm_res.zmmi, k32, i16_src1.zmmi, \
i16_src2.zmmi); \
\
/* Zero-masked. */ \
\
zmm_res.zmmi = _mm512_set1_epi32(1.0); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i16_src2); \
zmm_res.zmmi = \
_mm512_maskz_##oper_epi16(k32, i16_src1.zmmi, i16_src2.zmmi); \
}
#define GEN_I8_UNIFORM(oper) GEN_I8(oper, oper, oper, oper, oper)
#define GEN_I8(test_name, oper_epi8, oper_xmm, oper_ymm, oper_zmm) \
void NOINLINE do_##test_name() { \
V512 xmm_res, ymm_res, zmm_res; \
__mmask64 k64 = 0xa55a7febeb7f5aa5U; \
__mmask32 k32 = (__mmask32)k64; \
__mmask16 k16 = (__mmask16)k32; \
\
/* Non-masked. */ \
\
soft_v512_update(i8_src2); \
zmm_res.zmmi = _mm512_##oper_zmm(i8_src1.zmmi, i8_src2.zmmi); \
\
/* Masked. */ \
\
zmm_res.zmmi = _mm512_setzero_epi32(); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i8_src2); \
zmm_res.zmmi = _mm512_mask_##oper_epi8(zmm_res.zmmi, k64, i8_src1.zmmi, \
i8_src2.zmmi); \
\
/* Zero-masked. */ \
\
zmm_res.zmmi = _mm512_set1_epi32(1.0); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i8_src2); \
zmm_res.zmmi = _mm512_maskz_##oper_epi8(k64, i8_src1.zmmi, i8_src2.zmmi); \
}
GEN_I32(and_si512, and_epi32, and_si128, and_si256, and_si512)
GEN_I32(andnot_si512, andnot_epi32, andnot_si128, andnot_si256, andnot_si512)
GEN_I32(or_si512, or_epi32, or_si128, or_si256, or_si512)
GEN_I32(xor_si512, xor_epi32, xor_si128, xor_si256, xor_si512)
GEN_I64(and_epi64, and_epi64, and_si128, and_si256, and_epi64)
GEN_I64(andnot_epi64, andnot_epi64, andnot_si128, andnot_si256, andnot_epi64)
GEN_I64(or_epi64, or_epi64, or_si128, or_si256, or_epi64)
GEN_I64(xor_epi64, xor_epi64, xor_si128, xor_si256, xor_epi64)
GEN_I64_UNIFORM(add_epi64)
GEN_I64_UNIFORM(max_epi64)
GEN_I64_UNIFORM(max_epu64)
GEN_I64_UNIFORM(min_epi64)
GEN_I64_UNIFORM(min_epu64)
GEN_I64_UNIFORM(mul_epi32) /* Yes, these are really I64 vector elements. */
GEN_I64_UNIFORM(mul_epu32) /* Yes, these are really I64 vector elements. */
GEN_I32(and_epi32, and_epi32, and_si128, and_si256, and_epi32)
GEN_I32(andnot_epi32, andnot_epi32, andnot_si128, andnot_si256, andnot_epi32)
GEN_I32(or_epi32, or_epi32, or_si128, or_si256, or_epi32)
GEN_I32(xor_epi32, xor_epi32, xor_si128, xor_si256, xor_epi32)
GEN_I32_UNIFORM(add_epi32)
GEN_I32_UNIFORM(max_epi32)
GEN_I32_UNIFORM(max_epu32)
GEN_I32_UNIFORM(min_epi32)
GEN_I32_UNIFORM(min_epu32)
GEN_I32_UNIFORM(sub_epi32)
int main() {
init();
do_and_si512();
do_andnot_si512();
do_or_si512();
do_xor_si512();
do_and_epi64();
do_andnot_epi64();
do_or_epi64();
do_xor_epi64();
do_add_epi64();
do_max_epi64();
do_max_epu64();
do_min_epi64();
do_min_epu64();
do_mul_epi32();
do_mul_epu32();
do_and_epi32();
do_andnot_epi32();
do_or_epi32();
do_xor_epi32();
do_add_epi32();
do_max_epi32();
do_max_epu32();
do_min_epi32();
do_min_epu32();
do_sub_epi32();
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}