blob: a115b849b68210d53d7715c33808afdea8e3fad3 [file] [log] [blame]
// clang-format off
byte bf16_vl_00[] = {0x62,0x02,0x17,0x20,0x72,0xf4,};
byte bf16_vl_01[] = {0x62,0x02,0x17,0x00,0x72,0xf4,};
byte bf16_vl_02[] = {0x62,0x22,0x17,0x27,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10,};
byte bf16_vl_03[] = {0x62,0x42,0x17,0x30,0x72,0x31,};
byte bf16_vl_04[] = {0x62,0x62,0x17,0x20,0x72,0x71,0x7f,};
byte bf16_vl_05[] = {0x62,0x62,0x17,0xb7,0x72,0xb2,0x00,0xf0,0xff,0xff,};
byte bf16_vl_06[] = {0x62,0x22,0x17,0x07,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10,};
byte bf16_vl_07[] = {0x62,0x42,0x17,0x10,0x72,0x31,};
byte bf16_vl_08[] = {0x62,0x62,0x17,0x00,0x72,0x71,0x7f,};
byte bf16_vl_09[] = {0x62,0x62,0x17,0x97,0x72,0xa2,0x00,0xf8,0xff,0xff,};
byte bf16_vl_10[] = {0x62,0x02,0x7e,0x08,0x72,0xf5,};
byte bf16_vl_11[] = {0x62,0x02,0x7e,0x28,0x72,0xf5,};
byte bf16_vl_12[] = {0x62,0x22,0x7e,0x0f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10,};
byte bf16_vl_13[] = {0x62,0xc2,0x7e,0x18,0x72,0x29,};
byte bf16_vl_14[] = {0x62,0xf2,0x7e,0x18,0x72,0x09,};
byte bf16_vl_15[] = {0x62,0x62,0x7e,0x08,0x72,0x71,0x7f,};
byte bf16_vl_16[] = {0x62,0x62,0x7e,0x9f,0x72,0xaa,0x00,0xf8,0xff,0xff,};
byte bf16_vl_17[] = {0x62,0xc2,0x7e,0x38,0x72,0x31,};
byte bf16_vl_18[] = {0x62,0xf2,0x7e,0x38,0x72,0x11,};
byte bf16_vl_19[] = {0x62,0xe2,0x7e,0x28,0x72,0x79,0x7f,};
byte bf16_vl_20[] = {0x62,0x62,0x7e,0xbf,0x72,0x9a,0x00,0xf0,0xff,0xff,};
byte bf16_vl_21[] = {0x62,0x02,0x16,0x20,0x52,0xf4,};
byte bf16_vl_22[] = {0x62,0x02,0x16,0x00,0x52,0xf4,};
byte bf16_vl_23[] = {0x62,0x22,0x16,0x27,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,};
byte bf16_vl_24[] = {0x62,0x42,0x16,0x30,0x52,0x31,};
byte bf16_vl_25[] = {0x62,0x62,0x16,0x20,0x52,0x71,0x7f,};
byte bf16_vl_26[] = {0x62,0x62,0x16,0xb7,0x52,0xb2,0x00,0xf0,0xff,0xff,};
byte bf16_vl_27[] = {0x62,0x22,0x16,0x07,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,};
byte bf16_vl_28[] = {0x62,0x42,0x16,0x10,0x52,0x31,};
byte bf16_vl_29[] = {0x62,0x62,0x16,0x00,0x52,0x71,0x7f,};
byte bf16_vl_30[] = {0x62,0x62,0x16,0x97,0x52,0xb2,0x00,0xf8,0xff,0xff,};
byte bf16_vl_31[] = {0x62,0x02,0x17,0x20,0x72,0xf4,};
byte bf16_vl_32[] = {0x62,0x02,0x17,0x00,0x72,0xf4,};
byte bf16_vl_33[] = {0x62,0x22,0x17,0x27,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10,};
byte bf16_vl_34[] = {0x62,0x42,0x17,0x30,0x72,0x31,};
byte bf16_vl_35[] = {0x62,0x62,0x17,0x20,0x72,0x71,0x7f,};
byte bf16_vl_36[] = {0x62,0x62,0x17,0xb7,0x72,0xb2,0x00,0xf0,0xff,0xff,};
byte bf16_vl_37[] = {0x62,0x22,0x17,0x07,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10,};
byte bf16_vl_38[] = {0x62,0x42,0x17,0x10,0x72,0x31,};
byte bf16_vl_39[] = {0x62,0x62,0x17,0x00,0x72,0x71,0x7f,};
byte bf16_vl_40[] = {0x62,0x62,0x17,0x97,0x72,0xb2,0x00,0xf8,0xff,0xff,};
byte bf16_vl_41[] = {0x62,0x02,0x7e,0x08,0x72,0xf5,};
byte bf16_vl_42[] = {0x62,0x02,0x7e,0x28,0x72,0xf5,};
byte bf16_vl_43[] = {0x62,0x22,0x7e,0x0f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10,};
byte bf16_vl_44[] = {0x62,0xf2,0x7e,0x18,0x72,0x29,};
byte bf16_vl_45[] = {0x62,0x42,0x7e,0x18,0x72,0x09,};
byte bf16_vl_46[] = {0x62,0x62,0x7e,0x08,0x72,0x71,0x7f,};
byte bf16_vl_47[] = {0x62,0x62,0x7e,0x9f,0x72,0xb2,0x00,0xf8,0xff,0xff,};
byte bf16_vl_48[] = {0x62,0xf2,0x7e,0x38,0x72,0x21,};
byte bf16_vl_49[] = {0x62,0x42,0x7e,0x38,0x72,0x01,};
byte bf16_vl_50[] = {0x62,0x62,0x7e,0x28,0x72,0x71,0x7f,};
byte bf16_vl_51[] = {0x62,0x62,0x7e,0xbf,0x72,0xb2,0x00,0xf0,0xff,0xff,};
byte bf16_vl_52[] = {0x62,0x02,0x16,0x20,0x52,0xf4,};
byte bf16_vl_53[] = {0x62,0x02,0x16,0x00,0x52,0xf4,};
byte bf16_vl_54[] = {0x62,0x22,0x16,0x27,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,};
byte bf16_vl_55[] = {0x62,0x42,0x16,0x30,0x52,0x31,};
byte bf16_vl_56[] = {0x62,0x62,0x16,0x20,0x52,0x71,0x7f,};
byte bf16_vl_57[] = {0x62,0x62,0x16,0xb7,0x52,0xb2,0x00,0xf0,0xff,0xff,};
byte bf16_vl_58[] = {0x62,0x22,0x16,0x07,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10,};
byte bf16_vl_59[] = {0x62,0x42,0x16,0x10,0x52,0x31,};
byte bf16_vl_60[] = {0x62,0x62,0x16,0x00,0x52,0x71,0x7f,};
byte bf16_vl_61[] = {0x62,0x62,0x16,0x97,0x52,0xb2,0x00,0xf8,0xff,0xff,};
ENC4(bf16_vl_00, vcvtne2ps2bf16_mask, 0, R(YMM30), R(K0), R(YMM29), R(YMM28));
ENC4(bf16_vl_01, vcvtne2ps2bf16_mask, 0, R(XMM30), R(K0), R(XMM29), R(XMM28));
ENC4(bf16_vl_02, vcvtne2ps2bf16_mask, 0, R(YMM30), R(K7), R(YMM29), M(RBP, R14, 8, 0x10000000, OPSZ_32));
ENC4(bf16_vl_03, vcvtne2ps2bf16_mask, 0, R(YMM30), R(K0), R(YMM29), M(R9, NULL, 0, 0, OPSZ_4));
ENC4(bf16_vl_04, vcvtne2ps2bf16_mask, 0, R(YMM30), R(K0), R(YMM29), M(RCX, NULL, 0, 0xfe0, OPSZ_32));
ENC4(bf16_vl_05, vcvtne2ps2bf16_mask, Z, R(YMM30), R(K7), R(YMM29), M(RDX, NULL, 0, 0xfffff000, OPSZ_4));
ENC4(bf16_vl_06, vcvtne2ps2bf16_mask, 0, R(XMM30), R(K7), R(XMM29), M(RBP, R14, 8, 0x10000000, OPSZ_16));
ENC4(bf16_vl_07, vcvtne2ps2bf16_mask, 0, R(XMM30), R(K0), R(XMM29), M(R9, NULL, 0, 0, OPSZ_4));
ENC4(bf16_vl_08, vcvtne2ps2bf16_mask, 0, R(XMM30), R(K0), R(XMM29), M(RCX, NULL, 0, 0x7f0, OPSZ_16));
ENC4(bf16_vl_09, vcvtne2ps2bf16_mask, Z, R(XMM28), R(K7), R(XMM29), M(RDX, NULL, 0, 0xfffff800, OPSZ_4));
ENC3(bf16_vl_10, vcvtneps2bf16_mask, 0, Xh(XMM30), R(K0), R(XMM29));
// changed XMM30 to YMM30
ENC3(bf16_vl_11, vcvtneps2bf16_mask, 0, Yh(YMM30), R(K0), R(YMM29));
ENC3(bf16_vl_12, vcvtneps2bf16_mask, 0, Xh(XMM30), R(K7), M(RBP, R14, 8, 0x10000000, OPSZ_16));
ENC3(bf16_vl_13, vcvtneps2bf16_mask, 0, Xh(XMM21), R(K0), M(R9, NULL, 0, 0, OPSZ_4));
ENC3(bf16_vl_14, vcvtneps2bf16_mask, 0, Xh(XMM1), R(K0), M(RCX, NULL, 0, 0, OPSZ_4));
ENC3(bf16_vl_15, vcvtneps2bf16_mask, 0, Xh(XMM30), R(K0), M(RCX, NULL, 0, 0x7f0, OPSZ_16));
ENC3(bf16_vl_16, vcvtneps2bf16_mask, Z, Xh(XMM29), R(K7), M(RDX, NULL, 0, 0xfffff800, OPSZ_4));
// the following four tests "imply" YMM
ENC3(bf16_vl_17, vcvtneps2bf16_mask, 0, Yh(YMM22), R(K0), M(R9, NULL, 0, 0, OPSZ_4));
ENC3(bf16_vl_18, vcvtneps2bf16_mask, 0, Yh(YMM2), R(K0), M(RCX, NULL, 0, 0, OPSZ_4));
ENC3(bf16_vl_19, vcvtneps2bf16_mask, 0, Yh(YMM23), R(K0), M(RCX, NULL, 0, 0xfe0, OPSZ_32));
ENC3(bf16_vl_20, vcvtneps2bf16_mask, Z, Yh(YMM27), R(K7), M(RDX, NULL, 0, 0xfffff000, OPSZ_4));
ENC4(bf16_vl_21, vdpbf16ps_mask, 0, R(YMM30), R(K0), R(YMM29), R(YMM28));
ENC4(bf16_vl_22, vdpbf16ps_mask, 0, R(XMM30), R(K0), R(XMM29), R(XMM28));
ENC4(bf16_vl_23, vdpbf16ps_mask, 0, R(YMM30), R(K7), R(YMM29), M(RBP, R14, 8, 0x10000000, OPSZ_32));
ENC4(bf16_vl_24, vdpbf16ps_mask, 0, R(YMM30), R(K0), R(YMM29), M(R9, NULL, 0, 0, OPSZ_4));
ENC4(bf16_vl_25, vdpbf16ps_mask, 0, R(YMM30), R(K0), R(YMM29), M(RCX, NULL, 0, 0xfe0, OPSZ_32));
ENC4(bf16_vl_26, vdpbf16ps_mask, Z, R(YMM30), R(K7), R(YMM29), M(RDX, NULL, 0, 0xfffff000, OPSZ_4));
ENC4(bf16_vl_27, vdpbf16ps_mask, 0, R(XMM30), R(K7), R(XMM29), M(RBP, R14, 8, 0x10000000, OPSZ_16));
ENC4(bf16_vl_28, vdpbf16ps_mask, 0, R(XMM30), R(K0), R(XMM29), M(R9, NULL, 0, 0, OPSZ_4));
ENC4(bf16_vl_29, vdpbf16ps_mask, 0, R(XMM30), R(K0), R(XMM29), M(RCX, NULL, 0, 0x7f0, OPSZ_16));
ENC4(bf16_vl_30, vdpbf16ps_mask, Z, R(XMM30), R(K7), R(XMM29), M(RDX, NULL, 0, 0xfffff800, OPSZ_4));
ENC4(bf16_vl_31, vcvtne2ps2bf16_mask, 0, R(YMM30), R(K0), R(YMM29), R(YMM28));
ENC4(bf16_vl_32, vcvtne2ps2bf16_mask, 0, R(XMM30), R(K0), R(XMM29), R(XMM28));
ENC4(bf16_vl_33, vcvtne2ps2bf16_mask, 0, R(YMM30), R(K7), R(YMM29), M(RBP, R14, 8, 0x10000000, OPSZ_32));
ENC4(bf16_vl_34, vcvtne2ps2bf16_mask, 0, R(YMM30), R(K0), R(YMM29), M(R9, NULL, 0, 0, OPSZ_4));
ENC4(bf16_vl_35, vcvtne2ps2bf16_mask, 0, R(YMM30), R(K0), R(YMM29), M(RCX, NULL, 0, 0xfe0, OPSZ_32));
ENC4(bf16_vl_36, vcvtne2ps2bf16_mask, Z, R(YMM30), R(K7), R(YMM29), M(RDX, NULL, 0, 0xfffff000, OPSZ_4));
ENC4(bf16_vl_37, vcvtne2ps2bf16_mask, 0, R(XMM30), R(K7), R(XMM29), M(RBP, R14, 8, 0x10000000, OPSZ_16));
ENC4(bf16_vl_38, vcvtne2ps2bf16_mask, 0, R(XMM30), R(K0), R(XMM29), M(R9, NULL, 0, 0, OPSZ_4));
ENC4(bf16_vl_39, vcvtne2ps2bf16_mask, 0, R(XMM30), R(K0), R(XMM29), M(RCX, NULL, 0, 0x7f0, OPSZ_16));
ENC4(bf16_vl_40, vcvtne2ps2bf16_mask, Z, R(XMM30), R(K7), R(XMM29), M(RDX, NULL, 0, 0xfffff800, OPSZ_4));
ENC3(bf16_vl_41, vcvtneps2bf16_mask, 0, Xh(XMM30), R(K0), R(XMM29));
ENC3(bf16_vl_42, vcvtneps2bf16_mask, 0, Yh(YMM30), R(K0), R(YMM29));
ENC3(bf16_vl_43, vcvtneps2bf16_mask, 0, Xh(XMM30), R(K7), M(RBP, R14, 8, 0x10000000, OPSZ_16));
ENC3(bf16_vl_44, vcvtneps2bf16_mask, 0, Xh(XMM5), R(K0), M(RCX, NULL, 0, 0, OPSZ_4));
ENC3(bf16_vl_45, vcvtneps2bf16_mask, 0, Xh(XMM25), R(K0), M(R9, NULL, 0, 0, OPSZ_4));
ENC3(bf16_vl_46, vcvtneps2bf16_mask, 0, Xh(XMM30), R(K0), M(RCX, NULL, 0, 0x7f0, OPSZ_16));
ENC3(bf16_vl_47, vcvtneps2bf16_mask, Z, Xh(XMM30), R(K7), M(RDX, NULL, 0, 0xfffff800, OPSZ_4));
// the following four tests "imply" YMM
ENC3(bf16_vl_48, vcvtneps2bf16_mask, 0, Yh(YMM4), R(K0), M(RCX, NULL, 0, 0, OPSZ_4));
ENC3(bf16_vl_49, vcvtneps2bf16_mask, 0, Yh(YMM24), R(K0), M(R9, NULL, 0, 0, OPSZ_4));
ENC3(bf16_vl_50, vcvtneps2bf16_mask, 0, Yh(YMM30), R(K0), M(RCX, NULL, 0, 0xfe0, OPSZ_32));
ENC3(bf16_vl_51, vcvtneps2bf16_mask, Z, Yh(YMM30), R(K7), M(RDX, NULL, 0, 0xfffff000, OPSZ_4));
//
ENC4(bf16_vl_52, vdpbf16ps_mask, 0, R(YMM30), R(K0), R(YMM29), R(YMM28));
ENC4(bf16_vl_53, vdpbf16ps_mask, 0, R(XMM30), R(K0), R(XMM29), R(XMM28));
ENC4(bf16_vl_54, vdpbf16ps_mask, 0, R(YMM30), R(K7), R(YMM29), M(RBP, R14, 8, 0x10000000, OPSZ_32));
ENC4(bf16_vl_55, vdpbf16ps_mask, 0, R(YMM30), R(K0), R(YMM29), M(R9, NULL, 0, 0, OPSZ_4));
ENC4(bf16_vl_56, vdpbf16ps_mask, 0, R(YMM30), R(K0), R(YMM29), M(RCX, NULL, 0, 0xfe0, OPSZ_32));
ENC4(bf16_vl_57, vdpbf16ps_mask, Z, R(YMM30), R(K7), R(YMM29), M(RDX, NULL, 0, 0xfffff000, OPSZ_4));
ENC4(bf16_vl_58, vdpbf16ps_mask, 0, R(XMM30), R(K7), R(XMM29), M(RBP, R14, 8, 0x10000000, OPSZ_16));
ENC4(bf16_vl_59, vdpbf16ps_mask, 0, R(XMM30), R(K0), R(XMM29), M(R9, NULL, 0, 0, OPSZ_4));
ENC4(bf16_vl_60, vdpbf16ps_mask, 0, R(XMM30), R(K0), R(XMM29), M(RCX, NULL, 0, 0x7f0, OPSZ_16));
ENC4(bf16_vl_61, vdpbf16ps_mask, Z, R(XMM30), R(K7), R(XMM29), M(RDX, NULL, 0, 0xfffff800, OPSZ_4));