[X86][AVX] Decode constant bits from insert_subvector(c1, c2, c3)
This mostly happens due to SimplifyDemandedVectorElts reducing a vector to insert_subvector(undef, c1, 0)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363499 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 16ddc7a..7863496 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5967,6 +5967,29 @@
return CastBitData(UndefSrcElts, SrcEltBits);
}
+ // Insert constant bits from a base and sub vector sources.
+ if (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ isa<ConstantSDNode>(Op.getOperand(2))) {
+ // TODO - support insert_subvector through bitcasts.
+ if (EltSizeInBits != VT.getScalarSizeInBits())
+ return false;
+
+ APInt UndefSubElts;
+ SmallVector<APInt, 32> EltSubBits;
+ if (getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
+ UndefSubElts, EltSubBits,
+ AllowWholeUndefs, AllowPartialUndefs) &&
+ getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+ UndefElts, EltBits, AllowWholeUndefs,
+ AllowPartialUndefs)) {
+ unsigned BaseIdx = Op.getConstantOperandVal(2);
+ UndefElts.insertBits(UndefSubElts, BaseIdx);
+ for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i)
+ EltBits[BaseIdx + i] = EltSubBits[i];
+ return true;
+ }
+ }
+
// Extract constant bits from a subvector's source.
if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
isa<ConstantSDNode>(Op.getOperand(1))) {
diff --git a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
index 57e333d..685efde 100644
--- a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
+++ b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
@@ -1805,10 +1805,8 @@
define <4 x i32> @test_16xi32_to_4xi32_perm_mask9(<16 x i32> %vec) {
; CHECK-LABEL: test_16xi32_to_4xi32_perm_mask9:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,3]
-; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [4,1,0,2]
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; CHECK-NEXT: vpermd %ymm3, %ymm1, %ymm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [4,1,12,2]
+; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; CHECK-NEXT: vpermt2d %ymm0, %ymm2, %ymm1
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index f8dffbc..45c2abc 100644
--- a/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -207,23 +207,10 @@
ret <8 x float> %1
}
define <8 x float> @demandedelts_vpermilvar_8f32_movsldup(<8 x float> %a0, i32 %a1) {
-; AVX1-LABEL: demandedelts_vpermilvar_8f32_movsldup:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = <u,0,2,2,4,4,6,6>
-; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],mem[4,5,6,7]
-; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,2,3,4,5,6,7]
-; AVX1-NEXT: ret{{[l|q]}}
-;
-; AVX2-LABEL: demandedelts_vpermilvar_8f32_movsldup:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
-; AVX2-NEXT: ret{{[l|q]}}
-;
-; AVX512-LABEL: demandedelts_vpermilvar_8f32_movsldup:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
-; AVX512-NEXT: ret{{[l|q]}}
+; CHECK-LABEL: demandedelts_vpermilvar_8f32_movsldup:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT: ret{{[l|q]}}
%1 = insertelement <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>, i32 %a1, i32 0
%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1)
%3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
index d796108..7fc295e 100644
--- a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
@@ -933,10 +933,7 @@
;
; X64-LABEL: combine_vpermi2var_8f64_as_permpd:
; X64: # %bb.0:
-; X64-NEXT: vmovapd {{.*#+}} zmm2 = <u,2,1,3,4,6,5,7>
-; X64-NEXT: vinsertf32x4 $0, {{.*}}(%rip), %zmm2, %zmm2
-; X64-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2
-; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm2[2,3,1,1,6,7,5,5]
+; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,2,2,5,7,6,6]
; X64-NEXT: retq
%res0 = insertelement <8 x i64> <i64 0, i64 2, i64 1, i64 3, i64 4, i64 6, i64 5, i64 7>, i64 %a2, i32 0
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %res0, <8 x double> %x1, i8 -1)
diff --git a/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/test/CodeGen/X86/vector-shuffle-combining-xop.ll
index e785530..73a8cbe 100644
--- a/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -155,10 +155,7 @@
;
; X64-LABEL: demandedelts_vpermil2pd256_as_shufpd:
; X64: # %bb.0:
-; X64-NEXT: vmovapd {{.*#+}} xmm2 = <u,4,2,7>
-; X64-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],mem[2,3]
-; X64-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0
-; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3]
+; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm1[0,0],ymm0[3],ymm1[3]
; X64-NEXT: retq
%res0 = insertelement <4 x i64> <i64 0, i64 4, i64 2, i64 7>, i64 %a2, i32 0
%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %res0, i8 0)