[MergeICmps] Simplify the code.

Instead of patching the original blocks, we now generate new blocks and
delete the old blocks. This results in simpler code with a less twisted
control flow (see the change in `entry-block-shuffled.ll`).

This will make https://reviews.llvm.org/D60318 simpler by making it more
obvious where control flow created and deleted.

Reviewers: gchatelet

Subscribers: hiraditya, llvm-commits, spatel

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61736

llvm-svn: 360771
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index 9a57ed6..d8baf50 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -48,6 +48,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include <algorithm>
 #include <numeric>
@@ -406,13 +407,6 @@
            First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
   }
 
-  // Merges the given comparison blocks into one memcmp block and update
-  // branches. Comparisons are assumed to be continguous. If NextBBInChain is
-  // null, the merged block will link to the phi block.
-  void mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
-                        BasicBlock *const NextBBInChain, PHINode &Phi,
-                        const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
-
   PHINode &Phi_;
   std::vector<BCECmpBlock> Comparisons_;
   // The original entry block (before sorting);
@@ -452,7 +446,7 @@
         // chain before sorting. Unless we can abort the chain at this point
         // and start anew.
         //
-        // NOTE: we only handle block with single predecessor for now.
+        // NOTE: we only handle blocks a with single predecessor for now.
         if (Comparison.canSplit(AA)) {
           LLVM_DEBUG(dbgs()
                      << "Split initial block '" << Comparison.BB->getName()
@@ -540,162 +534,173 @@
 }
 #endif  // MERGEICMPS_DOT_ON
 
-bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
-                           AliasAnalysis *AA) {
-  // First pass to check if there is at least one merge. If not, we don't do
-  // anything and we keep analysis passes intact.
-  {
-    bool AtLeastOneMerged = false;
-    for (size_t I = 1; I < Comparisons_.size(); ++I) {
-      if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
-        AtLeastOneMerged = true;
-        break;
+namespace {
+
+// A class to compute the name of a set of merged basic blocks.
+// This is optimized for the common case of no block names.
+class MergedBlockName {
+  // Storage for the uncommon case of several named blocks.
+  SmallString<16> Scratch;
+
+public:
+  explicit MergedBlockName(ArrayRef<BCECmpBlock> Comparisons)
+      : Name(makeTwine(Comparisons)) {}
+  const Twine Name;
+
+private:
+  Twine makeTwine(ArrayRef<BCECmpBlock> Comparisons) {
+    assert(!Comparisons.empty() && "no basic block");
+    // Fast path: only one block, or no names at all.
+    if (Comparisons.size() == 1)
+      return Comparisons[0].BB->getName();
+    const int size = std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
+                                     [](int i, const BCECmpBlock &Cmp) {
+                                       return i + Cmp.BB->getName().size();
+                                     });
+    if (size == 0)
+      return Twine();
+
+    // Slow path: at least two blocks, at least one block with a name.
+    Scratch.clear();
+    // We'll have `size` bytes for name and `Comparisons.size() - 1` bytes for
+    // separators.
+    Scratch.reserve(size + Comparisons.size() - 1);
+    const auto append = [this](StringRef str) {
+      Scratch.append(str.begin(), str.end());
+    };
+    append(Comparisons[0].BB->getName());
+    for (int I = 1, E = Comparisons.size(); I < E; ++I) {
+      const BasicBlock *const BB = Comparisons[I].BB;
+      if (!BB->getName().empty()) {
+        append("+");
+        append(BB->getName());
       }
     }
-    if (!AtLeastOneMerged) return false;
+    return Twine(Scratch);
   }
+};
+} // namespace
 
-  // Remove phi references to comparison blocks, they will be rebuilt as we
-  // merge the blocks.
-  for (const auto &Comparison : Comparisons_) {
-    Phi_.removeIncomingValue(Comparison.BB, false);
-  }
+// Merges the given contiguous comparison blocks into one memcmp block.
+static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
+                                    BasicBlock *const NextCmpBlock,
+                                    PHINode &Phi,
+                                    const TargetLibraryInfo *const TLI,
+                                    AliasAnalysis *AA) {
+  assert(!Comparisons.empty() && "merging zero comparisons");
+  LLVMContext &Context = NextCmpBlock->getContext();
+  const BCECmpBlock &FirstCmp = Comparisons[0];
 
-  // If entry block is part of the chain, we need to make the first block
-  // of the chain the new entry block of the function.
-  BasicBlock *Entry = &Comparisons_[0].BB->getParent()->getEntryBlock();
-  for (size_t I = 1; I < Comparisons_.size(); ++I) {
-    if (Entry == Comparisons_[I].BB) {
-      BasicBlock *NEntryBB = BasicBlock::Create(Entry->getContext(), "",
-                                                Entry->getParent(), Entry);
-      BranchInst::Create(Entry, NEntryBB);
-      break;
-    }
-  }
+  // Create a new cmp block before next cmp block.
+  BasicBlock *const BB =
+      BasicBlock::Create(Context, MergedBlockName(Comparisons).Name,
+                         NextCmpBlock->getParent(), NextCmpBlock);
+  IRBuilder<> Builder(BB);
+  // Add the GEPs from the first BCECmpBlock.
+  Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
+  Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
 
-  // Point the predecessors of the chain to the first comparison block (which is
-  // the new entry point) and update the entry block of the chain.
-  if (EntryBlock_ != Comparisons_[0].BB) {
-    EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB);
-    EntryBlock_ = Comparisons_[0].BB;
-  }
+  Value *IsEqual = nullptr;
+  if (Comparisons.size() == 1) {
+    LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
+    Value *const LhsLoad =
+        Builder.CreateLoad(FirstCmp.Lhs().LoadI->getType(), Lhs);
+    Value *const RhsLoad =
+        Builder.CreateLoad(FirstCmp.Rhs().LoadI->getType(), Rhs);
+    // There are no blocks to merge, just do the comparison.
+    IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
+  } else {
+    LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
 
-  // Effectively merge blocks.
-  int NumMerged = 1;
-  for (size_t I = 1; I < Comparisons_.size(); ++I) {
-    if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
-      ++NumMerged;
-    } else {
-      // Merge all previous comparisons and start a new merge block.
-      mergeComparisons(
-          makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
-          Comparisons_[I].BB, Phi_, TLI, AA);
-      NumMerged = 1;
-    }
-  }
-  mergeComparisons(makeArrayRef(Comparisons_)
-                       .slice(Comparisons_.size() - NumMerged, NumMerged),
-                   nullptr, Phi_, TLI, AA);
-
-  return true;
-}
-
-void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
-                                   BasicBlock *const NextBBInChain,
-                                   PHINode &Phi,
-                                   const TargetLibraryInfo *const TLI,
-                                   AliasAnalysis *AA) {
-  assert(!Comparisons.empty());
-  const auto &FirstComparison = *Comparisons.begin();
-  BasicBlock *const BB = FirstComparison.BB;
-  LLVMContext &Context = BB->getContext();
-
-  if (Comparisons.size() >= 2) {
     // If there is one block that requires splitting, we do it now, i.e.
     // just before we know we will collapse the chain. The instructions
     // can be executed before any of the instructions in the chain.
-    auto C = std::find_if(Comparisons.begin(), Comparisons.end(),
-                          [](const BCECmpBlock &B) { return B.RequireSplit; });
-    if (C != Comparisons.end())
-      C->split(EntryBlock_, AA);
+    const auto ToSplit =
+        std::find_if(Comparisons.begin(), Comparisons.end(),
+                     [](const BCECmpBlock &B) { return B.RequireSplit; });
+    if (ToSplit != Comparisons.end()) {
+      LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n");
+      ToSplit->split(BB, AA);
+    }
 
-    LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
-    const auto TotalSize =
-        std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
-                        [](int Size, const BCECmpBlock &C) {
-                          return Size + C.SizeBits();
-                        }) /
-        8;
+    const unsigned TotalSizeBits = std::accumulate(
+        Comparisons.begin(), Comparisons.end(), 0u,
+        [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); });
 
-    // Incoming edges do not need to be updated, and both GEPs are already
-    // computing the right address, we just need to:
-    //   - replace the two loads and the icmp with the memcmp
-    //   - update the branch
-    //   - update the incoming values in the phi.
-    FirstComparison.BranchI->eraseFromParent();
-    FirstComparison.CmpI->eraseFromParent();
-    FirstComparison.Lhs().LoadI->eraseFromParent();
-    FirstComparison.Rhs().LoadI->eraseFromParent();
-
-    IRBuilder<> Builder(BB);
+    // Create memcmp() == 0.
     const auto &DL = Phi.getModule()->getDataLayout();
     Value *const MemCmpCall = emitMemCmp(
-        FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP,
-        ConstantInt::get(DL.getIntPtrType(Context), TotalSize),
-        Builder, DL, TLI);
-    Value *const MemCmpIsZero = Builder.CreateICmpEQ(
+        Lhs, Rhs,
+        ConstantInt::get(DL.getIntPtrType(Context), TotalSizeBits / 8), Builder,
+        DL, TLI);
+    IsEqual = Builder.CreateICmpEQ(
         MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
+  }
 
-    // Add a branch to the next basic block in the chain.
-    if (NextBBInChain) {
-      Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent());
-      Phi.addIncoming(ConstantInt::getFalse(Context), BB);
-    } else {
-      Builder.CreateBr(Phi.getParent());
-      Phi.addIncoming(MemCmpIsZero, BB);
-    }
-
-    // Delete merged blocks.
-    for (size_t I = 1; I < Comparisons.size(); ++I) {
-      BasicBlock *CBB = Comparisons[I].BB;
-      CBB->replaceAllUsesWith(BB);
-      CBB->eraseFromParent();
-    }
+  BasicBlock *const PhiBB = Phi.getParent();
+  // Add a branch to the next basic block in the chain.
+  if (NextCmpBlock == PhiBB) {
+    // Continue to phi, passing it the comparison result.
+    Builder.CreateBr(Phi.getParent());
+    Phi.addIncoming(IsEqual, BB);
   } else {
-    assert(Comparisons.size() == 1);
-    // There are no blocks to merge, but we still need to update the branches.
-    LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
-    if (NextBBInChain) {
-      if (FirstComparison.BranchI->isConditional()) {
-        LLVM_DEBUG(dbgs() << "conditional -> conditional\n");
-        // Just update the "true" target, the "false" target should already be
-        // the phi block.
-        assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent());
-        FirstComparison.BranchI->setSuccessor(0, NextBBInChain);
-        Phi.addIncoming(ConstantInt::getFalse(Context), BB);
-      } else {
-        LLVM_DEBUG(dbgs() << "unconditional -> conditional\n");
-        // Replace the unconditional branch by a conditional one.
-        FirstComparison.BranchI->eraseFromParent();
-        IRBuilder<> Builder(BB);
-        Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain,
-                             Phi.getParent());
-        Phi.addIncoming(FirstComparison.CmpI, BB);
-      }
+    // Continue to next block if equal, exit to phi else.
+    Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
+    Phi.addIncoming(ConstantInt::getFalse(Context), BB);
+  }
+  return BB;
+}
+
+bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
+                           AliasAnalysis *AA) {
+  assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
+  // First pass to check if there is at least one merge. If not, we don't do
+  // anything and we keep analysis passes intact.
+  const auto AtLeastOneMerged = [this]() {
+    for (size_t I = 1; I < Comparisons_.size(); ++I) {
+      if (IsContiguous(Comparisons_[I - 1], Comparisons_[I]))
+        return true;
+    }
+    return false;
+  };
+  if (!AtLeastOneMerged())
+    return false;
+
+  // Effectively merge blocks. We go in the reverse direction from the phi block
+  // so that the next block is always available to branch to.
+  const auto mergeRange = [this, TLI, AA](int I, int Num, BasicBlock *Next) {
+    return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num), Next,
+                            Phi_, TLI, AA);
+  };
+  int NumMerged = 1;
+  BasicBlock *NextCmpBlock = Phi_.getParent();
+  for (int I = static_cast<int>(Comparisons_.size()) - 2; I >= 0; --I) {
+    if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) {
+      ++NumMerged;
     } else {
-      if (FirstComparison.BranchI->isConditional()) {
-        LLVM_DEBUG(dbgs() << "conditional -> unconditional\n");
-        // Replace the conditional branch by an unconditional one.
-        FirstComparison.BranchI->eraseFromParent();
-        IRBuilder<> Builder(BB);
-        Builder.CreateBr(Phi.getParent());
-        Phi.addIncoming(FirstComparison.CmpI, BB);
-      } else {
-        LLVM_DEBUG(dbgs() << "unconditional -> unconditional\n");
-        Phi.addIncoming(FirstComparison.CmpI, BB);
-      }
+      NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock);
+      NumMerged = 1;
     }
   }
+  NextCmpBlock = mergeRange(0, NumMerged, NextCmpBlock);
+
+  // Replace the original cmp chain with the new cmp chain by pointing all
+  // predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
+  // blocks in the old chain unreachable.
+  for (BasicBlock *Pred : predecessors(EntryBlock_)) {
+    Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock);
+  }
+  EntryBlock_ = nullptr;
+
+  // Delete merged blocks. This also removes incoming values in phi.
+  SmallVector<BasicBlock *, 16> DeadBlocks;
+  for (auto &Cmp : Comparisons_) {
+    DeadBlocks.push_back(Cmp.BB);
+  }
+  DeleteDeadBlocks(DeadBlocks);
+
+  Comparisons_.clear();
+  return true;
 }
 
 std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
diff --git a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
index c1e8107..298ce90 100644
--- a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
+++ b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
@@ -7,7 +7,7 @@
 
 define zeroext i1 @opeq1(
 ; PPC64LE-LABEL: opeq1:
-; PPC64LE:       # %bb.0: # %entry
+; PPC64LE:       # %bb.0: # %"entry+land.rhs.i"
 ; PPC64LE-NEXT:    ld 3, 0(3)
 ; PPC64LE-NEXT:    ld 4, 0(4)
 ; PPC64LE-NEXT:    xor 3, 3, 4
diff --git a/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll b/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll
index 785ba40..0be463d 100644
--- a/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll
+++ b/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll
@@ -8,7 +8,7 @@
 
 define zeroext i1 @opeq1(
 ; X86-LABEL: opeq1:
-; X86:       # %bb.0: # %entry
+; X86:       # %bb.0: # %"entry+land.rhs.i"
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl (%ecx), %edx
@@ -20,7 +20,7 @@
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: opeq1:
-; X64:       # %bb.0: # %entry
+; X64:       # %bb.0: # %"entry+land.rhs.i"
 ; X64-NEXT:    movq (%rdi), %rax
 ; X64-NEXT:    cmpq (%rsi), %rax
 ; X64-NEXT:    sete %al
diff --git a/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll b/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll
index fa4af66..00c70fb 100644
--- a/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll
@@ -5,19 +5,18 @@
 
 define zeroext i1 @opeq1(
 ; X86-LABEL: @opeq1(
-; X86-NEXT:  entry:
+; X86-NEXT:  "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3":
 ; X86-NEXT:    [[PTR:%.*]] = alloca i32
 ; X86-NEXT:    store i32 42, i32* [[PTR]]
-; X86-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
-; X86-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
-; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8*
-; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8*
+; X86-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
+; X86-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
+; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
+; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
 ; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16)
-; X86-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; X86-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
 ; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
 ; X86:       opeq1.exit:
-; X86-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ]
-; X86-NEXT:    ret i1 [[TMP1]]
+; X86-NEXT:    ret i1 [[TMP2]]
 ;
   %S* nocapture readonly dereferenceable(16) %a,
   %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
diff --git a/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll b/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll
index f416fa4..2123b79 100644
--- a/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll
@@ -3,37 +3,37 @@
 
 %S = type { i32, i32, i32, i32 }
 
-; The entry block is part of the chain. It however can not be merged. We need to make the
-; first comparison block in the chain the new entry block of the function.
+; The entry block is part of the chain. It however can not be merged. We need to
+; make sure that the control flow is still consistent (goes through each of the
+; blocks).
 
 define zeroext i1 @opeq1(
 ; CHECK-LABEL: @opeq1(
-; CHECK-NEXT:    br label [[LAND_RHS_I:%.*]]
-; CHECK:       entry:
-; CHECK-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 3
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST_I]], align 4
-; CHECK-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 2
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[FIRST1_I]], align 4
-; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    br i1 [[CMP_I]], label [[LAND_RHS_I_3:%.*]], label [[OPEQ1_EXIT:%.*]]
-; CHECK:       land.rhs.i:
-; CHECK-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 0
-; CHECK-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 0
-; CHECK-NEXT:    [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8*
-; CHECK-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8*
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    br i1 [[TMP3]], label [[ENTRY:%.*]], label [[OPEQ1_EXIT]]
-; CHECK:       land.rhs.i.3:
-; CHECK-NEXT:    [[FOURTH_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[FOURTH_I]], align 4
-; CHECK-NEXT:    [[FOURTH2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[FOURTH2_I]], align 4
-; CHECK-NEXT:    [[CMP5_I:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:  "land.rhs.i+land.rhs.i.2":
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
+; CHECK-NEXT:    [[CSTR3:%.*]] = bitcast i32* [[TMP1]] to i8*
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR3]], i64 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[ENTRY2:%.*]], label [[OPEQ1_EXIT:%.*]]
+; CHECK:       entry2:
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[LAND_RHS_I_31:%.*]], label [[OPEQ1_EXIT]]
+; CHECK:       land.rhs.i.31:
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[TMP10]], [[TMP11]]
 ; CHECK-NEXT:    br label [[OPEQ1_EXIT]]
 ; CHECK:       opeq1.exit:
-; CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ false, [[LAND_RHS_I]] ], [ false, [[ENTRY]] ], [ [[CMP5_I]], [[LAND_RHS_I_3]] ]
-; CHECK-NEXT:    ret i1 [[TMP6]]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i1 [ [[TMP12]], [[LAND_RHS_I_31]] ], [ false, [[ENTRY2]] ], [ false, %"land.rhs.i+land.rhs.i.2" ]
+; CHECK-NEXT:    ret i1 [[TMP13]]
 ;
   %S* nocapture readonly dereferenceable(16) %a,
   %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
diff --git a/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll b/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll
index 790c0e9..0a75d3b 100644
--- a/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll
@@ -23,18 +23,18 @@
 ; X86-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
 ; X86-NEXT:    call void (...) @foo()
 ; X86-NEXT:    [[CMP2_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
-; X86-NEXT:    br i1 [[CMP2_I]], label [[LAND_RHS_I_2:%.*]], label [[OPEQ1_EXIT]]
-; X86:       land.rhs.i.2:
-; X86-NEXT:    [[THIRD_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2
-; X86-NEXT:    [[THIRD2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 2
-; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[THIRD_I]] to i8*
-; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[THIRD2_I]] to i8*
+; X86-NEXT:    br i1 [[CMP2_I]], label %"land.rhs.i.2+land.rhs.i.3", label [[OPEQ1_EXIT]]
+; X86:       "land.rhs.i.2+land.rhs.i.3":
+; X86-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2
+; X86-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 2
+; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[TMP4]] to i8*
+; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[TMP5]] to i8*
 ; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
-; X86-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; X86-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[MEMCMP]], 0
 ; X86-NEXT:    br label [[OPEQ1_EXIT]]
 ; X86:       opeq1.exit:
-; X86-NEXT:    [[TMP5:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ [[TMP4]], [[LAND_RHS_I_2]] ]
-; X86-NEXT:    ret i1 [[TMP5]]
+; X86-NEXT:    [[TMP7:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ [[TMP6]], %"land.rhs.i.2+land.rhs.i.3" ]
+; X86-NEXT:    ret i1 [[TMP7]]
 ;
   %S* nocapture readonly dereferenceable(16) %a,
   %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
diff --git a/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll b/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll
index 13f2f48..0a6a681 100644
--- a/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll
@@ -6,17 +6,16 @@
 
 define zeroext i1 @opeq1(
 ; X86-LABEL: @opeq1(
-; X86-NEXT:  entry:
-; X86-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
-; X86-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
-; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8*
-; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8*
+; X86-NEXT:  "entry+land.rhs.i":
+; X86-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
+; X86-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
+; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
+; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
 ; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
-; X86-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; X86-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
 ; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
 ; X86:       opeq1.exit:
-; X86-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ]
-; X86-NEXT:    ret i1 [[TMP1]]
+; X86-NEXT:    ret i1 [[TMP2]]
 ;
 ; X86-NOBUILTIN-LABEL: @opeq1(
 ; X86-NOBUILTIN-NEXT:  entry:
@@ -67,17 +66,15 @@
 ; Same as above, but the two blocks are in inverse order.
 define zeroext i1 @opeq1_inverse(
 ; X86-LABEL: @opeq1_inverse(
-; X86-NEXT:    br label [[LAND_RHS_I:%.*]]
-; X86:       land.rhs.i:
-; X86-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
-; X86-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
-; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8*
-; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8*
+; X86-NEXT:  "land.rhs.i+entry":
+; X86-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
+; X86-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
+; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
+; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
 ; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
-; X86-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; X86-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
 ; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
 ; X86:       opeq1.exit:
-; X86-NEXT:    [[TMP2:%.*]] = phi i1 [ [[TMP1]], [[LAND_RHS_I]] ]
 ; X86-NEXT:    ret i1 [[TMP2]]
 ;
 ; X86-NOBUILTIN-LABEL: @opeq1_inverse(
diff --git a/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll b/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
index 91ef9b1..63283ed 100644
--- a/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
@@ -8,18 +8,17 @@
 ; We can split %entry and create a memcmp(16 bytes).
 define zeroext i1 @opeq1(
 ; X86-LABEL: @opeq1(
-; X86-NEXT:  entry:
+; X86-NEXT:  "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3":
 ; X86-NEXT:    call void (...) @foo()
-; X86-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
-; X86-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
-; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8*
-; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8*
+; X86-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
+; X86-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
+; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
+; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
 ; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16)
-; X86-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; X86-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
 ; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
 ; X86:       opeq1.exit:
-; X86-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ]
-; X86-NEXT:    ret i1 [[TMP1]]
+; X86-NEXT:    ret i1 [[TMP2]]
 ;
 ; Make sure this call is moved to the beginning of the entry block.
   %S* nocapture readonly dereferenceable(16) %a,