SROA: Allow eliminating addrspacecasted allocas

There is a circular dependency between SROA and InferAddressSpaces
today that requires running both multiple times in order to be able to
eliminate all simple allocas and addrspacecasts. InferAddressSpaces
can't remove addrspacecasts when written to memory, and SROA helps
move pointers out of memory.

This should avoid inserting new commuting addrspacecasts with GEPs,
since there are unresolved questions about pointer wrapping between
different address spaces.

For now, don't replace volatile operations that don't match the alloca
addrspace, as it would change the address space of the access. It may
be still OK to insert an addrspacecast from the new alloca, but be
more conservative for now.

llvm-svn: 363462
diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h
index dca8718..fbf04c8 100644
--- a/llvm/include/llvm/Analysis/PtrUseVisitor.h
+++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h
@@ -256,6 +256,10 @@
     enqueueUsers(BC);
   }
 
+  void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+    enqueueUsers(ASC);
+  }
+
   void visitPtrToIntInst(PtrToIntInst &I) {
     PI.setEscaped(&I);
   }
diff --git a/llvm/lib/Analysis/PtrUseVisitor.cpp b/llvm/lib/Analysis/PtrUseVisitor.cpp
index f78446c..9a834ba 100644
--- a/llvm/lib/Analysis/PtrUseVisitor.cpp
+++ b/llvm/lib/Analysis/PtrUseVisitor.cpp
@@ -34,5 +34,11 @@
   if (!IsOffsetKnown)
     return false;
 
-  return GEPI.accumulateConstantOffset(DL, Offset);
+  APInt TmpOffset(DL.getIndexTypeSizeInBits(GEPI.getType()), 0);
+  if (GEPI.accumulateConstantOffset(DL, TmpOffset)) {
+    Offset += TmpOffset.sextOrTrunc(Offset.getBitWidth());
+    return true;
+  }
+
+  return false;
 }
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 790a16d..6454f1e 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -713,6 +713,13 @@
     return Base::visitBitCastInst(BC);
   }
 
+  void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+    if (ASC.use_empty())
+      return markAsDead(ASC);
+
+    return Base::visitAddrSpaceCastInst(ASC);
+  }
+
   void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
     if (GEPI.use_empty())
       return markAsDead(GEPI);
@@ -776,7 +783,10 @@
     if (!IsOffsetKnown)
       return PI.setAborted(&LI);
 
-    const DataLayout &DL = LI.getModule()->getDataLayout();
+    if (LI.isVolatile() &&
+        LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
+      return PI.setAborted(&LI);
+
     uint64_t Size = DL.getTypeStoreSize(LI.getType());
     return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
   }
@@ -788,7 +798,10 @@
     if (!IsOffsetKnown)
       return PI.setAborted(&SI);
 
-    const DataLayout &DL = SI.getModule()->getDataLayout();
+    if (SI.isVolatile() &&
+        SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
+      return PI.setAborted(&SI);
+
     uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
 
     // If this memory access can be shown to *statically* extend outside the
@@ -823,6 +836,11 @@
     if (!IsOffsetKnown)
       return PI.setAborted(&II);
 
+    // Don't replace this with a store with a different address space.  TODO:
+    // Use a store with the casted new alloca?
+    if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace())
+      return PI.setAborted(&II);
+
     insertUse(II, Offset, Length ? Length->getLimitedValue()
                                  : AllocSize - Offset.getLimitedValue(),
               (bool)Length);
@@ -842,6 +860,13 @@
     if (!IsOffsetKnown)
       return PI.setAborted(&II);
 
+    // Don't replace this with a load/store with a different address space.
+    // TODO: Use a store with the casted new alloca?
+    if (II.isVolatile() &&
+        (II.getDestAddressSpace() != DL.getAllocaAddrSpace() ||
+         II.getSourceAddressSpace() != DL.getAllocaAddrSpace()))
+      return PI.setAborted(&II);
+
     // This side of the transfer is completely out-of-bounds, and so we can
     // nuke the entire transfer. However, we also need to nuke the other side
     // if already added to our partitions.
@@ -949,7 +974,7 @@
         if (!GEP->hasAllZeroIndices())
           return GEP;
       } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
-                 !isa<SelectInst>(I)) {
+                 !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
         return I;
       }
 
@@ -1561,7 +1586,8 @@
   Value *Int8Ptr = nullptr;
   APInt Int8PtrOffset(Offset.getBitWidth(), 0);
 
-  Type *TargetTy = PointerTy->getPointerElementType();
+  PointerType *TargetPtrTy = cast<PointerType>(PointerTy);
+  Type *TargetTy = TargetPtrTy->getElementType();
 
   do {
     // First fold any existing GEPs into the offset.
@@ -1630,8 +1656,11 @@
   Ptr = OffsetPtr;
 
   // On the off chance we were targeting i8*, guard the bitcast here.
-  if (Ptr->getType() != PointerTy)
-    Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
+  if (cast<PointerType>(Ptr->getType()) != TargetPtrTy) {
+    Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
+                                                  TargetPtrTy,
+                                                  NamePrefix + "sroa_cast");
+  }
 
   return Ptr;
 }
@@ -3082,8 +3111,9 @@
         continue;
       }
 
-      assert(isa<BitCastInst>(I) || isa<PHINode>(I) ||
-             isa<SelectInst>(I) || isa<GetElementPtrInst>(I));
+      assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
+             isa<PHINode>(I) || isa<SelectInst>(I) ||
+             isa<GetElementPtrInst>(I));
       for (User *U : I->users())
         if (Visited.insert(cast<Instruction>(U)).second)
           Uses.push_back(cast<Instruction>(U));
@@ -3384,6 +3414,11 @@
     return false;
   }
 
+  bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+    enqueueUsers(ASC);
+    return false;
+  }
+
   bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
     enqueueUsers(GEPI);
     return false;
diff --git a/llvm/test/Transforms/SROA/addrspacecast.ll b/llvm/test/Transforms/SROA/addrspacecast.ll
index 4b3b81f..5edef89 100644
--- a/llvm/test/Transforms/SROA/addrspacecast.ll
+++ b/llvm/test/Transforms/SROA/addrspacecast.ll
@@ -10,12 +10,7 @@
 define i64 @alloca_addrspacecast_bitcast(i64 %X) {
 ; CHECK-LABEL: @alloca_addrspacecast_bitcast(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A:%.*]] = alloca [8 x i8]
-; CHECK-NEXT:    [[A_CAST:%.*]] = addrspacecast [8 x i8]* [[A]] to [8 x i8] addrspace(1)*
-; CHECK-NEXT:    [[B:%.*]] = bitcast [8 x i8] addrspace(1)* [[A_CAST]] to i64 addrspace(1)*
-; CHECK-NEXT:    store i64 [[X:%.*]], i64 addrspace(1)* [[B]]
-; CHECK-NEXT:    [[Z:%.*]] = load i64, i64 addrspace(1)* [[B]]
-; CHECK-NEXT:    ret i64 [[Z]]
+; CHECK-NEXT:    ret i64 [[X:%.*]]
 ;
 entry:
   %A = alloca [8 x i8]
@@ -29,12 +24,7 @@
 define i64 @alloca_bitcast_addrspacecast(i64 %X) {
 ; CHECK-LABEL: @alloca_bitcast_addrspacecast(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A:%.*]] = alloca [8 x i8]
-; CHECK-NEXT:    [[A_CAST:%.*]] = bitcast [8 x i8]* [[A]] to i64*
-; CHECK-NEXT:    [[B:%.*]] = addrspacecast i64* [[A_CAST]] to i64 addrspace(1)*
-; CHECK-NEXT:    store i64 [[X:%.*]], i64 addrspace(1)* [[B]]
-; CHECK-NEXT:    [[Z:%.*]] = load i64, i64 addrspace(1)* [[B]]
-; CHECK-NEXT:    ret i64 [[Z]]
+; CHECK-NEXT:    ret i64 [[X:%.*]]
 ;
 entry:
   %A = alloca [8 x i8]
@@ -48,15 +38,7 @@
 define i64 @alloca_addrspacecast_gep(i64 %X) {
 ; CHECK-LABEL: @alloca_addrspacecast_gep(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_AS0:%.*]] = alloca [256 x i8], align 4
-; CHECK-NEXT:    [[GEPA_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 32
-; CHECK-NEXT:    [[GEPA_AS0_BC:%.*]] = bitcast i8* [[GEPA_AS0]] to i64*
-; CHECK-NEXT:    store i64 [[X:%.*]], i64* [[GEPA_AS0_BC]], align 4
-; CHECK-NEXT:    [[A_AS1:%.*]] = addrspacecast [256 x i8]* [[A_AS0]] to [256 x i8] addrspace(1)*
-; CHECK-NEXT:    [[GEPA_AS1:%.*]] = getelementptr [256 x i8], [256 x i8] addrspace(1)* [[A_AS1]], i16 0, i16 32
-; CHECK-NEXT:    [[GEPA_AS1_BC:%.*]] = bitcast i8 addrspace(1)* [[GEPA_AS1]] to i64 addrspace(1)*
-; CHECK-NEXT:    [[Z:%.*]] = load i64, i64 addrspace(1)* [[GEPA_AS1_BC]], align 4
-; CHECK-NEXT:    ret i64 [[Z]]
+; CHECK-NEXT:    ret i64 [[X:%.*]]
 ;
 entry:
   %A.as0 = alloca [256 x i8], align 4
@@ -76,13 +58,7 @@
 define i64 @alloca_gep_addrspacecast(i64 %X) {
 ; CHECK-LABEL: @alloca_gep_addrspacecast(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_AS0:%.*]] = alloca [256 x i8], align 4
-; CHECK-NEXT:    [[GEPA_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 32
-; CHECK-NEXT:    [[GEPA_AS0_BC:%.*]] = bitcast i8* [[GEPA_AS0]] to i64*
-; CHECK-NEXT:    store i64 [[X:%.*]], i64* [[GEPA_AS0_BC]], align 4
-; CHECK-NEXT:    [[GEPA_AS1_BC:%.*]] = addrspacecast i64* [[GEPA_AS0_BC]] to i64 addrspace(1)*
-; CHECK-NEXT:    [[Z:%.*]] = load i64, i64 addrspace(1)* [[GEPA_AS1_BC]], align 4
-; CHECK-NEXT:    ret i64 [[Z]]
+; CHECK-NEXT:    ret i64 [[X:%.*]]
 ;
 entry:
   %A.as0 = alloca [256 x i8], align 4
@@ -99,16 +75,7 @@
 define i64 @alloca_gep_addrspacecast_gep(i64 %X) {
 ; CHECK-LABEL: @alloca_gep_addrspacecast_gep(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_AS0:%.*]] = alloca [256 x i8], align 4
-; CHECK-NEXT:    [[GEPA_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 32
-; CHECK-NEXT:    [[GEPA_AS0_BC:%.*]] = bitcast i8* [[GEPA_AS0]] to i64*
-; CHECK-NEXT:    store i64 [[X:%.*]], i64* [[GEPA_AS0_BC]], align 4
-; CHECK-NEXT:    [[GEPB_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 16
-; CHECK-NEXT:    [[GEPB_AS1:%.*]] = addrspacecast i8* [[GEPB_AS0]] to i8 addrspace(1)*
-; CHECK-NEXT:    [[GEPC_AS1:%.*]] = getelementptr i8, i8 addrspace(1)* [[GEPB_AS1]], i16 16
-; CHECK-NEXT:    [[GEPC_AS1_BC:%.*]] = bitcast i8 addrspace(1)* [[GEPC_AS1]] to i64 addrspace(1)*
-; CHECK-NEXT:    [[Z:%.*]] = load i64, i64 addrspace(1)* [[GEPC_AS1_BC]], align 4
-; CHECK-NEXT:    ret i64 [[Z]]
+; CHECK-NEXT:    ret i64 [[X:%.*]]
 ;
 entry:
   %A.as0 = alloca [256 x i8], align 4
@@ -287,11 +254,6 @@
 
 define void @select_addrspacecast(i1 %a, i1 %b) {
 ; CHECK-LABEL: @select_addrspacecast(
-; CHECK-NEXT:    [[C:%.*]] = alloca i64, align 8
-; CHECK-NEXT:    [[P_0_C:%.*]] = select i1 undef, i64* [[C]], i64* [[C]]
-; CHECK-NEXT:    [[ASC:%.*]] = addrspacecast i64* [[P_0_C]] to i64 addrspace(1)*
-; CHECK-NEXT:    [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[ASC]], i64 addrspace(1)* [[ASC]]
-; CHECK-NEXT:    [[COND:%.*]] = load i64, i64 addrspace(1)* [[COND_IN]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %c = alloca i64, align 8
@@ -306,9 +268,8 @@
 define void @select_addrspacecast_const_op(i1 %a, i1 %b) {
 ; CHECK-LABEL: @select_addrspacecast_const_op(
 ; CHECK-NEXT:    [[C:%.*]] = alloca i64, align 8
-; CHECK-NEXT:    [[P_0_C:%.*]] = select i1 undef, i64* [[C]], i64* [[C]]
-; CHECK-NEXT:    [[ASC:%.*]] = addrspacecast i64* [[P_0_C]] to i64 addrspace(1)*
-; CHECK-NEXT:    [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[ASC]], i64 addrspace(1)* null
+; CHECK-NEXT:    [[C_0_ASC_SROA_CAST:%.*]] = addrspacecast i64* [[C]] to i64 addrspace(1)*
+; CHECK-NEXT:    [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[C_0_ASC_SROA_CAST]], i64 addrspace(1)* null
 ; CHECK-NEXT:    [[COND:%.*]] = load i64, i64 addrspace(1)* [[COND_IN]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -325,11 +286,8 @@
 
 define void @select_addrspacecast_gv(i1 %a, i1 %b) {
 ; CHECK-LABEL: @select_addrspacecast_gv(
-; CHECK-NEXT:    [[C:%.*]] = alloca i64, align 8
-; CHECK-NEXT:    [[P_0_C:%.*]] = select i1 undef, i64* [[C]], i64* [[C]]
-; CHECK-NEXT:    [[ASC:%.*]] = addrspacecast i64* [[P_0_C]] to i64 addrspace(1)*
-; CHECK-NEXT:    [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[ASC]], i64 addrspace(1)* @gv
-; CHECK-NEXT:    [[COND:%.*]] = load i64, i64 addrspace(1)* [[COND_IN]], align 8
+; CHECK-NEXT:    [[COND_SROA_SPECULATE_LOAD_FALSE:%.*]] = load i64, i64 addrspace(1)* @gv, align 8
+; CHECK-NEXT:    [[COND_SROA_SPECULATED:%.*]] = select i1 undef, i64 undef, i64 [[COND_SROA_SPECULATE_LOAD_FALSE]]
 ; CHECK-NEXT:    ret void
 ;
   %c = alloca i64, align 8
diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll
index 2c5829d..498d2c8 100644
--- a/llvm/test/Transforms/SROA/basictest.ll
+++ b/llvm/test/Transforms/SROA/basictest.ll
@@ -65,6 +65,67 @@
   ret i64 %Z
 }
 
+define i64 @test2_addrspacecast(i64 %X) {
+; CHECK-LABEL: @test2_addrspacecast(
+; CHECK-NOT: alloca
+; CHECK: ret i64 %X
+
+entry:
+  %A = alloca [8 x i8]
+  %B = addrspacecast [8 x i8]* %A to i64 addrspace(1)*
+  store i64 %X, i64 addrspace(1)* %B
+  br label %L2
+
+L2:
+  %Z = load i64, i64 addrspace(1)* %B
+  ret i64 %Z
+}
+
+define i64 @test2_addrspacecast_gep(i64 %X, i16 %idx) {
+; CHECK-LABEL: @test2_addrspacecast_gep(
+; CHECK-NOT: alloca
+; CHECK: ret i64 %X
+
+entry:
+  %A = alloca [256 x i8]
+  %B = addrspacecast [256 x i8]* %A to i64 addrspace(1)*
+  %gepA = getelementptr [256 x i8], [256 x i8]* %A, i16 0, i16 32
+  %gepB = getelementptr i64, i64 addrspace(1)* %B, i16 4
+  store i64 %X, i64 addrspace(1)* %gepB, align 1
+  br label %L2
+
+L2:
+  %gepA.bc = bitcast i8* %gepA to i64*
+  %Z = load i64, i64* %gepA.bc, align 1
+  ret i64 %Z
+}
+
+; Avoid crashing when load/storing at at different offsets.
+define i64 @test2_addrspacecast_gep_offset(i64 %X) {
+; CHECK-LABEL: @test2_addrspacecast_gep_offset(
+; CHECK: %A.sroa.0 = alloca [10 x i8]
+; CHECK: [[GEP0:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %A.sroa.0, i16 0, i16 2
+; CHECK-NEXT: [[GEP1:%.*]] = addrspacecast i8* [[GEP0]] to i64 addrspace(1)*
+; CHECK-NEXT: store i64 %X, i64 addrspace(1)* [[GEP1]], align 1
+; CHECK: br
+
+; CHECK: [[BITCAST:%.*]] = bitcast [10 x i8]* %A.sroa.0 to i64*
+; CHECK: %A.sroa.0.0.A.sroa.0.30.Z = load i64, i64* [[BITCAST]], align 1
+; CHECK-NEXT: ret
+entry:
+  %A = alloca [256 x i8]
+  %B = addrspacecast [256 x i8]* %A to i64 addrspace(1)*
+  %gepA = getelementptr [256 x i8], [256 x i8]* %A, i16 0, i16 30
+  %gepB = getelementptr i64, i64 addrspace(1)* %B, i16 4
+  store i64 %X, i64 addrspace(1)* %gepB, align 1
+  br label %L2
+
+L2:
+  %gepA.bc = bitcast i8* %gepA to i64*
+  %Z = load i64, i64* %gepA.bc, align 1
+  ret i64 %Z
+}
+
 define void @test3(i8* %dst, i8* align 8 %src) {
 ; CHECK-LABEL: @test3(
 
@@ -426,6 +487,25 @@
   ret i16 %val
 }
 
+define i16 @test5_multi_addrspace_access() {
+; CHECK-LABEL: @test5_multi_addrspace_access(
+; CHECK-NOT: alloca float
+; CHECK:      %[[cast:.*]] = bitcast float 0.0{{.*}} to i32
+; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16
+; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16
+; CHECK-NEXT: ret i16 %[[trunc]]
+
+entry:
+  %a = alloca [4 x i8]
+  %fptr = bitcast [4 x i8]* %a to float*
+  %fptr.as1 = addrspacecast float* %fptr to float addrspace(1)*
+  store float 0.0, float addrspace(1)* %fptr.as1
+  %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 2
+  %iptr = bitcast i8* %ptr to i16*
+  %val = load i16, i16* %iptr
+  ret i16 %val
+}
+
 define i32 @test6() {
 ; CHECK-LABEL: @test6(
 ; CHECK: alloca i32
@@ -825,6 +905,27 @@
   ret i32 undef
 }
 
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) nounwind
+
+define i32 @test19_addrspacecast(%opaque* %x) {
+; This input will cause us to try to compute a natural GEP when rewriting
+; pointers in such a way that we try to GEP through the opaque type. Previously,
+; a check for an unsized type was missing and this crashed. Ensure it behaves
+; reasonably now.
+; CHECK-LABEL: @test19_addrspacecast(
+; CHECK-NOT: alloca
+; CHECK: ret i32 undef
+
+entry:
+  %a = alloca { i64, i8* }
+  %cast1 = addrspacecast %opaque* %x to i8 addrspace(1)*
+  %cast2 = bitcast { i64, i8* }* %a to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %cast2, i8 addrspace(1)* %cast1, i32 16, i32 1, i1 false)
+  %gep = getelementptr inbounds { i64, i8* }, { i64, i8* }* %a, i32 0, i32 0
+  %val = load i64, i64* %gep
+  ret i32 undef
+}
+
 define i32 @test20() {
 ; Ensure we can track negative offsets (before the beginning of the alloca) and
 ; negative relative offsets from offsets starting past the end of the alloca.
@@ -1154,14 +1255,15 @@
 define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) {
 ; Make sure this the right address space pointer is used for type check.
 ; CHECK-LABEL: @PR14105_as1(
+; CHECK: alloca { [16 x i8] }, align 8
+; CHECK-NEXT: %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i64 -1
+; CHECK-NEXT: %cast1 = bitcast { [16 x i8] } addrspace(1)* %gep to i8 addrspace(1)*
+; CHECK-NEXT: %cast2 = bitcast { [16 x i8] }* %a to i8*
+; CHECK-NEXT: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true)
 
 entry:
   %a = alloca { [16 x i8] }, align 8
-; CHECK: alloca [16 x i8], align 8
-
   %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i64 -1
-; CHECK-NEXT: getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i16 -1, i32 0, i16 0
-
   %cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)*
   %cast2 = bitcast { [16 x i8 ] }* %a to i8*
   call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true)
diff --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll
index d0904ce..8f04e3d 100644
--- a/llvm/test/Transforms/SROA/phi-and-select.ll
+++ b/llvm/test/Transforms/SROA/phi-and-select.ll
@@ -53,6 +53,56 @@
 	ret i32 %result
 }
 
+; If bitcast isn't considered a safe phi/select use, the alloca
+; remains as an array.
+; FIXME: Why isn't this identical to test2?
+
+; CHECK-LABEL: @test2_bitcast(
+; CHECK: alloca i32
+; CHECK-NEXT: alloca i32
+
+; CHECK: %select = select i1 %cond, i32* %a.sroa.3, i32* %a.sroa.0
+; CHECK-NEXT: %select.bc = bitcast i32* %select to float*
+; CHECK-NEXT: %result = load float, float* %select.bc, align 4
+define float @test2_bitcast() {
+entry:
+  %a = alloca [2 x i32]
+  %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0
+  %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
+  store i32 0, i32* %a0
+  store i32 1, i32* %a1
+  %v0 = load i32, i32* %a0
+  %v1 = load i32, i32* %a1
+  %cond = icmp sle i32 %v0, %v1
+  %select = select i1 %cond, i32* %a1, i32* %a0
+  %select.bc = bitcast i32* %select to float*
+  %result = load float, float* %select.bc
+  ret float %result
+}
+
+; CHECK-LABEL: @test2_addrspacecast(
+; CHECK: alloca i32
+; CHECK-NEXT: alloca i32
+
+; CHECK: %select = select i1 %cond, i32* %a.sroa.3, i32* %a.sroa.0
+; CHECK-NEXT: %select.asc = addrspacecast i32* %select to i32 addrspace(1)*
+; CHECK-NEXT: load i32, i32 addrspace(1)* %select.asc, align 4
+define i32 @test2_addrspacecast() {
+entry:
+  %a = alloca [2 x i32]
+  %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0
+  %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
+  store i32 0, i32* %a0
+  store i32 1, i32* %a1
+  %v0 = load i32, i32* %a0
+  %v1 = load i32, i32* %a1
+  %cond = icmp sle i32 %v0, %v1
+  %select = select i1 %cond, i32* %a1, i32* %a0
+  %select.asc = addrspacecast i32* %select to i32 addrspace(1)*
+  %result = load i32, i32 addrspace(1)* %select.asc
+  ret i32 %result
+}
+
 define i32 @test3(i32 %x) {
 ; CHECK-LABEL: @test3(
 entry: