[ARM] Don't use the Machine Scheduler for cortex-m at minsize

The new cortex-m schedule in rL360768 helps performance, but can increase the
amount of high-registers used. This, on average, ends up increasing the
codesize by a fair amount (because less instructions are converted from T2 to
T1). On cortex-m at -Oz, where we are quite size-paranoid, it is better to use
the existing DAG scheduler with the RegPressure scheduling preference (at least
until the issues around T2 vs T1 instructions can be improved).

I have also made sure that the Sched::RegPressure dag scheduler is always
chosen for MinSize.

The test shows one case where we increase the number of registers used.

Differential Revision: https://reviews.llvm.org/D61882

llvm-svn: 360769
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 94f94d3..643d280 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1184,7 +1184,7 @@
   setStackPointerRegisterToSaveRestore(ARM::SP);
 
   if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
-      !Subtarget->hasVFP2())
+      !Subtarget->hasVFP2() || Subtarget->hasMinSize())
     setSchedulingPreference(Sched::RegPressure);
   else
     setSchedulingPreference(Sched::Hybrid);
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 22652d6..63f6941 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -361,6 +361,13 @@
 }
 
 bool ARMSubtarget::enableMachineScheduler() const {
+  // The MachineScheduler can increase register usage, so we use more high
+  // registers and end up with more T2 instructions that cannot be converted to
+  // T1 instructions. At least until we do better at converting to thumb1
+  // instructions, on cortex-m at Oz where we are size-paranoid, don't use the
+  // Machine scheduler, relying on the DAG register pressure scheduler instead.
+  if (isMClass() && hasMinSize())
+    return false;
   // Enable the MachineScheduler before register allocation for subtargets
   // with the use-misched feature.
   return useMachineScheduler();
diff --git a/llvm/test/CodeGen/Thumb2/m4-sched-regs.ll b/llvm/test/CodeGen/Thumb2/m4-sched-regs.ll
index a83da8a..29952fe 100644
--- a/llvm/test/CodeGen/Thumb2/m4-sched-regs.ll
+++ b/llvm/test/CodeGen/Thumb2/m4-sched-regs.ll
@@ -10,22 +10,20 @@
 define void @test(%struct.a* nocapture %dhcp, i16 zeroext %value) #0 {
 ; CHECK-LABEL: test:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    ldrh r3, [r0, #20]
-; CHECK-NEXT:    ldr.w lr, [r0, #16]
-; CHECK-NEXT:    lsr.w r12, r1, #8
-; CHECK-NEXT:    adds r2, r3, #1
-; CHECK-NEXT:    strh r2, [r0, #20]
-; CHECK-NEXT:    add.w r2, lr, r3
-; CHECK-NEXT:    strb.w r12, [r2, #240]
 ; CHECK-NEXT:    ldrh r2, [r0, #20]
-; CHECK-NEXT:    ldr.w r12, [r0, #16]
 ; CHECK-NEXT:    adds r3, r2, #1
 ; CHECK-NEXT:    strh r3, [r0, #20]
-; CHECK-NEXT:    add.w r0, r12, r2
+; CHECK-NEXT:    ldr r3, [r0, #16]
+; CHECK-NEXT:    add r2, r3
+; CHECK-NEXT:    lsrs r3, r1, #8
+; CHECK-NEXT:    strb.w r3, [r2, #240]
+; CHECK-NEXT:    ldrh r2, [r0, #20]
+; CHECK-NEXT:    adds r3, r2, #1
+; CHECK-NEXT:    strh r3, [r0, #20]
+; CHECK-NEXT:    ldr r0, [r0, #16]
+; CHECK-NEXT:    add r0, r2
 ; CHECK-NEXT:    strb.w r1, [r0, #240]
-; CHECK-NEXT:    pop {r7, pc}
+; CHECK-NEXT:    bx lr
 entry:
   %shr = lshr i16 %value, 8
   %conv1 = trunc i16 %shr to i8