[ARM] Support inline assembler constraints for MVE.

"To" selects an odd-numbered GPR, and "Te" an even one. There are some
8.1-M instructions that have one too few bits in their register fields
and require registers of particular parity, without necessarily using
a consecutive even/odd pair.

Also, the constraint letter "t" should select an MVE q-register, when
MVE is present. This didn't need any source changes, but some extra
tests have been added.

Reviewers: dmgreen, samparker, SjoerdMeijer

Subscribers: javed.absar, eraman, kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D60709

llvm-svn: 364331
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 7ed2158..ceab4e7 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -900,6 +900,16 @@
   case 'Q': // A memory address that is a single base register.
     Info.setAllowsMemory();
     return true;
+  case 'T':
+    switch (Name[1]) {
+    default:
+      break;
+    case 'e': // Even general-purpose register
+    case 'o': // Odd general-purpose register
+      Info.setAllowsRegister();
+      Name++;
+      return true;
+    }
   case 'U': // a memory reference...
     switch (Name[1]) {
     case 'q': // ...ARMV4 ldrsb
@@ -923,6 +933,7 @@
   std::string R;
   switch (*Constraint) {
   case 'U': // Two-character constraint; add "^" hint for later parsing.
+  case 'T':
     R = std::string("^") + std::string(Constraint, 2);
     Constraint++;
     break;
diff --git a/clang/test/CodeGen/arm-asm.c b/clang/test/CodeGen/arm-asm.c
index bd2fe11..d2ae1ed 100644
--- a/clang/test/CodeGen/arm-asm.c
+++ b/clang/test/CodeGen/arm-asm.c
@@ -6,3 +6,21 @@
     __asm__ volatile ("flds s15, %[k] \n" :: [k] "Uv" (k) : "s15");
     return 0;
 }
+
+// CHECK-LABEL: @even_reg_constraint_Te
+int even_reg_constraint_Te(void) {
+  int acc = 0;
+  // CHECK: vaddv{{.*\^Te}}
+  asm("vaddv.s8 %0, Q0"
+      : "+Te" (acc));
+  return acc;
+}
+
+// CHECK-LABEL: @odd_reg_constraint_To
+int odd_reg_constraint_To(void) {
+  int eacc = 0, oacc = 0;
+  // CHECK: vaddlv{{.*\^To}}
+  asm("vaddlv.s8 %0, %1, Q0"
+      : "+Te" (eacc), "+To" (oacc));
+  return oacc;
+}
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 41316d1..75564f5 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3795,6 +3795,8 @@
 
 - ``Q``, ``Um``, ``Un``, ``Uq``, ``Us``, ``Ut``, ``Uv``, ``Uy``: Memory address
   operand. Treated the same as operand ``m``, at the moment.
+- ``Te``: An even general-purpose 32-bit integer register: ``r0,r2,...,r12,r14``
+- ``To``: An odd general-purpose 32-bit integer register: ``r1,r3,...,r11``
 
 ARM and ARM's Thumb2 mode:
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8ca947a4..68a6365 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14002,6 +14002,7 @@
   } else if (Constraint.size() == 2) {
     switch (Constraint[0]) {
     default: break;
+    case 'T': return C_RegisterClass;
     // All 'U+' constraints are addresses.
     case 'U': return C_Memory;
     }
@@ -14047,7 +14048,8 @@
 
 RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
-  if (Constraint.size() == 1) {
+  switch (Constraint.size()) {
+  case 1:
     // GCC ARM Constraint Letters
     switch (Constraint[0]) {
     case 'l': // Low regs or general regs.
@@ -14093,7 +14095,26 @@
         return RCPair(0U, &ARM::QPR_VFP2RegClass);
       break;
     }
+
+  case 2:
+    switch (Constraint[0]) {
+    case 'T':
+      switch (Constraint[1]) {
+      default:
+        break;
+      case 'e':
+        return RCPair(0U, &ARM::tGPREvenRegClass);
+      case 'o':
+        return RCPair(0U, &ARM::tGPROddRegClass);
+      }
+    default:
+      break;
+    }
+
+  default:
+    break;
   }
+
   if (StringRef("{cc}").equals_lower(Constraint))
     return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
 
diff --git a/llvm/test/CodeGen/ARM/inlineasm.ll b/llvm/test/CodeGen/ARM/inlineasm.ll
index 1ed7f69..d7fb442 100644
--- a/llvm/test/CodeGen/ARM/inlineasm.ll
+++ b/llvm/test/CodeGen/ARM/inlineasm.ll
@@ -48,3 +48,27 @@
 	%0 = tail call <4 x float> asm "vadd.f32 $0, $1, $2", "=t,t,t"(<4 x float> %a, <4 x float> %b)
 	ret <4 x float> %0
 }
+
+define i32 @even-GPR-constraint() {
+entry:
+	; CHECK-LABEL: even-GPR-constraint
+	; CHECK: add [[REG:r1*[0, 2, 4, 6, 8]]], [[REG]], #1
+	; CHECK: add [[REG:r1*[0, 2, 4, 6, 8]]], [[REG]], #2
+	; CHECK: add [[REG:r1*[0, 2, 4, 6, 8]]], [[REG]], #3
+	; CHECK: add [[REG:r1*[0, 2, 4, 6, 8]]], [[REG]], #4
+	%0 = tail call { i32, i32, i32, i32 } asm "add $0, #1\0Aadd $1, #2\0Aadd $2, #3\0Aadd $3, #4\0A", "=^Te,=^Te,=^Te,=^Te,0,1,2,3"(i32 0, i32 0, i32 0, i32 0)
+	%asmresult = extractvalue { i32, i32, i32, i32 } %0, 0
+	ret i32 %asmresult
+}
+
+define i32 @odd-GPR-constraint() {
+entry:
+	; CHECK-LABEL: odd-GPR-constraint
+	; CHECK: add [[REG:r1*[1, 3, 5, 7, 9]]], [[REG]], #1
+	; CHECK: add [[REG:r1*[1, 3, 5, 7, 9]]], [[REG]], #2
+	; CHECK: add [[REG:r1*[1, 3, 5, 7, 9]]], [[REG]], #3
+	; CHECK: add [[REG:r1*[1, 3, 5, 7, 9]]], [[REG]], #4
+	%0 = tail call { i32, i32, i32, i32 } asm "add $0, #1\0Aadd $1, #2\0Aadd $2, #3\0Aadd $3, #4\0A", "=^To,=^To,=^To,=^To,0,1,2,3"(i32 0, i32 0, i32 0, i32 0)
+	%asmresult = extractvalue { i32, i32, i32, i32 } %0, 0
+	ret i32 %asmresult
+}
diff --git a/llvm/test/CodeGen/Thumb2/inlineasm-error-t-toofewregs-mve.ll b/llvm/test/CodeGen/Thumb2/inlineasm-error-t-toofewregs-mve.ll
new file mode 100644
index 0000000..419ff71
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/inlineasm-error-t-toofewregs-mve.ll
@@ -0,0 +1,14 @@
+; RUN: not llc -mtriple=armv8.1-m-eabi -mattr=+mve %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: inline assembly requires more registers than available
+define arm_aapcs_vfpcc <4 x i32> @t-constraint-i32-vectors-too-few-regs(<4 x i32> %a, <4 x i32> %b) {
+entry:
+	%0 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>,
+                         <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+                       asm "",
+             "=t,=t,=t,=t,=t,=t,=t,=t,=t,=t,t,t"(<4 x i32> %a, <4 x i32> %b)
+	%asmresult = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>,
+                                    <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>,
+                                    <4 x i32>, <4 x i32> } %0, 0
+	ret <4 x i32> %asmresult
+}
diff --git a/llvm/test/CodeGen/Thumb2/inlineasm-mve.ll b/llvm/test/CodeGen/Thumb2/inlineasm-mve.ll
new file mode 100644
index 0000000..a8c0622
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/inlineasm-mve.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=armv8.1-m-eabi -mattr=+mve %s -o - | FileCheck %s
+
+define i32 @test1(i32 %tmp54) {
+	%tmp56 = tail call i32 asm "uxtb16 $0,$1", "=r,r"( i32 %tmp54 )
+	ret i32 %tmp56
+}
+
+define void @test2() {
+	tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 )
+	ret void
+}
+
+define arm_aapcs_vfpcc <4 x i32> @mve-t-constraint-128bit(<4 x i32>, <4 x i32>) {
+; CHECK-LABEL: mve-t-constraint-128bit
+; CHECK: vadd.i32 q{{[0-7]}}, q{{[0-7]}}, q{{[0-7]}}
+  %ret = tail call <4 x i32>
+         asm "vadd.i32 $0, $1, $2", "=t,t,t"
+         (<4 x i32> %0, <4 x i32> %1)
+  ret <4 x i32> %ret
+}
+
+define i32 @even-GPR-constraint() {
+entry:
+	; CHECK-LABEL: even-GPR-constraint
+	; CHECK: add [[REG:r1*[0, 2, 4, 6, 8]]], [[REG]], #1
+	; CHECK: add [[REG:r1*[0, 2, 4, 6, 8]]], [[REG]], #2
+	; CHECK: add [[REG:r1*[0, 2, 4, 6, 8]]], [[REG]], #3
+	; CHECK: add [[REG:r1*[0, 2, 4, 6, 8]]], [[REG]], #4
+	%0 = tail call { i32, i32, i32, i32 }
+             asm "add $0, #1\0Aadd $1, #2\0Aadd $2, #3\0Aadd $3, #4\0A", "=^Te,=^Te,=^Te,=^Te,0,1,2,3"
+             (i32 0, i32 0, i32 0, i32 0)
+	%asmresult = extractvalue { i32, i32, i32, i32 } %0, 0
+	ret i32 %asmresult
+}
+
+define i32 @odd-GPR-constraint() {
+entry:
+	; CHECK-LABEL: odd-GPR-constraint
+	; CHECK: add [[REG:r1*[1, 3, 5, 7, 9]]], [[REG]], #1
+	; CHECK: add [[REG:r1*[1, 3, 5, 7, 9]]], [[REG]], #2
+	; CHECK: add [[REG:r1*[1, 3, 5, 7, 9]]], [[REG]], #3
+	; CHECK: add [[REG:r1*[1, 3, 5, 7, 9]]], [[REG]], #4
+	%0 = tail call { i32, i32, i32, i32 }
+             asm "add $0, #1\0Aadd $1, #2\0Aadd $2, #3\0Aadd $3, #4\0A", "=^To,=^To,=^To,=^To,0,1,2,3"
+             (i32 0, i32 0, i32 0, i32 0)
+	%asmresult = extractvalue { i32, i32, i32, i32 } %0, 0
+	ret i32 %asmresult
+}