Add ld3r to AArch64 assembler

PiperOrigin-RevId: 462478843
diff --git a/src/jit/aarch64-assembler.cc b/src/jit/aarch64-assembler.cc
index 7fd9f1a..9a48022 100644
--- a/src/jit/aarch64-assembler.cc
+++ b/src/jit/aarch64-assembler.cc
@@ -459,7 +459,7 @@
 }
 
 void Assembler::ld2r(VRegisterList xs, MemOperand xn) {
-  if (xs.length != 2 || !is_same_shape(xs.vt1, xs.vt2) || xn.offset != 0) {
+  if (xs.length != 2 || !is_same_shape(xs.vt1, xs.vt2) || xn.offset != 0 || !is_consecutive(xs)) {
     error_ = Error::kInvalidOperand;
     return;
   }
@@ -467,6 +467,15 @@
   emit32(0x0D60C000 | q(xs.vt1) | size(xs.vt1) | rn(xn.base) | xs.vt1.code);
 }
 
+void Assembler::ld3r(VRegisterList xs, MemOperand xn) {
+  if (xs.length != 3 || !is_same_shape(xs.vt1, xs.vt2, xs.vt3) || xn.offset != 0 || !is_consecutive(xs)) {
+    error_ = Error::kInvalidOperand;
+    return;
+  }
+
+  emit32(0x0D40E000 | q(xs.vt1) | size(xs.vt1) | rn(xn.base) | xs.vt1.code);
+}
+
 void Assembler::ldp(DRegister dt1, DRegister dt2, MemOperand xn) {
   if (!imm7_offset_valid(xn.offset, dt1)) {
     error_ = Error::kInvalidOperand;
diff --git a/src/xnnpack/aarch64-assembler.h b/src/xnnpack/aarch64-assembler.h
index 5e280fc..ce7671d 100644
--- a/src/xnnpack/aarch64-assembler.h
+++ b/src/xnnpack/aarch64-assembler.h
@@ -368,6 +368,7 @@
   void ld1(VRegisterList vs, MemOperand xn, int32_t imm);
   void ld1r(VRegisterList xs, MemOperand xn);
   void ld2r(VRegisterList xs, MemOperand xn);
+  void ld3r(VRegisterList xs, MemOperand xn);
   void ldp(DRegister dt1, DRegister dt2, MemOperand xn);
   void ldp(DRegister dt1, DRegister dt2, MemOperand xn, int32_t imm);
   void ldp(QRegister qt1, QRegister qt2, MemOperand xn, int32_t imm);
diff --git a/test/aarch64-assembler.cc b/test/aarch64-assembler.cc
index 2c3bc08..bd80e95 100644
--- a/test/aarch64-assembler.cc
+++ b/test/aarch64-assembler.cc
@@ -178,9 +178,14 @@
 
   CHECK_ENCODING(0x4D60C902, a.ld2r({v2.v4s(), v3.v4s()}, mem[x8]));
   EXPECT_ERROR(Error::kInvalidOperand, a.ld2r({v2.v4s(), v3.v4s()}, mem[x8, 16]));
-  EXPECT_ERROR(Error::kInvalidOperand, a.ld2r({v2.v4s(), v4.v4s()}, mem[x8, 16]));
+  EXPECT_ERROR(Error::kInvalidOperand, a.ld2r({v2.v4s(), v4.v4s()}, mem[x8]));
   EXPECT_ERROR(Error::kInvalidOperand, a.ld2r({v2.v4s(), v3.v8b()}, mem[x8]));
 
+  CHECK_ENCODING(0x4D40E906, a.ld3r({v6.v4s(), v7.v4s(), v8.v4s()}, mem[x8]));
+  EXPECT_ERROR(Error::kInvalidOperand, a.ld3r({v6.v4s(), v7.v4s(), v8.v4s()}, mem[x8, 16]));
+  EXPECT_ERROR(Error::kInvalidOperand, a.ld3r({v6.v4s(), v7.v4s(), v9.v4s()}, mem[x8]));
+  EXPECT_ERROR(Error::kInvalidOperand, a.ld3r({v6.v4s(), v7.v2s(), v8.v4s()}, mem[x8]));
+
   CHECK_ENCODING(0x4EB21E50, a.mov(v16.v16b(), v18.v16b()));
   CHECK_ENCODING(0x0EB21E50, a.mov(v16.v8b(), v18.v8b()));
   EXPECT_ERROR(Error::kInvalidOperand, a.mov(v16.v16b(), v18.v8b()));