[vm] Add support for the Zalasr extension.

TEST=ci
Change-Id: Ic1a98751ac6b46310e420b44f487720f0dc237c8
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/366162
Commit-Queue: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Alexander Aprelev <aam@google.com>
diff --git a/runtime/vm/compiler/assembler/assembler_riscv.cc b/runtime/vm/compiler/assembler/assembler_riscv.cc
index 40d8c4c..6b8eb2e 100644
--- a/runtime/vm/compiler/assembler/assembler_riscv.cc
+++ b/runtime/vm/compiler/assembler/assembler_riscv.cc
@@ -1634,6 +1634,72 @@
   EmitRType(BSET, shamt, rs1, F3_BSET, rd, OPIMM);
 }
 
+void MicroAssembler::lb(Register rd, Address addr, std::memory_order order) {
+  ASSERT(addr.offset() == 0);
+  ASSERT((order == std::memory_order_acquire) ||
+         (order == std::memory_order_acq_rel));
+  ASSERT(Supports(RV_Zalasr));
+  EmitRType(LOADORDERED, order, ZR, addr.base(), WIDTH8, rd, AMO);
+}
+
+void MicroAssembler::lh(Register rd, Address addr, std::memory_order order) {
+  ASSERT(addr.offset() == 0);
+  ASSERT((order == std::memory_order_acquire) ||
+         (order == std::memory_order_acq_rel));
+  ASSERT(Supports(RV_Zalasr));
+  EmitRType(LOADORDERED, order, ZR, addr.base(), WIDTH16, rd, AMO);
+}
+
+void MicroAssembler::lw(Register rd, Address addr, std::memory_order order) {
+  ASSERT(addr.offset() == 0);
+  ASSERT((order == std::memory_order_acquire) ||
+         (order == std::memory_order_acq_rel));
+  ASSERT(Supports(RV_Zalasr));
+  EmitRType(LOADORDERED, order, ZR, addr.base(), WIDTH32, rd, AMO);
+}
+
+void MicroAssembler::sb(Register rs2, Address addr, std::memory_order order) {
+  ASSERT(addr.offset() == 0);
+  ASSERT((order == std::memory_order_release) ||
+         (order == std::memory_order_acq_rel));
+  ASSERT(Supports(RV_Zalasr));
+  EmitRType(STOREORDERED, order, rs2, addr.base(), WIDTH8, ZR, AMO);
+}
+
+void MicroAssembler::sh(Register rs2, Address addr, std::memory_order order) {
+  ASSERT(addr.offset() == 0);
+  ASSERT((order == std::memory_order_release) ||
+         (order == std::memory_order_acq_rel));
+  ASSERT(Supports(RV_Zalasr));
+  EmitRType(STOREORDERED, order, rs2, addr.base(), WIDTH16, ZR, AMO);
+}
+
+void MicroAssembler::sw(Register rs2, Address addr, std::memory_order order) {
+  ASSERT(addr.offset() == 0);
+  ASSERT((order == std::memory_order_release) ||
+         (order == std::memory_order_acq_rel));
+  ASSERT(Supports(RV_Zalasr));
+  EmitRType(STOREORDERED, order, rs2, addr.base(), WIDTH32, ZR, AMO);
+}
+
+#if XLEN >= 64
+void MicroAssembler::ld(Register rd, Address addr, std::memory_order order) {
+  ASSERT(addr.offset() == 0);
+  ASSERT((order == std::memory_order_acquire) ||
+         (order == std::memory_order_acq_rel));
+  ASSERT(Supports(RV_Zalasr));
+  EmitRType(LOADORDERED, order, ZR, addr.base(), WIDTH64, rd, AMO);
+}
+
+void MicroAssembler::sd(Register rs2, Address addr, std::memory_order order) {
+  ASSERT(addr.offset() == 0);
+  ASSERT((order == std::memory_order_release) ||
+         (order == std::memory_order_acq_rel));
+  ASSERT(Supports(RV_Zalasr));
+  EmitRType(STOREORDERED, order, rs2, addr.base(), WIDTH64, ZR, AMO);
+}
+#endif
+
 void MicroAssembler::c_lwsp(Register rd, Address addr) {
   ASSERT(rd != ZR);
   ASSERT(addr.base() == SP);
@@ -2582,8 +2648,31 @@
                             const Address& address,
                             OperandSize size) {
   ASSERT(dst != address.base());
-  Load(dst, address, size);
-  fence(HartEffects::kRead, HartEffects::kMemory);
+
+  if (Supports(RV_Zalasr)) {
+    Address addr = PrepareAtomicOffset(address.base(), address.offset());
+    switch (size) {
+#if XLEN == 64
+      case kEightBytes:
+        ld(dst, addr, std::memory_order_acquire);
+        break;
+#endif
+      case kFourBytes:
+        lw(dst, addr, std::memory_order_acquire);
+        break;
+      case kTwoBytes:
+        lh(dst, addr, std::memory_order_acquire);
+        break;
+      case kByte:
+        lb(dst, addr, std::memory_order_acquire);
+        break;
+      default:
+        UNREACHABLE();
+    }
+  } else {
+    Load(dst, address, size);
+    fence(HartEffects::kRead, HartEffects::kMemory);
+  }
 
   if (FLAG_target_thread_sanitizer) {
     if (address.offset() == 0) {
@@ -2598,8 +2687,33 @@
 void Assembler::StoreRelease(Register src,
                              const Address& address,
                              OperandSize size) {
-  fence(HartEffects::kMemory, HartEffects::kWrite);
-  Store(src, address, size);
+  if (Supports(RV_Zalasr)) {
+    Address addr = PrepareAtomicOffset(address.base(), address.offset());
+    switch (size) {
+#if XLEN == 64
+      case kEightBytes:
+        sd(src, addr, std::memory_order_release);
+        break;
+#endif
+      case kUnsignedFourBytes:
+      case kFourBytes:
+        sw(src, addr, std::memory_order_release);
+        break;
+      case kUnsignedTwoBytes:
+      case kTwoBytes:
+        sh(src, addr, std::memory_order_release);
+        break;
+      case kUnsignedByte:
+      case kByte:
+        sb(src, addr, std::memory_order_release);
+        break;
+      default:
+        UNREACHABLE();
+    }
+  } else {
+    fence(HartEffects::kMemory, HartEffects::kWrite);
+    Store(src, address, size);
+  }
 }
 
 void Assembler::CompareWithMemoryValue(Register value,
@@ -3165,6 +3279,15 @@
   return Address(TMP2, lo);
 }
 
+Address Assembler::PrepareAtomicOffset(Register base, int32_t offset) {
+  ASSERT(base != TMP2);
+  if (offset == 0) {
+    return Address(base, 0);
+  }
+  AddImmediate(TMP2, base, offset);
+  return Address(TMP2, 0);
+}
+
 void Assembler::Load(Register dest, const Address& address, OperandSize sz) {
   Address addr = PrepareLargeOffset(address.base(), address.offset());
   switch (sz) {
diff --git a/runtime/vm/compiler/assembler/assembler_riscv.h b/runtime/vm/compiler/assembler/assembler_riscv.h
index e16bab1..9abfcc2 100644
--- a/runtime/vm/compiler/assembler/assembler_riscv.h
+++ b/runtime/vm/compiler/assembler/assembler_riscv.h
@@ -603,6 +603,19 @@
   void bset(Register rd, Register rs1, Register rs2);
   void bseti(Register rd, Register rs1, intx_t shamt);
 
+  // ==== Zalasr: Load-acquire, store-release ====
+  void lb(Register rd, Address addr, std::memory_order order);
+  void lh(Register rd, Address addr, std::memory_order order);
+  void lw(Register rd, Address addr, std::memory_order order);
+  void sb(Register rs2, Address addr, std::memory_order order);
+  void sh(Register rs2, Address addr, std::memory_order order);
+  void sw(Register rs2, Address addr, std::memory_order order);
+
+#if XLEN >= 64
+  void ld(Register rd, Address addr, std::memory_order order);
+  void sd(Register rs2, Address addr, std::memory_order order);
+#endif
+
   // ==== Dart Simulator Debugging ====
   void SimulatorPrintObject(Register rs1);
 
@@ -1092,6 +1105,7 @@
                         OperandSize sz = kWordBytes) override;
 
   Address PrepareLargeOffset(Register base, int32_t offset);
+  Address PrepareAtomicOffset(Register base, int32_t offset);
   void Load(Register dest,
             const Address& address,
             OperandSize sz = kWordBytes) override;
diff --git a/runtime/vm/compiler/assembler/assembler_riscv_test.cc b/runtime/vm/compiler/assembler/assembler_riscv_test.cc
index 63c3287..fbf6965 100644
--- a/runtime/vm/compiler/assembler/assembler_riscv_test.cc
+++ b/runtime/vm/compiler/assembler/assembler_riscv_test.cc
@@ -7112,6 +7112,124 @@
   EXPECT_EQ(-1, Call(test->entry(), -1));
 }
 
+ASSEMBLER_TEST_GENERATE(LoadByteAcquire, assembler) {
+  __ SetExtensions(RV_GC | RV_Zalasr);
+  __ lb(A0, Address(A1), std::memory_order_acquire);
+  __ ret();
+}
+ASSEMBLER_TEST_RUN(LoadByteAcquire, test) {
+  EXPECT_DISASSEMBLY(
+      "3405852f lb.aq a0, (a1)\n"
+      "    8082 ret\n");
+
+  int8_t data = -42;
+  EXPECT_EQ(-42, Call(test->entry(), 0, reinterpret_cast<intx_t>(&data)));
+}
+
+ASSEMBLER_TEST_GENERATE(LoadHalfwordAcquire, assembler) {
+  __ SetExtensions(RV_GC | RV_Zalasr);
+  __ lh(A0, Address(A1), std::memory_order_acquire);
+  __ ret();
+}
+ASSEMBLER_TEST_RUN(LoadHalfwordAcquire, test) {
+  EXPECT_DISASSEMBLY(
+      "3405952f lh.aq a0, (a1)\n"
+      "    8082 ret\n");
+
+  int16_t data = -42;
+  EXPECT_EQ(-42, Call(test->entry(), 0, reinterpret_cast<intx_t>(&data)));
+}
+
+ASSEMBLER_TEST_GENERATE(LoadWordAcquire, assembler) {
+  __ SetExtensions(RV_GC | RV_Zalasr);
+  __ lw(A0, Address(A1), std::memory_order_acquire);
+  __ ret();
+}
+ASSEMBLER_TEST_RUN(LoadWordAcquire, test) {
+  EXPECT_DISASSEMBLY(
+      "3405a52f lw.aq a0, (a1)\n"
+      "    8082 ret\n");
+
+  int32_t data = -42;
+  EXPECT_EQ(-42, Call(test->entry(), 0, reinterpret_cast<intx_t>(&data)));
+}
+
+ASSEMBLER_TEST_GENERATE(StoreByteRelease, assembler) {
+  __ SetExtensions(RV_GC | RV_Zalasr);
+  __ sb(A0, Address(A1), std::memory_order_release);
+  __ ret();
+}
+ASSEMBLER_TEST_RUN(StoreByteRelease, test) {
+  EXPECT_DISASSEMBLY(
+      "3aa5802f sb.rl a0, (a1)\n"
+      "    8082 ret\n");
+
+  int8_t data = 0;
+  EXPECT_EQ(-42, Call(test->entry(), -42, reinterpret_cast<intx_t>(&data)));
+  EXPECT_EQ(-42, data);
+}
+
+ASSEMBLER_TEST_GENERATE(StoreHalfwordRelease, assembler) {
+  __ SetExtensions(RV_GC | RV_Zalasr);
+  __ sh(A0, Address(A1), std::memory_order_release);
+  __ ret();
+}
+ASSEMBLER_TEST_RUN(StoreHalfwordRelease, test) {
+  EXPECT_DISASSEMBLY(
+      "3aa5902f sh.rl a0, (a1)\n"
+      "    8082 ret\n");
+
+  int16_t data = 0;
+  EXPECT_EQ(-42, Call(test->entry(), -42, reinterpret_cast<intx_t>(&data)));
+  EXPECT_EQ(-42, data);
+}
+
+ASSEMBLER_TEST_GENERATE(StoreWordRelease, assembler) {
+  __ SetExtensions(RV_GC | RV_Zalasr);
+  __ sw(A0, Address(A1), std::memory_order_release);
+  __ ret();
+}
+ASSEMBLER_TEST_RUN(StoreWordRelease, test) {
+  EXPECT_DISASSEMBLY(
+      "3aa5a02f sw.rl a0, (a1)\n"
+      "    8082 ret\n");
+
+  int32_t data = 0;
+  EXPECT_EQ(-42, Call(test->entry(), -42, reinterpret_cast<intx_t>(&data)));
+  EXPECT_EQ(-42, data);
+}
+
+#if XLEN >= 64
+ASSEMBLER_TEST_GENERATE(LoadDoubleWordAcquire, assembler) {
+  __ SetExtensions(RV_GC | RV_Zalasr);
+  __ ld(A0, Address(A1), std::memory_order_acquire);
+  __ ret();
+}
+ASSEMBLER_TEST_RUN(LoadDoubleWordAcquire, test) {
+  EXPECT_DISASSEMBLY(
+      "3405b52f ld.aq a0, (a1)\n"
+      "    8082 ret\n");
+
+  int64_t data = -42;
+  EXPECT_EQ(-42, Call(test->entry(), 0, reinterpret_cast<intx_t>(&data)));
+}
+
+ASSEMBLER_TEST_GENERATE(StoreDoubleWordRelease, assembler) {
+  __ SetExtensions(RV_GC | RV_Zalasr);
+  __ sd(A0, Address(A1), std::memory_order_release);
+  __ ret();
+}
+ASSEMBLER_TEST_RUN(StoreDoubleWordRelease, test) {
+  EXPECT_DISASSEMBLY(
+      "3aa5b02f sd.rl a0, (a1)\n"
+      "    8082 ret\n");
+
+  int64_t data = 0;
+  EXPECT_EQ(-42, Call(test->entry(), -42, reinterpret_cast<intx_t>(&data)));
+  EXPECT_EQ(-42, data);
+}
+#endif  // XLEN >= 64
+
 ASSEMBLER_TEST_GENERATE(LoadImmediate_MaxInt32, assembler) {
   FLAG_use_compressed_instructions = true;
   __ SetExtensions(RV_GC);
diff --git a/runtime/vm/compiler/assembler/disassembler_riscv.cc b/runtime/vm/compiler/assembler/disassembler_riscv.cc
index 60af634..937b6f6 100644
--- a/runtime/vm/compiler/assembler/disassembler_riscv.cc
+++ b/runtime/vm/compiler/assembler/disassembler_riscv.cc
@@ -79,6 +79,8 @@
   void DisassembleMISCMEM(Instr instr);
   void DisassembleSYSTEM(Instr instr);
   void DisassembleAMO(Instr instr);
+  void DisassembleAMO8(Instr instr);
+  void DisassembleAMO16(Instr instr);
   void DisassembleAMO32(Instr instr);
   void DisassembleAMO64(Instr instr);
   void DisassembleLOADFP(Instr instr);
@@ -1097,6 +1099,12 @@
 
 void RISCVDisassembler::DisassembleAMO(Instr instr) {
   switch (instr.funct3()) {
+    case WIDTH8:
+      DisassembleAMO8(instr);
+      break;
+    case WIDTH16:
+      DisassembleAMO16(instr);
+      break;
     case WIDTH32:
       DisassembleAMO32(instr);
       break;
@@ -1108,6 +1116,32 @@
   }
 }
 
+void RISCVDisassembler::DisassembleAMO8(Instr instr) {
+  switch (instr.funct5()) {
+    case LOADORDERED:
+      Print("lb'order 'rd, ('rs1)", instr, RV_Zalasr);
+      break;
+    case STOREORDERED:
+      Print("sb'order 'rs2, ('rs1)", instr, RV_Zalasr);
+      break;
+    default:
+      UnknownInstruction(instr);
+  }
+}
+
+void RISCVDisassembler::DisassembleAMO16(Instr instr) {
+  switch (instr.funct5()) {
+    case LOADORDERED:
+      Print("lh'order 'rd, ('rs1)", instr, RV_Zalasr);
+      break;
+    case STOREORDERED:
+      Print("sh'order 'rs2, ('rs1)", instr, RV_Zalasr);
+      break;
+    default:
+      UnknownInstruction(instr);
+  }
+}
+
 void RISCVDisassembler::DisassembleAMO32(Instr instr) {
   switch (instr.funct5()) {
     case LR:
@@ -1143,6 +1177,12 @@
     case AMOMAXU:
       Print("amomaxu.w'order 'rd, 'rs2, ('rs1)", instr, RV_A);
       break;
+    case LOADORDERED:
+      Print("lw'order 'rd, ('rs1)", instr, RV_Zalasr);
+      break;
+    case STOREORDERED:
+      Print("sw'order 'rs2, ('rs1)", instr, RV_Zalasr);
+      break;
     default:
       UnknownInstruction(instr);
   }
@@ -1184,6 +1224,12 @@
     case AMOMAXU:
       Print("amomaxu.d'order 'rd, 'rs2, ('rs1)", instr, RV_A);
       break;
+    case LOADORDERED:
+      Print("ld'order 'rd, ('rs1)", instr, RV_Zalasr);
+      break;
+    case STOREORDERED:
+      Print("sd'order 'rs2, ('rs1)", instr, RV_Zalasr);
+      break;
 #endif
     default:
       UnknownInstruction(instr);
diff --git a/runtime/vm/constants_riscv.h b/runtime/vm/constants_riscv.h
index 5568475..0b28f70 100644
--- a/runtime/vm/constants_riscv.h
+++ b/runtime/vm/constants_riscv.h
@@ -799,6 +799,8 @@
   REMW = 0b110,
   REMUW = 0b111,
 
+  WIDTH8 = 0b000,
+  WIDTH16 = 0b001,
   WIDTH32 = 0b010,
   WIDTH64 = 0b011,
 
@@ -898,6 +900,8 @@
   AMOMAX = 0b10100,
   AMOMINU = 0b11000,
   AMOMAXU = 0b11100,
+  LOADORDERED = 0b00110,
+  STOREORDERED = 0b00111,
 };
 
 enum Funct2 {
@@ -1597,7 +1601,8 @@
 static constexpr Extension RV_Zbs(8);  // Single-bit instructions
 static constexpr ExtensionSet RV_B = RV_Zba | RV_Zbb | RV_Zbs;
 static constexpr ExtensionSet RV_GCB = RV_GC | RV_B;
-static constexpr Extension RV_Zbc(9);  // Carry-less multiplication
+static constexpr Extension RV_Zbc(9);      // Carry-less multiplication
+static constexpr Extension RV_Zalasr(10);  // Load-acquire, store-release
 
 #undef R
 
diff --git a/runtime/vm/simulator_riscv.cc b/runtime/vm/simulator_riscv.cc
index 566eabc..48a9fab 100644
--- a/runtime/vm/simulator_riscv.cc
+++ b/runtime/vm/simulator_riscv.cc
@@ -2088,6 +2088,12 @@
 
 void Simulator::InterpretAMO(Instr instr) {
   switch (instr.funct3()) {
+    case WIDTH8:
+      InterpretAMO8(instr);
+      break;
+    case WIDTH16:
+      InterpretAMO16(instr);
+      break;
     case WIDTH32:
       InterpretAMO32(instr);
       break;
@@ -2226,6 +2232,56 @@
   set_xreg(instr.rd(), sign_extend(expected));
 }
 
+template <typename type>
+void Simulator::InterpretLOADORDERED(Instr instr) {
+  uintx_t addr = get_xreg(instr.rs1());
+  if ((addr & (sizeof(type) - 1)) != 0) {
+    FATAL("Misaligned atomic memory operation");
+  }
+  std::atomic<type>* atomic = reinterpret_cast<std::atomic<type>*>(addr);
+  type value = atomic->load(instr.memory_order());
+  set_xreg(instr.rd(), sign_extend(value));
+}
+
+template <typename type>
+void Simulator::InterpretSTOREORDERED(Instr instr) {
+  uintx_t addr = get_xreg(instr.rs1());
+  if ((addr & (sizeof(type) - 1)) != 0) {
+    FATAL("Misaligned atomic memory operation");
+  }
+  type value = get_xreg(instr.rs2());
+  std::atomic<type>* atomic = reinterpret_cast<std::atomic<type>*>(addr);
+  atomic->store(value, instr.memory_order());
+}
+
+void Simulator::InterpretAMO8(Instr instr) {
+  switch (instr.funct5()) {
+    case LOADORDERED:
+      InterpretLOADORDERED<int8_t>(instr);
+      break;
+    case STOREORDERED:
+      InterpretSTOREORDERED<int8_t>(instr);
+      break;
+    default:
+      IllegalInstruction(instr);
+  }
+  pc_ += instr.length();
+}
+
+void Simulator::InterpretAMO16(Instr instr) {
+  switch (instr.funct5()) {
+    case LOADORDERED:
+      InterpretLOADORDERED<int16_t>(instr);
+      break;
+    case STOREORDERED:
+      InterpretSTOREORDERED<int16_t>(instr);
+      break;
+    default:
+      IllegalInstruction(instr);
+  }
+  pc_ += instr.length();
+}
+
 void Simulator::InterpretAMO32(Instr instr) {
   switch (instr.funct5()) {
     case LR:
@@ -2261,6 +2317,12 @@
     case AMOMAXU:
       InterpretAMOMAX<uint32_t>(instr);
       break;
+    case LOADORDERED:
+      InterpretLOADORDERED<int32_t>(instr);
+      break;
+    case STOREORDERED:
+      InterpretSTOREORDERED<int32_t>(instr);
+      break;
     default:
       IllegalInstruction(instr);
   }
@@ -2303,6 +2365,12 @@
     case AMOMAXU:
       InterpretAMOMAX<uint64_t>(instr);
       break;
+    case LOADORDERED:
+      InterpretLOADORDERED<int64_t>(instr);
+      break;
+    case STOREORDERED:
+      InterpretSTOREORDERED<int64_t>(instr);
+      break;
 #endif  // XLEN >= 64
     default:
       IllegalInstruction(instr);
diff --git a/runtime/vm/simulator_riscv.h b/runtime/vm/simulator_riscv.h
index 81332a2..467a46d 100644
--- a/runtime/vm/simulator_riscv.h
+++ b/runtime/vm/simulator_riscv.h
@@ -225,6 +225,8 @@
   void InterpretEBREAK(Instr instr);
   void InterpretEBREAK(CInstr instr);
   void InterpretAMO(Instr instr);
+  void InterpretAMO8(Instr instr);
+  void InterpretAMO16(Instr instr);
   void InterpretAMO32(Instr instr);
   void InterpretAMO64(Instr instr);
   template <typename type>
@@ -249,6 +251,10 @@
   void InterpretAMOMINU(Instr instr);
   template <typename type>
   void InterpretAMOMAXU(Instr instr);
+  template <typename type>
+  void InterpretLOADORDERED(Instr instr);
+  template <typename type>
+  void InterpretSTOREORDERED(Instr instr);
   void InterpretLOADFP(Instr instr);
   void InterpretSTOREFP(Instr instr);
   void InterpretFMADD(Instr instr);