[CET] emit endbr64 landing pad instructions

Add an optional feature to emit landing pads on x64.
If CET IBT support is enforced, all indirect jmps/calls need to end up
at an endbr64 instruction or have a notrack prefix.

The feature is behind the build flag (off by default)
  v8_enable_cet_ibt

Bug: v8:13355
Change-Id: Ifd350a566b2cb1aa9e5976b797dc658106df826c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4637222
Reviewed-by: Michael Lippautz <mlippautz@chromium.org>
Commit-Queue: Stephen Röttger <sroettger@google.com>
Reviewed-by: Thibaud Michaud <thibaudm@chromium.org>
Reviewed-by: Andreas Haas <ahaas@chromium.org>
Reviewed-by: Leszek Swirski <leszeks@chromium.org>
Reviewed-by: Patrick Thier <pthier@chromium.org>
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#91273}
diff --git a/BUILD.gn b/BUILD.gn
index 89d8e05..a41311b 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -1137,6 +1137,9 @@
   if (v8_enable_cet_shadow_stack) {
     defines += [ "V8_ENABLE_CET_SHADOW_STACK" ]
   }
+  if (v8_enable_cet_ibt) {
+    defines += [ "V8_ENABLE_CET_IBT" ]
+  }
   if (v8_enable_wasm_gdb_remote_debugging) {
     defines += [ "V8_ENABLE_WASM_GDB_REMOTE_DEBUGGING" ]
   }
diff --git a/gni/v8.gni b/gni/v8.gni
index a72edb4..72381c7 100644
--- a/gni/v8.gni
+++ b/gni/v8.gni
@@ -155,6 +155,9 @@
   # Change code emission and runtime features to be CET shadow-stack compliant
   # (incomplete and experimental).
   v8_enable_cet_shadow_stack = false
+
+  # Emit CET IBT landing pad instructions in JIT generated code (experimental).
+  v8_enable_cet_ibt = false
 }
 
 if (v8_use_external_startup_data == "") {
diff --git a/src/baseline/x64/baseline-assembler-x64-inl.h b/src/baseline/x64/baseline-assembler-x64-inl.h
index 554861e..fc552e9 100644
--- a/src/baseline/x64/baseline-assembler-x64-inl.h
+++ b/src/baseline/x64/baseline-assembler-x64-inl.h
@@ -77,7 +77,9 @@
 void BaselineAssembler::Bind(Label* label) { __ bind(label); }
 
 void BaselineAssembler::JumpTarget() {
-  // NOP on x64.
+#ifdef V8_ENABLE_CET_IBT
+  __ endbr64();
+#endif
 }
 
 void BaselineAssembler::Jump(Label* target, Label::Distance distance) {
diff --git a/src/builtins/x64/builtins-x64.cc b/src/builtins/x64/builtins-x64.cc
index a70a824..7c61c13 100644
--- a/src/builtins/x64/builtins-x64.cc
+++ b/src/builtins/x64/builtins-x64.cc
@@ -44,6 +44,8 @@
 #define __ ACCESS_MASM(masm)
 
 void Builtins::Generate_Adaptor(MacroAssembler* masm, Address address) {
+  __ CodeEntry();
+
   __ LoadAddress(kJavaScriptCallExtraArg1Register,
                  ExternalReference::Create(address));
   __ Jump(BUILTIN_CODE(masm->isolate(), AdaptorWithBuiltinExitFrame),
@@ -430,7 +432,7 @@
   // Jump to a faked try block that does the invoke, with a faked catch
   // block that sets the pending exception.
   __ jmp(&invoke);
-  __ bind(&handler_entry);
+  __ BindExceptionHandler(&handler_entry);
 
   // Store the current pc as the handler offset. It's used later to create the
   // handler table.
@@ -3453,6 +3455,11 @@
 void GenerateExceptionHandlingLandingPad(MacroAssembler* masm,
                                          Label* return_promise) {
   int catch_handler = __ pc_offset();
+
+#ifdef V8_ENABLE_CET_IBT
+  __ endbr64();
+#endif
+
   // Restore rsp to free the reserved stack slots for the sections.
   __ leaq(rsp, MemOperand(rbp, StackSwitchFrameConstants::kLastSpillOffset));
 
@@ -3658,6 +3665,9 @@
     SwitchBackAndReturnPromise(masm, r8, rdi, &return_promise);
   }
   __ bind(&suspend);
+#ifdef V8_ENABLE_CET_IBT
+  __ endbr64();
+#endif
 
   __ LeaveFrame(stack_switch ? StackFrame::STACK_SWITCH
                              : StackFrame::JS_TO_WASM);
@@ -3803,6 +3813,9 @@
   LoadJumpBuffer(masm, jmpbuf, true);
   __ Trap();
   __ bind(&resume);
+#ifdef V8_ENABLE_CET_IBT
+  __ endbr64();
+#endif
   __ LeaveFrame(StackFrame::STACK_SWITCH);
   __ ret(0);
 }
@@ -3934,6 +3947,9 @@
   }
   __ Trap();
   __ bind(&suspend);
+#ifdef V8_ENABLE_CET_IBT
+  __ endbr64();
+#endif
   __ LeaveFrame(StackFrame::STACK_SWITCH);
   // Pop receiver + parameter.
   __ ret(2 * kSystemPointerSize);
diff --git a/src/codegen/x64/assembler-x64.cc b/src/codegen/x64/assembler-x64.cc
index 3c65b02..084c1f4 100644
--- a/src/codegen/x64/assembler-x64.cc
+++ b/src/codegen/x64/assembler-x64.cc
@@ -1321,6 +1321,14 @@
   emit(0xF4);
 }
 
+void Assembler::endbr64() {
+  EnsureSpace ensure_space(this);
+  emit(0xF3);
+  emit(0x0f);
+  emit(0x1e);
+  emit(0xfa);
+}
+
 void Assembler::emit_idiv(Register src, int size) {
   EnsureSpace ensure_space(this);
   emit_rex(src, size);
@@ -1588,6 +1596,32 @@
   emitl(code_target_index);
 }
 
+#ifdef V8_ENABLE_CET_IBT
+
+void Assembler::jmp(Register target, bool notrack) {
+  EnsureSpace ensure_space(this);
+  if (notrack) {
+    emit(0x3e);
+  }
+  // Opcode FF/4 r64.
+  emit_optional_rex_32(target);
+  emit(0xFF);
+  emit_modrm(0x4, target);
+}
+
+void Assembler::jmp(Operand src, bool notrack) {
+  EnsureSpace ensure_space(this);
+  if (notrack) {
+    emit(0x3e);
+  }
+  // Opcode FF/4 m64.
+  emit_optional_rex_32(src);
+  emit(0xFF);
+  emit_operand(0x4, src);
+}
+
+#else  // V8_ENABLE_CET_IBT
+
 void Assembler::jmp(Register target) {
   EnsureSpace ensure_space(this);
   // Opcode FF/4 r64.
@@ -1604,6 +1638,8 @@
   emit_operand(0x4, src);
 }
 
+#endif
+
 void Assembler::emit_lea(Register dst, Operand src, int size) {
   EnsureSpace ensure_space(this);
   emit_rex(dst, src, size);
diff --git a/src/codegen/x64/assembler-x64.h b/src/codegen/x64/assembler-x64.h
index e4ba454..303b92e 100644
--- a/src/codegen/x64/assembler-x64.h
+++ b/src/codegen/x64/assembler-x64.h
@@ -628,6 +628,7 @@
   void pushq(Immediate value);
   // Push a 32 bit integer, and guarantee that it is actually pushed as a
   // 32 bit value, the normal push will optimize the 8 bit case.
+  static constexpr int kPushq32InstrSize = 5;
   void pushq_imm32(int32_t imm32);
   void pushq(Register src);
   void pushq(Operand src);
@@ -860,6 +861,7 @@
   void ret(int imm16);
   void ud2();
   void setcc(Condition cc, Register reg);
+  void endbr64();
 
   void pblendw(XMMRegister dst, Operand src, uint8_t mask);
   void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask);
@@ -918,8 +920,13 @@
   void jmp(Handle<Code> target, RelocInfo::Mode rmode);
 
   // Jump near absolute indirect (r64)
+#ifdef V8_ENABLE_CET_IBT
+  void jmp(Register adr, bool notrack = false);
+  void jmp(Operand src, bool notrack = false);
+#else
   void jmp(Register adr);
   void jmp(Operand src);
+#endif
 
   // Unconditional jump relative to the current address. Low-level routine,
   // use with caution!
diff --git a/src/codegen/x64/macro-assembler-x64.cc b/src/codegen/x64/macro-assembler-x64.cc
index 55b1f3c..6fe1a52 100644
--- a/src/codegen/x64/macro-assembler-x64.cc
+++ b/src/codegen/x64/macro-assembler-x64.cc
@@ -51,6 +51,12 @@
   return Operand(rsp, kPCOnStackSize + index * kSystemPointerSize);
 }
 
+void MacroAssembler::CodeEntry() {
+#ifdef V8_ENABLE_CET_IBT
+  endbr64();
+#endif
+}
+
 void MacroAssembler::Load(Register destination, ExternalReference source) {
   if (root_array_available_ && options().enable_root_relative_access) {
     intptr_t delta = RootRegisterOffsetForExternalReference(isolate(), source);
@@ -2192,7 +2198,12 @@
   cmpq(reg, Immediate(num_labels));
   j(above_equal, &fallthrough);
   leaq(table, MemOperand(&jump_table));
+#ifdef V8_ENABLE_CET_IBT
+  // Add the notrack prefix to disable landing pad enforcement.
+  jmp(MemOperand(table, reg, times_8, 0), /*notrack=*/true);
+#else
   jmp(MemOperand(table, reg, times_8, 0));
+#endif
   // Emit the jump table inline, under the assumption that it's not too big.
   Align(kSystemPointerSize);
   bind(&jump_table);
diff --git a/src/codegen/x64/macro-assembler-x64.h b/src/codegen/x64/macro-assembler-x64.h
index 080f23c..394fb2a 100644
--- a/src/codegen/x64/macro-assembler-x64.h
+++ b/src/codegen/x64/macro-assembler-x64.h
@@ -654,13 +654,18 @@
 
   // Control-flow integrity:
 
-  // Define a function entrypoint. This doesn't emit any code for this
-  // architecture, as control-flow integrity is not supported for it.
-  void CodeEntry() {}
+  // Define a function entrypoint which will emit a landing pad instruction if
+  // required by the build config.
+  void CodeEntry();
   // Define an exception handler.
-  void ExceptionHandler() {}
+  void ExceptionHandler() { CodeEntry(); }
   // Define an exception handler and bind a label.
-  void BindExceptionHandler(Label* label) { bind(label); }
+  void BindExceptionHandler(Label* label) { BindJumpTarget(label); }
+  // Bind a jump target and mark it as a valid code entry.
+  void BindJumpTarget(Label* label) {
+    bind(label);
+    CodeEntry();
+  }
 
   // ---------------------------------------------------------------------------
   // Pointer compression support
diff --git a/src/compiler/backend/x64/code-generator-x64.cc b/src/compiler/backend/x64/code-generator-x64.cc
index 121aa47..8d2eee8 100644
--- a/src/compiler/backend/x64/code-generator-x64.cc
+++ b/src/compiler/backend/x64/code-generator-x64.cc
@@ -6827,11 +6827,20 @@
     // target = table + (target - table)
     __ addq(input, kScratchRegister);
     // Jump to the target.
+#ifdef V8_ENABLE_CET_IBT
+    // Add the notrack prefix to disable landing pad enforcement.
+    __ jmp(input, /*notrack=*/true);
+#else
     __ jmp(input);
+#endif
   } else {
     // For non builtins, the value in the table is 'target_address' (8 bytes)
     // jmp [table + index*8]
+#ifdef V8_ENABLE_CET_IBT
+    __ jmp(Operand(kScratchRegister, input, times_8, 0), /*notrack=*/true);
+#else
     __ jmp(Operand(kScratchRegister, input, times_8, 0));
+#endif
   }
 }
 
diff --git a/src/deoptimizer/x64/deoptimizer-x64.cc b/src/deoptimizer/x64/deoptimizer-x64.cc
index 47c8f60..e91e062 100644
--- a/src/deoptimizer/x64/deoptimizer-x64.cc
+++ b/src/deoptimizer/x64/deoptimizer-x64.cc
@@ -21,7 +21,12 @@
 #undef ASSERT_OFFSET
 
 const int Deoptimizer::kEagerDeoptExitSize = 4;
+#ifdef V8_ENABLE_CET_IBT
+// With IBT, the lazy deopt entry has an additional endbr64 instruction.
+const int Deoptimizer::kLazyDeoptExitSize = 8;
+#else
 const int Deoptimizer::kLazyDeoptExitSize = 4;
+#endif
 
 Float32 RegisterValues::GetFloatRegister(unsigned n) const {
   return Float32::FromBits(
diff --git a/src/maglev/x64/maglev-assembler-x64-inl.h b/src/maglev/x64/maglev-assembler-x64-inl.h
index 7864727..c954de6 100644
--- a/src/maglev/x64/maglev-assembler-x64-inl.h
+++ b/src/maglev/x64/maglev-assembler-x64-inl.h
@@ -232,7 +232,9 @@
   detail::PushAllHelper<T...>::PushReverse(this, vals...);
 }
 
-inline void MaglevAssembler::BindJumpTarget(Label* label) { bind(label); }
+inline void MaglevAssembler::BindJumpTarget(Label* label) {
+  MacroAssembler::BindJumpTarget(label);
+}
 
 inline void MaglevAssembler::BindBlock(BasicBlock* block) {
   bind(block->label());
diff --git a/src/maglev/x64/maglev-assembler-x64.cc b/src/maglev/x64/maglev-assembler-x64.cc
index 5e0ba08..e967f34 100644
--- a/src/maglev/x64/maglev-assembler-x64.cc
+++ b/src/maglev/x64/maglev-assembler-x64.cc
@@ -433,10 +433,12 @@
 void MaglevAssembler::Prologue(Graph* graph) {
   DCHECK(!graph->is_osr());
 
+  CodeEntry();
+
   BailoutIfDeoptimized(rbx);
 
   if (graph->has_recursive_calls()) {
-    bind(code_gen_state()->entry_label());
+    BindJumpTarget(code_gen_state()->entry_label());
   }
 
   // Tiering support.
diff --git a/src/regexp/x64/regexp-macro-assembler-x64.cc b/src/regexp/x64/regexp-macro-assembler-x64.cc
index 543caae..54f6a63 100644
--- a/src/regexp/x64/regexp-macro-assembler-x64.cc
+++ b/src/regexp/x64/regexp-macro-assembler-x64.cc
@@ -110,6 +110,7 @@
       backtrack_label_(),
       exit_label_() {
   DCHECK_EQ(0, registers_to_save % 2);
+  __ CodeEntry();
   __ jmp(&entry_label_);   // We'll write the entry code when we know more.
   __ bind(&start_label_);  // And then continue from here.
 }
@@ -170,7 +171,13 @@
   // and jump to location.
   Pop(rbx);
   __ addq(rbx, code_object_pointer());
+#ifdef V8_ENABLE_CET_IBT
+  // TODO(sroettger): This jump needs an endbr64 instruction but the code is
+  // performance sensitive. Needs more thought how to do this in a fast way.
+  __ jmp(rbx, /*notrack=*/true);
+#else
   __ jmp(rbx);
+#endif
 }
 
 
@@ -716,6 +723,12 @@
   }
 }
 
+void RegExpMacroAssemblerX64::BindJumpTarget(Label* label) {
+  Bind(label);
+  // TODO(sroettger): There should be an endbr64 instruction here, but it needs
+  // more thought how to avoid perf regressions.
+}
+
 void RegExpMacroAssemblerX64::Fail() {
   static_assert(FAILURE == 0);  // Return value for failure is zero.
   if (!global()) {
diff --git a/src/regexp/x64/regexp-macro-assembler-x64.h b/src/regexp/x64/regexp-macro-assembler-x64.h
index 85dacfd..bc4fb07 100644
--- a/src/regexp/x64/regexp-macro-assembler-x64.h
+++ b/src/regexp/x64/regexp-macro-assembler-x64.h
@@ -59,6 +59,9 @@
   void CheckPosition(int cp_offset, Label* on_outside_input) override;
   bool CheckSpecialClassRanges(StandardCharacterSet type,
                                Label* on_no_match) override;
+
+  void BindJumpTarget(Label* label) override;
+
   void Fail() override;
   Handle<HeapObject> GetCode(Handle<String> source) override;
   void GoTo(Label* label) override;
diff --git a/src/wasm/jump-table-assembler.cc b/src/wasm/jump-table-assembler.cc
index 95687c5..cf480b9 100644
--- a/src/wasm/jump-table-assembler.cc
+++ b/src/wasm/jump-table-assembler.cc
@@ -75,14 +75,20 @@
 void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
                                                  Address lazy_compile_target) {
   // Use a push, because mov to an extended register takes 6 bytes.
-  pushq_imm32(func_index);            // 5 bytes
-  EmitJumpSlot(lazy_compile_target);  // 5 bytes
+  pushq_imm32(func_index);  // 5 bytes
+  intptr_t displacement =
+      static_cast<intptr_t>(reinterpret_cast<uint8_t*>(lazy_compile_target) -
+                            (pc_ + kNearJmpInstrSize));
+  DCHECK(is_int32(displacement));
+  near_jmp(displacement, RelocInfo::NO_INFO);  // 5 bytes
 }
 
 bool JumpTableAssembler::EmitJumpSlot(Address target) {
-  intptr_t displacement = static_cast<intptr_t>(
-      reinterpret_cast<uint8_t*>(target) - pc_ - kNearJmpInstrSize);
+  intptr_t displacement =
+      static_cast<intptr_t>(reinterpret_cast<uint8_t*>(target) -
+                            (pc_ + kEndbrSize + kNearJmpInstrSize));
   if (!is_int32(displacement)) return false;
+  CodeEntry();                                 // kEndbrSize bytes (0 or 4)
   near_jmp(displacement, RelocInfo::NO_INFO);  // 5 bytes
   return true;
 }
diff --git a/src/wasm/jump-table-assembler.h b/src/wasm/jump-table-assembler.h
index cd153a3..80ea425 100644
--- a/src/wasm/jump-table-assembler.h
+++ b/src/wasm/jump-table-assembler.h
@@ -173,8 +173,13 @@
 // that the instruction containing the call target does not cross cache-line
 // boundaries. The jump table line size has been chosen to satisfy this.
 #if V8_TARGET_ARCH_X64
+#ifdef V8_ENABLE_CET_IBT
+  static constexpr int kEndbrSize = 4;
+#else  // V8_ENABLE_CET_IBT
+  static constexpr int kEndbrSize = 0;
+#endif
   static constexpr int kJumpTableLineSize = 64;
-  static constexpr int kJumpTableSlotSize = 5;
+  static constexpr int kJumpTableSlotSize = 5 + kEndbrSize;
   static constexpr int kFarJumpTableSlotSize = 16;
   static constexpr int kLazyCompileTableSlotSize = 10;
 #elif V8_TARGET_ARCH_IA32
diff --git a/test/unittests/assembler/macro-assembler-x64-unittest.cc b/test/unittests/assembler/macro-assembler-x64-unittest.cc
index c7b73aa..8bd8e00 100644
--- a/test/unittests/assembler/macro-assembler-x64-unittest.cc
+++ b/test/unittests/assembler/macro-assembler-x64-unittest.cc
@@ -1226,7 +1226,11 @@
   for (int i = 0; i < kDeoptimizeKindCount; i++) {
     DeoptimizeKind kind = static_cast<DeoptimizeKind>(i);
     Label before_exit;
+
     masm.bind(&before_exit);
+    if (kind == DeoptimizeKind::kLazy) {
+      masm.CodeEntry();
+    }
     Builtin target = Deoptimizer::GetDeoptimizationEntry(kind);
     masm.CallForDeoptimization(target, 42, &before_exit, kind, &before_exit,
                                nullptr);