[regexp] Implement interrupt support in the interpreter

... similar to how we do this in native irregexp code, i.e. handle
interrupts on each backtrack. Unhandlified references into the code
ByteArray and the subject String object are updated after a potential
GC.

Since interrupts may change the subject string's representation, the
interpreter is now called in a loop to handle retries.

Bug: v8:8724
Change-Id: Ic34de8d69ccc56d4656b8ed080c2c168c212ebfc
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1511477
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: Peter Marshall <petermarshall@chromium.org>
Cr-Commit-Position: refs/heads/master@{#60187}
diff --git a/src/regexp/interpreter-irregexp.cc b/src/regexp/interpreter-irregexp.cc
index 2138009..e7bf005 100644
--- a/src/regexp/interpreter-irregexp.cc
+++ b/src/regexp/interpreter-irregexp.cc
@@ -147,13 +147,76 @@
   DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
 };
 
+namespace {
+
+// Runs all pending interrupts. Callers must update unhandlified object
+// references after this function completes.
+IrregexpInterpreter::Result HandleInterrupts(Isolate* isolate,
+                                             Handle<String> subject_string) {
+  DisallowHeapAllocation no_gc;
+
+  StackLimitCheck check(isolate);
+  if (check.JsHasOverflowed()) {
+    // A real stack overflow.
+    AllowHeapAllocation yes_gc;
+    isolate->StackOverflow();
+    return IrregexpInterpreter::EXCEPTION;
+  }
+
+  const bool was_one_byte =
+      String::IsOneByteRepresentationUnderneath(*subject_string);
+
+  Object result;
+  {
+    AllowHeapAllocation yes_gc;
+    result = isolate->stack_guard()->HandleInterrupts();
+  }
+
+  if (result->IsException(isolate)) {
+    return IrregexpInterpreter::EXCEPTION;
+  }
+
+  // If we changed between a LATIN1 and a UC16 string, we need to restart
+  // regexp matching with the appropriate template instantiation of RawMatch.
+  if (String::IsOneByteRepresentationUnderneath(*subject_string) !=
+      was_one_byte) {
+    return IrregexpInterpreter::RETRY;
+  }
+
+  return IrregexpInterpreter::SUCCESS;
+}
+
 template <typename Char>
-static IrregexpInterpreter::Result RawMatch(Isolate* isolate,
-                                            const byte* code_base,
-                                            Vector<const Char> subject,
-                                            int* registers, int current,
-                                            uint32_t current_char) {
-  const byte* pc = code_base;
+void UpdateCodeAndSubjectReferences(Isolate* isolate,
+                                    Handle<ByteArray> code_array,
+                                    Handle<String> subject_string,
+                                    const byte** code_base_out,
+                                    const byte** pc_out,
+                                    Vector<const Char>* subject_string_out) {
+  DisallowHeapAllocation no_gc;
+
+  if (*code_base_out != code_array->GetDataStartAddress()) {
+    const intptr_t pc_offset = *pc_out - *code_base_out;
+    DCHECK_GT(pc_offset, 0);
+    *code_base_out = code_array->GetDataStartAddress();
+    *pc_out = *code_base_out + pc_offset;
+  }
+
+  DCHECK(subject_string->IsFlat());
+  *subject_string_out = subject_string->GetCharVector<Char>(no_gc);
+}
+
+template <typename Char>
+IrregexpInterpreter::Result RawMatch(Isolate* isolate,
+                                     Handle<ByteArray> code_array,
+                                     Handle<String> subject_string,
+                                     Vector<const Char> subject, int* registers,
+                                     int current, uint32_t current_char) {
+  DisallowHeapAllocation no_gc;
+
+  const byte* pc = code_array->GetDataStartAddress();
+  const byte* code_base = pc;
+
   // BacktrackStack ensures that the memory allocated for the backtracking stack
   // is returned to the system or cached if there is no stack being cached at
   // the moment.
@@ -228,12 +291,21 @@
         current = *backtrack_sp;
         pc += BC_POP_CP_LENGTH;
         break;
-      BYTECODE(POP_BT)
+        // clang-format off
+      BYTECODE(POP_BT) {
+        IrregexpInterpreter::Result return_code = HandleInterrupts(
+            isolate, subject_string);
+        if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
+
+        UpdateCodeAndSubjectReferences(isolate, code_array, subject_string,
+            &code_base, &pc, &subject);
+
         backtrack_stack_space++;
         --backtrack_sp;
         pc = code_base + *backtrack_sp;
         break;
-      BYTECODE(POP_REGISTER)
+      }
+      BYTECODE(POP_REGISTER)  // clang-format on
         backtrack_stack_space++;
         --backtrack_sp;
         registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
@@ -585,34 +657,28 @@
   }
 }
 
+}  // namespace
+
+// static
 IrregexpInterpreter::Result IrregexpInterpreter::Match(
-    Isolate* isolate, Handle<ByteArray> code_array, Handle<String> subject,
-    int* registers, int start_position) {
-  DCHECK(subject->IsFlat());
+    Isolate* isolate, Handle<ByteArray> code_array,
+    Handle<String> subject_string, int* registers, int start_position) {
+  DCHECK(subject_string->IsFlat());
 
   DisallowHeapAllocation no_gc;
-  const byte* code_base = code_array->GetDataStartAddress();
   uc16 previous_char = '\n';
-  String::FlatContent subject_content = subject->GetFlatContent(no_gc);
+  String::FlatContent subject_content = subject_string->GetFlatContent(no_gc);
   if (subject_content.IsOneByte()) {
     Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
     if (start_position != 0) previous_char = subject_vector[start_position - 1];
-    return RawMatch(isolate,
-                    code_base,
-                    subject_vector,
-                    registers,
-                    start_position,
-                    previous_char);
+    return RawMatch(isolate, code_array, subject_string, subject_vector,
+                    registers, start_position, previous_char);
   } else {
     DCHECK(subject_content.IsTwoByte());
     Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
     if (start_position != 0) previous_char = subject_vector[start_position - 1];
-    return RawMatch(isolate,
-                    code_base,
-                    subject_vector,
-                    registers,
-                    start_position,
-                    previous_char);
+    return RawMatch(isolate, code_array, subject_string, subject_vector,
+                    registers, start_position, previous_char);
   }
 }
 
diff --git a/src/regexp/interpreter-irregexp.h b/src/regexp/interpreter-irregexp.h
index adbcb32..4536423 100644
--- a/src/regexp/interpreter-irregexp.h
+++ b/src/regexp/interpreter-irregexp.h
@@ -19,8 +19,9 @@
   STATIC_ASSERT(FAILURE == static_cast<int>(RegExpImpl::RE_FAILURE));
   STATIC_ASSERT(SUCCESS == static_cast<int>(RegExpImpl::RE_SUCCESS));
 
+  // The caller is responsible for initializing registers before each call.
   static Result Match(Isolate* isolate, Handle<ByteArray> code_array,
-                      Handle<String> subject, int* registers,
+                      Handle<String> subject_string, int* registers,
                       int start_position);
 };
 
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
index f7fbbfb..e53706a 100644
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@@ -477,26 +477,40 @@
     int number_of_capture_registers =
         (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
     int32_t* raw_output = &output[number_of_capture_registers];
-    // We do not touch the actual capture result registers until we know there
-    // has been a match so that we can use those capture results to set the
-    // last match info.
-    for (int i = number_of_capture_registers - 1; i >= 0; i--) {
-      raw_output[i] = -1;
-    }
-    Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
-                                 isolate);
 
-    IrregexpInterpreter::Result result = IrregexpInterpreter::Match(
-        isolate, byte_codes, subject, raw_output, index);
-    DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
-                   isolate->has_pending_exception());
+    do {
+      // We do not touch the actual capture result registers until we know there
+      // has been a match so that we can use those capture results to set the
+      // last match info.
+      for (int i = number_of_capture_registers - 1; i >= 0; i--) {
+        raw_output[i] = -1;
+      }
+      Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
+                                   isolate);
 
-    if (result == IrregexpInterpreter::SUCCESS) {
-      // Copy capture results to the start of the registers array.
-      MemCopy(output, raw_output,
-              number_of_capture_registers * sizeof(int32_t));
-    }
-    return result;
+      IrregexpInterpreter::Result result = IrregexpInterpreter::Match(
+          isolate, byte_codes, subject, raw_output, index);
+      DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
+                     isolate->has_pending_exception());
+
+      switch (result) {
+        case IrregexpInterpreter::SUCCESS:
+          // Copy capture results to the start of the registers array.
+          MemCopy(output, raw_output,
+                  number_of_capture_registers * sizeof(int32_t));
+          return result;
+        case IrregexpInterpreter::EXCEPTION:
+        case IrregexpInterpreter::FAILURE:
+          return result;
+        case IrregexpInterpreter::RETRY:
+          // The string has changed representation, and we must restart the
+          // match.
+          is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
+          EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
+          break;
+      }
+    } while (true);
+    UNREACHABLE();
   }
 }
 
diff --git a/test/cctest/cctest.status b/test/cctest/cctest.status
index 5266dbd..39e5dbe 100644
--- a/test/cctest/cctest.status
+++ b/test/cctest/cctest.status
@@ -413,12 +413,6 @@
 }],  # variant == stress_background_compile
 
 ##############################################################################
-['variant == interpreted_regexp', {
-  # Times out: https://crbug.com/v8/8678
-  'test-api/RegExpInterruption': [SKIP],
-}],  # variant == interpreted_regexp
-
-##############################################################################
 ['variant == no_wasm_traps', {
   'test-accessors/*': [SKIP],
   'test-api-interceptors/*': [SKIP],
@@ -503,7 +497,6 @@
   # Tests that generate code at runtime.
   'codegen-tester/*': [SKIP],
   'test-accessor-assembler/*': [SKIP],
-  'test-api/RegExpInterruption': [SKIP],
   'test-assembler-*': [SKIP],
   'test-basic-block-profiler/*': [SKIP],
   'test-branch-combine/*': [SKIP],
diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc
index 474ec37..aef5bc1 100644
--- a/test/cctest/test-api.cc
+++ b/test/cctest/test-api.cc
@@ -15102,81 +15102,6 @@
   }
 }
 
-struct RegExpInterruptionData {
-  v8::base::Atomic32 loop_count;
-  UC16VectorResource* string_resource;
-  v8::Persistent<v8::String> string;
-} regexp_interruption_data;
-
-
-class RegExpInterruptionThread : public v8::base::Thread {
- public:
-  explicit RegExpInterruptionThread(v8::Isolate* isolate)
-      : Thread(Options("TimeoutThread")), isolate_(isolate) {}
-
-  void Run() override {
-    for (v8::base::Relaxed_Store(&regexp_interruption_data.loop_count, 0);
-         v8::base::Relaxed_Load(&regexp_interruption_data.loop_count) < 7;
-         v8::base::Relaxed_AtomicIncrement(&regexp_interruption_data.loop_count,
-                                           1)) {
-      // Wait a bit before requesting GC.
-      v8::base::OS::Sleep(v8::base::TimeDelta::FromMilliseconds(50));
-      reinterpret_cast<i::Isolate*>(isolate_)->stack_guard()->RequestGC();
-    }
-    // Wait a bit before terminating.
-    v8::base::OS::Sleep(v8::base::TimeDelta::FromMilliseconds(50));
-    isolate_->TerminateExecution();
-  }
-
- private:
-  v8::Isolate* isolate_;
-};
-
-
-void RunBeforeGC(v8::Isolate* isolate, v8::GCType type,
-                 v8::GCCallbackFlags flags) {
-  if (v8::base::Relaxed_Load(&regexp_interruption_data.loop_count) != 2) {
-    return;
-  }
-  v8::HandleScope scope(isolate);
-  v8::Local<v8::String> string = v8::Local<v8::String>::New(
-      CcTest::isolate(), regexp_interruption_data.string);
-  string->MakeExternal(regexp_interruption_data.string_resource);
-}
-
-
-// Test that RegExp execution can be interrupted.  Specifically, we test
-// * interrupting with GC
-// * turn the subject string from one-byte internal to two-byte external string
-// * force termination
-TEST(RegExpInterruption) {
-  LocalContext env;
-  v8::HandleScope scope(env->GetIsolate());
-
-  RegExpInterruptionThread timeout_thread(env->GetIsolate());
-
-  env->GetIsolate()->AddGCPrologueCallback(RunBeforeGC);
-  static const char* one_byte_content = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
-  i::uc16* uc16_content = AsciiToTwoByteString(one_byte_content);
-  v8::Local<v8::String> string = v8_str(one_byte_content);
-
-  env->Global()->Set(env.local(), v8_str("a"), string).FromJust();
-  regexp_interruption_data.string.Reset(env->GetIsolate(), string);
-  regexp_interruption_data.string_resource = new UC16VectorResource(
-      i::Vector<const i::uc16>(uc16_content, i::StrLength(one_byte_content)));
-
-  v8::TryCatch try_catch(env->GetIsolate());
-  timeout_thread.Start();
-
-  CompileRun("/((a*)*)*b/.exec(a)");
-  CHECK(try_catch.HasTerminated());
-
-  timeout_thread.Join();
-
-  regexp_interruption_data.string.Reset();
-  i::DeleteArray(uc16_content);
-}
-
 // Test that we cannot set a property on the global object if there
 // is a read-only property in the prototype chain.
 TEST(ReadOnlyPropertyInGlobalProto) {
@@ -22843,6 +22768,179 @@
   isolate->SetFailedAccessCheckCallbackFunction(nullptr);
 }
 
+namespace {
+
+const char kOneByteSubjectString[] = {
+    'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
+    'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
+    'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', '\0'};
+const uint16_t kTwoByteSubjectString[] = {
+    'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
+    'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
+    'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', '\0'};
+
+const int kSubjectStringLength = arraysize(kOneByteSubjectString) - 1;
+STATIC_ASSERT(arraysize(kOneByteSubjectString) ==
+              arraysize(kTwoByteSubjectString));
+
+OneByteVectorResource one_byte_string_resource(
+    i::Vector<const char>(&kOneByteSubjectString[0], kSubjectStringLength));
+UC16VectorResource two_byte_string_resource(
+    i::Vector<const i::uc16>(&kTwoByteSubjectString[0], kSubjectStringLength));
+
+class RegExpInterruptTest {
+ public:
+  RegExpInterruptTest()
+      : i_thread(this),
+        env_(),
+        isolate_(env_->GetIsolate()),
+        sem_(0),
+        ran_test_body_(false),
+        ran_to_completion_(false) {}
+
+  void RunTest(v8::InterruptCallback test_body_fn) {
+    v8::HandleScope handle_scope(isolate_);
+
+    i_thread.SetTestBody(test_body_fn);
+    i_thread.Start();
+
+    TestBody();
+
+    i_thread.Join();
+  }
+
+  static void CollectAllGarbage(v8::Isolate* isolate, void* data) {
+    i::Isolate* i_isolate = reinterpret_cast<i::Isolate*>(isolate);
+    i_isolate->heap()->PreciseCollectAllGarbage(
+        i::Heap::kNoGCFlags, i::GarbageCollectionReason::kRuntime);
+  }
+
+  static void MakeSubjectOneByteExternal(v8::Isolate* isolate, void* data) {
+    auto instance = reinterpret_cast<RegExpInterruptTest*>(data);
+
+    v8::HandleScope scope(isolate);
+    v8::Local<v8::String> string =
+        v8::Local<v8::String>::New(isolate, instance->string_handle_);
+    CHECK(string->CanMakeExternal());
+    string->MakeExternal(&one_byte_string_resource);
+  }
+
+  static void MakeSubjectTwoByteExternal(v8::Isolate* isolate, void* data) {
+    auto instance = reinterpret_cast<RegExpInterruptTest*>(data);
+
+    v8::HandleScope scope(isolate);
+    v8::Local<v8::String> string =
+        v8::Local<v8::String>::New(isolate, instance->string_handle_);
+    CHECK(string->CanMakeExternal());
+    string->MakeExternal(&two_byte_string_resource);
+  }
+
+ private:
+  static void SignalSemaphore(v8::Isolate* isolate, void* data) {
+    reinterpret_cast<RegExpInterruptTest*>(data)->sem_.Signal();
+  }
+
+  void CreateTestStrings() {
+    i::Isolate* i_isolate = reinterpret_cast<i::Isolate*>(isolate_);
+
+    // The string must be in old space to support externalization.
+    i::Handle<i::String> i_string =
+        i_isolate->factory()->NewStringFromAsciiChecked(
+            &kOneByteSubjectString[0], i::AllocationType::kOld);
+    v8::Local<v8::String> string = v8::Utils::ToLocal(i_string);
+
+    env_->Global()->Set(env_.local(), v8_str("a"), string).FromJust();
+
+    string_handle_.Reset(env_->GetIsolate(), string);
+  }
+
+  void TestBody() {
+    CHECK(!ran_test_body_.load());
+    CHECK(!ran_to_completion_.load());
+
+    CreateTestStrings();
+
+    v8::TryCatch try_catch(env_->GetIsolate());
+
+    isolate_->RequestInterrupt(&SignalSemaphore, this);
+    CompileRun("/((a*)*)*b/.exec(a)");
+
+    CHECK(try_catch.HasTerminated());
+    CHECK(ran_test_body_.load());
+    CHECK(ran_to_completion_.load());
+  }
+
+  class InterruptThread : public v8::base::Thread {
+   public:
+    explicit InterruptThread(RegExpInterruptTest* test)
+        : Thread(Options("RegExpInterruptTest")), test_(test) {}
+
+    void Run() override {
+      CHECK_NOT_NULL(test_body_fn_);
+
+      // Wait for JS execution to start.
+      test_->sem_.Wait();
+
+      // Sleep for a bit to allow irregexp execution to start up, then run the
+      // test body.
+      v8::base::OS::Sleep(v8::base::TimeDelta::FromMilliseconds(50));
+      test_->isolate_->RequestInterrupt(&RunTestBody, test_);
+      test_->isolate_->RequestInterrupt(&SignalSemaphore, test_);
+
+      // Wait for the scheduled interrupt to signal.
+      test_->sem_.Wait();
+
+      // Sleep again to resume irregexp execution, then terminate.
+      v8::base::OS::Sleep(v8::base::TimeDelta::FromMilliseconds(50));
+      test_->ran_to_completion_.store(true);
+      test_->isolate_->TerminateExecution();
+    }
+
+    static void RunTestBody(v8::Isolate* isolate, void* data) {
+      auto instance = reinterpret_cast<RegExpInterruptTest*>(data);
+      instance->i_thread.test_body_fn_(isolate, data);
+      instance->ran_test_body_.store(true);
+    }
+
+    void SetTestBody(v8::InterruptCallback callback) {
+      test_body_fn_ = callback;
+    }
+
+   private:
+    v8::InterruptCallback test_body_fn_;
+    RegExpInterruptTest* test_;
+  };
+
+  InterruptThread i_thread;
+
+  LocalContext env_;
+  v8::Isolate* isolate_;
+  v8::base::Semaphore sem_;  // Coordinates between main and interrupt threads.
+
+  v8::Persistent<v8::String> string_handle_;
+
+  std::atomic<bool> ran_test_body_;
+  std::atomic<bool> ran_to_completion_;
+};
+
+}  // namespace
+
+TEST(RegExpInterruptAndCollectAllGarbage) {
+  i::FLAG_always_compact = true;  // Move all movable objects on GC.
+  RegExpInterruptTest test;
+  test.RunTest(RegExpInterruptTest::CollectAllGarbage);
+}
+
+TEST(RegExpInterruptAndMakeSubjectOneByteExternal) {
+  RegExpInterruptTest test;
+  test.RunTest(RegExpInterruptTest::MakeSubjectOneByteExternal);
+}
+
+TEST(RegExpInterruptAndMakeSubjectTwoByteExternal) {
+  RegExpInterruptTest test;
+  test.RunTest(RegExpInterruptTest::MakeSubjectTwoByteExternal);
+}
+
 class RequestInterruptTestBase {
  public:
   RequestInterruptTestBase()
diff --git a/test/cctest/test-thread-termination.cc b/test/cctest/test-thread-termination.cc
index 074e516..604fd77 100644
--- a/test/cctest/test-thread-termination.cc
+++ b/test/cctest/test-thread-termination.cc
@@ -866,26 +866,23 @@
 };
 
 TEST(TerminateRegExp) {
-  // The regexp interpreter does not support preemption.
-  if (!i::FLAG_regexp_interpret_all) {
-    i::FLAG_allow_natives_syntax = true;
-    v8::Isolate* isolate = CcTest::isolate();
-    v8::HandleScope scope(isolate);
-    v8::Local<v8::ObjectTemplate> global = CreateGlobalTemplate(
-        isolate, TerminateCurrentThread, DoLoopCancelTerminate);
-    v8::Local<v8::Context> context = v8::Context::New(isolate, nullptr, global);
-    v8::Context::Scope context_scope(context);
-    CHECK(!isolate->IsExecutionTerminating());
-    v8::TryCatch try_catch(isolate);
-    CHECK(!isolate->IsExecutionTerminating());
-    CHECK(!CompileRun("var re = /(x+)+y$/; re.test('x');").IsEmpty());
-    TerminatorSleeperThread terminator(isolate, 100);
-    terminator.Start();
-    CHECK(CompileRun("re.test('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'); fail();")
-              .IsEmpty());
-    CHECK(try_catch.HasCaught());
-    CHECK(isolate->IsExecutionTerminating());
-  }
+  i::FLAG_allow_natives_syntax = true;
+  v8::Isolate* isolate = CcTest::isolate();
+  v8::HandleScope scope(isolate);
+  v8::Local<v8::ObjectTemplate> global = CreateGlobalTemplate(
+      isolate, TerminateCurrentThread, DoLoopCancelTerminate);
+  v8::Local<v8::Context> context = v8::Context::New(isolate, nullptr, global);
+  v8::Context::Scope context_scope(context);
+  CHECK(!isolate->IsExecutionTerminating());
+  v8::TryCatch try_catch(isolate);
+  CHECK(!isolate->IsExecutionTerminating());
+  CHECK(!CompileRun("var re = /(x+)+y$/; re.test('x');").IsEmpty());
+  TerminatorSleeperThread terminator(isolate, 100);
+  terminator.Start();
+  CHECK(CompileRun("re.test('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'); fail();")
+            .IsEmpty());
+  CHECK(try_catch.HasCaught());
+  CHECK(isolate->IsExecutionTerminating());
 }
 
 TEST(TerminateInMicrotask) {