| // Copyright 2016 the V8 project authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style license that can be | 
 | // found in the LICENSE file. | 
 |  | 
 | #include "src/regexp/regexp-parser.h" | 
 |  | 
 | #include "src/execution/isolate.h" | 
 | #include "src/objects/string-inl.h" | 
 | #include "src/regexp/regexp-ast.h" | 
 | #include "src/regexp/regexp-macro-assembler.h" | 
 | #include "src/regexp/regexp.h" | 
 | #include "src/strings/char-predicates-inl.h" | 
 | #include "src/utils/ostreams.h" | 
 | #include "src/utils/utils.h" | 
 | #include "src/zone/zone-allocator.h" | 
 | #include "src/zone/zone-list-inl.h" | 
 |  | 
 | #ifdef V8_INTL_SUPPORT | 
 | #include "unicode/uniset.h" | 
 | #include "unicode/unistr.h" | 
 | #include "unicode/usetiter.h" | 
 | #include "unicode/utf16.h"  // For U16_NEXT | 
 | #endif                      // V8_INTL_SUPPORT | 
 |  | 
 | namespace v8 { | 
 | namespace internal { | 
 |  | 
 | namespace { | 
 |  | 
 | // Whether we're currently inside the ClassEscape production | 
 | // (tc39.es/ecma262/#prod-annexB-CharacterEscape). | 
 | enum class InClassEscapeState { | 
 |   kInClass, | 
 |   kNotInClass, | 
 | }; | 
 |  | 
 | // The production used to derive ClassSetOperand. | 
 | enum class ClassSetOperandType { | 
 |   kClassSetCharacter, | 
 |   kClassStringDisjunction, | 
 |   kNestedClass, | 
 |   kCharacterClassEscape,  // \ CharacterClassEscape is a special nested class, | 
 |                           // as we can fold it directly into another range. | 
 |   kClassSetRange | 
 | }; | 
 |  | 
 | class RegExpTextBuilder { | 
 |  public: | 
 |   using SmallRegExpTreeVector = SmallZoneVector<RegExpTree*, 8>; | 
 |  | 
 |   RegExpTextBuilder(Zone* zone, SmallRegExpTreeVector* terms_storage, | 
 |                     RegExpFlags flags) | 
 |       : zone_(zone), flags_(flags), terms_(terms_storage), text_(zone) {} | 
 |   void AddCharacter(base::uc16 character); | 
 |   void AddUnicodeCharacter(base::uc32 character); | 
 |   void AddEscapedUnicodeCharacter(base::uc32 character); | 
 |   void AddAtom(RegExpTree* atom); | 
 |   void AddTerm(RegExpTree* term); | 
 |   void AddClassRanges(RegExpClassRanges* cc); | 
 |   void FlushPendingSurrogate(); | 
 |   void FlushText(); | 
 |   RegExpTree* PopLastAtom(); | 
 |   RegExpTree* ToRegExp(); | 
 |  | 
 |  private: | 
 |   static const base::uc16 kNoPendingSurrogate = 0; | 
 |  | 
 |   void AddLeadSurrogate(base::uc16 lead_surrogate); | 
 |   void AddTrailSurrogate(base::uc16 trail_surrogate); | 
 |   void FlushCharacters(); | 
 |   bool NeedsDesugaringForUnicode(RegExpClassRanges* cc); | 
 |   bool NeedsDesugaringForIgnoreCase(base::uc32 c); | 
 |   void AddClassRangesForDesugaring(base::uc32 c); | 
 |   bool ignore_case() const { return IsIgnoreCase(flags_); } | 
 |   bool IsUnicodeMode() const { | 
 |     // Either /v or /u enable UnicodeMode | 
 |     // https://tc39.es/ecma262/#sec-parsepattern | 
 |     return IsUnicode(flags_) || IsUnicodeSets(flags_); | 
 |   } | 
 |   Zone* zone() const { return zone_; } | 
 |  | 
 |   Zone* const zone_; | 
 |   const RegExpFlags flags_; | 
 |   ZoneList<base::uc16>* characters_ = nullptr; | 
 |   base::uc16 pending_surrogate_ = kNoPendingSurrogate; | 
 |   SmallRegExpTreeVector* terms_; | 
 |   SmallRegExpTreeVector text_; | 
 | }; | 
 |  | 
 | void RegExpTextBuilder::AddLeadSurrogate(base::uc16 lead_surrogate) { | 
 |   DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate)); | 
 |   FlushPendingSurrogate(); | 
 |   // Hold onto the lead surrogate, waiting for a trail surrogate to follow. | 
 |   pending_surrogate_ = lead_surrogate; | 
 | } | 
 |  | 
 | void RegExpTextBuilder::AddTrailSurrogate(base::uc16 trail_surrogate) { | 
 |   DCHECK(unibrow::Utf16::IsTrailSurrogate(trail_surrogate)); | 
 |   if (pending_surrogate_ != kNoPendingSurrogate) { | 
 |     base::uc16 lead_surrogate = pending_surrogate_; | 
 |     pending_surrogate_ = kNoPendingSurrogate; | 
 |     DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate)); | 
 |     base::uc32 combined = | 
 |         unibrow::Utf16::CombineSurrogatePair(lead_surrogate, trail_surrogate); | 
 |     if (NeedsDesugaringForIgnoreCase(combined)) { | 
 |       AddClassRangesForDesugaring(combined); | 
 |     } else { | 
 |       ZoneList<base::uc16> surrogate_pair(2, zone()); | 
 |       surrogate_pair.Add(lead_surrogate, zone()); | 
 |       surrogate_pair.Add(trail_surrogate, zone()); | 
 |       RegExpAtom* atom = | 
 |           zone()->New<RegExpAtom>(surrogate_pair.ToConstVector()); | 
 |       AddAtom(atom); | 
 |     } | 
 |   } else { | 
 |     pending_surrogate_ = trail_surrogate; | 
 |     FlushPendingSurrogate(); | 
 |   } | 
 | } | 
 |  | 
 | void RegExpTextBuilder::FlushPendingSurrogate() { | 
 |   if (pending_surrogate_ != kNoPendingSurrogate) { | 
 |     DCHECK(IsUnicodeMode()); | 
 |     base::uc32 c = pending_surrogate_; | 
 |     pending_surrogate_ = kNoPendingSurrogate; | 
 |     AddClassRangesForDesugaring(c); | 
 |   } | 
 | } | 
 |  | 
 | void RegExpTextBuilder::FlushCharacters() { | 
 |   FlushPendingSurrogate(); | 
 |   if (characters_ != nullptr) { | 
 |     RegExpTree* atom = zone()->New<RegExpAtom>(characters_->ToConstVector()); | 
 |     characters_ = nullptr; | 
 |     text_.emplace_back(atom); | 
 |   } | 
 | } | 
 |  | 
 | void RegExpTextBuilder::FlushText() { | 
 |   FlushCharacters(); | 
 |   size_t num_text = text_.size(); | 
 |   if (num_text == 0) { | 
 |     return; | 
 |   } else if (num_text == 1) { | 
 |     terms_->emplace_back(text_.back()); | 
 |   } else { | 
 |     RegExpText* text = zone()->New<RegExpText>(zone()); | 
 |     for (size_t i = 0; i < num_text; i++) { | 
 |       text_[i]->AppendToText(text, zone()); | 
 |     } | 
 |     terms_->emplace_back(text); | 
 |   } | 
 |   text_.clear(); | 
 | } | 
 |  | 
 | void RegExpTextBuilder::AddCharacter(base::uc16 c) { | 
 |   FlushPendingSurrogate(); | 
 |   if (characters_ == nullptr) { | 
 |     characters_ = zone()->New<ZoneList<base::uc16>>(4, zone()); | 
 |   } | 
 |   characters_->Add(c, zone()); | 
 | } | 
 |  | 
 | void RegExpTextBuilder::AddUnicodeCharacter(base::uc32 c) { | 
 |   if (c > static_cast<base::uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { | 
 |     DCHECK(IsUnicodeMode()); | 
 |     AddLeadSurrogate(unibrow::Utf16::LeadSurrogate(c)); | 
 |     AddTrailSurrogate(unibrow::Utf16::TrailSurrogate(c)); | 
 |   } else if (IsUnicodeMode() && unibrow::Utf16::IsLeadSurrogate(c)) { | 
 |     AddLeadSurrogate(c); | 
 |   } else if (IsUnicodeMode() && unibrow::Utf16::IsTrailSurrogate(c)) { | 
 |     AddTrailSurrogate(c); | 
 |   } else { | 
 |     AddCharacter(static_cast<base::uc16>(c)); | 
 |   } | 
 | } | 
 |  | 
 | void RegExpTextBuilder::AddEscapedUnicodeCharacter(base::uc32 character) { | 
 |   // A lead or trail surrogate parsed via escape sequence will not | 
 |   // pair up with any preceding lead or following trail surrogate. | 
 |   FlushPendingSurrogate(); | 
 |   AddUnicodeCharacter(character); | 
 |   FlushPendingSurrogate(); | 
 | } | 
 |  | 
 | void RegExpTextBuilder::AddClassRanges(RegExpClassRanges* cr) { | 
 |   if (NeedsDesugaringForUnicode(cr)) { | 
 |     // With /u or /v, character class needs to be desugared, so it | 
 |     // must be a standalone term instead of being part of a RegExpText. | 
 |     AddTerm(cr); | 
 |   } else { | 
 |     AddAtom(cr); | 
 |   } | 
 | } | 
 |  | 
 | void RegExpTextBuilder::AddClassRangesForDesugaring(base::uc32 c) { | 
 |   AddTerm(zone()->New<RegExpClassRanges>( | 
 |       zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c)))); | 
 | } | 
 |  | 
 | void RegExpTextBuilder::AddAtom(RegExpTree* atom) { | 
 |   DCHECK(atom->IsTextElement()); | 
 |   FlushCharacters(); | 
 |   text_.emplace_back(atom); | 
 | } | 
 |  | 
 | void RegExpTextBuilder::AddTerm(RegExpTree* term) { | 
 |   DCHECK(term->IsTextElement()); | 
 |   FlushText(); | 
 |   terms_->emplace_back(term); | 
 | } | 
 |  | 
 | bool RegExpTextBuilder::NeedsDesugaringForUnicode(RegExpClassRanges* cc) { | 
 |   if (!IsUnicodeMode()) return false; | 
 |   // TODO(yangguo): we could be smarter than this. Case-insensitivity does not | 
 |   // necessarily mean that we need to desugar. It's probably nicer to have a | 
 |   // separate pass to figure out unicode desugarings. | 
 |   if (ignore_case()) return true; | 
 |   ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 
 |   CharacterRange::Canonicalize(ranges); | 
 |  | 
 |   if (cc->is_negated()) { | 
 |     ZoneList<CharacterRange>* negated_ranges = | 
 |         zone()->New<ZoneList<CharacterRange>>(ranges->length(), zone()); | 
 |     CharacterRange::Negate(ranges, negated_ranges, zone()); | 
 |     ranges = negated_ranges; | 
 |   } | 
 |  | 
 |   for (int i = ranges->length() - 1; i >= 0; i--) { | 
 |     base::uc32 from = ranges->at(i).from(); | 
 |     base::uc32 to = ranges->at(i).to(); | 
 |     // Check for non-BMP characters. | 
 |     if (to >= kNonBmpStart) return true; | 
 |     // Check for lone surrogates. | 
 |     if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | // We only use this for characters made of surrogate pairs.  All other | 
 | // characters outside of character classes are made case independent in the | 
 | // code generation. | 
 | bool RegExpTextBuilder::NeedsDesugaringForIgnoreCase(base::uc32 c) { | 
 | #ifdef V8_INTL_SUPPORT | 
 |   if (IsUnicodeMode() && ignore_case()) { | 
 |     icu::UnicodeSet set(c, c); | 
 |     set.closeOver(USET_CASE_INSENSITIVE); | 
 |     set.removeAllStrings(); | 
 |     return set.size() > 1; | 
 |   } | 
 |   // In the case where ICU is not included, we act as if the unicode flag is | 
 |   // not set, and do not desugar. | 
 | #endif  // V8_INTL_SUPPORT | 
 |   return false; | 
 | } | 
 |  | 
 | RegExpTree* RegExpTextBuilder::PopLastAtom() { | 
 |   FlushPendingSurrogate(); | 
 |   RegExpTree* atom; | 
 |   if (characters_ != nullptr) { | 
 |     base::Vector<const base::uc16> char_vector = characters_->ToConstVector(); | 
 |     int num_chars = char_vector.length(); | 
 |     if (num_chars > 1) { | 
 |       base::Vector<const base::uc16> prefix = | 
 |           char_vector.SubVector(0, num_chars - 1); | 
 |       text_.emplace_back(zone()->New<RegExpAtom>(prefix)); | 
 |       char_vector = char_vector.SubVector(num_chars - 1, num_chars); | 
 |     } | 
 |     characters_ = nullptr; | 
 |     atom = zone()->New<RegExpAtom>(char_vector); | 
 |     return atom; | 
 |   } else if (!text_.empty()) { | 
 |     atom = text_.back(); | 
 |     text_.pop_back(); | 
 |     return atom; | 
 |   } | 
 |   return nullptr; | 
 | } | 
 |  | 
 | RegExpTree* RegExpTextBuilder::ToRegExp() { | 
 |   FlushText(); | 
 |   size_t number_of_terms = terms_->size(); | 
 |   if (number_of_terms == 0) return zone()->New<RegExpEmpty>(); | 
 |   if (number_of_terms == 1) return terms_->back(); | 
 |   return zone()->New<RegExpAlternative>(zone()->New<ZoneList<RegExpTree*>>( | 
 |       base::VectorOf(terms_->begin(), terms_->size()), zone())); | 
 | } | 
 |  | 
 | // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 
 | class RegExpBuilder { | 
 |  public: | 
 |   RegExpBuilder(Zone* zone, RegExpFlags flags) | 
 |       : zone_(zone), | 
 |         flags_(flags), | 
 |         terms_(zone), | 
 |         alternatives_(zone), | 
 |         text_builder_(RegExpTextBuilder{zone, &terms_, flags}) {} | 
 |   void AddCharacter(base::uc16 character); | 
 |   void AddUnicodeCharacter(base::uc32 character); | 
 |   void AddEscapedUnicodeCharacter(base::uc32 character); | 
 |   // "Adds" an empty expression. Does nothing except consume a | 
 |   // following quantifier | 
 |   void AddEmpty(); | 
 |   void AddClassRanges(RegExpClassRanges* cc); | 
 |   void AddAtom(RegExpTree* tree); | 
 |   void AddTerm(RegExpTree* tree); | 
 |   void AddAssertion(RegExpTree* tree); | 
 |   void NewAlternative();  // '|' | 
 |   bool AddQuantifierToAtom(int min, int max, int index, | 
 |                            RegExpQuantifier::QuantifierType type); | 
 |   void FlushText(); | 
 |   RegExpTree* ToRegExp(); | 
 |   RegExpFlags flags() const { return flags_; } | 
 |  | 
 |   bool ignore_case() const { return IsIgnoreCase(flags_); } | 
 |   bool multiline() const { return IsMultiline(flags_); } | 
 |   bool dotall() const { return IsDotAll(flags_); } | 
 |  | 
 |  private: | 
 |   void FlushTerms(); | 
 |   bool IsUnicodeMode() const { | 
 |     // Either /v or /u enable UnicodeMode | 
 |     // https://tc39.es/ecma262/#sec-parsepattern | 
 |     return IsUnicode(flags_) || IsUnicodeSets(flags_); | 
 |   } | 
 |   Zone* zone() const { return zone_; } | 
 |   RegExpTextBuilder& text_builder() { return text_builder_; } | 
 |  | 
 |   Zone* const zone_; | 
 |   bool pending_empty_ = false; | 
 |   const RegExpFlags flags_; | 
 |  | 
 |   using SmallRegExpTreeVector = SmallZoneVector<RegExpTree*, 8>; | 
 |   SmallRegExpTreeVector terms_; | 
 |   SmallRegExpTreeVector alternatives_; | 
 |   RegExpTextBuilder text_builder_; | 
 | }; | 
 |  | 
 | enum SubexpressionType { | 
 |   INITIAL, | 
 |   CAPTURE,  // All positive values represent captures. | 
 |   POSITIVE_LOOKAROUND, | 
 |   NEGATIVE_LOOKAROUND, | 
 |   GROUPING | 
 | }; | 
 |  | 
 | class RegExpParserState : public ZoneObject { | 
 |  public: | 
 |   // Push a state on the stack. | 
 |   RegExpParserState(RegExpParserState* previous_state, | 
 |                     SubexpressionType group_type, | 
 |                     RegExpLookaround::Type lookaround_type, | 
 |                     int disjunction_capture_index, | 
 |                     const ZoneVector<base::uc16>* capture_name, | 
 |                     RegExpFlags flags, Zone* zone) | 
 |       : previous_state_(previous_state), | 
 |         builder_(zone, flags), | 
 |         group_type_(group_type), | 
 |         lookaround_type_(lookaround_type), | 
 |         disjunction_capture_index_(disjunction_capture_index), | 
 |         capture_name_(capture_name), | 
 |         non_participating_capture_group_intervals_(zone) { | 
 |     if (previous_state != nullptr) { | 
 |       non_participating_capture_group_intervals_.insert( | 
 |           non_participating_capture_group_intervals_.begin(), | 
 |           previous_state->non_participating_capture_group_intervals_); | 
 |     } | 
 |   } | 
 |   using IntervalVector = SmallZoneVector<Interval, 1>; | 
 |  | 
 |   // Parser state of containing expression, if any. | 
 |   RegExpParserState* previous_state() const { return previous_state_; } | 
 |   bool IsSubexpression() { return previous_state_ != nullptr; } | 
 |   // RegExpBuilder building this regexp's AST. | 
 |   RegExpBuilder* builder() { return &builder_; } | 
 |   // Type of regexp being parsed (parenthesized group or entire regexp). | 
 |   SubexpressionType group_type() const { return group_type_; } | 
 |   // Lookahead or Lookbehind. | 
 |   RegExpLookaround::Type lookaround_type() const { return lookaround_type_; } | 
 |   // Index in captures array of first capture in this sub-expression, if any. | 
 |   // Also the capture index of this sub-expression itself, if group_type | 
 |   // is CAPTURE. | 
 |   int capture_index() const { return disjunction_capture_index_; } | 
 |   // The name of the current sub-expression, if group_type is CAPTURE. Only | 
 |   // used for named captures. | 
 |   const ZoneVector<base::uc16>* capture_name() const { return capture_name_; } | 
 |   const IntervalVector& non_participating_capture_group_intervals() const { | 
 |     return non_participating_capture_group_intervals_; | 
 |   } | 
 |  | 
 |   bool IsNamedCapture() const { return capture_name_ != nullptr; } | 
 |  | 
 |   // Check whether the parser is inside a capture group with the given index. | 
 |   bool IsInsideCaptureGroup(int index) const { | 
 |     for (const RegExpParserState* s = this; s != nullptr; | 
 |          s = s->previous_state()) { | 
 |       if (s->group_type() != CAPTURE) continue; | 
 |       // Return true if we found the matching capture index. | 
 |       if (index == s->capture_index()) return true; | 
 |       // Abort if index is larger than what has been parsed up till this state. | 
 |       if (index > s->capture_index()) return false; | 
 |     } | 
 |     return false; | 
 |   } | 
 |  | 
 |   // Check whether the parser is inside a capture group with the given name. | 
 |   bool IsInsideCaptureGroup(const ZoneVector<base::uc16>* name) const { | 
 |     DCHECK_NOT_NULL(name); | 
 |     for (const RegExpParserState* s = this; s != nullptr; | 
 |          s = s->previous_state()) { | 
 |       if (s->capture_name() == nullptr) continue; | 
 |       if (*s->capture_name() == *name) return true; | 
 |     } | 
 |     return false; | 
 |   } | 
 |  | 
 |   void NewAlternative(int captures_started) { | 
 |     // Nothing to do if there were no new captures started before the | 
 |     // alternative. | 
 |     if (capture_index() == captures_started) return; | 
 |  | 
 |     // +1 to create a closed interval (capture_index() is exclusive). | 
 |     int from = capture_index() + 1; | 
 |     int to = captures_started; | 
 |     DCHECK_LE(from, to); | 
 |     // Extend the last interval if we increase its range by exactly 1. | 
 |     if (!non_participating_capture_group_intervals().empty() && | 
 |         non_participating_capture_group_intervals().back().to() + 1 == to) { | 
 |       Interval& interval = non_participating_capture_group_intervals_.back(); | 
 |       DCHECK(!interval.is_empty()); | 
 |       DCHECK_GE(from, interval.from()); | 
 |       interval = interval.Union({from, to}); | 
 |     } else { | 
 |       non_participating_capture_group_intervals_.push_back({from, to}); | 
 |     } | 
 |   } | 
 |  | 
 |  private: | 
 |   // Linked list implementation of stack of states. | 
 |   RegExpParserState* const previous_state_; | 
 |   // Builder for the stored disjunction. | 
 |   RegExpBuilder builder_; | 
 |   // Stored disjunction type (capture, look-ahead or grouping), if any. | 
 |   const SubexpressionType group_type_; | 
 |   // Stored read direction. | 
 |   const RegExpLookaround::Type lookaround_type_; | 
 |   // Stored disjunction's capture index (if any). | 
 |   const int disjunction_capture_index_; | 
 |   // Stored capture name (if any). | 
 |   const ZoneVector<base::uc16>* const capture_name_; | 
 |   // List of Intervals of (named) capture indices [from, to] that are not | 
 |   // participating in the current state (i.e. they cannot match). | 
 |   // Capture indices are not participating if they were created in a different | 
 |   // alternative. | 
 |   IntervalVector non_participating_capture_group_intervals_; | 
 | }; | 
 |  | 
 | template <class CharT> | 
 | class RegExpParserImpl final { | 
 |  private: | 
 |   RegExpParserImpl(const CharT* input, int input_length, RegExpFlags flags, | 
 |                    uintptr_t stack_limit, Zone* zone, | 
 |                    const DisallowGarbageCollection& no_gc); | 
 |  | 
 |   bool Parse(RegExpCompileData* result); | 
 |  | 
 |   RegExpTree* ParsePattern(); | 
 |   RegExpTree* ParseDisjunction(); | 
 |   RegExpTree* ParseGroup(); | 
 |  | 
 |   // Parses a {...,...} quantifier and stores the range in the given | 
 |   // out parameters. | 
 |   bool ParseIntervalQuantifier(int* min_out, int* max_out); | 
 |  | 
 |   // Checks whether the following is a length-digit hexadecimal number, | 
 |   // and sets the value if it is. | 
 |   bool ParseHexEscape(int length, base::uc32* value); | 
 |   bool ParseUnicodeEscape(base::uc32* value); | 
 |   bool ParseUnlimitedLengthHexNumber(int max_value, base::uc32* value); | 
 |  | 
 |   bool ParsePropertyClassName(ZoneVector<char>* name_1, | 
 |                               ZoneVector<char>* name_2); | 
 |   bool AddPropertyClassRange(ZoneList<CharacterRange>* add_to_range, | 
 |                              CharacterClassStrings* add_to_strings, bool negate, | 
 |                              const ZoneVector<char>& name_1, | 
 |                              const ZoneVector<char>& name_2); | 
 |  | 
 |   RegExpTree* ParseClassRanges(ZoneList<CharacterRange>* ranges, | 
 |                                bool add_unicode_case_equivalents); | 
 |   // Parse inside a class. Either add escaped class to the range, or return | 
 |   // false and pass parsed single character through |char_out|. | 
 |   void ParseClassEscape(ZoneList<CharacterRange>* ranges, Zone* zone, | 
 |                         bool add_unicode_case_equivalents, base::uc32* char_out, | 
 |                         bool* is_class_escape); | 
 |   // Returns true iff parsing was successful. | 
 |   bool TryParseCharacterClassEscape(base::uc32 next, | 
 |                                     InClassEscapeState in_class_escape_state, | 
 |                                     ZoneList<CharacterRange>* ranges, | 
 |                                     CharacterClassStrings* strings, Zone* zone, | 
 |                                     bool add_unicode_case_equivalents); | 
 |   RegExpTree* ParseClassStringDisjunction(ZoneList<CharacterRange>* ranges, | 
 |                                           CharacterClassStrings* strings); | 
 |   RegExpTree* ParseClassSetOperand(const RegExpBuilder* builder, | 
 |                                    ClassSetOperandType* type_out); | 
 |   RegExpTree* ParseClassSetOperand(const RegExpBuilder* builder, | 
 |                                    ClassSetOperandType* type_out, | 
 |                                    ZoneList<CharacterRange>* ranges, | 
 |                                    CharacterClassStrings* strings, | 
 |                                    base::uc32* character); | 
 |   base::uc32 ParseClassSetCharacter(); | 
 |   // Parses and returns a single escaped character. | 
 |   base::uc32 ParseCharacterEscape(InClassEscapeState in_class_escape_state, | 
 |                                   bool* is_escaped_unicode_character); | 
 |  | 
 |   void AddMaybeSimpleCaseFoldedRange(ZoneList<CharacterRange>* ranges, | 
 |                                      CharacterRange new_range); | 
 |  | 
 |   RegExpTree* ParseClassUnion(const RegExpBuilder* builder, bool is_negated, | 
 |                               RegExpTree* first_operand, | 
 |                               ClassSetOperandType first_operand_type, | 
 |                               ZoneList<CharacterRange>* ranges, | 
 |                               CharacterClassStrings* strings, | 
 |                               base::uc32 first_character); | 
 |   RegExpTree* ParseClassIntersection(const RegExpBuilder* builder, | 
 |                                      bool is_negated, RegExpTree* first_operand, | 
 |                                      ClassSetOperandType first_operand_type); | 
 |   RegExpTree* ParseClassSubtraction(const RegExpBuilder* builder, | 
 |                                     bool is_negated, RegExpTree* first_operand, | 
 |                                     ClassSetOperandType first_operand_type); | 
 |   RegExpTree* ParseCharacterClass(const RegExpBuilder* state); | 
 |  | 
 |   base::uc32 ParseOctalLiteral(); | 
 |  | 
 |   // Tries to parse the input as a back reference.  If successful it | 
 |   // stores the result in the output parameter and returns true.  If | 
 |   // it fails it will push back the characters read so the same characters | 
 |   // can be reparsed. | 
 |   bool ParseBackReferenceIndex(int* index_out); | 
 |  | 
 |   RegExpTree* ReportError(RegExpError error); | 
 |   void Advance(); | 
 |   void Advance(int dist); | 
 |   void RewindByOneCodepoint();  // Rewinds to before the previous Advance(). | 
 |   void Reset(int pos); | 
 |  | 
 |   // Reports whether the pattern might be used as a literal search string. | 
 |   // Only use if the result of the parse is a single atom node. | 
 |   bool simple() const { return simple_; } | 
 |   bool contains_anchor() const { return contains_anchor_; } | 
 |   void set_contains_anchor() { contains_anchor_ = true; } | 
 |   int captures_started() const { return captures_started_; } | 
 |   int position() const { | 
 |     const bool current_is_surrogate = | 
 |         current() != kEndMarker && | 
 |         current() > unibrow::Utf16::kMaxNonSurrogateCharCode; | 
 |     const int rewind_bytes = current_is_surrogate ? 2 : 1; | 
 |     return next_pos_ - rewind_bytes; | 
 |   } | 
 |   bool failed() const { return failed_; } | 
 |   RegExpFlags flags() const { return flags_; } | 
 |   bool IsUnicodeMode() const { | 
 |     // Either /v or /u enable UnicodeMode | 
 |     // https://tc39.es/ecma262/#sec-parsepattern | 
 |     return IsUnicode(flags()) || IsUnicodeSets(flags()) || force_unicode_; | 
 |   } | 
 |   bool unicode_sets() const { return IsUnicodeSets(flags()); } | 
 |   bool ignore_case() const { return IsIgnoreCase(flags()); } | 
 |  | 
 |   static bool IsSyntaxCharacterOrSlash(base::uc32 c); | 
 |   static bool IsClassSetSyntaxCharacter(base::uc32 c); | 
 |   static bool IsClassSetReservedPunctuator(base::uc32 c); | 
 |   bool IsClassSetReservedDoublePunctuator(base::uc32 c); | 
 |  | 
 |   static const base::uc32 kEndMarker = (1 << 21); | 
 |  | 
 |  private: | 
 |   // Return the 1-indexed RegExpCapture object, allocate if necessary. | 
 |   RegExpCapture* GetCapture(int index); | 
 |  | 
 |   // Creates a new named capture at the specified index. Must be called exactly | 
 |   // once for each named capture. Fails if a capture with the same name is | 
 |   // encountered. | 
 |   bool CreateNamedCaptureAtIndex(const RegExpParserState* state, int index); | 
 |  | 
 |   // Parses the name of a capture group (?<name>pattern). The name must adhere | 
 |   // to IdentifierName in the ECMAScript standard. | 
 |   const ZoneVector<base::uc16>* ParseCaptureGroupName(); | 
 |  | 
 |   bool ParseNamedBackReference(RegExpBuilder* builder, | 
 |                                RegExpParserState* state); | 
 |   RegExpParserState* ParseOpenParenthesis(RegExpParserState* state); | 
 |  | 
 |   // After the initial parsing pass, patch corresponding RegExpCapture objects | 
 |   // into all RegExpBackReferences. This is done after initial parsing in order | 
 |   // to avoid complicating cases in which references comes before the capture. | 
 |   void PatchNamedBackReferences(); | 
 |  | 
 |   ZoneVector<RegExpCapture*>* GetNamedCaptures(); | 
 |  | 
 |   // Returns true iff the pattern contains named captures. May call | 
 |   // ScanForCaptures to look ahead at the remaining pattern. | 
 |   bool HasNamedCaptures(InClassEscapeState in_class_escape_state); | 
 |  | 
 |   Zone* zone() const { return zone_; } | 
 |  | 
 |   base::uc32 current() const { return current_; } | 
 |   bool has_more() const { return has_more_; } | 
 |   bool has_next() const { return next_pos_ < input_length(); } | 
 |   base::uc32 Next(); | 
 |   template <bool update_position> | 
 |   base::uc32 ReadNext(); | 
 |   CharT InputAt(int index) const { | 
 |     DCHECK(0 <= index && index < input_length()); | 
 |     return input_[index]; | 
 |   } | 
 |   int input_length() const { return input_length_; } | 
 |   void ScanForCaptures(InClassEscapeState in_class_escape_state); | 
 |  | 
 |   struct RegExpCaptureNameLess { | 
 |     bool operator()(const RegExpCapture* lhs, const RegExpCapture* rhs) const { | 
 |       DCHECK_NOT_NULL(lhs); | 
 |       DCHECK_NOT_NULL(rhs); | 
 |       return *lhs->name() < *rhs->name(); | 
 |     } | 
 |   }; | 
 |  | 
 |   class ForceUnicodeScope final { | 
 |    public: | 
 |     explicit ForceUnicodeScope(RegExpParserImpl<CharT>* parser) | 
 |         : parser_(parser) { | 
 |       DCHECK(!parser_->force_unicode_); | 
 |       parser_->force_unicode_ = true; | 
 |     } | 
 |     ~ForceUnicodeScope() { | 
 |       DCHECK(parser_->force_unicode_); | 
 |       parser_->force_unicode_ = false; | 
 |     } | 
 |  | 
 |    private: | 
 |     RegExpParserImpl<CharT>* const parser_; | 
 |   }; | 
 |  | 
 |   const DisallowGarbageCollection no_gc_; | 
 |   Zone* const zone_; | 
 |   RegExpError error_ = RegExpError::kNone; | 
 |   int error_pos_ = 0; | 
 |   ZoneList<RegExpCapture*>* captures_; | 
 |   // Maps capture names to a list of capture indices with this name. | 
 |   ZoneMap<RegExpCapture*, ZoneList<int>*, RegExpCaptureNameLess>* | 
 |       named_captures_; | 
 |   ZoneList<RegExpBackReference*>* named_back_references_; | 
 |   const CharT* const input_; | 
 |   const int input_length_; | 
 |   base::uc32 current_; | 
 |   RegExpFlags flags_; | 
 |   bool force_unicode_ = false;  // Force parser to act as if unicode were set. | 
 |   int next_pos_; | 
 |   int captures_started_; | 
 |   int capture_count_;  // Only valid after we have scanned for captures. | 
 |   int quantifier_count_; | 
 |   int lookaround_count_;  // Only valid after we have scanned for lookbehinds. | 
 |   bool has_more_; | 
 |   bool simple_; | 
 |   bool contains_anchor_; | 
 |   bool is_scanned_for_captures_; | 
 |   bool has_named_captures_;  // Only valid after we have scanned for captures. | 
 |   bool failed_; | 
 |   const uintptr_t stack_limit_; | 
 |  | 
 |   friend class v8::internal::RegExpParser; | 
 | }; | 
 |  | 
 | template <class CharT> | 
 | RegExpParserImpl<CharT>::RegExpParserImpl( | 
 |     const CharT* input, int input_length, RegExpFlags flags, | 
 |     uintptr_t stack_limit, Zone* zone, const DisallowGarbageCollection& no_gc) | 
 |     : zone_(zone), | 
 |       captures_(nullptr), | 
 |       named_captures_(nullptr), | 
 |       named_back_references_(nullptr), | 
 |       input_(input), | 
 |       input_length_(input_length), | 
 |       current_(kEndMarker), | 
 |       flags_(flags), | 
 |       next_pos_(0), | 
 |       captures_started_(0), | 
 |       capture_count_(0), | 
 |       quantifier_count_(0), | 
 |       lookaround_count_(0), | 
 |       has_more_(true), | 
 |       simple_(false), | 
 |       contains_anchor_(false), | 
 |       is_scanned_for_captures_(false), | 
 |       has_named_captures_(false), | 
 |       failed_(false), | 
 |       stack_limit_(stack_limit) { | 
 |   Advance(); | 
 | } | 
 |  | 
 | template <> | 
 | template <bool update_position> | 
 | inline base::uc32 RegExpParserImpl<uint8_t>::ReadNext() { | 
 |   int position = next_pos_; | 
 |   base::uc16 c0 = InputAt(position); | 
 |   position++; | 
 |   DCHECK(!unibrow::Utf16::IsLeadSurrogate(c0)); | 
 |   if (update_position) next_pos_ = position; | 
 |   return c0; | 
 | } | 
 |  | 
 | template <> | 
 | template <bool update_position> | 
 | inline base::uc32 RegExpParserImpl<base::uc16>::ReadNext() { | 
 |   int position = next_pos_; | 
 |   base::uc16 c0 = InputAt(position); | 
 |   base::uc32 result = c0; | 
 |   position++; | 
 |   // Read the whole surrogate pair in case of unicode mode, if possible. | 
 |   if (IsUnicodeMode() && position < input_length() && | 
 |       unibrow::Utf16::IsLeadSurrogate(c0)) { | 
 |     base::uc16 c1 = InputAt(position); | 
 |     if (unibrow::Utf16::IsTrailSurrogate(c1)) { | 
 |       result = unibrow::Utf16::CombineSurrogatePair(c0, c1); | 
 |       position++; | 
 |     } | 
 |   } | 
 |   if (update_position) next_pos_ = position; | 
 |   return result; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | base::uc32 RegExpParserImpl<CharT>::Next() { | 
 |   if (has_next()) { | 
 |     return ReadNext<false>(); | 
 |   } else { | 
 |     return kEndMarker; | 
 |   } | 
 | } | 
 |  | 
 | template <class CharT> | 
 | void RegExpParserImpl<CharT>::Advance() { | 
 |   if (has_next()) { | 
 |     if (GetCurrentStackPosition() < stack_limit_) { | 
 |       if (v8_flags.correctness_fuzzer_suppressions) { | 
 |         FATAL("Aborting on stack overflow"); | 
 |       } | 
 |       ReportError(RegExpError::kStackOverflow); | 
 |     } else { | 
 |       current_ = ReadNext<true>(); | 
 |     } | 
 |   } else { | 
 |     current_ = kEndMarker; | 
 |     // Advance so that position() points to 1-after-the-last-character. This is | 
 |     // important so that Reset() to this position works correctly. | 
 |     next_pos_ = input_length() + 1; | 
 |     has_more_ = false; | 
 |   } | 
 | } | 
 |  | 
 | template <class CharT> | 
 | void RegExpParserImpl<CharT>::RewindByOneCodepoint() { | 
 |   if (!has_more()) return; | 
 |   // Rewinds by one code point, i.e.: two code units if `current` is outside | 
 |   // the basic multilingual plane (= composed of a lead and trail surrogate), | 
 |   // or one code unit otherwise. | 
 |   const int rewind_by = | 
 |       current() > unibrow::Utf16::kMaxNonSurrogateCharCode ? -2 : -1; | 
 |   Advance(rewind_by);  // Undo the last Advance. | 
 | } | 
 |  | 
 | template <class CharT> | 
 | void RegExpParserImpl<CharT>::Reset(int pos) { | 
 |   next_pos_ = pos; | 
 |   has_more_ = (pos < input_length()); | 
 |   Advance(); | 
 | } | 
 |  | 
 | template <class CharT> | 
 | void RegExpParserImpl<CharT>::Advance(int dist) { | 
 |   next_pos_ += dist - 1; | 
 |   Advance(); | 
 | } | 
 |  | 
 | // static | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::IsSyntaxCharacterOrSlash(base::uc32 c) { | 
 |   switch (c) { | 
 |     case '^': | 
 |     case '$': | 
 |     case '\\': | 
 |     case '.': | 
 |     case '*': | 
 |     case '+': | 
 |     case '?': | 
 |     case '(': | 
 |     case ')': | 
 |     case '[': | 
 |     case ']': | 
 |     case '{': | 
 |     case '}': | 
 |     case '|': | 
 |     case '/': | 
 |       return true; | 
 |     default: | 
 |       break; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | // static | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::IsClassSetSyntaxCharacter(base::uc32 c) { | 
 |   switch (c) { | 
 |     case '(': | 
 |     case ')': | 
 |     case '[': | 
 |     case ']': | 
 |     case '{': | 
 |     case '}': | 
 |     case '/': | 
 |     case '-': | 
 |     case '\\': | 
 |     case '|': | 
 |       return true; | 
 |     default: | 
 |       break; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | // static | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::IsClassSetReservedPunctuator(base::uc32 c) { | 
 |   switch (c) { | 
 |     case '&': | 
 |     case '-': | 
 |     case '!': | 
 |     case '#': | 
 |     case '%': | 
 |     case ',': | 
 |     case ':': | 
 |     case ';': | 
 |     case '<': | 
 |     case '=': | 
 |     case '>': | 
 |     case '@': | 
 |     case '`': | 
 |     case '~': | 
 |       return true; | 
 |     default: | 
 |       break; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::IsClassSetReservedDoublePunctuator(base::uc32 c) { | 
 | #define DOUBLE_PUNCTUATOR_CASE(Char) \ | 
 |   case Char:                         \ | 
 |     return Next() == Char | 
 |  | 
 |   switch (c) { | 
 |     DOUBLE_PUNCTUATOR_CASE('&'); | 
 |     DOUBLE_PUNCTUATOR_CASE('!'); | 
 |     DOUBLE_PUNCTUATOR_CASE('#'); | 
 |     DOUBLE_PUNCTUATOR_CASE('$'); | 
 |     DOUBLE_PUNCTUATOR_CASE('%'); | 
 |     DOUBLE_PUNCTUATOR_CASE('*'); | 
 |     DOUBLE_PUNCTUATOR_CASE('+'); | 
 |     DOUBLE_PUNCTUATOR_CASE(','); | 
 |     DOUBLE_PUNCTUATOR_CASE('.'); | 
 |     DOUBLE_PUNCTUATOR_CASE(':'); | 
 |     DOUBLE_PUNCTUATOR_CASE(';'); | 
 |     DOUBLE_PUNCTUATOR_CASE('<'); | 
 |     DOUBLE_PUNCTUATOR_CASE('='); | 
 |     DOUBLE_PUNCTUATOR_CASE('>'); | 
 |     DOUBLE_PUNCTUATOR_CASE('?'); | 
 |     DOUBLE_PUNCTUATOR_CASE('@'); | 
 |     DOUBLE_PUNCTUATOR_CASE('^'); | 
 |     DOUBLE_PUNCTUATOR_CASE('`'); | 
 |     DOUBLE_PUNCTUATOR_CASE('~'); | 
 |     default: | 
 |       break; | 
 |   } | 
 | #undef DOUBLE_PUNCTUATOR_CASE | 
 |  | 
 |   return false; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | RegExpTree* RegExpParserImpl<CharT>::ReportError(RegExpError error) { | 
 |   if (failed_) return nullptr;  // Do not overwrite any existing error. | 
 |   failed_ = true; | 
 |   error_ = error; | 
 |   error_pos_ = position(); | 
 |   // Zip to the end to make sure no more input is read. | 
 |   current_ = kEndMarker; | 
 |   next_pos_ = input_length(); | 
 |   has_more_ = false; | 
 |   return nullptr; | 
 | } | 
 |  | 
 | #define CHECK_FAILED /**/);    \ | 
 |   if (failed_) return nullptr; \ | 
 |   ((void)0 | 
 |  | 
 | // Pattern :: | 
 | //   Disjunction | 
 | template <class CharT> | 
 | RegExpTree* RegExpParserImpl<CharT>::ParsePattern() { | 
 |   RegExpTree* result = ParseDisjunction(CHECK_FAILED); | 
 |   PatchNamedBackReferences(CHECK_FAILED); | 
 |   DCHECK(!has_more()); | 
 |   // If the result of parsing is a literal string atom, and it has the | 
 |   // same length as the input, then the atom is identical to the input. | 
 |   if (result->IsAtom() && result->AsAtom()->length() == input_length()) { | 
 |     simple_ = true; | 
 |   } | 
 |   return result; | 
 | } | 
 |  | 
 | // Disjunction :: | 
 | //   Alternative | 
 | //   Alternative | Disjunction | 
 | // Alternative :: | 
 | //   [empty] | 
 | //   Term Alternative | 
 | // Term :: | 
 | //   Assertion | 
 | //   Atom | 
 | //   Atom Quantifier | 
 | template <class CharT> | 
 | RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() { | 
 |   // Used to store current state while parsing subexpressions. | 
 |   RegExpParserState initial_state(nullptr, INITIAL, RegExpLookaround::LOOKAHEAD, | 
 |                                   0, nullptr, flags(), zone()); | 
 |   RegExpParserState* state = &initial_state; | 
 |   // Cache the builder in a local variable for quick access. | 
 |   RegExpBuilder* builder = initial_state.builder(); | 
 |   while (true) { | 
 |     switch (current()) { | 
 |       case kEndMarker: | 
 |         if (failed()) return nullptr;  // E.g. the initial Advance failed. | 
 |         if (state->IsSubexpression()) { | 
 |           // Inside a parenthesized group when hitting end of input. | 
 |           return ReportError(RegExpError::kUnterminatedGroup); | 
 |         } | 
 |         DCHECK_EQ(INITIAL, state->group_type()); | 
 |         // Parsing completed successfully. | 
 |         return builder->ToRegExp(); | 
 |       case ')': { | 
 |         if (!state->IsSubexpression()) { | 
 |           return ReportError(RegExpError::kUnmatchedParen); | 
 |         } | 
 |         DCHECK_NE(INITIAL, state->group_type()); | 
 |  | 
 |         Advance(); | 
 |         // End disjunction parsing and convert builder content to new single | 
 |         // regexp atom. | 
 |         RegExpTree* body = builder->ToRegExp(); | 
 |  | 
 |         int end_capture_index = captures_started(); | 
 |  | 
 |         int capture_index = state->capture_index(); | 
 |         SubexpressionType group_type = state->group_type(); | 
 |  | 
 |         // Build result of subexpression. | 
 |         if (group_type == CAPTURE) { | 
 |           if (state->IsNamedCapture()) { | 
 |             CreateNamedCaptureAtIndex(state, capture_index CHECK_FAILED); | 
 |           } | 
 |           RegExpCapture* capture = GetCapture(capture_index); | 
 |           capture->set_body(body); | 
 |           body = capture; | 
 |         } else if (group_type == GROUPING) { | 
 |           body = zone()->template New<RegExpGroup>(body, builder->flags()); | 
 |         } else { | 
 |           DCHECK(group_type == POSITIVE_LOOKAROUND || | 
 |                  group_type == NEGATIVE_LOOKAROUND); | 
 |           bool is_positive = (group_type == POSITIVE_LOOKAROUND); | 
 |           body = zone()->template New<RegExpLookaround>( | 
 |               body, is_positive, end_capture_index - capture_index, | 
 |               capture_index, state->lookaround_type(), lookaround_count_); | 
 |           lookaround_count_++; | 
 |         } | 
 |  | 
 |         // Restore previous state. | 
 |         state = state->previous_state(); | 
 |         builder = state->builder(); | 
 |  | 
 |         builder->AddAtom(body); | 
 |         // For compatibility with JSC and ES3, we allow quantifiers after | 
 |         // lookaheads, and break in all cases. | 
 |         break; | 
 |       } | 
 |       case '|': { | 
 |         Advance(); | 
 |         state->NewAlternative(captures_started()); | 
 |         builder->NewAlternative(); | 
 |         continue; | 
 |       } | 
 |       case '*': | 
 |       case '+': | 
 |       case '?': | 
 |         return ReportError(RegExpError::kNothingToRepeat); | 
 |       case '^': { | 
 |         Advance(); | 
 |         builder->AddAssertion(zone()->template New<RegExpAssertion>( | 
 |             builder->multiline() ? RegExpAssertion::Type::START_OF_LINE | 
 |                                  : RegExpAssertion::Type::START_OF_INPUT)); | 
 |         set_contains_anchor(); | 
 |         continue; | 
 |       } | 
 |       case '$': { | 
 |         Advance(); | 
 |         RegExpAssertion::Type assertion_type = | 
 |             builder->multiline() ? RegExpAssertion::Type::END_OF_LINE | 
 |                                  : RegExpAssertion::Type::END_OF_INPUT; | 
 |         builder->AddAssertion( | 
 |             zone()->template New<RegExpAssertion>(assertion_type)); | 
 |         continue; | 
 |       } | 
 |       case '.': { | 
 |         Advance(); | 
 |         ZoneList<CharacterRange>* ranges = | 
 |             zone()->template New<ZoneList<CharacterRange>>(2, zone()); | 
 |  | 
 |         if (builder->dotall()) { | 
 |           // Everything. | 
 |           CharacterRange::AddClassEscape(StandardCharacterSet::kEverything, | 
 |                                          ranges, false, zone()); | 
 |         } else { | 
 |           // Everything except \x0A, \x0D, \u2028 and \u2029. | 
 |           CharacterRange::AddClassEscape( | 
 |               StandardCharacterSet::kNotLineTerminator, ranges, false, zone()); | 
 |         } | 
 |  | 
 |         RegExpClassRanges* cc = | 
 |             zone()->template New<RegExpClassRanges>(zone(), ranges); | 
 |         builder->AddClassRanges(cc); | 
 |         break; | 
 |       } | 
 |       case '(': { | 
 |         state = ParseOpenParenthesis(state CHECK_FAILED); | 
 |         builder = state->builder(); | 
 |         flags_ = builder->flags(); | 
 |         continue; | 
 |       } | 
 |       case '[': { | 
 |         RegExpTree* cc = ParseCharacterClass(builder CHECK_FAILED); | 
 |         if (cc->IsClassRanges()) { | 
 |           builder->AddClassRanges(cc->AsClassRanges()); | 
 |         } else { | 
 |           DCHECK(cc->IsClassSetExpression()); | 
 |           builder->AddTerm(cc); | 
 |         } | 
 |         break; | 
 |       } | 
 |       // Atom :: | 
 |       //   \ AtomEscape | 
 |       case '\\': | 
 |         switch (Next()) { | 
 |           case kEndMarker: | 
 |             return ReportError(RegExpError::kEscapeAtEndOfPattern); | 
 |           // AtomEscape :: | 
 |           //   [+UnicodeMode] DecimalEscape | 
 |           //   [~UnicodeMode] DecimalEscape but only if the CapturingGroupNumber | 
 |           //                  of DecimalEscape is ≤ NcapturingParens | 
 |           //   CharacterEscape (some cases of this mixed in too) | 
 |           // | 
 |           // TODO(jgruber): It may make sense to disentangle all the different | 
 |           // cases and make the structure mirror the spec, e.g. for AtomEscape: | 
 |           // | 
 |           //  if (TryParseDecimalEscape(...)) return; | 
 |           //  if (TryParseCharacterClassEscape(...)) return; | 
 |           //  if (TryParseCharacterEscape(...)) return; | 
 |           //  if (TryParseGroupName(...)) return; | 
 |           case '1': | 
 |           case '2': | 
 |           case '3': | 
 |           case '4': | 
 |           case '5': | 
 |           case '6': | 
 |           case '7': | 
 |           case '8': | 
 |           case '9': { | 
 |             int index = 0; | 
 |             const bool is_backref = | 
 |                 ParseBackReferenceIndex(&index CHECK_FAILED); | 
 |             if (is_backref) { | 
 |               if (state->IsInsideCaptureGroup(index)) { | 
 |                 // The back reference is inside the capture group it refers to. | 
 |                 // Nothing can possibly have been captured yet, so we use empty | 
 |                 // instead. This ensures that, when checking a back reference, | 
 |                 // the capture registers of the referenced capture are either | 
 |                 // both set or both cleared. | 
 |                 builder->AddEmpty(); | 
 |               } else { | 
 |                 RegExpCapture* capture = GetCapture(index); | 
 |                 RegExpTree* atom = | 
 |                     zone()->template New<RegExpBackReference>(capture, zone()); | 
 |                 builder->AddAtom(atom); | 
 |               } | 
 |               break; | 
 |             } | 
 |             // With /u and /v, no identity escapes except for syntax characters | 
 |             // are allowed. Otherwise, all identity escapes are allowed. | 
 |             if (IsUnicodeMode()) { | 
 |               return ReportError(RegExpError::kInvalidEscape); | 
 |             } | 
 |             base::uc32 first_digit = Next(); | 
 |             if (first_digit == '8' || first_digit == '9') { | 
 |               builder->AddCharacter(first_digit); | 
 |               Advance(2); | 
 |               break; | 
 |             } | 
 |             [[fallthrough]]; | 
 |           } | 
 |           case '0': { | 
 |             Advance(); | 
 |             if (IsUnicodeMode() && Next() >= '0' && Next() <= '9') { | 
 |               // Decimal escape with leading 0 are not parsed as octal. | 
 |               return ReportError(RegExpError::kInvalidDecimalEscape); | 
 |             } | 
 |             base::uc32 octal = ParseOctalLiteral(); | 
 |             builder->AddCharacter(octal); | 
 |             break; | 
 |           } | 
 |           case 'b': | 
 |             Advance(2); | 
 |             builder->AddAssertion(zone()->template New<RegExpAssertion>( | 
 |                 RegExpAssertion::Type::BOUNDARY)); | 
 |             continue; | 
 |           case 'B': | 
 |             Advance(2); | 
 |             builder->AddAssertion(zone()->template New<RegExpAssertion>( | 
 |                 RegExpAssertion::Type::NON_BOUNDARY)); | 
 |             continue; | 
 |           // AtomEscape :: | 
 |           //   CharacterClassEscape | 
 |           case 'd': | 
 |           case 'D': | 
 |           case 's': | 
 |           case 'S': | 
 |           case 'w': | 
 |           case 'W': { | 
 |             base::uc32 next = Next(); | 
 |             ZoneList<CharacterRange>* ranges = | 
 |                 zone()->template New<ZoneList<CharacterRange>>(2, zone()); | 
 |             bool add_unicode_case_equivalents = | 
 |                 IsUnicodeMode() && ignore_case(); | 
 |             bool parsed_character_class_escape = TryParseCharacterClassEscape( | 
 |                 next, InClassEscapeState::kNotInClass, ranges, nullptr, zone(), | 
 |                 add_unicode_case_equivalents CHECK_FAILED); | 
 |  | 
 |             if (parsed_character_class_escape) { | 
 |               RegExpClassRanges* cc = | 
 |                   zone()->template New<RegExpClassRanges>(zone(), ranges); | 
 |               builder->AddClassRanges(cc); | 
 |             } else { | 
 |               CHECK(!IsUnicodeMode()); | 
 |               Advance(2); | 
 |               builder->AddCharacter(next);  // IdentityEscape. | 
 |             } | 
 |             break; | 
 |           } | 
 |           case 'p': | 
 |           case 'P': { | 
 |             base::uc32 next = Next(); | 
 |             ZoneList<CharacterRange>* ranges = | 
 |                 zone()->template New<ZoneList<CharacterRange>>(2, zone()); | 
 |             CharacterClassStrings* strings = nullptr; | 
 |             if (unicode_sets()) { | 
 |               strings = zone()->template New<CharacterClassStrings>(zone()); | 
 |             } | 
 |             bool add_unicode_case_equivalents = ignore_case(); | 
 |             bool parsed_character_class_escape = TryParseCharacterClassEscape( | 
 |                 next, InClassEscapeState::kNotInClass, ranges, strings, zone(), | 
 |                 add_unicode_case_equivalents CHECK_FAILED); | 
 |  | 
 |             if (parsed_character_class_escape) { | 
 |               if (unicode_sets()) { | 
 |                 RegExpClassSetOperand* op = | 
 |                     zone()->template New<RegExpClassSetOperand>(ranges, | 
 |                                                                 strings); | 
 |                 builder->AddTerm(op); | 
 |               } else { | 
 |                 RegExpClassRanges* cc = | 
 |                     zone()->template New<RegExpClassRanges>(zone(), ranges); | 
 |                 builder->AddClassRanges(cc); | 
 |               } | 
 |             } else { | 
 |               CHECK(!IsUnicodeMode()); | 
 |               Advance(2); | 
 |               builder->AddCharacter(next);  // IdentityEscape. | 
 |             } | 
 |             break; | 
 |           } | 
 |           // AtomEscape :: | 
 |           //   k GroupName | 
 |           case 'k': { | 
 |             // Either an identity escape or a named back-reference.  The two | 
 |             // interpretations are mutually exclusive: '\k' is interpreted as | 
 |             // an identity escape for non-Unicode patterns without named | 
 |             // capture groups, and as the beginning of a named back-reference | 
 |             // in all other cases. | 
 |             const bool has_named_captures = | 
 |                 HasNamedCaptures(InClassEscapeState::kNotInClass CHECK_FAILED); | 
 |             if (IsUnicodeMode() || has_named_captures) { | 
 |               Advance(2); | 
 |               ParseNamedBackReference(builder, state CHECK_FAILED); | 
 |               break; | 
 |             } | 
 |           } | 
 |             [[fallthrough]]; | 
 |           // AtomEscape :: | 
 |           //   CharacterEscape | 
 |           default: { | 
 |             bool is_escaped_unicode_character = false; | 
 |             base::uc32 c = ParseCharacterEscape( | 
 |                 InClassEscapeState::kNotInClass, | 
 |                 &is_escaped_unicode_character CHECK_FAILED); | 
 |             if (is_escaped_unicode_character) { | 
 |               builder->AddEscapedUnicodeCharacter(c); | 
 |             } else { | 
 |               builder->AddCharacter(c); | 
 |             } | 
 |             break; | 
 |           } | 
 |         } | 
 |         break; | 
 |       case '{': { | 
 |         int dummy; | 
 |         bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED); | 
 |         if (parsed) return ReportError(RegExpError::kNothingToRepeat); | 
 |         [[fallthrough]]; | 
 |       } | 
 |       case '}': | 
 |       case ']': | 
 |         if (IsUnicodeMode()) { | 
 |           return ReportError(RegExpError::kLoneQuantifierBrackets); | 
 |         } | 
 |         [[fallthrough]]; | 
 |       default: | 
 |         builder->AddUnicodeCharacter(current()); | 
 |         Advance(); | 
 |         break; | 
 |     }  // end switch(current()) | 
 |  | 
 |     int min; | 
 |     int max; | 
 |     switch (current()) { | 
 |       // QuantifierPrefix :: | 
 |       //   * | 
 |       //   + | 
 |       //   ? | 
 |       //   { | 
 |       case '*': | 
 |         min = 0; | 
 |         max = RegExpTree::kInfinity; | 
 |         Advance(); | 
 |         break; | 
 |       case '+': | 
 |         min = 1; | 
 |         max = RegExpTree::kInfinity; | 
 |         Advance(); | 
 |         break; | 
 |       case '?': | 
 |         min = 0; | 
 |         max = 1; | 
 |         Advance(); | 
 |         break; | 
 |       case '{': | 
 |         if (ParseIntervalQuantifier(&min, &max)) { | 
 |           if (max < min) { | 
 |             return ReportError(RegExpError::kRangeOutOfOrder); | 
 |           } | 
 |           break; | 
 |         } else if (IsUnicodeMode()) { | 
 |           // Incomplete quantifiers are not allowed. | 
 |           return ReportError(RegExpError::kIncompleteQuantifier); | 
 |         } | 
 |         continue; | 
 |       default: | 
 |         continue; | 
 |     } | 
 |     RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY; | 
 |     if (current() == '?') { | 
 |       quantifier_type = RegExpQuantifier::NON_GREEDY; | 
 |       Advance(); | 
 |     } else if (v8_flags.regexp_possessive_quantifier && current() == '+') { | 
 |       // v8_flags.regexp_possessive_quantifier is a debug-only flag. | 
 |       quantifier_type = RegExpQuantifier::POSSESSIVE; | 
 |       Advance(); | 
 |     } | 
 |     if (!builder->AddQuantifierToAtom(min, max, quantifier_count_, | 
 |                                       quantifier_type)) { | 
 |       return ReportError(RegExpError::kInvalidQuantifier); | 
 |     } | 
 |     ++quantifier_count_; | 
 |   } | 
 | } | 
 |  | 
 | template <class CharT> | 
 | RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis( | 
 |     RegExpParserState* state) { | 
 |   RegExpLookaround::Type lookaround_type = state->lookaround_type(); | 
 |   bool is_named_capture = false; | 
 |   const ZoneVector<base::uc16>* capture_name = nullptr; | 
 |   SubexpressionType subexpr_type = CAPTURE; | 
 |   RegExpFlags flags = state->builder()->flags(); | 
 |   bool parsing_modifiers = false; | 
 |   bool modifiers_polarity = true; | 
 |   RegExpFlags modifiers; | 
 |   Advance(); | 
 |   if (current() == '?') { | 
 |     do { | 
 |       base::uc32 next = Next(); | 
 |       switch (next) { | 
 |         case '-': | 
 |           if (!v8_flags.js_regexp_modifiers) { | 
 |             ReportError(RegExpError::kInvalidGroup); | 
 |             return nullptr; | 
 |           } | 
 |           Advance(); | 
 |           parsing_modifiers = true; | 
 |           if (modifiers_polarity == false) { | 
 |             ReportError(RegExpError::kMultipleFlagDashes); | 
 |             return nullptr; | 
 |           } | 
 |           modifiers_polarity = false; | 
 |           break; | 
 |         case 'm': | 
 |         case 'i': | 
 |         case 's': { | 
 |           if (!v8_flags.js_regexp_modifiers) { | 
 |             ReportError(RegExpError::kInvalidGroup); | 
 |             return nullptr; | 
 |           } | 
 |           Advance(); | 
 |           parsing_modifiers = true; | 
 |           RegExpFlag flag = TryRegExpFlagFromChar(next).value(); | 
 |           if ((modifiers & flag) != 0) { | 
 |             ReportError(RegExpError::kRepeatedFlag); | 
 |             return nullptr; | 
 |           } | 
 |           modifiers |= flag; | 
 |           flags.set(flag, modifiers_polarity); | 
 |           break; | 
 |         } | 
 |         case ':': | 
 |           Advance(2); | 
 |           parsing_modifiers = false; | 
 |           subexpr_type = GROUPING; | 
 |           break; | 
 |         case '=': | 
 |           Advance(2); | 
 |           if (parsing_modifiers) { | 
 |             DCHECK(v8_flags.js_regexp_modifiers); | 
 |             ReportError(RegExpError::kInvalidGroup); | 
 |             return nullptr; | 
 |           } | 
 |           lookaround_type = RegExpLookaround::LOOKAHEAD; | 
 |           subexpr_type = POSITIVE_LOOKAROUND; | 
 |           break; | 
 |         case '!': | 
 |           Advance(2); | 
 |           if (parsing_modifiers) { | 
 |             DCHECK(v8_flags.js_regexp_modifiers); | 
 |             ReportError(RegExpError::kInvalidGroup); | 
 |             return nullptr; | 
 |           } | 
 |           lookaround_type = RegExpLookaround::LOOKAHEAD; | 
 |           subexpr_type = NEGATIVE_LOOKAROUND; | 
 |           break; | 
 |         case '<': | 
 |           Advance(); | 
 |           if (parsing_modifiers) { | 
 |             DCHECK(v8_flags.js_regexp_modifiers); | 
 |             ReportError(RegExpError::kInvalidGroup); | 
 |             return nullptr; | 
 |           } | 
 |           if (Next() == '=') { | 
 |             Advance(2); | 
 |             lookaround_type = RegExpLookaround::LOOKBEHIND; | 
 |             subexpr_type = POSITIVE_LOOKAROUND; | 
 |             break; | 
 |           } else if (Next() == '!') { | 
 |             Advance(2); | 
 |             lookaround_type = RegExpLookaround::LOOKBEHIND; | 
 |             subexpr_type = NEGATIVE_LOOKAROUND; | 
 |             break; | 
 |           } | 
 |           is_named_capture = true; | 
 |           has_named_captures_ = true; | 
 |           Advance(); | 
 |           break; | 
 |         default: | 
 |           ReportError(RegExpError::kInvalidGroup); | 
 |           return nullptr; | 
 |       } | 
 |     } while (parsing_modifiers); | 
 |   } | 
 |   if (modifiers_polarity == false) { | 
 |     // We encountered a dash. | 
 |     if (modifiers == 0) { | 
 |       ReportError(RegExpError::kInvalidFlagGroup); | 
 |       return nullptr; | 
 |     } | 
 |   } | 
 |   if (subexpr_type == CAPTURE) { | 
 |     if (captures_started_ >= RegExpMacroAssembler::kMaxCaptures) { | 
 |       ReportError(RegExpError::kTooManyCaptures); | 
 |       return nullptr; | 
 |     } | 
 |     captures_started_++; | 
 |  | 
 |     if (is_named_capture) { | 
 |       capture_name = ParseCaptureGroupName(CHECK_FAILED); | 
 |     } | 
 |   } | 
 |   // Store current state and begin new disjunction parsing. | 
 |   return zone()->template New<RegExpParserState>( | 
 |       state, subexpr_type, lookaround_type, captures_started_, capture_name, | 
 |       flags, zone()); | 
 | } | 
 |  | 
 | // In order to know whether an escape is a backreference or not we have to scan | 
 | // the entire regexp and find the number of capturing parentheses.  However we | 
 | // don't want to scan the regexp twice unless it is necessary.  This mini-parser | 
 | // is called when needed.  It can see the difference between capturing and | 
 | // noncapturing parentheses and can skip character classes and backslash-escaped | 
 | // characters. | 
 | // | 
 | // Important: The scanner has to be in a consistent state when calling | 
 | // ScanForCaptures, e.g. not in the middle of an escape sequence '\[' or while | 
 | // parsing a nested class. | 
 | template <class CharT> | 
 | void RegExpParserImpl<CharT>::ScanForCaptures( | 
 |     InClassEscapeState in_class_escape_state) { | 
 |   DCHECK(!is_scanned_for_captures_); | 
 |   const int saved_position = position(); | 
 |   // Start with captures started previous to current position | 
 |   int capture_count = captures_started(); | 
 |   // When we start inside a character class, skip everything inside the class. | 
 |   if (in_class_escape_state == InClassEscapeState::kInClass) { | 
 |     // \k is always invalid within a class in unicode mode, thus we should never | 
 |     // call ScanForCaptures within a class. | 
 |     DCHECK(!IsUnicodeMode()); | 
 |     int c; | 
 |     while ((c = current()) != kEndMarker) { | 
 |       Advance(); | 
 |       if (c == '\\') { | 
 |         Advance(); | 
 |       } else { | 
 |         if (c == ']') break; | 
 |       } | 
 |     } | 
 |   } | 
 |   // Add count of captures after this position. | 
 |   int n; | 
 |   while ((n = current()) != kEndMarker) { | 
 |     Advance(); | 
 |     switch (n) { | 
 |       case '\\': | 
 |         Advance(); | 
 |         break; | 
 |       case '[': { | 
 |         int class_nest_level = 0; | 
 |         int c; | 
 |         while ((c = current()) != kEndMarker) { | 
 |           Advance(); | 
 |           if (c == '\\') { | 
 |             Advance(); | 
 |           } else if (c == '[') { | 
 |             // With /v, '[' inside a class is treated as a nested class. | 
 |             // Without /v, '[' is a normal character. | 
 |             if (unicode_sets()) class_nest_level++; | 
 |           } else if (c == ']') { | 
 |             if (class_nest_level == 0) break; | 
 |             class_nest_level--; | 
 |           } | 
 |         } | 
 |         break; | 
 |       } | 
 |       case '(': | 
 |         if (current() == '?') { | 
 |           // At this point we could be in | 
 |           // * a non-capturing group '(:', | 
 |           // * a lookbehind assertion '(?<=' '(?<!' | 
 |           // * or a named capture '(?<'. | 
 |           // | 
 |           // Of these, only named captures are capturing groups. | 
 |  | 
 |           Advance(); | 
 |           if (current() != '<') break; | 
 |  | 
 |           Advance(); | 
 |           if (current() == '=' || current() == '!') break; | 
 |  | 
 |           // Found a possible named capture. It could turn out to be a syntax | 
 |           // error (e.g. an unterminated or invalid name), but that distinction | 
 |           // does not matter for our purposes. | 
 |           has_named_captures_ = true; | 
 |         } | 
 |         capture_count++; | 
 |         break; | 
 |     } | 
 |   } | 
 |   capture_count_ = capture_count; | 
 |   is_scanned_for_captures_ = true; | 
 |   Reset(saved_position); | 
 | } | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::ParseBackReferenceIndex(int* index_out) { | 
 |   DCHECK_EQ('\\', current()); | 
 |   DCHECK('1' <= Next() && Next() <= '9'); | 
 |   // Try to parse a decimal literal that is no greater than the total number | 
 |   // of left capturing parentheses in the input. | 
 |   int start = position(); | 
 |   int value = Next() - '0'; | 
 |   Advance(2); | 
 |   while (true) { | 
 |     base::uc32 c = current(); | 
 |     if (IsDecimalDigit(c)) { | 
 |       value = 10 * value + (c - '0'); | 
 |       if (value > RegExpMacroAssembler::kMaxCaptures) { | 
 |         Reset(start); | 
 |         return false; | 
 |       } | 
 |       Advance(); | 
 |     } else { | 
 |       break; | 
 |     } | 
 |   } | 
 |   if (value > captures_started()) { | 
 |     if (!is_scanned_for_captures_) { | 
 |       ScanForCaptures(InClassEscapeState::kNotInClass); | 
 |     } | 
 |     if (value > capture_count_) { | 
 |       Reset(start); | 
 |       return false; | 
 |     } | 
 |   } | 
 |   *index_out = value; | 
 |   return true; | 
 | } | 
 |  | 
 | namespace { | 
 |  | 
 | void push_code_unit(ZoneVector<base::uc16>* v, uint32_t code_unit) { | 
 |   if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | 
 |     v->push_back(code_unit); | 
 |   } else { | 
 |     v->push_back(unibrow::Utf16::LeadSurrogate(code_unit)); | 
 |     v->push_back(unibrow::Utf16::TrailSurrogate(code_unit)); | 
 |   } | 
 | } | 
 |  | 
 | }  // namespace | 
 |  | 
 | template <class CharT> | 
 | const ZoneVector<base::uc16>* RegExpParserImpl<CharT>::ParseCaptureGroupName() { | 
 |   // Due to special Advance requirements (see the next comment), rewind by one | 
 |   // such that names starting with a surrogate pair are parsed correctly for | 
 |   // patterns where the unicode flag is unset. | 
 |   // | 
 |   // Note that we use this odd pattern of rewinding the last advance in order | 
 |   // to adhere to the common parser behavior of expecting `current` to point at | 
 |   // the first candidate character for a function (e.g. when entering ParseFoo, | 
 |   // `current` should point at the first character of Foo). | 
 |   RewindByOneCodepoint(); | 
 |  | 
 |   ZoneVector<base::uc16>* name = | 
 |       zone()->template New<ZoneVector<base::uc16>>(zone()); | 
 |  | 
 |   { | 
 |     // Advance behavior inside this function is tricky since | 
 |     // RegExpIdentifierName explicitly enables unicode (in spec terms, sets +U) | 
 |     // and thus allows surrogate pairs and \u{}-style escapes even in | 
 |     // non-unicode patterns. Therefore Advance within the capture group name | 
 |     // has to force-enable unicode, and outside the name revert to default | 
 |     // behavior. | 
 |     ForceUnicodeScope force_unicode(this); | 
 |  | 
 |     bool at_start = true; | 
 |     while (true) { | 
 |       Advance(); | 
 |       base::uc32 c = current(); | 
 |  | 
 |       // Convert unicode escapes. | 
 |       if (c == '\\' && Next() == 'u') { | 
 |         Advance(2); | 
 |         if (!ParseUnicodeEscape(&c)) { | 
 |           ReportError(RegExpError::kInvalidUnicodeEscape); | 
 |           return nullptr; | 
 |         } | 
 |         RewindByOneCodepoint(); | 
 |       } | 
 |  | 
 |       // The backslash char is misclassified as both ID_Start and ID_Continue. | 
 |       if (c == '\\') { | 
 |         ReportError(RegExpError::kInvalidCaptureGroupName); | 
 |         return nullptr; | 
 |       } | 
 |  | 
 |       if (at_start) { | 
 |         if (!IsIdentifierStart(c)) { | 
 |           ReportError(RegExpError::kInvalidCaptureGroupName); | 
 |           return nullptr; | 
 |         } | 
 |         push_code_unit(name, c); | 
 |         at_start = false; | 
 |       } else { | 
 |         if (c == '>') { | 
 |           break; | 
 |         } else if (IsIdentifierPart(c)) { | 
 |           push_code_unit(name, c); | 
 |         } else { | 
 |           ReportError(RegExpError::kInvalidCaptureGroupName); | 
 |           return nullptr; | 
 |         } | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   // This final advance goes back into the state of pointing at the next | 
 |   // relevant char, which the rest of the parser expects. See also the previous | 
 |   // comments in this function. | 
 |   Advance(); | 
 |   return name; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::CreateNamedCaptureAtIndex( | 
 |     const RegExpParserState* state, int index) { | 
 |   const ZoneVector<base::uc16>* name = state->capture_name(); | 
 |   const auto& non_participating_capture_group_intervals = | 
 |       state->non_participating_capture_group_intervals(); | 
 |   DCHECK(0 < index && index <= captures_started_); | 
 |   DCHECK_NOT_NULL(name); | 
 |  | 
 |   RegExpCapture* capture = GetCapture(index); | 
 |   DCHECK_NULL(capture->name()); | 
 |  | 
 |   capture->set_name(name); | 
 |  | 
 |   if (named_captures_ == nullptr) { | 
 |     named_captures_ = zone_->template New< | 
 |         ZoneMap<RegExpCapture*, ZoneList<int>*, RegExpCaptureNameLess>>(zone()); | 
 |   } else { | 
 |     // Check for duplicates and bail if we find any. | 
 |     const auto& named_capture_it = named_captures_->find(capture); | 
 |     if (named_capture_it != named_captures_->end()) { | 
 |       if (v8_flags.js_regexp_duplicate_named_groups) { | 
 |         ZoneList<int>* named_capture_indices = named_capture_it->second; | 
 |         DCHECK_NOT_NULL(named_capture_indices); | 
 |         DCHECK(!named_capture_indices->is_empty()); | 
 |         for (int named_index : *named_capture_indices) { | 
 |           bool is_duplicate = true; | 
 |           for (Interval interval : non_participating_capture_group_intervals) { | 
 |             DCHECK(!interval.is_empty()); | 
 |             // We can stop as soon as we are inside one non-participating | 
 |             // interval. There can't be a non-participating and participating | 
 |             // interval, as intervals are never decreasing. | 
 |             if (interval.Contains(named_index)) { | 
 |               is_duplicate = false; | 
 |               break; | 
 |             } | 
 |             // Intervals are ordered strictly increasing, so we can stop early | 
 |             // when the current interval is past the current index. | 
 |             if (named_index <= interval.from()) { | 
 |               break; | 
 |             } | 
 |           } | 
 |           if (is_duplicate) { | 
 |             ReportError(RegExpError::kDuplicateCaptureGroupName); | 
 |             return false; | 
 |           } | 
 |         } | 
 |       } else { | 
 |         ReportError(RegExpError::kDuplicateCaptureGroupName); | 
 |         return false; | 
 |       } | 
 |     } | 
 |   } | 
 |   if (v8_flags.js_regexp_duplicate_named_groups) { | 
 |     // Check for nested named captures. This is necessary to find duplicate | 
 |     // named captures within the same disjunct. | 
 |     RegExpParserState* parent_state = state->previous_state(); | 
 |     if (parent_state && parent_state->IsInsideCaptureGroup(name)) { | 
 |       ReportError(RegExpError::kDuplicateCaptureGroupName); | 
 |       return false; | 
 |     } | 
 |   } | 
 |  | 
 |   auto entry = named_captures_->try_emplace( | 
 |       capture, zone()->template New<ZoneList<int>>(1, zone())); | 
 |   entry.first->second->Add(index, zone()); | 
 |   return true; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::ParseNamedBackReference( | 
 |     RegExpBuilder* builder, RegExpParserState* state) { | 
 |   // The parser is assumed to be on the '<' in \k<name>. | 
 |   if (current() != '<') { | 
 |     ReportError(RegExpError::kInvalidNamedReference); | 
 |     return false; | 
 |   } | 
 |  | 
 |   Advance(); | 
 |   const ZoneVector<base::uc16>* name = ParseCaptureGroupName(); | 
 |   if (name == nullptr) { | 
 |     return false; | 
 |   } | 
 |  | 
 |   if (state->IsInsideCaptureGroup(name)) { | 
 |     builder->AddEmpty(); | 
 |   } else { | 
 |     RegExpBackReference* atom = | 
 |         zone()->template New<RegExpBackReference>(zone()); | 
 |     atom->set_name(name); | 
 |  | 
 |     builder->AddAtom(atom); | 
 |  | 
 |     if (named_back_references_ == nullptr) { | 
 |       named_back_references_ = | 
 |           zone()->template New<ZoneList<RegExpBackReference*>>(1, zone()); | 
 |     } | 
 |     named_back_references_->Add(atom, zone()); | 
 |   } | 
 |  | 
 |   return true; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | void RegExpParserImpl<CharT>::PatchNamedBackReferences() { | 
 |   if (named_back_references_ == nullptr) return; | 
 |  | 
 |   if (named_captures_ == nullptr) { | 
 |     ReportError(RegExpError::kInvalidNamedCaptureReference); | 
 |     return; | 
 |   } | 
 |  | 
 |   // Look up and patch the actual capture for each named back reference. | 
 |  | 
 |   for (int i = 0; i < named_back_references_->length(); i++) { | 
 |     RegExpBackReference* ref = named_back_references_->at(i); | 
 |  | 
 |     // Capture used to search the named_captures_ by name, index of the | 
 |     // capture is never used. | 
 |     static const int kInvalidIndex = 0; | 
 |     RegExpCapture* search_capture = | 
 |         zone()->template New<RegExpCapture>(kInvalidIndex); | 
 |     DCHECK_NULL(search_capture->name()); | 
 |     search_capture->set_name(ref->name()); | 
 |  | 
 |     const auto& capture_it = named_captures_->find(search_capture); | 
 |     if (capture_it == named_captures_->end()) { | 
 |       ReportError(RegExpError::kInvalidNamedCaptureReference); | 
 |       return; | 
 |     } | 
 |  | 
 |     DCHECK_IMPLIES(!v8_flags.js_regexp_duplicate_named_groups, | 
 |                    capture_it->second->length() == 1); | 
 |     for (int index : *capture_it->second) { | 
 |       ref->add_capture(GetCapture(index), zone()); | 
 |     } | 
 |   } | 
 | } | 
 |  | 
 | template <class CharT> | 
 | RegExpCapture* RegExpParserImpl<CharT>::GetCapture(int index) { | 
 |   // The index for the capture groups are one-based. Its index in the list is | 
 |   // zero-based. | 
 |   const int known_captures = | 
 |       is_scanned_for_captures_ ? capture_count_ : captures_started_; | 
 |   SBXCHECK(index >= 1 && index <= known_captures); | 
 |   if (captures_ == nullptr) { | 
 |     captures_ = | 
 |         zone()->template New<ZoneList<RegExpCapture*>>(known_captures, zone()); | 
 |   } | 
 |   while (captures_->length() < known_captures) { | 
 |     captures_->Add(zone()->template New<RegExpCapture>(captures_->length() + 1), | 
 |                    zone()); | 
 |   } | 
 |   return captures_->at(index - 1); | 
 | } | 
 |  | 
 | template <class CharT> | 
 | ZoneVector<RegExpCapture*>* RegExpParserImpl<CharT>::GetNamedCaptures() { | 
 |   if (named_captures_ == nullptr) { | 
 |     return nullptr; | 
 |   } | 
 |   DCHECK(!named_captures_->empty()); | 
 |  | 
 |   ZoneVector<RegExpCapture*>* flattened_named_captures = | 
 |       zone()->template New<ZoneVector<RegExpCapture*>>(zone()); | 
 |   for (auto capture : *named_captures_) { | 
 |     DCHECK_IMPLIES(!v8_flags.js_regexp_duplicate_named_groups, | 
 |                    capture.second->length() == 1); | 
 |     for (int index : *capture.second) { | 
 |       flattened_named_captures->push_back(GetCapture(index)); | 
 |     } | 
 |   } | 
 |   return flattened_named_captures; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::HasNamedCaptures( | 
 |     InClassEscapeState in_class_escape_state) { | 
 |   if (has_named_captures_ || is_scanned_for_captures_) { | 
 |     return has_named_captures_; | 
 |   } | 
 |  | 
 |   ScanForCaptures(in_class_escape_state); | 
 |   DCHECK(is_scanned_for_captures_); | 
 |   return has_named_captures_; | 
 | } | 
 |  | 
 | // QuantifierPrefix :: | 
 | //   { DecimalDigits } | 
 | //   { DecimalDigits , } | 
 | //   { DecimalDigits , DecimalDigits } | 
 | // | 
 | // Returns true if parsing succeeds, and set the min_out and max_out | 
 | // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::ParseIntervalQuantifier(int* min_out, | 
 |                                                       int* max_out) { | 
 |   DCHECK_EQ(current(), '{'); | 
 |   int start = position(); | 
 |   Advance(); | 
 |   int min = 0; | 
 |   if (!IsDecimalDigit(current())) { | 
 |     Reset(start); | 
 |     return false; | 
 |   } | 
 |   while (IsDecimalDigit(current())) { | 
 |     int next = current() - '0'; | 
 |     if (min > (RegExpTree::kInfinity - next) / 10) { | 
 |       // Overflow. Skip past remaining decimal digits and return -1. | 
 |       do { | 
 |         Advance(); | 
 |       } while (IsDecimalDigit(current())); | 
 |       min = RegExpTree::kInfinity; | 
 |       break; | 
 |     } | 
 |     min = 10 * min + next; | 
 |     Advance(); | 
 |   } | 
 |   int max = 0; | 
 |   if (current() == '}') { | 
 |     max = min; | 
 |     Advance(); | 
 |   } else if (current() == ',') { | 
 |     Advance(); | 
 |     if (current() == '}') { | 
 |       max = RegExpTree::kInfinity; | 
 |       Advance(); | 
 |     } else { | 
 |       while (IsDecimalDigit(current())) { | 
 |         int next = current() - '0'; | 
 |         if (max > (RegExpTree::kInfinity - next) / 10) { | 
 |           do { | 
 |             Advance(); | 
 |           } while (IsDecimalDigit(current())); | 
 |           max = RegExpTree::kInfinity; | 
 |           break; | 
 |         } | 
 |         max = 10 * max + next; | 
 |         Advance(); | 
 |       } | 
 |       if (current() != '}') { | 
 |         Reset(start); | 
 |         return false; | 
 |       } | 
 |       Advance(); | 
 |     } | 
 |   } else { | 
 |     Reset(start); | 
 |     return false; | 
 |   } | 
 |   *min_out = min; | 
 |   *max_out = max; | 
 |   return true; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | base::uc32 RegExpParserImpl<CharT>::ParseOctalLiteral() { | 
 |   DCHECK(('0' <= current() && current() <= '7') || !has_more()); | 
 |   // For compatibility with some other browsers (not all), we parse | 
 |   // up to three octal digits with a value below 256. | 
 |   // ES#prod-annexB-LegacyOctalEscapeSequence | 
 |   base::uc32 value = current() - '0'; | 
 |   Advance(); | 
 |   if ('0' <= current() && current() <= '7') { | 
 |     value = value * 8 + current() - '0'; | 
 |     Advance(); | 
 |     if (value < 32 && '0' <= current() && current() <= '7') { | 
 |       value = value * 8 + current() - '0'; | 
 |       Advance(); | 
 |     } | 
 |   } | 
 |   return value; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::ParseHexEscape(int length, base::uc32* value) { | 
 |   int start = position(); | 
 |   base::uc32 val = 0; | 
 |   for (int i = 0; i < length; ++i) { | 
 |     base::uc32 c = current(); | 
 |     int d = base::HexValue(c); | 
 |     if (d < 0) { | 
 |       Reset(start); | 
 |       return false; | 
 |     } | 
 |     val = val * 16 + d; | 
 |     Advance(); | 
 |   } | 
 |   *value = val; | 
 |   return true; | 
 | } | 
 |  | 
 | // This parses RegExpUnicodeEscapeSequence as described in ECMA262. | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::ParseUnicodeEscape(base::uc32* value) { | 
 |   // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are | 
 |   // allowed). In the latter case, the number of hex digits between { } is | 
 |   // arbitrary. \ and u have already been read. | 
 |   if (current() == '{' && IsUnicodeMode()) { | 
 |     int start = position(); | 
 |     Advance(); | 
 |     if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) { | 
 |       if (current() == '}') { | 
 |         Advance(); | 
 |         return true; | 
 |       } | 
 |     } | 
 |     Reset(start); | 
 |     return false; | 
 |   } | 
 |   // \u but no {, or \u{...} escapes not allowed. | 
 |   bool result = ParseHexEscape(4, value); | 
 |   if (result && IsUnicodeMode() && unibrow::Utf16::IsLeadSurrogate(*value) && | 
 |       current() == '\\') { | 
 |     // Attempt to read trail surrogate. | 
 |     int start = position(); | 
 |     if (Next() == 'u') { | 
 |       Advance(2); | 
 |       base::uc32 trail; | 
 |       if (ParseHexEscape(4, &trail) && | 
 |           unibrow::Utf16::IsTrailSurrogate(trail)) { | 
 |         *value = unibrow::Utf16::CombineSurrogatePair( | 
 |             static_cast<base::uc16>(*value), static_cast<base::uc16>(trail)); | 
 |         return true; | 
 |       } | 
 |     } | 
 |     Reset(start); | 
 |   } | 
 |   return result; | 
 | } | 
 |  | 
 | #ifdef V8_INTL_SUPPORT | 
 |  | 
 | namespace { | 
 |  | 
 | bool IsExactPropertyAlias(const char* property_name, UProperty property) { | 
 |   const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); | 
 |   if (short_name != nullptr && strcmp(property_name, short_name) == 0) | 
 |     return true; | 
 |   for (int i = 0;; i++) { | 
 |     const char* long_name = u_getPropertyName( | 
 |         property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); | 
 |     if (long_name == nullptr) break; | 
 |     if (strcmp(property_name, long_name) == 0) return true; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | bool IsExactPropertyValueAlias(const char* property_value_name, | 
 |                                UProperty property, int32_t property_value) { | 
 |   const char* short_name = | 
 |       u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME); | 
 |   if (short_name != nullptr && strcmp(property_value_name, short_name) == 0) { | 
 |     return true; | 
 |   } | 
 |   for (int i = 0;; i++) { | 
 |     const char* long_name = u_getPropertyValueName( | 
 |         property, property_value, | 
 |         static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); | 
 |     if (long_name == nullptr) break; | 
 |     if (strcmp(property_value_name, long_name) == 0) return true; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | void ExtractStringsFromUnicodeSet(const icu::UnicodeSet& set, | 
 |                                   CharacterClassStrings* strings, | 
 |                                   RegExpFlags flags, Zone* zone) { | 
 |   DCHECK(set.hasStrings()); | 
 |   DCHECK(IsUnicodeSets(flags)); | 
 |   DCHECK_NOT_NULL(strings); | 
 |  | 
 |   RegExpTextBuilder::SmallRegExpTreeVector string_storage(zone); | 
 |   RegExpTextBuilder string_builder(zone, &string_storage, flags); | 
 |   const bool needs_case_folding = IsIgnoreCase(flags); | 
 |   icu::UnicodeSetIterator iter(set); | 
 |   iter.skipToStrings(); | 
 |   while (iter.next()) { | 
 |     const icu::UnicodeString& s = iter.getString(); | 
 |     const char16_t* p = s.getBuffer(); | 
 |     int32_t length = s.length(); | 
 |     ZoneList<base::uc32>* string = | 
 |         zone->template New<ZoneList<base::uc32>>(length, zone); | 
 |     for (int32_t i = 0; i < length;) { | 
 |       UChar32 c; | 
 |       U16_NEXT(p, i, length, c); | 
 |       string_builder.AddUnicodeCharacter(c); | 
 |       if (needs_case_folding) { | 
 |         c = u_foldCase(c, U_FOLD_CASE_DEFAULT); | 
 |       } | 
 |       string->Add(c, zone); | 
 |     } | 
 |     strings->emplace(string->ToVector(), string_builder.ToRegExp()); | 
 |     string_storage.clear(); | 
 |   } | 
 | } | 
 |  | 
 | bool LookupPropertyValueName(UProperty property, | 
 |                              const char* property_value_name, bool negate, | 
 |                              ZoneList<CharacterRange>* result_ranges, | 
 |                              CharacterClassStrings* result_strings, | 
 |                              RegExpFlags flags, Zone* zone) { | 
 |   UProperty property_for_lookup = property; | 
 |   if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) { | 
 |     // For the property Script_Extensions, we have to do the property value | 
 |     // name lookup as if the property is Script. | 
 |     property_for_lookup = UCHAR_SCRIPT; | 
 |   } | 
 |   int32_t property_value = | 
 |       u_getPropertyValueEnum(property_for_lookup, property_value_name); | 
 |   if (property_value == UCHAR_INVALID_CODE) return false; | 
 |  | 
 |   // We require the property name to match exactly to one of the property value | 
 |   // aliases. However, u_getPropertyValueEnum uses loose matching. | 
 |   if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup, | 
 |                                  property_value)) { | 
 |     return false; | 
 |   } | 
 |  | 
 |   UErrorCode ec = U_ZERO_ERROR; | 
 |   icu::UnicodeSet set; | 
 |   set.applyIntPropertyValue(property, property_value, ec); | 
 |   bool success = ec == U_ZERO_ERROR && !set.isEmpty(); | 
 |  | 
 |   if (success) { | 
 |     if (set.hasStrings()) { | 
 |       ExtractStringsFromUnicodeSet(set, result_strings, flags, zone); | 
 |     } | 
 |     const bool needs_case_folding = IsUnicodeSets(flags) && IsIgnoreCase(flags); | 
 |     if (needs_case_folding) set.closeOver(USET_SIMPLE_CASE_INSENSITIVE); | 
 |     set.removeAllStrings(); | 
 |     if (negate) set.complement(); | 
 |     for (int i = 0; i < set.getRangeCount(); i++) { | 
 |       result_ranges->Add( | 
 |           CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)), | 
 |           zone); | 
 |     } | 
 |   } | 
 |   return success; | 
 | } | 
 |  | 
 | template <size_t N> | 
 | inline bool NameEquals(const char* name, const char (&literal)[N]) { | 
 |   return strncmp(name, literal, N + 1) == 0; | 
 | } | 
 |  | 
 | bool LookupSpecialPropertyValueName(const char* name, | 
 |                                     ZoneList<CharacterRange>* result, | 
 |                                     bool negate, RegExpFlags flags, | 
 |                                     Zone* zone) { | 
 |   if (NameEquals(name, "Any")) { | 
 |     if (negate) { | 
 |       // Leave the list of character ranges empty, since the negation of 'Any' | 
 |       // is the empty set. | 
 |     } else { | 
 |       result->Add(CharacterRange::Everything(), zone); | 
 |     } | 
 |   } else if (NameEquals(name, "ASCII")) { | 
 |     result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint) | 
 |                        : CharacterRange::Range(0x0, 0x7F), | 
 |                 zone); | 
 |   } else if (NameEquals(name, "Assigned")) { | 
 |     return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", | 
 |                                    !negate, result, nullptr, flags, zone); | 
 |   } else { | 
 |     return false; | 
 |   } | 
 |   return true; | 
 | } | 
 |  | 
 | // Explicitly allowlist supported binary properties. The spec forbids supporting | 
 | // properties outside of this set to ensure interoperability. | 
 | bool IsSupportedBinaryProperty(UProperty property, bool unicode_sets) { | 
 |   switch (property) { | 
 |     case UCHAR_ALPHABETIC: | 
 |     // 'Any' is not supported by ICU. See LookupSpecialPropertyValueName. | 
 |     // 'ASCII' is not supported by ICU. See LookupSpecialPropertyValueName. | 
 |     case UCHAR_ASCII_HEX_DIGIT: | 
 |     // 'Assigned' is not supported by ICU. See LookupSpecialPropertyValueName. | 
 |     case UCHAR_BIDI_CONTROL: | 
 |     case UCHAR_BIDI_MIRRORED: | 
 |     case UCHAR_CASE_IGNORABLE: | 
 |     case UCHAR_CASED: | 
 |     case UCHAR_CHANGES_WHEN_CASEFOLDED: | 
 |     case UCHAR_CHANGES_WHEN_CASEMAPPED: | 
 |     case UCHAR_CHANGES_WHEN_LOWERCASED: | 
 |     case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED: | 
 |     case UCHAR_CHANGES_WHEN_TITLECASED: | 
 |     case UCHAR_CHANGES_WHEN_UPPERCASED: | 
 |     case UCHAR_DASH: | 
 |     case UCHAR_DEFAULT_IGNORABLE_CODE_POINT: | 
 |     case UCHAR_DEPRECATED: | 
 |     case UCHAR_DIACRITIC: | 
 |     case UCHAR_EMOJI: | 
 |     case UCHAR_EMOJI_COMPONENT: | 
 |     case UCHAR_EMOJI_MODIFIER_BASE: | 
 |     case UCHAR_EMOJI_MODIFIER: | 
 |     case UCHAR_EMOJI_PRESENTATION: | 
 |     case UCHAR_EXTENDED_PICTOGRAPHIC: | 
 |     case UCHAR_EXTENDER: | 
 |     case UCHAR_GRAPHEME_BASE: | 
 |     case UCHAR_GRAPHEME_EXTEND: | 
 |     case UCHAR_HEX_DIGIT: | 
 |     case UCHAR_ID_CONTINUE: | 
 |     case UCHAR_ID_START: | 
 |     case UCHAR_IDEOGRAPHIC: | 
 |     case UCHAR_IDS_BINARY_OPERATOR: | 
 |     case UCHAR_IDS_TRINARY_OPERATOR: | 
 |     case UCHAR_JOIN_CONTROL: | 
 |     case UCHAR_LOGICAL_ORDER_EXCEPTION: | 
 |     case UCHAR_LOWERCASE: | 
 |     case UCHAR_MATH: | 
 |     case UCHAR_NONCHARACTER_CODE_POINT: | 
 |     case UCHAR_PATTERN_SYNTAX: | 
 |     case UCHAR_PATTERN_WHITE_SPACE: | 
 |     case UCHAR_QUOTATION_MARK: | 
 |     case UCHAR_RADICAL: | 
 |     case UCHAR_REGIONAL_INDICATOR: | 
 |     case UCHAR_S_TERM: | 
 |     case UCHAR_SOFT_DOTTED: | 
 |     case UCHAR_TERMINAL_PUNCTUATION: | 
 |     case UCHAR_UNIFIED_IDEOGRAPH: | 
 |     case UCHAR_UPPERCASE: | 
 |     case UCHAR_VARIATION_SELECTOR: | 
 |     case UCHAR_WHITE_SPACE: | 
 |     case UCHAR_XID_CONTINUE: | 
 |     case UCHAR_XID_START: | 
 |       return true; | 
 |     case UCHAR_BASIC_EMOJI: | 
 |     case UCHAR_EMOJI_KEYCAP_SEQUENCE: | 
 |     case UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE: | 
 |     case UCHAR_RGI_EMOJI_FLAG_SEQUENCE: | 
 |     case UCHAR_RGI_EMOJI_TAG_SEQUENCE: | 
 |     case UCHAR_RGI_EMOJI_ZWJ_SEQUENCE: | 
 |     case UCHAR_RGI_EMOJI: | 
 |       return unicode_sets; | 
 |     default: | 
 |       break; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | bool IsBinaryPropertyOfStrings(UProperty property) { | 
 |   switch (property) { | 
 |     case UCHAR_BASIC_EMOJI: | 
 |     case UCHAR_EMOJI_KEYCAP_SEQUENCE: | 
 |     case UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE: | 
 |     case UCHAR_RGI_EMOJI_FLAG_SEQUENCE: | 
 |     case UCHAR_RGI_EMOJI_TAG_SEQUENCE: | 
 |     case UCHAR_RGI_EMOJI_ZWJ_SEQUENCE: | 
 |     case UCHAR_RGI_EMOJI: | 
 |       return true; | 
 |     default: | 
 |       break; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | bool IsUnicodePropertyValueCharacter(char c) { | 
 |   // https://tc39.github.io/proposal-regexp-unicode-property-escapes/ | 
 |   // | 
 |   // Note that using this to validate each parsed char is quite conservative. | 
 |   // A possible alternative solution would be to only ensure the parsed | 
 |   // property name/value candidate string does not contain '\0' characters and | 
 |   // let ICU lookups trigger the final failure. | 
 |   if ('a' <= c && c <= 'z') return true; | 
 |   if ('A' <= c && c <= 'Z') return true; | 
 |   if ('0' <= c && c <= '9') return true; | 
 |   return (c == '_'); | 
 | } | 
 |  | 
 | }  // namespace | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::ParsePropertyClassName(ZoneVector<char>* name_1, | 
 |                                                      ZoneVector<char>* name_2) { | 
 |   DCHECK(name_1->empty()); | 
 |   DCHECK(name_2->empty()); | 
 |   // Parse the property class as follows: | 
 |   // - In \p{name}, 'name' is interpreted | 
 |   //   - either as a general category property value name. | 
 |   //   - or as a binary property name. | 
 |   // - In \p{name=value}, 'name' is interpreted as an enumerated property name, | 
 |   //   and 'value' is interpreted as one of the available property value names. | 
 |   // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used. | 
 |   // - Loose matching is not applied. | 
 |   if (current() == '{') { | 
 |     // Parse \p{[PropertyName=]PropertyNameValue} | 
 |     for (Advance(); current() != '}' && current() != '='; Advance()) { | 
 |       if (!IsUnicodePropertyValueCharacter(current())) return false; | 
 |       if (!has_next()) return false; | 
 |       name_1->push_back(static_cast<char>(current())); | 
 |     } | 
 |     if (current() == '=') { | 
 |       for (Advance(); current() != '}'; Advance()) { | 
 |         if (!IsUnicodePropertyValueCharacter(current())) return false; | 
 |         if (!has_next()) return false; | 
 |         name_2->push_back(static_cast<char>(current())); | 
 |       } | 
 |       name_2->push_back(0);  // null-terminate string. | 
 |     } | 
 |   } else { | 
 |     return false; | 
 |   } | 
 |   Advance(); | 
 |   name_1->push_back(0);  // null-terminate string. | 
 |  | 
 |   DCHECK(name_1->size() - 1 == std::strlen(name_1->data())); | 
 |   DCHECK(name_2->empty() || name_2->size() - 1 == std::strlen(name_2->data())); | 
 |   return true; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::AddPropertyClassRange( | 
 |     ZoneList<CharacterRange>* add_to_ranges, | 
 |     CharacterClassStrings* add_to_strings, bool negate, | 
 |     const ZoneVector<char>& name_1, const ZoneVector<char>& name_2) { | 
 |   if (name_2.empty()) { | 
 |     // First attempt to interpret as general category property value name. | 
 |     const char* name = name_1.data(); | 
 |     if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, | 
 |                                 add_to_ranges, add_to_strings, flags(), | 
 |                                 zone())) { | 
 |       return true; | 
 |     } | 
 |     // Interpret "Any", "ASCII", and "Assigned". | 
 |     if (LookupSpecialPropertyValueName(name, add_to_ranges, negate, flags(), | 
 |                                        zone())) { | 
 |       return true; | 
 |     } | 
 |     // Then attempt to interpret as binary property name with value name 'Y'. | 
 |     UProperty property = u_getPropertyEnum(name); | 
 |     if (!IsSupportedBinaryProperty(property, unicode_sets())) return false; | 
 |     if (!IsExactPropertyAlias(name, property)) return false; | 
 |     // Negation of properties with strings is not allowed. | 
 |     // See | 
 |     // https://tc39.es/ecma262/#sec-static-semantics-maycontainstrings | 
 |     if (negate && IsBinaryPropertyOfStrings(property)) return false; | 
 |     if (unicode_sets()) { | 
 |       // In /v mode we can't simple lookup the "false" binary property values, | 
 |       // as the spec requires us to perform case folding before calculating the | 
 |       // complement. | 
 |       // See https://tc39.es/ecma262/#sec-compiletocharset | 
 |       // UnicodePropertyValueExpression :: LoneUnicodePropertyNameOrValue | 
 |       return LookupPropertyValueName(property, "Y", negate, add_to_ranges, | 
 |                                      add_to_strings, flags(), zone()); | 
 |     } else { | 
 |       return LookupPropertyValueName(property, negate ? "N" : "Y", false, | 
 |                                      add_to_ranges, add_to_strings, flags(), | 
 |                                      zone()); | 
 |     } | 
 |   } else { | 
 |     // Both property name and value name are specified. Attempt to interpret | 
 |     // the property name as enumerated property. | 
 |     const char* property_name = name_1.data(); | 
 |     const char* value_name = name_2.data(); | 
 |     UProperty property = u_getPropertyEnum(property_name); | 
 |     if (!IsExactPropertyAlias(property_name, property)) return false; | 
 |     if (property == UCHAR_GENERAL_CATEGORY) { | 
 |       // We want to allow aggregate value names such as "Letter". | 
 |       property = UCHAR_GENERAL_CATEGORY_MASK; | 
 |     } else if (property != UCHAR_SCRIPT && | 
 |                property != UCHAR_SCRIPT_EXTENSIONS) { | 
 |       return false; | 
 |     } | 
 |     return LookupPropertyValueName(property, value_name, negate, add_to_ranges, | 
 |                                    add_to_strings, flags(), zone()); | 
 |   } | 
 | } | 
 |  | 
 | #else  // V8_INTL_SUPPORT | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::ParsePropertyClassName(ZoneVector<char>* name_1, | 
 |                                                      ZoneVector<char>* name_2) { | 
 |   return false; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::AddPropertyClassRange( | 
 |     ZoneList<CharacterRange>* add_to_ranges, | 
 |     CharacterClassStrings* add_to_strings, bool negate, | 
 |     const ZoneVector<char>& name_1, const ZoneVector<char>& name_2) { | 
 |   return false; | 
 | } | 
 |  | 
 | #endif  // V8_INTL_SUPPORT | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::ParseUnlimitedLengthHexNumber(int max_value, | 
 |                                                             base::uc32* value) { | 
 |   base::uc32 x = 0; | 
 |   int d = base::HexValue(current()); | 
 |   if (d < 0) { | 
 |     return false; | 
 |   } | 
 |   while (d >= 0) { | 
 |     x = x * 16 + d; | 
 |     if (x > static_cast<base::uc32>(max_value)) { | 
 |       return false; | 
 |     } | 
 |     Advance(); | 
 |     d = base::HexValue(current()); | 
 |   } | 
 |   *value = x; | 
 |   return true; | 
 | } | 
 |  | 
 | // https://tc39.es/ecma262/#prod-CharacterEscape | 
 | template <class CharT> | 
 | base::uc32 RegExpParserImpl<CharT>::ParseCharacterEscape( | 
 |     InClassEscapeState in_class_escape_state, | 
 |     bool* is_escaped_unicode_character) { | 
 |   DCHECK_EQ('\\', current()); | 
 |   DCHECK(has_next()); | 
 |  | 
 |   Advance(); | 
 |  | 
 |   const base::uc32 c = current(); | 
 |   switch (c) { | 
 |     // CharacterEscape :: | 
 |     //   ControlEscape :: one of | 
 |     //     f n r t v | 
 |     case 'f': | 
 |       Advance(); | 
 |       return '\f'; | 
 |     case 'n': | 
 |       Advance(); | 
 |       return '\n'; | 
 |     case 'r': | 
 |       Advance(); | 
 |       return '\r'; | 
 |     case 't': | 
 |       Advance(); | 
 |       return '\t'; | 
 |     case 'v': | 
 |       Advance(); | 
 |       return '\v'; | 
 |     // CharacterEscape :: | 
 |     //   c ControlLetter | 
 |     case 'c': { | 
 |       base::uc32 controlLetter = Next(); | 
 |       base::uc32 letter = controlLetter & ~('A' ^ 'a'); | 
 |       if (letter >= 'A' && letter <= 'Z') { | 
 |         Advance(2); | 
 |         // Control letters mapped to ASCII control characters in the range | 
 |         // 0x00-0x1F. | 
 |         return controlLetter & 0x1F; | 
 |       } | 
 |       if (IsUnicodeMode()) { | 
 |         // With /u and /v, invalid escapes are not treated as identity escapes. | 
 |         ReportError(RegExpError::kInvalidUnicodeEscape); | 
 |         return 0; | 
 |       } | 
 |       if (in_class_escape_state == InClassEscapeState::kInClass) { | 
 |         // Inside a character class, we also accept digits and underscore as | 
 |         // control characters, unless with /u or /v. See Annex B: | 
 |         // ES#prod-annexB-ClassControlLetter | 
 |         if ((controlLetter >= '0' && controlLetter <= '9') || | 
 |             controlLetter == '_') { | 
 |           Advance(2); | 
 |           return controlLetter & 0x1F; | 
 |         } | 
 |       } | 
 |       // We match JSC in reading the backslash as a literal | 
 |       // character instead of as starting an escape. | 
 |       return '\\'; | 
 |     } | 
 |     // CharacterEscape :: | 
 |     //   0 [lookahead ∉ DecimalDigit] | 
 |     //   [~UnicodeMode] LegacyOctalEscapeSequence | 
 |     case '0': | 
 |       // \0 is interpreted as NUL if not followed by another digit. | 
 |       if (Next() < '0' || Next() > '9') { | 
 |         Advance(); | 
 |         return 0; | 
 |       } | 
 |       [[fallthrough]]; | 
 |     case '1': | 
 |     case '2': | 
 |     case '3': | 
 |     case '4': | 
 |     case '5': | 
 |     case '6': | 
 |     case '7': | 
 |       // For compatibility, we interpret a decimal escape that isn't | 
 |       // a back reference (and therefore either \0 or not valid according | 
 |       // to the specification) as a 1..3 digit octal character code. | 
 |       // ES#prod-annexB-LegacyOctalEscapeSequence | 
 |       if (IsUnicodeMode()) { | 
 |         // With /u or /v, decimal escape is not interpreted as octal character | 
 |         // code. | 
 |         ReportError(RegExpError::kInvalidDecimalEscape); | 
 |         return 0; | 
 |       } | 
 |       return ParseOctalLiteral(); | 
 |     // CharacterEscape :: | 
 |     //   HexEscapeSequence | 
 |     case 'x': { | 
 |       Advance(); | 
 |       base::uc32 value; | 
 |       if (ParseHexEscape(2, &value)) return value; | 
 |       if (IsUnicodeMode()) { | 
 |         // With /u or /v, invalid escapes are not treated as identity escapes. | 
 |         ReportError(RegExpError::kInvalidEscape); | 
 |         return 0; | 
 |       } | 
 |       // If \x is not followed by a two-digit hexadecimal, treat it | 
 |       // as an identity escape. | 
 |       return 'x'; | 
 |     } | 
 |     // CharacterEscape :: | 
 |     //   RegExpUnicodeEscapeSequence [?UnicodeMode] | 
 |     case 'u': { | 
 |       Advance(); | 
 |       base::uc32 value; | 
 |       if (ParseUnicodeEscape(&value)) { | 
 |         *is_escaped_unicode_character = true; | 
 |         return value; | 
 |       } | 
 |       if (IsUnicodeMode()) { | 
 |         // With /u or /v, invalid escapes are not treated as identity escapes. | 
 |         ReportError(RegExpError::kInvalidUnicodeEscape); | 
 |         return 0; | 
 |       } | 
 |       // If \u is not followed by a two-digit hexadecimal, treat it | 
 |       // as an identity escape. | 
 |       return 'u'; | 
 |     } | 
 |     default: | 
 |       break; | 
 |   } | 
 |  | 
 |   // CharacterEscape :: | 
 |   //   IdentityEscape[?UnicodeMode, ?N] | 
 |   // | 
 |   // * With /u, no identity escapes except for syntax characters are | 
 |   //   allowed. | 
 |   // * With /v, no identity escapes except for syntax characters and | 
 |   //   ClassSetReservedPunctuators (if within a class) are allowed. | 
 |   // * Without /u or /v: | 
 |   //   * '\c' is not an IdentityEscape. | 
 |   //   * '\k' is not an IdentityEscape when named captures exist. | 
 |   //   * Otherwise, all identity escapes are allowed. | 
 |   if (unicode_sets() && in_class_escape_state == InClassEscapeState::kInClass) { | 
 |     if (IsClassSetReservedPunctuator(c)) { | 
 |       Advance(); | 
 |       return c; | 
 |     } | 
 |   } | 
 |   if (IsUnicodeMode()) { | 
 |     if (!IsSyntaxCharacterOrSlash(c)) { | 
 |       ReportError(RegExpError::kInvalidEscape); | 
 |       return 0; | 
 |     } | 
 |     Advance(); | 
 |     return c; | 
 |   } | 
 |   DCHECK(!IsUnicodeMode()); | 
 |   if (c == 'c') { | 
 |     ReportError(RegExpError::kInvalidEscape); | 
 |     return 0; | 
 |   } | 
 |   Advance(); | 
 |   // Note: It's important to Advance before the HasNamedCaptures call s.t. we | 
 |   // don't start scanning in the middle of an escape. | 
 |   if (c == 'k' && HasNamedCaptures(in_class_escape_state)) { | 
 |     ReportError(RegExpError::kInvalidEscape); | 
 |     return 0; | 
 |   } | 
 |   return c; | 
 | } | 
 |  | 
 | // https://tc39.es/ecma262/#prod-ClassRanges | 
 | template <class CharT> | 
 | RegExpTree* RegExpParserImpl<CharT>::ParseClassRanges( | 
 |     ZoneList<CharacterRange>* ranges, bool add_unicode_case_equivalents) { | 
 |   base::uc32 char_1, char_2; | 
 |   bool is_class_1, is_class_2; | 
 |   while (has_more() && current() != ']') { | 
 |     ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_1, | 
 |                      &is_class_1 CHECK_FAILED); | 
 |     // ClassAtom | 
 |     if (current() == '-') { | 
 |       Advance(); | 
 |       if (!has_more()) { | 
 |         // If we reach the end we break out of the loop and let the | 
 |         // following code report an error. | 
 |         break; | 
 |       } else if (current() == ']') { | 
 |         if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone()); | 
 |         ranges->Add(CharacterRange::Singleton('-'), zone()); | 
 |         break; | 
 |       } | 
 |       ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_2, | 
 |                        &is_class_2 CHECK_FAILED); | 
 |       if (is_class_1 || is_class_2) { | 
 |         // Either end is an escaped character class. Treat the '-' verbatim. | 
 |         if (IsUnicodeMode()) { | 
 |           // ES2015 21.2.2.15.1 step 1. | 
 |           return ReportError(RegExpError::kInvalidCharacterClass); | 
 |         } | 
 |         if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone()); | 
 |         ranges->Add(CharacterRange::Singleton('-'), zone()); | 
 |         if (!is_class_2) ranges->Add(CharacterRange::Singleton(char_2), zone()); | 
 |         continue; | 
 |       } | 
 |       // ES2015 21.2.2.15.1 step 6. | 
 |       if (char_1 > char_2) { | 
 |         return ReportError(RegExpError::kOutOfOrderCharacterClass); | 
 |       } | 
 |       ranges->Add(CharacterRange::Range(char_1, char_2), zone()); | 
 |     } else { | 
 |       if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone()); | 
 |     } | 
 |   } | 
 |   return nullptr; | 
 | } | 
 |  | 
 | // https://tc39.es/ecma262/#prod-ClassEscape | 
 | template <class CharT> | 
 | void RegExpParserImpl<CharT>::ParseClassEscape( | 
 |     ZoneList<CharacterRange>* ranges, Zone* zone, | 
 |     bool add_unicode_case_equivalents, base::uc32* char_out, | 
 |     bool* is_class_escape) { | 
 |   *is_class_escape = false; | 
 |  | 
 |   if (current() != '\\') { | 
 |     // Not a ClassEscape. | 
 |     *char_out = current(); | 
 |     Advance(); | 
 |     return; | 
 |   } | 
 |  | 
 |   const base::uc32 next = Next(); | 
 |   switch (next) { | 
 |     case 'b': | 
 |       *char_out = '\b'; | 
 |       Advance(2); | 
 |       return; | 
 |     case '-': | 
 |       if (IsUnicodeMode()) { | 
 |         *char_out = next; | 
 |         Advance(2); | 
 |         return; | 
 |       } | 
 |       break; | 
 |     case kEndMarker: | 
 |       ReportError(RegExpError::kEscapeAtEndOfPattern); | 
 |       return; | 
 |     default: | 
 |       break; | 
 |   } | 
 |  | 
 |   static constexpr InClassEscapeState kInClassEscape = | 
 |       InClassEscapeState::kInClass; | 
 |   *is_class_escape = | 
 |       TryParseCharacterClassEscape(next, kInClassEscape, ranges, nullptr, zone, | 
 |                                    add_unicode_case_equivalents); | 
 |   if (*is_class_escape) return; | 
 |  | 
 |   bool dummy = false;  // Unused. | 
 |   *char_out = ParseCharacterEscape(kInClassEscape, &dummy); | 
 | } | 
 |  | 
 | // https://tc39.es/ecma262/#prod-CharacterClassEscape | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::TryParseCharacterClassEscape( | 
 |     base::uc32 next, InClassEscapeState in_class_escape_state, | 
 |     ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings, | 
 |     Zone* zone, bool add_unicode_case_equivalents) { | 
 |   DCHECK_EQ(current(), '\\'); | 
 |   DCHECK_EQ(Next(), next); | 
 |  | 
 |   switch (next) { | 
 |     case 'd': | 
 |     case 'D': | 
 |     case 's': | 
 |     case 'S': | 
 |     case 'w': | 
 |     case 'W': | 
 |       CharacterRange::AddClassEscape(static_cast<StandardCharacterSet>(next), | 
 |                                      ranges, add_unicode_case_equivalents, | 
 |                                      zone); | 
 |       Advance(2); | 
 |       return true; | 
 |     case 'p': | 
 |     case 'P': { | 
 |       if (!IsUnicodeMode()) return false; | 
 |       bool negate = next == 'P'; | 
 |       Advance(2); | 
 |       ZoneVector<char> name_1(zone); | 
 |       ZoneVector<char> name_2(zone); | 
 |       if (!ParsePropertyClassName(&name_1, &name_2) || | 
 |           !AddPropertyClassRange(ranges, strings, negate, name_1, name_2)) { | 
 |         ReportError(in_class_escape_state == InClassEscapeState::kInClass | 
 |                         ? RegExpError::kInvalidClassPropertyName | 
 |                         : RegExpError::kInvalidPropertyName); | 
 |       } | 
 |       return true; | 
 |     } | 
 |     default: | 
 |       return false; | 
 |   } | 
 | } | 
 |  | 
 | namespace { | 
 |  | 
 | // Add |string| to |ranges| if length of |string| == 1, otherwise add |string| | 
 | // to |strings|. | 
 | void AddClassString(ZoneList<base::uc32>* normalized_string, | 
 |                     RegExpTree* regexp_string, ZoneList<CharacterRange>* ranges, | 
 |                     CharacterClassStrings* strings, Zone* zone) { | 
 |   if (normalized_string->length() == 1) { | 
 |     ranges->Add(CharacterRange::Singleton(normalized_string->at(0)), zone); | 
 |   } else { | 
 |     strings->emplace(normalized_string->ToVector(), regexp_string); | 
 |   } | 
 | } | 
 |  | 
 | }  // namespace | 
 |  | 
 | // https://tc39.es/ecma262/#prod-ClassStringDisjunction | 
 | template <class CharT> | 
 | RegExpTree* RegExpParserImpl<CharT>::ParseClassStringDisjunction( | 
 |     ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings) { | 
 |   DCHECK(unicode_sets()); | 
 |   DCHECK_EQ(current(), '\\'); | 
 |   DCHECK_EQ(Next(), 'q'); | 
 |   Advance(2); | 
 |   if (current() != '{') { | 
 |     // Identity escape of 'q' is not allowed in unicode mode. | 
 |     return ReportError(RegExpError::kInvalidEscape); | 
 |   } | 
 |   Advance(); | 
 |  | 
 |   ZoneList<base::uc32>* string = | 
 |       zone()->template New<ZoneList<base::uc32>>(4, zone()); | 
 |   RegExpTextBuilder::SmallRegExpTreeVector string_storage(zone()); | 
 |   RegExpTextBuilder string_builder(zone(), &string_storage, flags()); | 
 |  | 
 |   while (has_more() && current() != '}') { | 
 |     if (current() == '|') { | 
 |       AddClassString(string, string_builder.ToRegExp(), ranges, strings, | 
 |                      zone()); | 
 |       string = zone()->template New<ZoneList<base::uc32>>(4, zone()); | 
 |       string_storage.clear(); | 
 |       Advance(); | 
 |     } else { | 
 |       base::uc32 c = ParseClassSetCharacter(CHECK_FAILED); | 
 |       if (ignore_case()) { | 
 | #ifdef V8_INTL_SUPPORT | 
 |         c = u_foldCase(c, U_FOLD_CASE_DEFAULT); | 
 | #else | 
 |         c = AsciiAlphaToLower(c); | 
 | #endif | 
 |       } | 
 |       string->Add(c, zone()); | 
 |       string_builder.AddUnicodeCharacter(c); | 
 |     } | 
 |   } | 
 |  | 
 |   AddClassString(string, string_builder.ToRegExp(), ranges, strings, zone()); | 
 |   CharacterRange::Canonicalize(ranges); | 
 |  | 
 |   // We don't need to handle missing closing '}' here. | 
 |   // If the character class is correctly closed, ParseClassSetCharacter will | 
 |   // report an error. | 
 |   Advance(); | 
 |   return nullptr; | 
 | } | 
 |  | 
 | // https://tc39.es/ecma262/#prod-ClassSetOperand | 
 | // Tree returned based on type_out: | 
 | //  * kNestedClass: RegExpClassSetExpression | 
 | //  * For all other types: RegExpClassSetOperand | 
 | template <class CharT> | 
 | RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand( | 
 |     const RegExpBuilder* builder, ClassSetOperandType* type_out) { | 
 |   ZoneList<CharacterRange>* ranges = | 
 |       zone()->template New<ZoneList<CharacterRange>>(1, zone()); | 
 |   CharacterClassStrings* strings = | 
 |       zone()->template New<CharacterClassStrings>(zone()); | 
 |   base::uc32 character; | 
 |   RegExpTree* tree = ParseClassSetOperand(builder, type_out, ranges, strings, | 
 |                                           &character CHECK_FAILED); | 
 |   DCHECK_IMPLIES(*type_out != ClassSetOperandType::kNestedClass, | 
 |                  tree == nullptr); | 
 |   DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassSetCharacter, | 
 |                  ranges->is_empty()); | 
 |   DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassSetCharacter, | 
 |                  strings->empty()); | 
 |   DCHECK_IMPLIES(*type_out == ClassSetOperandType::kNestedClass, | 
 |                  ranges->is_empty()); | 
 |   DCHECK_IMPLIES(*type_out == ClassSetOperandType::kNestedClass, | 
 |                  strings->empty()); | 
 |   DCHECK_IMPLIES(*type_out == ClassSetOperandType::kNestedClass, | 
 |                  tree->IsClassSetExpression()); | 
 |   // ClassSetRange is only used within ClassSetUnion(). | 
 |   DCHECK_NE(*type_out, ClassSetOperandType::kClassSetRange); | 
 |   // There are no restrictions for kCharacterClassEscape. | 
 |   // CharacterClassEscape includes \p{}, which can contain ranges, strings or | 
 |   // both and \P{}, which could contain nothing (i.e. \P{Any}). | 
 |   if (tree == nullptr) { | 
 |     if (*type_out == ClassSetOperandType::kClassSetCharacter) { | 
 |       AddMaybeSimpleCaseFoldedRange(ranges, | 
 |                                     CharacterRange::Singleton(character)); | 
 |     } | 
 |     tree = zone()->template New<RegExpClassSetOperand>(ranges, strings); | 
 |   } | 
 |   return tree; | 
 | } | 
 |  | 
 | // https://tc39.es/ecma262/#prod-ClassSetOperand | 
 | // Based on |type_out| either a tree is returned or | 
 | // |ranges|/|strings|/|character| modified. If a tree is returned, | 
 | // ranges/strings are not modified. If |type_out| is kNestedClass, a tree of | 
 | // type RegExpClassSetExpression is returned. If | type_out| is | 
 | // kClassSetCharacter, |character| is set and nullptr returned. For all other | 
 | // types, |ranges|/|strings|/|character| is modified and nullptr is returned. | 
 | template <class CharT> | 
 | RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand( | 
 |     const RegExpBuilder* builder, ClassSetOperandType* type_out, | 
 |     ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings, | 
 |     base::uc32* character) { | 
 |   DCHECK(unicode_sets()); | 
 |   base::uc32 c = current(); | 
 |   if (c == '\\') { | 
 |     const base::uc32 next = Next(); | 
 |     if (next == 'q') { | 
 |       *type_out = ClassSetOperandType::kClassStringDisjunction; | 
 |       ParseClassStringDisjunction(ranges, strings CHECK_FAILED); | 
 |       return nullptr; | 
 |     } | 
 |     static constexpr InClassEscapeState kInClassEscape = | 
 |         InClassEscapeState::kInClass; | 
 |     const bool add_unicode_case_equivalents = ignore_case(); | 
 |     if (TryParseCharacterClassEscape(next, kInClassEscape, ranges, strings, | 
 |                                      zone(), add_unicode_case_equivalents)) { | 
 |       *type_out = ClassSetOperandType::kCharacterClassEscape; | 
 |       return nullptr; | 
 |     } | 
 |   } | 
 |  | 
 |   if (c == '[') { | 
 |     *type_out = ClassSetOperandType::kNestedClass; | 
 |     return ParseCharacterClass(builder); | 
 |   } | 
 |  | 
 |   *type_out = ClassSetOperandType::kClassSetCharacter; | 
 |   c = ParseClassSetCharacter(CHECK_FAILED); | 
 |   *character = c; | 
 |   return nullptr; | 
 | } | 
 |  | 
 | template <class CharT> | 
 | base::uc32 RegExpParserImpl<CharT>::ParseClassSetCharacter() { | 
 |   DCHECK(unicode_sets()); | 
 |   const base::uc32 c = current(); | 
 |   if (c == '\\') { | 
 |     const base::uc32 next = Next(); | 
 |     switch (next) { | 
 |       case 'b': | 
 |         Advance(2); | 
 |         return '\b'; | 
 |       case kEndMarker: | 
 |         ReportError(RegExpError::kEscapeAtEndOfPattern); | 
 |         return 0; | 
 |     } | 
 |     static constexpr InClassEscapeState kInClassEscape = | 
 |         InClassEscapeState::kInClass; | 
 |  | 
 |     bool dummy = false;  // Unused. | 
 |     return ParseCharacterEscape(kInClassEscape, &dummy); | 
 |   } | 
 |   if (IsClassSetSyntaxCharacter(c)) { | 
 |     ReportError(RegExpError::kInvalidCharacterInClass); | 
 |     return 0; | 
 |   } | 
 |   if (IsClassSetReservedDoublePunctuator(c)) { | 
 |     ReportError(RegExpError::kInvalidClassSetOperation); | 
 |     return 0; | 
 |   } | 
 |   Advance(); | 
 |   return c; | 
 | } | 
 |  | 
 | namespace { | 
 |  | 
 | bool MayContainStrings(ClassSetOperandType type, RegExpTree* operand) { | 
 |   switch (type) { | 
 |     case ClassSetOperandType::kClassSetCharacter: | 
 |     case ClassSetOperandType::kClassSetRange: | 
 |       return false; | 
 |     case ClassSetOperandType::kCharacterClassEscape: | 
 |     case ClassSetOperandType::kClassStringDisjunction: | 
 |       return operand->AsClassSetOperand()->has_strings(); | 
 |     case ClassSetOperandType::kNestedClass: | 
 |       if (operand->IsClassRanges()) return false; | 
 |       return operand->AsClassSetExpression()->may_contain_strings(); | 
 |   } | 
 | } | 
 |  | 
 | }  // namespace | 
 |  | 
 | template <class CharT> | 
 | void RegExpParserImpl<CharT>::AddMaybeSimpleCaseFoldedRange( | 
 |     ZoneList<CharacterRange>* ranges, CharacterRange new_range) { | 
 |   DCHECK(unicode_sets()); | 
 |   if (ignore_case()) { | 
 |     ZoneList<CharacterRange>* new_ranges = | 
 |         zone()->template New<ZoneList<CharacterRange>>(2, zone()); | 
 |     new_ranges->Add(new_range, zone()); | 
 |     CharacterRange::AddUnicodeCaseEquivalents(new_ranges, zone()); | 
 |     ranges->AddAll(*new_ranges, zone()); | 
 |   } else { | 
 |     ranges->Add(new_range, zone()); | 
 |   } | 
 |   CharacterRange::Canonicalize(ranges); | 
 | } | 
 |  | 
 | // https://tc39.es/ecma262/#prod-ClassUnion | 
 | template <class CharT> | 
 | RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion( | 
 |     const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand, | 
 |     ClassSetOperandType first_operand_type, ZoneList<CharacterRange>* ranges, | 
 |     CharacterClassStrings* strings, base::uc32 character) { | 
 |   DCHECK(unicode_sets()); | 
 |   ZoneList<RegExpTree*>* operands = | 
 |       zone()->template New<ZoneList<RegExpTree*>>(2, zone()); | 
 |   bool may_contain_strings = false; | 
 |   // Add the lhs to operands if necessary. | 
 |   // Either the lhs values were added to |ranges|/|strings| (in which case | 
 |   // |first_operand| is nullptr), or the lhs was evaluated to a tree and passed | 
 |   // as |first_operand| (in which case |ranges| and |strings| are empty). | 
 |   if (first_operand != nullptr) { | 
 |     may_contain_strings = MayContainStrings(first_operand_type, first_operand); | 
 |     operands->Add(first_operand, zone()); | 
 |   } | 
 |   ClassSetOperandType last_type = first_operand_type; | 
 |   while (has_more() && current() != ']') { | 
 |     if (current() == '-') { | 
 |       // Mix of ClassSetRange and ClassSubtraction is not allowed. | 
 |       if (Next() == '-') { | 
 |         return ReportError(RegExpError::kInvalidClassSetOperation); | 
 |       } | 
 |       Advance(); | 
 |       if (!has_more()) { | 
 |         // If we reach the end we break out of the loop and let the | 
 |         // following code report an error. | 
 |         break; | 
 |       } | 
 |       // If the lhs and rhs around '-' are both ClassSetCharacters, they | 
 |       // represent a character range. | 
 |       // In case one of them is not a ClassSetCharacter, it is a syntax error, | 
 |       // as '-' can not be used unescaped within a class with /v. | 
 |       // See | 
 |       // https://tc39.es/ecma262/#prod-ClassSetRange | 
 |       if (last_type != ClassSetOperandType::kClassSetCharacter) { | 
 |         return ReportError(RegExpError::kInvalidCharacterClass); | 
 |       } | 
 |       base::uc32 from = character; | 
 |       ParseClassSetOperand(builder, &last_type, ranges, strings, | 
 |                            &character CHECK_FAILED); | 
 |       if (last_type != ClassSetOperandType::kClassSetCharacter) { | 
 |         return ReportError(RegExpError::kInvalidCharacterClass); | 
 |       } | 
 |       if (from > character) { | 
 |         return ReportError(RegExpError::kOutOfOrderCharacterClass); | 
 |       } | 
 |       AddMaybeSimpleCaseFoldedRange(ranges, | 
 |                                     CharacterRange::Range(from, character)); | 
 |       last_type = ClassSetOperandType::kClassSetRange; | 
 |     } else { | 
 |       DCHECK_NE(current(), '-'); | 
 |       if (last_type == ClassSetOperandType::kClassSetCharacter) { | 
 |         AddMaybeSimpleCaseFoldedRange(ranges, | 
 |                                       CharacterRange::Singleton(character)); | 
 |       } | 
 |       RegExpTree* operand = ParseClassSetOperand( | 
 |           builder, &last_type, ranges, strings, &character CHECK_FAILED); | 
 |       if (operand != nullptr) { | 
 |         may_contain_strings |= MayContainStrings(last_type, operand); | 
 |         // Add the range we started building as operand and reset the current | 
 |         // range. | 
 |         if (!ranges->is_empty() || !strings->empty()) { | 
 |           may_contain_strings |= !strings->empty(); | 
 |           operands->Add( | 
 |               zone()->template New<RegExpClassSetOperand>(ranges, strings), | 
 |               zone()); | 
 |           ranges = zone()->template New<ZoneList<CharacterRange>>(2, zone()); | 
 |           strings = zone()->template New<CharacterClassStrings>(zone()); | 
 |         } | 
 |         operands->Add(operand, zone()); | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   if (!has_more()) { | 
 |     return ReportError(RegExpError::kUnterminatedCharacterClass); | 
 |   } | 
 |  | 
 |   if (last_type == ClassSetOperandType::kClassSetCharacter) { | 
 |     AddMaybeSimpleCaseFoldedRange(ranges, CharacterRange::Singleton(character)); | 
 |   } | 
 |  | 
 |   // Add the range we started building as operand. | 
 |   if (!ranges->is_empty() || !strings->empty()) { | 
 |     may_contain_strings |= !strings->empty(); | 
 |     operands->Add(zone()->template New<RegExpClassSetOperand>(ranges, strings), | 
 |                   zone()); | 
 |   } | 
 |  | 
 |   DCHECK_EQ(current(), ']'); | 
 |   Advance(); | 
 |  | 
 |   if (is_negated && may_contain_strings) { | 
 |     return ReportError(RegExpError::kNegatedCharacterClassWithStrings); | 
 |   } | 
 |  | 
 |   if (operands->is_empty()) { | 
 |     // Return empty expression if no operands were added (e.g. [\P{Any}] | 
 |     // produces an empty range). | 
 |     DCHECK(ranges->is_empty()); | 
 |     DCHECK(strings->empty()); | 
 |     return RegExpClassSetExpression::Empty(zone(), is_negated); | 
 |   } | 
 |  | 
 |   return zone()->template New<RegExpClassSetExpression>( | 
 |       RegExpClassSetExpression::OperationType::kUnion, is_negated, | 
 |       may_contain_strings, operands); | 
 | } | 
 |  | 
 | // https://tc39.es/ecma262/#prod-ClassIntersection | 
 | template <class CharT> | 
 | RegExpTree* RegExpParserImpl<CharT>::ParseClassIntersection( | 
 |     const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand, | 
 |     ClassSetOperandType first_operand_type) { | 
 |   DCHECK(unicode_sets()); | 
 |   DCHECK(current() == '&' && Next() == '&'); | 
 |   bool may_contain_strings = | 
 |       MayContainStrings(first_operand_type, first_operand); | 
 |   ZoneList<RegExpTree*>* operands = | 
 |       zone()->template New<ZoneList<RegExpTree*>>(2, zone()); | 
 |   operands->Add(first_operand, zone()); | 
 |   while (has_more() && current() != ']') { | 
 |     if (current() != '&' || Next() != '&') { | 
 |       return ReportError(RegExpError::kInvalidClassSetOperation); | 
 |     } | 
 |     Advance(2); | 
 |     // [lookahead ≠ &] | 
 |     if (current() == '&') { | 
 |       return ReportError(RegExpError::kInvalidCharacterInClass); | 
 |     } | 
 |  | 
 |     ClassSetOperandType operand_type; | 
 |     RegExpTree* operand = | 
 |         ParseClassSetOperand(builder, &operand_type CHECK_FAILED); | 
 |     may_contain_strings &= MayContainStrings(operand_type, operand); | 
 |     operands->Add(operand, zone()); | 
 |   } | 
 |   if (!has_more()) { | 
 |     return ReportError(RegExpError::kUnterminatedCharacterClass); | 
 |   } | 
 |   if (is_negated && may_contain_strings) { | 
 |     return ReportError(RegExpError::kNegatedCharacterClassWithStrings); | 
 |   } | 
 |   DCHECK_EQ(current(), ']'); | 
 |   Advance(); | 
 |   return zone()->template New<RegExpClassSetExpression>( | 
 |       RegExpClassSetExpression::OperationType::kIntersection, is_negated, | 
 |       may_contain_strings, operands); | 
 | } | 
 |  | 
 | // https://tc39.es/ecma262/#prod-ClassSubtraction | 
 | template <class CharT> | 
 | RegExpTree* RegExpParserImpl<CharT>::ParseClassSubtraction( | 
 |     const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand, | 
 |     ClassSetOperandType first_operand_type) { | 
 |   DCHECK(unicode_sets()); | 
 |   DCHECK(current() == '-' && Next() == '-'); | 
 |   const bool may_contain_strings = | 
 |       MayContainStrings(first_operand_type, first_operand); | 
 |   if (is_negated && may_contain_strings) { | 
 |     return ReportError(RegExpError::kNegatedCharacterClassWithStrings); | 
 |   } | 
 |   ZoneList<RegExpTree*>* operands = | 
 |       zone()->template New<ZoneList<RegExpTree*>>(2, zone()); | 
 |   operands->Add(first_operand, zone()); | 
 |   while (has_more() && current() != ']') { | 
 |     if (current() != '-' || Next() != '-') { | 
 |       return ReportError(RegExpError::kInvalidClassSetOperation); | 
 |     } | 
 |     Advance(2); | 
 |     ClassSetOperandType dummy;  // unused | 
 |     RegExpTree* operand = ParseClassSetOperand(builder, &dummy CHECK_FAILED); | 
 |     operands->Add(operand, zone()); | 
 |   } | 
 |   if (!has_more()) { | 
 |     return ReportError(RegExpError::kUnterminatedCharacterClass); | 
 |   } | 
 |   DCHECK_EQ(current(), ']'); | 
 |   Advance(); | 
 |   return zone()->template New<RegExpClassSetExpression>( | 
 |       RegExpClassSetExpression::OperationType::kSubtraction, is_negated, | 
 |       may_contain_strings, operands); | 
 | } | 
 |  | 
 | // https://tc39.es/ecma262/#prod-CharacterClass | 
 | template <class CharT> | 
 | RegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass( | 
 |     const RegExpBuilder* builder) { | 
 |   DCHECK_EQ(current(), '['); | 
 |   Advance(); | 
 |   bool is_negated = false; | 
 |   if (current() == '^') { | 
 |     is_negated = true; | 
 |     Advance(); | 
 |   } | 
 |   ZoneList<CharacterRange>* ranges = | 
 |       zone()->template New<ZoneList<CharacterRange>>(2, zone()); | 
 |   if (current() == ']') { | 
 |     Advance(); | 
 |     if (unicode_sets()) { | 
 |       return RegExpClassSetExpression::Empty(zone(), is_negated); | 
 |     } else { | 
 |       RegExpClassRanges::ClassRangesFlags class_ranges_flags; | 
 |       if (is_negated) class_ranges_flags = RegExpClassRanges::NEGATED; | 
 |       return zone()->template New<RegExpClassRanges>(zone(), ranges, | 
 |                                                      class_ranges_flags); | 
 |     } | 
 |   } | 
 |  | 
 |   if (!unicode_sets()) { | 
 |     bool add_unicode_case_equivalents = IsUnicodeMode() && ignore_case(); | 
 |     ParseClassRanges(ranges, add_unicode_case_equivalents CHECK_FAILED); | 
 |     if (!has_more()) { | 
 |       return ReportError(RegExpError::kUnterminatedCharacterClass); | 
 |     } | 
 |     DCHECK_EQ(current(), ']'); | 
 |     Advance(); | 
 |     RegExpClassRanges::ClassRangesFlags character_class_flags; | 
 |     if (is_negated) character_class_flags = RegExpClassRanges::NEGATED; | 
 |     return zone()->template New<RegExpClassRanges>(zone(), ranges, | 
 |                                                    character_class_flags); | 
 |   } else { | 
 |     ClassSetOperandType operand_type; | 
 |     CharacterClassStrings* strings = | 
 |         zone()->template New<CharacterClassStrings>(zone()); | 
 |     base::uc32 character; | 
 |     RegExpTree* operand = ParseClassSetOperand( | 
 |         builder, &operand_type, ranges, strings, &character CHECK_FAILED); | 
 |     switch (current()) { | 
 |       case '-': | 
 |         if (Next() == '-') { | 
 |           if (operand == nullptr) { | 
 |             if (operand_type == ClassSetOperandType::kClassSetCharacter) { | 
 |               AddMaybeSimpleCaseFoldedRange( | 
 |                   ranges, CharacterRange::Singleton(character)); | 
 |             } | 
 |             operand = | 
 |                 zone()->template New<RegExpClassSetOperand>(ranges, strings); | 
 |           } | 
 |           return ParseClassSubtraction(builder, is_negated, operand, | 
 |                                        operand_type); | 
 |         } | 
 |         // ClassSetRange is handled in ParseClassUnion(). | 
 |         break; | 
 |       case '&': | 
 |         if (Next() == '&') { | 
 |           if (operand == nullptr) { | 
 |             if (operand_type == ClassSetOperandType::kClassSetCharacter) { | 
 |               AddMaybeSimpleCaseFoldedRange( | 
 |                   ranges, CharacterRange::Singleton(character)); | 
 |             } | 
 |             operand = | 
 |                 zone()->template New<RegExpClassSetOperand>(ranges, strings); | 
 |           } | 
 |           return ParseClassIntersection(builder, is_negated, operand, | 
 |                                         operand_type); | 
 |         } | 
 |     } | 
 |     return ParseClassUnion(builder, is_negated, operand, operand_type, ranges, | 
 |                            strings, character); | 
 |   } | 
 | } | 
 |  | 
 | #undef CHECK_FAILED | 
 |  | 
 | template <class CharT> | 
 | bool RegExpParserImpl<CharT>::Parse(RegExpCompileData* result) { | 
 |   DCHECK_NOT_NULL(result); | 
 |   RegExpTree* tree = ParsePattern(); | 
 |  | 
 |   if (failed()) { | 
 |     DCHECK_NULL(tree); | 
 |     DCHECK_NE(error_, RegExpError::kNone); | 
 |     result->error = error_; | 
 |     result->error_pos = error_pos_; | 
 |     return false; | 
 |   } | 
 |  | 
 |   DCHECK_NOT_NULL(tree); | 
 |   DCHECK_EQ(error_, RegExpError::kNone); | 
 |   if (v8_flags.trace_regexp_parser) { | 
 |     StdoutStream os; | 
 |     tree->Print(os, zone()); | 
 |     os << "\n"; | 
 |   } | 
 |  | 
 |   result->tree = tree; | 
 |   const int capture_count = captures_started(); | 
 |   result->simple = tree->IsAtom() && simple() && capture_count == 0; | 
 |   result->contains_anchor = contains_anchor(); | 
 |   result->capture_count = capture_count; | 
 |   result->named_captures = GetNamedCaptures(); | 
 |   return true; | 
 | } | 
 |  | 
 | void RegExpBuilder::FlushText() { text_builder().FlushText(); } | 
 |  | 
 | void RegExpBuilder::AddCharacter(base::uc16 c) { | 
 |   pending_empty_ = false; | 
 |   text_builder().AddCharacter(c); | 
 | } | 
 |  | 
 | void RegExpBuilder::AddUnicodeCharacter(base::uc32 c) { | 
 |   pending_empty_ = false; | 
 |   text_builder().AddUnicodeCharacter(c); | 
 | } | 
 |  | 
 | void RegExpBuilder::AddEscapedUnicodeCharacter(base::uc32 character) { | 
 |   pending_empty_ = false; | 
 |   text_builder().AddEscapedUnicodeCharacter(character); | 
 | } | 
 |  | 
 | void RegExpBuilder::AddEmpty() { | 
 |   text_builder().FlushPendingSurrogate(); | 
 |   pending_empty_ = true; | 
 | } | 
 |  | 
 | void RegExpBuilder::AddClassRanges(RegExpClassRanges* cc) { | 
 |   pending_empty_ = false; | 
 |   text_builder().AddClassRanges(cc); | 
 | } | 
 |  | 
 | void RegExpBuilder::AddAtom(RegExpTree* term) { | 
 |   if (term->IsEmpty()) { | 
 |     AddEmpty(); | 
 |     return; | 
 |   } | 
 |   pending_empty_ = false; | 
 |   if (term->IsTextElement()) { | 
 |     text_builder().AddAtom(term); | 
 |   } else { | 
 |     FlushText(); | 
 |     terms_.emplace_back(term); | 
 |   } | 
 | } | 
 |  | 
 | void RegExpBuilder::AddTerm(RegExpTree* term) { | 
 |   DCHECK(!term->IsEmpty()); | 
 |   pending_empty_ = false; | 
 |   if (term->IsTextElement()) { | 
 |     text_builder().AddTerm(term); | 
 |   } else { | 
 |     FlushText(); | 
 |     terms_.emplace_back(term); | 
 |   } | 
 | } | 
 |  | 
 | void RegExpBuilder::AddAssertion(RegExpTree* assert) { | 
 |   FlushText(); | 
 |   pending_empty_ = false; | 
 |   terms_.emplace_back(assert); | 
 | } | 
 |  | 
 | void RegExpBuilder::NewAlternative() { FlushTerms(); } | 
 |  | 
 | void RegExpBuilder::FlushTerms() { | 
 |   FlushText(); | 
 |   size_t num_terms = terms_.size(); | 
 |   RegExpTree* alternative; | 
 |   if (num_terms == 0) { | 
 |     alternative = zone()->New<RegExpEmpty>(); | 
 |   } else if (num_terms == 1) { | 
 |     alternative = terms_.back(); | 
 |   } else { | 
 |     alternative = | 
 |         zone()->New<RegExpAlternative>(zone()->New<ZoneList<RegExpTree*>>( | 
 |             base::VectorOf(terms_.begin(), terms_.size()), zone())); | 
 |   } | 
 |   alternatives_.emplace_back(alternative); | 
 |   terms_.clear(); | 
 | } | 
 |  | 
 | RegExpTree* RegExpBuilder::ToRegExp() { | 
 |   FlushTerms(); | 
 |   size_t num_alternatives = alternatives_.size(); | 
 |   if (num_alternatives == 0) return zone()->New<RegExpEmpty>(); | 
 |   if (num_alternatives == 1) return alternatives_.back(); | 
 |   return zone()->New<RegExpDisjunction>(zone()->New<ZoneList<RegExpTree*>>( | 
 |       base::VectorOf(alternatives_.begin(), alternatives_.size()), zone())); | 
 | } | 
 |  | 
 | bool RegExpBuilder::AddQuantifierToAtom( | 
 |     int min, int max, int index, | 
 |     RegExpQuantifier::QuantifierType quantifier_type) { | 
 |   if (pending_empty_) { | 
 |     pending_empty_ = false; | 
 |     return true; | 
 |   } | 
 |   RegExpTree* atom = text_builder().PopLastAtom(); | 
 |   if (atom != nullptr) { | 
 |     FlushText(); | 
 |   } else if (!terms_.empty()) { | 
 |     atom = terms_.back(); | 
 |     terms_.pop_back(); | 
 |     if (atom->IsLookaround()) { | 
 |       // With /u or /v, lookarounds are not quantifiable. | 
 |       if (IsUnicodeMode()) return false; | 
 |       // Lookbehinds are not quantifiable. | 
 |       if (atom->AsLookaround()->type() == RegExpLookaround::LOOKBEHIND) { | 
 |         return false; | 
 |       } | 
 |     } | 
 |     if (atom->max_match() == 0) { | 
 |       // Guaranteed to only match an empty string. | 
 |       if (min == 0) { | 
 |         return true; | 
 |       } | 
 |       terms_.emplace_back(atom); | 
 |       return true; | 
 |     } | 
 |   } else { | 
 |     // Only call immediately after adding an atom or character! | 
 |     UNREACHABLE(); | 
 |   } | 
 |   terms_.emplace_back( | 
 |       zone()->New<RegExpQuantifier>(min, max, quantifier_type, index, atom)); | 
 |   return true; | 
 | } | 
 |  | 
 | template class RegExpParserImpl<uint8_t>; | 
 | template class RegExpParserImpl<base::uc16>; | 
 |  | 
 | }  // namespace | 
 |  | 
 | // static | 
 | bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone, | 
 |                                              DirectHandle<String> input, | 
 |                                              RegExpFlags flags, | 
 |                                              RegExpCompileData* result) { | 
 |   DisallowGarbageCollection no_gc; | 
 |   uintptr_t stack_limit = isolate->stack_guard()->real_climit(); | 
 |   String::FlatContent content = input->GetFlatContent(no_gc); | 
 |   if (content.IsOneByte()) { | 
 |     base::Vector<const uint8_t> v = content.ToOneByteVector(); | 
 |     return RegExpParserImpl<uint8_t>{v.begin(),   v.length(), flags, | 
 |                                      stack_limit, zone,       no_gc} | 
 |         .Parse(result); | 
 |   } else { | 
 |     base::Vector<const base::uc16> v = content.ToUC16Vector(); | 
 |     return RegExpParserImpl<base::uc16>{v.begin(),   v.length(), flags, | 
 |                                         stack_limit, zone,       no_gc} | 
 |         .Parse(result); | 
 |   } | 
 | } | 
 |  | 
 | // static | 
 | template <class CharT> | 
 | bool RegExpParser::VerifyRegExpSyntax(Zone* zone, uintptr_t stack_limit, | 
 |                                       const CharT* input, int input_length, | 
 |                                       RegExpFlags flags, | 
 |                                       RegExpCompileData* result, | 
 |                                       const DisallowGarbageCollection& no_gc) { | 
 |   return RegExpParserImpl<CharT>{input,       input_length, flags, | 
 |                                  stack_limit, zone,         no_gc} | 
 |       .Parse(result); | 
 | } | 
 |  | 
 | template bool RegExpParser::VerifyRegExpSyntax<uint8_t>( | 
 |     Zone*, uintptr_t, const uint8_t*, int, RegExpFlags, RegExpCompileData*, | 
 |     const DisallowGarbageCollection&); | 
 | template bool RegExpParser::VerifyRegExpSyntax<base::uc16>( | 
 |     Zone*, uintptr_t, const base::uc16*, int, RegExpFlags, RegExpCompileData*, | 
 |     const DisallowGarbageCollection&); | 
 |  | 
 | }  // namespace internal | 
 | }  // namespace v8 |