blob: 52e7ee6582a231fc94175dfbf27d287412b90c7b [file] [log] [blame]
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_OBJECTS_JS_REGEXP_H_
#define V8_OBJECTS_JS_REGEXP_H_
#include "include/v8-regexp.h"
#include "src/objects/contexts.h"
#include "src/objects/js-array.h"
#include "src/regexp/regexp-flags.h"
#include "torque-generated/bit-fields.h"
// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"
namespace v8 {
namespace internal {
#include "torque-generated/src/objects/js-regexp-tq.inc"
// Regular expressions
// The regular expression holds a single reference to a FixedArray in
// the kDataOffset field.
// The FixedArray contains the following data:
// - tag : type of regexp implementation (not compiled yet, atom or irregexp)
// - reference to the original source string
// - reference to the original flag string
// If it is an atom regexp
// - a reference to a literal string to search for
// If it is an irregexp regexp:
// - a reference to code for Latin1 inputs (bytecode or compiled), or a smi
// used for tracking the last usage (used for regexp code flushing).
// - a reference to code for UC16 inputs (bytecode or compiled), or a smi
// used for tracking the last usage (used for regexp code flushing).
// - max number of registers used by irregexp implementations.
// - number of capture registers (output values) of the regexp.
class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
public:
enum Type {
NOT_COMPILED, // Initial value. No data array has been set yet.
ATOM, // A simple string match.
IRREGEXP, // Compiled with Irregexp (code or bytecode).
EXPERIMENTAL, // Compiled to use the experimental linear time engine.
};
DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS()
V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New(
Isolate* isolate, Handle<String> source, Flags flags,
uint32_t backtrack_limit = kNoBacktrackLimit);
static MaybeHandle<JSRegExp> Initialize(
Handle<JSRegExp> regexp, Handle<String> source, Flags flags,
uint32_t backtrack_limit = kNoBacktrackLimit);
static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
Handle<String> source,
Handle<String> flags_string);
DECL_ACCESSORS(last_index, Tagged<Object>)
// Instance fields accessors.
inline Tagged<String> source() const;
inline Flags flags() const;
// Data array field accessors.
inline Type type_tag() const;
inline Tagged<String> atom_pattern() const;
// This could be a Smi kUninitializedValue or InstructionStream.
V8_EXPORT_PRIVATE Tagged<Object> code(IsolateForSandbox isolate,
bool is_latin1) const;
V8_EXPORT_PRIVATE void set_code(bool is_unicode, Handle<Code> code);
// This could be a Smi kUninitializedValue or ByteArray.
V8_EXPORT_PRIVATE Tagged<Object> bytecode(bool is_latin1) const;
// Sets the bytecode as well as initializing trampoline slots to the
// RegExpInterpreterTrampoline.
void set_bytecode_and_trampoline(Isolate* isolate,
Handle<ByteArray> bytecode);
inline int max_register_count() const;
// Number of captures (without the match itself).
inline int capture_count() const;
inline Tagged<Object> capture_name_map();
inline void set_capture_name_map(Handle<FixedArray> capture_name_map);
uint32_t backtrack_limit() const;
static constexpr Flag AsJSRegExpFlag(RegExpFlag f) {
return static_cast<Flag>(f);
}
static constexpr Flags AsJSRegExpFlags(RegExpFlags f) {
return Flags{static_cast<int>(f)};
}
static constexpr RegExpFlags AsRegExpFlags(Flags f) {
return RegExpFlags{static_cast<int>(f)};
}
static base::Optional<RegExpFlag> FlagFromChar(char c) {
base::Optional<RegExpFlag> f = TryRegExpFlagFromChar(c);
if (!f.has_value()) return f;
if (f.value() == RegExpFlag::kLinear &&
!v8_flags.enable_experimental_regexp_engine) {
return {};
}
return f;
}
static_assert(static_cast<int>(kNone) == v8::RegExp::kNone);
#define V(_, Camel, ...) \
static_assert(static_cast<int>(k##Camel) == v8::RegExp::k##Camel); \
static_assert(static_cast<int>(k##Camel) == \
static_cast<int>(RegExpFlag::k##Camel));
REGEXP_FLAG_LIST(V)
#undef V
static_assert(kFlagCount == v8::RegExp::kFlagCount);
static_assert(kFlagCount == kRegExpFlagCount);
static base::Optional<Flags> FlagsFromString(Isolate* isolate,
Handle<String> flags);
V8_EXPORT_PRIVATE static Handle<String> StringFromFlags(Isolate* isolate,
Flags flags);
inline Tagged<String> EscapedPattern();
bool CanTierUp();
bool MarkedForTierUp();
void ResetLastTierUpTick();
void TierUpTick();
void MarkTierUpForNextExec();
bool ShouldProduceBytecode();
inline bool HasCompiledCode() const;
inline void DiscardCompiledCodeForSerialization();
static constexpr bool TypeSupportsCaptures(Type t) {
return t == IRREGEXP || t == EXPERIMENTAL;
}
// Each capture (including the match itself) needs two registers.
static constexpr int RegistersForCaptureCount(int count) {
return (count + 1) * 2;
}
static constexpr int CaptureCountForRegisters(int register_count) {
DCHECK_EQ(register_count % 2, 0);
DCHECK_GE(register_count, 2);
return (register_count - 2) / 2;
}
static constexpr int code_index(bool is_latin1) {
return is_latin1 ? kIrregexpLatin1CodeIndex : kIrregexpUC16CodeIndex;
}
static constexpr int bytecode_index(bool is_latin1) {
return is_latin1 ? kIrregexpLatin1BytecodeIndex
: kIrregexpUC16BytecodeIndex;
}
// Dispatched behavior.
DECL_PRINTER(JSRegExp)
DECL_VERIFIER(JSRegExp)
/* This is already an in-object field. */
// TODO(v8:8944): improve handling of in-object fields
static constexpr int kLastIndexOffset = kHeaderSize;
// The initial value of the last_index field on a new JSRegExp instance.
static constexpr int kInitialLastIndexValue = 0;
// Indices in the data array.
static constexpr int kTagIndex = 0;
static constexpr int kSourceIndex = kTagIndex + 1;
static constexpr int kFlagsIndex = kSourceIndex + 1;
static constexpr int kFirstTypeSpecificIndex = kFlagsIndex + 1;
static constexpr int kMinDataArrayLength = kFirstTypeSpecificIndex;
// The data fields are used in different ways depending on the
// value of the tag.
// Atom regexps (literal strings).
static constexpr int kAtomPatternIndex = kFirstTypeSpecificIndex;
static constexpr int kAtomDataSize = kAtomPatternIndex + 1;
// A InstructionStream object or a Smi marker value equal to
// kUninitializedValue.
static constexpr int kIrregexpLatin1CodeIndex = kFirstTypeSpecificIndex;
static constexpr int kIrregexpUC16CodeIndex = kIrregexpLatin1CodeIndex + 1;
// A ByteArray object or a Smi marker value equal to kUninitializedValue.
static constexpr int kIrregexpLatin1BytecodeIndex =
kIrregexpUC16CodeIndex + 1;
static constexpr int kIrregexpUC16BytecodeIndex =
kIrregexpLatin1BytecodeIndex + 1;
// Maximal number of registers used by either Latin1 or UC16.
// Only used to check that there is enough stack space
static constexpr int kIrregexpMaxRegisterCountIndex =
kIrregexpUC16BytecodeIndex + 1;
// Number of captures in the compiled regexp.
static constexpr int kIrregexpCaptureCountIndex =
kIrregexpMaxRegisterCountIndex + 1;
// Maps names of named capture groups (at indices 2i) to their corresponding
// (1-based) capture group indices (at indices 2i + 1).
static constexpr int kIrregexpCaptureNameMapIndex =
kIrregexpCaptureCountIndex + 1;
// Tier-up ticks are set to the value of the tier-up ticks flag. The value is
// decremented on each execution of the bytecode, so that the tier-up
// happens once the ticks reach zero.
// This value is ignored if the regexp-tier-up flag isn't turned on.
static constexpr int kIrregexpTicksUntilTierUpIndex =
kIrregexpCaptureNameMapIndex + 1;
// A smi containing either the backtracking limit or kNoBacktrackLimit.
// TODO(jgruber): If needed, this limit could be packed into other fields
// above to save space.
static constexpr int kIrregexpBacktrackLimit =
kIrregexpTicksUntilTierUpIndex + 1;
static constexpr int kIrregexpDataSize = kIrregexpBacktrackLimit + 1;
// TODO(mbid,v8:10765): At the moment the EXPERIMENTAL data array conforms
// to the format of an IRREGEXP data array, with most fields set to some
// default/uninitialized value. This is because EXPERIMENTAL and IRREGEXP
// regexps take the same code path in `RegExpExecInternal`, which reads off
// various fields from the data array. `RegExpExecInternal` should probably
// distinguish between EXPERIMENTAL and IRREGEXP, and then we can get rid of
// all the IRREGEXP only fields.
static constexpr int kExperimentalDataSize = kIrregexpDataSize;
// In-object fields.
static constexpr int kLastIndexFieldIndex = 0;
static constexpr int kInObjectFieldCount = 1;
// The actual object size including in-object fields.
static constexpr int Size() {
return kHeaderSize + kInObjectFieldCount * kTaggedSize;
}
// Descriptor array index to important methods in the prototype.
static constexpr int kExecFunctionDescriptorIndex = 1;
static constexpr int kSymbolMatchFunctionDescriptorIndex = 15;
static constexpr int kSymbolMatchAllFunctionDescriptorIndex = 16;
static constexpr int kSymbolReplaceFunctionDescriptorIndex = 17;
static constexpr int kSymbolSearchFunctionDescriptorIndex = 18;
static constexpr int kSymbolSplitFunctionDescriptorIndex = 19;
// The uninitialized value for a regexp code object.
static constexpr int kUninitializedValue = -1;
// If the backtrack limit is set to this marker value, no limit is applied.
static constexpr uint32_t kNoBacktrackLimit = 0;
// The heuristic value for the length of the subject string for which we
// tier-up to the compiler immediately, instead of using the interpreter.
static constexpr int kTierUpForSubjectLengthValue = 1000;
// Maximum number of captures allowed.
static constexpr int kMaxCaptures = 1 << 16;
private:
using FlagsBuffer = base::EmbeddedVector<char, kFlagCount + 1>;
inline static const char* FlagsToString(Flags flags, FlagsBuffer* out_buffer);
inline Tagged<Object> DataAt(int index) const;
inline void SetDataAt(int index, Tagged<Object> value);
TQ_OBJECT_CONSTRUCTORS(JSRegExp)
};
DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)
// JSRegExpResult is just a JSArray with a specific initial map.
// This initial map adds in-object properties for "index" and "input"
// properties, as assigned by RegExp.prototype.exec, which allows
// faster creation of RegExp exec results.
// This class just holds constants used when creating the result.
// After creation the result must be treated as a JSArray in all regards.
class JSRegExpResult
: public TorqueGeneratedJSRegExpResult<JSRegExpResult, JSArray> {
public:
// TODO(joshualitt): We would like to add printers and verifiers to
// JSRegExpResult, and maybe JSRegExpResultIndices, but both have the same
// instance type as JSArray.
// Indices of in-object properties.
static constexpr int kIndexIndex = 0;
static constexpr int kInputIndex = 1;
static constexpr int kGroupsIndex = 2;
// Private internal only fields.
static constexpr int kNamesIndex = 3;
static constexpr int kRegExpInputIndex = 4;
static constexpr int kRegExpLastIndex = 5;
static constexpr int kInObjectPropertyCount = 6;
static constexpr int kMapIndexInContext = Context::REGEXP_RESULT_MAP_INDEX;
TQ_OBJECT_CONSTRUCTORS(JSRegExpResult)
};
class JSRegExpResultWithIndices
: public TorqueGeneratedJSRegExpResultWithIndices<JSRegExpResultWithIndices,
JSRegExpResult> {
public:
static_assert(
JSRegExpResult::kInObjectPropertyCount == 6,
"JSRegExpResultWithIndices must be a subclass of JSRegExpResult");
static constexpr int kIndicesIndex = 6;
static constexpr int kInObjectPropertyCount = 7;
TQ_OBJECT_CONSTRUCTORS(JSRegExpResultWithIndices)
};
// JSRegExpResultIndices is just a JSArray with a specific initial map.
// This initial map adds in-object properties for "group"
// properties, as assigned by RegExp.prototype.exec, which allows
// faster creation of RegExp exec results.
// This class just holds constants used when creating the result.
// After creation the result must be treated as a JSArray in all regards.
class JSRegExpResultIndices
: public TorqueGeneratedJSRegExpResultIndices<JSRegExpResultIndices,
JSArray> {
public:
static Handle<JSRegExpResultIndices> BuildIndices(
Isolate* isolate, Handle<RegExpMatchInfo> match_info,
Handle<Object> maybe_names);
// Indices of in-object properties.
static constexpr int kGroupsIndex = 0;
static constexpr int kInObjectPropertyCount = 1;
// Descriptor index of groups.
static constexpr int kGroupsDescriptorIndex = 1;
TQ_OBJECT_CONSTRUCTORS(JSRegExpResultIndices)
};
} // namespace internal
} // namespace v8
#include "src/objects/object-macros-undef.h"
#endif // V8_OBJECTS_JS_REGEXP_H_