blob: e2cf2028d9da87fc66af6efb856e83fc865fef85 [file] [log] [blame]
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/v8.h"
#include "src/arguments.h"
#include "src/jsregexp-inl.h"
#include "src/jsregexp.h"
#include "src/runtime/runtime-utils.h"
#include "src/string-builder.h"
#include "src/string-search.h"
namespace v8 {
namespace internal {
class CompiledReplacement {
public:
explicit CompiledReplacement(Zone* zone)
: parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
// Return whether the replacement is simple.
bool Compile(Handle<String> replacement, int capture_count,
int subject_length);
// Use Apply only if Compile returned false.
void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
int32_t* match);
// Number of distinct parts of the replacement pattern.
int parts() { return parts_.length(); }
Zone* zone() const { return zone_; }
private:
enum PartType {
SUBJECT_PREFIX = 1,
SUBJECT_SUFFIX,
SUBJECT_CAPTURE,
REPLACEMENT_SUBSTRING,
REPLACEMENT_STRING,
NUMBER_OF_PART_TYPES
};
struct ReplacementPart {
static inline ReplacementPart SubjectMatch() {
return ReplacementPart(SUBJECT_CAPTURE, 0);
}
static inline ReplacementPart SubjectCapture(int capture_index) {
return ReplacementPart(SUBJECT_CAPTURE, capture_index);
}
static inline ReplacementPart SubjectPrefix() {
return ReplacementPart(SUBJECT_PREFIX, 0);
}
static inline ReplacementPart SubjectSuffix(int subject_length) {
return ReplacementPart(SUBJECT_SUFFIX, subject_length);
}
static inline ReplacementPart ReplacementString() {
return ReplacementPart(REPLACEMENT_STRING, 0);
}
static inline ReplacementPart ReplacementSubString(int from, int to) {
DCHECK(from >= 0);
DCHECK(to > from);
return ReplacementPart(-from, to);
}
// If tag <= 0 then it is the negation of a start index of a substring of
// the replacement pattern, otherwise it's a value from PartType.
ReplacementPart(int tag, int data) : tag(tag), data(data) {
// Must be non-positive or a PartType value.
DCHECK(tag < NUMBER_OF_PART_TYPES);
}
// Either a value of PartType or a non-positive number that is
// the negation of an index into the replacement string.
int tag;
// The data value's interpretation depends on the value of tag:
// tag == SUBJECT_PREFIX ||
// tag == SUBJECT_SUFFIX: data is unused.
// tag == SUBJECT_CAPTURE: data is the number of the capture.
// tag == REPLACEMENT_SUBSTRING ||
// tag == REPLACEMENT_STRING: data is index into array of substrings
// of the replacement string.
// tag <= 0: Temporary representation of the substring of the replacement
// string ranging over -tag .. data.
// Is replaced by REPLACEMENT_{SUB,}STRING when we create the
// substring objects.
int data;
};
template <typename Char>
bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
Vector<Char> characters, int capture_count,
int subject_length, Zone* zone) {
int length = characters.length();
int last = 0;
for (int i = 0; i < length; i++) {
Char c = characters[i];
if (c == '$') {
int next_index = i + 1;
if (next_index == length) { // No next character!
break;
}
Char c2 = characters[next_index];
switch (c2) {
case '$':
if (i > last) {
// There is a substring before. Include the first "$".
parts->Add(
ReplacementPart::ReplacementSubString(last, next_index),
zone);
last = next_index + 1; // Continue after the second "$".
} else {
// Let the next substring start with the second "$".
last = next_index;
}
i = next_index;
break;
case '`':
if (i > last) {
parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
}
parts->Add(ReplacementPart::SubjectPrefix(), zone);
i = next_index;
last = i + 1;
break;
case '\'':
if (i > last) {
parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
}
parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
i = next_index;
last = i + 1;
break;
case '&':
if (i > last) {
parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
}
parts->Add(ReplacementPart::SubjectMatch(), zone);
i = next_index;
last = i + 1;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': {
int capture_ref = c2 - '0';
if (capture_ref > capture_count) {
i = next_index;
continue;
}
int second_digit_index = next_index + 1;
if (second_digit_index < length) {
// Peek ahead to see if we have two digits.
Char c3 = characters[second_digit_index];
if ('0' <= c3 && c3 <= '9') { // Double digits.
int double_digit_ref = capture_ref * 10 + c3 - '0';
if (double_digit_ref <= capture_count) {
next_index = second_digit_index;
capture_ref = double_digit_ref;
}
}
}
if (capture_ref > 0) {
if (i > last) {
parts->Add(ReplacementPart::ReplacementSubString(last, i),
zone);
}
DCHECK(capture_ref <= capture_count);
parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
last = next_index + 1;
}
i = next_index;
break;
}
default:
i = next_index;
break;
}
}
}
if (length > last) {
if (last == 0) {
// Replacement is simple. Do not use Apply to do the replacement.
return true;
} else {
parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
}
}
return false;
}
ZoneList<ReplacementPart> parts_;
ZoneList<Handle<String> > replacement_substrings_;
Zone* zone_;
};
bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
int subject_length) {
{
DisallowHeapAllocation no_gc;
String::FlatContent content = replacement->GetFlatContent();
DCHECK(content.IsFlat());
bool simple = false;
if (content.IsOneByte()) {
simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
capture_count, subject_length, zone());
} else {
DCHECK(content.IsTwoByte());
simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
capture_count, subject_length, zone());
}
if (simple) return true;
}
Isolate* isolate = replacement->GetIsolate();
// Find substrings of replacement string and create them as String objects.
int substring_index = 0;
for (int i = 0, n = parts_.length(); i < n; i++) {
int tag = parts_[i].tag;
if (tag <= 0) { // A replacement string slice.
int from = -tag;
int to = parts_[i].data;
replacement_substrings_.Add(
isolate->factory()->NewSubString(replacement, from, to), zone());
parts_[i].tag = REPLACEMENT_SUBSTRING;
parts_[i].data = substring_index;
substring_index++;
} else if (tag == REPLACEMENT_STRING) {
replacement_substrings_.Add(replacement, zone());
parts_[i].data = substring_index;
substring_index++;
}
}
return false;
}
void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
int match_from, int match_to, int32_t* match) {
DCHECK_LT(0, parts_.length());
for (int i = 0, n = parts_.length(); i < n; i++) {
ReplacementPart part = parts_[i];
switch (part.tag) {
case SUBJECT_PREFIX:
if (match_from > 0) builder->AddSubjectSlice(0, match_from);
break;
case SUBJECT_SUFFIX: {
int subject_length = part.data;
if (match_to < subject_length) {
builder->AddSubjectSlice(match_to, subject_length);
}
break;
}
case SUBJECT_CAPTURE: {
int capture = part.data;
int from = match[capture * 2];
int to = match[capture * 2 + 1];
if (from >= 0 && to > from) {
builder->AddSubjectSlice(from, to);
}
break;
}
case REPLACEMENT_SUBSTRING:
case REPLACEMENT_STRING:
builder->AddString(replacement_substrings_[part.data]);
break;
default:
UNREACHABLE();
}
}
}
void FindOneByteStringIndices(Vector<const uint8_t> subject, char pattern,
ZoneList<int>* indices, unsigned int limit,
Zone* zone) {
DCHECK(limit > 0);
// Collect indices of pattern in subject using memchr.
// Stop after finding at most limit values.
const uint8_t* subject_start = subject.start();
const uint8_t* subject_end = subject_start + subject.length();
const uint8_t* pos = subject_start;
while (limit > 0) {
pos = reinterpret_cast<const uint8_t*>(
memchr(pos, pattern, subject_end - pos));
if (pos == NULL) return;
indices->Add(static_cast<int>(pos - subject_start), zone);
pos++;
limit--;
}
}
void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
ZoneList<int>* indices, unsigned int limit,
Zone* zone) {
DCHECK(limit > 0);
const uc16* subject_start = subject.start();
const uc16* subject_end = subject_start + subject.length();
for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
if (*pos == pattern) {
indices->Add(static_cast<int>(pos - subject_start), zone);
limit--;
}
}
}
template <typename SubjectChar, typename PatternChar>
void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
Vector<const PatternChar> pattern,
ZoneList<int>* indices, unsigned int limit, Zone* zone) {
DCHECK(limit > 0);
// Collect indices of pattern in subject.
// Stop after finding at most limit values.
int pattern_length = pattern.length();
int index = 0;
StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
while (limit > 0) {
index = search.Search(subject, index);
if (index < 0) return;
indices->Add(index, zone);
index += pattern_length;
limit--;
}
}
void FindStringIndicesDispatch(Isolate* isolate, String* subject,
String* pattern, ZoneList<int>* indices,
unsigned int limit, Zone* zone) {
{
DisallowHeapAllocation no_gc;
String::FlatContent subject_content = subject->GetFlatContent();
String::FlatContent pattern_content = pattern->GetFlatContent();
DCHECK(subject_content.IsFlat());
DCHECK(pattern_content.IsFlat());
if (subject_content.IsOneByte()) {
Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
if (pattern_content.IsOneByte()) {
Vector<const uint8_t> pattern_vector =
pattern_content.ToOneByteVector();
if (pattern_vector.length() == 1) {
FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
limit, zone);
} else {
FindStringIndices(isolate, subject_vector, pattern_vector, indices,
limit, zone);
}
} else {
FindStringIndices(isolate, subject_vector,
pattern_content.ToUC16Vector(), indices, limit, zone);
}
} else {
Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (pattern_content.IsOneByte()) {
Vector<const uint8_t> pattern_vector =
pattern_content.ToOneByteVector();
if (pattern_vector.length() == 1) {
FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
limit, zone);
} else {
FindStringIndices(isolate, subject_vector, pattern_vector, indices,
limit, zone);
}
} else {
Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
if (pattern_vector.length() == 1) {
FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
limit, zone);
} else {
FindStringIndices(isolate, subject_vector, pattern_vector, indices,
limit, zone);
}
}
}
}
}
template <typename ResultSeqString>
MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
Handle<String> replacement, Handle<JSArray> last_match_info) {
DCHECK(subject->IsFlat());
DCHECK(replacement->IsFlat());
ZoneScope zone_scope(isolate->runtime_zone());
ZoneList<int> indices(8, zone_scope.zone());
DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
String* pattern =
String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
int subject_len = subject->length();
int pattern_len = pattern->length();
int replacement_len = replacement->length();
FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
zone_scope.zone());
int matches = indices.length();
if (matches == 0) return *subject;
// Detect integer overflow.
int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
static_cast<int64_t>(pattern_len)) *
static_cast<int64_t>(matches) +
static_cast<int64_t>(subject_len);
int result_len;
if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
STATIC_ASSERT(String::kMaxLength < kMaxInt);
result_len = kMaxInt; // Provoke exception.
} else {
result_len = static_cast<int>(result_len_64);
}
int subject_pos = 0;
int result_pos = 0;
MaybeHandle<SeqString> maybe_res;
if (ResultSeqString::kHasOneByteEncoding) {
maybe_res = isolate->factory()->NewRawOneByteString(result_len);
} else {
maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
}
Handle<SeqString> untyped_res;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
for (int i = 0; i < matches; i++) {
// Copy non-matched subject content.
if (subject_pos < indices.at(i)) {
String::WriteToFlat(*subject, result->GetChars() + result_pos,
subject_pos, indices.at(i));
result_pos += indices.at(i) - subject_pos;
}
// Replace match.
if (replacement_len > 0) {
String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
replacement_len);
result_pos += replacement_len;
}
subject_pos = indices.at(i) + pattern_len;
}
// Add remaining subject content at the end.
if (subject_pos < subject_len) {
String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
subject_len);
}
int32_t match_indices[] = {indices.at(matches - 1),
indices.at(matches - 1) + pattern_len};
RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
return *result;
}
MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
Handle<String> replacement, Handle<JSArray> last_match_info) {
DCHECK(subject->IsFlat());
DCHECK(replacement->IsFlat());
int capture_count = regexp->CaptureCount();
int subject_length = subject->length();
// CompiledReplacement uses zone allocation.
ZoneScope zone_scope(isolate->runtime_zone());
CompiledReplacement compiled_replacement(zone_scope.zone());
bool simple_replace =
compiled_replacement.Compile(replacement, capture_count, subject_length);
// Shortcut for simple non-regexp global replacements
if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
isolate, subject, regexp, replacement, last_match_info);
} else {
return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
isolate, subject, regexp, replacement, last_match_info);
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
if (global_cache.HasException()) return isolate->heap()->exception();
int32_t* current_match = global_cache.FetchNext();
if (current_match == NULL) {
if (global_cache.HasException()) return isolate->heap()->exception();
return *subject;
}
// Guessing the number of parts that the final result string is built
// from. Global regexps can match any number of times, so we guess
// conservatively.
int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
// Number of parts added by compiled replacement plus preceeding
// string and possibly suffix after last match. It is possible for
// all components to use two elements when encoded as two smis.
const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
int prev = 0;
do {
builder.EnsureCapacity(parts_added_per_loop);
int start = current_match[0];
int end = current_match[1];
if (prev < start) {
builder.AddSubjectSlice(prev, start);
}
if (simple_replace) {
builder.AddString(replacement);
} else {
compiled_replacement.Apply(&builder, start, end, current_match);
}
prev = end;
current_match = global_cache.FetchNext();
} while (current_match != NULL);
if (global_cache.HasException()) return isolate->heap()->exception();
if (prev < subject_length) {
builder.EnsureCapacity(2);
builder.AddSubjectSlice(prev, subject_length);
}
RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
global_cache.LastSuccessfulMatch());
Handle<String> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString());
return *result;
}
template <typename ResultSeqString>
MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
Handle<JSArray> last_match_info) {
DCHECK(subject->IsFlat());
// Shortcut for simple non-regexp global replacements
if (regexp->TypeTag() == JSRegExp::ATOM) {
Handle<String> empty_string = isolate->factory()->empty_string();
if (subject->IsOneByteRepresentation()) {
return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
isolate, subject, regexp, empty_string, last_match_info);
} else {
return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
isolate, subject, regexp, empty_string, last_match_info);
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
if (global_cache.HasException()) return isolate->heap()->exception();
int32_t* current_match = global_cache.FetchNext();
if (current_match == NULL) {
if (global_cache.HasException()) return isolate->heap()->exception();
return *subject;
}
int start = current_match[0];
int end = current_match[1];
int capture_count = regexp->CaptureCount();
int subject_length = subject->length();
int new_length = subject_length - (end - start);
if (new_length == 0) return isolate->heap()->empty_string();
Handle<ResultSeqString> answer;
if (ResultSeqString::kHasOneByteEncoding) {
answer = Handle<ResultSeqString>::cast(
isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
} else {
answer = Handle<ResultSeqString>::cast(
isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
}
int prev = 0;
int position = 0;
do {
start = current_match[0];
end = current_match[1];
if (prev < start) {
// Add substring subject[prev;start] to answer string.
String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
position += start - prev;
}
prev = end;
current_match = global_cache.FetchNext();
} while (current_match != NULL);
if (global_cache.HasException()) return isolate->heap()->exception();
RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
global_cache.LastSuccessfulMatch());
if (prev < subject_length) {
// Add substring subject[prev;length] to answer string.
String::WriteToFlat(*subject, answer->GetChars() + position, prev,
subject_length);
position += subject_length - prev;
}
if (position == 0) return isolate->heap()->empty_string();
// Shorten string and fill
int string_size = ResultSeqString::SizeFor(position);
int allocated_string_size = ResultSeqString::SizeFor(new_length);
int delta = allocated_string_size - string_size;
answer->set_length(position);
if (delta == 0) return *answer;
Address end_of_string = answer->address() + string_size;
Heap* heap = isolate->heap();
// The trimming is performed on a newly allocated object, which is on a
// fresly allocated page or on an already swept page. Hence, the sweeper
// thread can not get confused with the filler creation. No synchronization
// needed.
heap->CreateFillerObjectAt(end_of_string, delta);
heap->AdjustLiveBytes(answer->address(), -delta, Heap::FROM_MUTATOR);
return *answer;
}
RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
HandleScope scope(isolate);
DCHECK(args.length() == 4);
CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
RUNTIME_ASSERT(regexp->GetFlags().is_global());
RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
subject = String::Flatten(subject);
if (replacement->length() == 0) {
if (subject->HasOnlyOneByteChars()) {
return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
isolate, subject, regexp, last_match_info);
} else {
return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
isolate, subject, regexp, last_match_info);
}
}
replacement = String::Flatten(replacement);
return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
replacement, last_match_info);
}
RUNTIME_FUNCTION(Runtime_StringSplit) {
HandleScope handle_scope(isolate);
DCHECK(args.length() == 3);
CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
RUNTIME_ASSERT(limit > 0);
int subject_length = subject->length();
int pattern_length = pattern->length();
RUNTIME_ASSERT(pattern_length > 0);
if (limit == 0xffffffffu) {
Handle<Object> cached_answer(
RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
isolate);
if (*cached_answer != Smi::FromInt(0)) {
// The cache FixedArray is a COW-array and can therefore be reused.
Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
Handle<FixedArray>::cast(cached_answer));
return *result;
}
}
// The limit can be very large (0xffffffffu), but since the pattern
// isn't empty, we can never create more parts than ~half the length
// of the subject.
subject = String::Flatten(subject);
pattern = String::Flatten(pattern);
static const int kMaxInitialListCapacity = 16;
ZoneScope zone_scope(isolate->runtime_zone());
// Find (up to limit) indices of separator and end-of-string in subject
int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
ZoneList<int> indices(initial_capacity, zone_scope.zone());
FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
zone_scope.zone());
if (static_cast<uint32_t>(indices.length()) < limit) {
indices.Add(subject_length, zone_scope.zone());
}
// The list indices now contains the end of each part to create.
// Create JSArray of substrings separated by separator.
int part_count = indices.length();
Handle<JSArray> result = isolate->factory()->NewJSArray(part_count);
JSObject::EnsureCanContainHeapObjectElements(result);
result->set_length(Smi::FromInt(part_count));
DCHECK(result->HasFastObjectElements());
if (part_count == 1 && indices.at(0) == subject_length) {
FixedArray::cast(result->elements())->set(0, *subject);
return *result;
}
Handle<FixedArray> elements(FixedArray::cast(result->elements()));
int part_start = 0;
for (int i = 0; i < part_count; i++) {
HandleScope local_loop_handle(isolate);
int part_end = indices.at(i);
Handle<String> substring =
isolate->factory()->NewProperSubString(subject, part_start, part_end);
elements->set(i, *substring);
part_start = part_end + pattern_length;
}
if (limit == 0xffffffffu) {
if (result->HasFastObjectElements()) {
RegExpResultsCache::Enter(isolate, subject, pattern, elements,
RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
}
}
return *result;
}
RUNTIME_FUNCTION(Runtime_RegExpExecRT) {
HandleScope scope(isolate);
DCHECK(args.length() == 4);
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
CONVERT_INT32_ARG_CHECKED(index, 2);
CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
// Due to the way the JS calls are constructed this must be less than the
// length of a string, i.e. it is always a Smi. We check anyway for security.
RUNTIME_ASSERT(index >= 0);
RUNTIME_ASSERT(index <= subject->length());
isolate->counters()->regexp_entry_runtime()->Increment();
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result,
RegExpImpl::Exec(regexp, subject, index, last_match_info));
return *result;
}
RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
HandleScope handle_scope(isolate);
DCHECK(args.length() == 3);
CONVERT_SMI_ARG_CHECKED(size, 0);
RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength);
CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
Handle<JSObject> object =
isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED, false);
Handle<JSArray> array = Handle<JSArray>::cast(object);
array->set_elements(*elements);
array->set_length(Smi::FromInt(size));
// Write in-object properties after the length of the array.
array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
return *array;
}
static JSRegExp::Flags RegExpFlagsFromString(Handle<String> flags,
bool* success) {
uint32_t value = JSRegExp::NONE;
int length = flags->length();
// A longer flags string cannot be valid.
if (length > 5) return JSRegExp::Flags(0);
for (int i = 0; i < length; i++) {
uint32_t flag = JSRegExp::NONE;
switch (flags->Get(i)) {
case 'g':
flag = JSRegExp::GLOBAL;
break;
case 'i':
flag = JSRegExp::IGNORE_CASE;
break;
case 'm':
flag = JSRegExp::MULTILINE;
break;
case 'u':
if (!FLAG_harmony_unicode) return JSRegExp::Flags(0);
flag = JSRegExp::UNICODE_ESCAPES;
break;
case 'y':
if (!FLAG_harmony_regexps) return JSRegExp::Flags(0);
flag = JSRegExp::STICKY;
break;
default:
return JSRegExp::Flags(0);
}
// Duplicate flag.
if (value & flag) return JSRegExp::Flags(0);
value |= flag;
}
*success = true;
return JSRegExp::Flags(value);
}
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
HandleScope scope(isolate);
DCHECK(args.length() == 3);
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
CONVERT_ARG_HANDLE_CHECKED(String, flags_string, 2);
Factory* factory = isolate->factory();
// If source is the empty string we set it to "(?:)" instead as
// suggested by ECMA-262, 5th, section 15.10.4.1.
if (source->length() == 0) source = factory->query_colon_string();
bool success = false;
JSRegExp::Flags flags = RegExpFlagsFromString(flags_string, &success);
if (!success) {
Handle<FixedArray> element = factory->NewFixedArray(1);
element->set(0, *flags_string);
Handle<JSArray> args = factory->NewJSArrayWithElements(element);
THROW_NEW_ERROR_RETURN_FAILURE(
isolate, NewSyntaxError("invalid_regexp_flags", args));
}
Handle<Object> global = factory->ToBoolean(flags.is_global());
Handle<Object> ignore_case = factory->ToBoolean(flags.is_ignore_case());
Handle<Object> multiline = factory->ToBoolean(flags.is_multiline());
Handle<Object> sticky = factory->ToBoolean(flags.is_sticky());
Handle<Object> unicode = factory->ToBoolean(flags.is_unicode());
Map* map = regexp->map();
Object* constructor = map->constructor();
if (!FLAG_harmony_regexps && !FLAG_harmony_unicode &&
constructor->IsJSFunction() &&
JSFunction::cast(constructor)->initial_map() == map) {
// If we still have the original map, set in-object properties directly.
// Both true and false are immovable immortal objects so no need for write
// barrier.
regexp->InObjectPropertyAtPut(JSRegExp::kGlobalFieldIndex, *global,
SKIP_WRITE_BARRIER);
regexp->InObjectPropertyAtPut(JSRegExp::kIgnoreCaseFieldIndex, *ignore_case,
SKIP_WRITE_BARRIER);
regexp->InObjectPropertyAtPut(JSRegExp::kMultilineFieldIndex, *multiline,
SKIP_WRITE_BARRIER);
regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex,
Smi::FromInt(0), SKIP_WRITE_BARRIER);
} else {
// Map has changed, so use generic, but slower, method. We also end here if
// the --harmony-regexp flag is set, because the initial map does not have
// space for the 'sticky' flag, since it is from the snapshot, but must work
// both with and without --harmony-regexp. When sticky comes out from under
// the flag, we will be able to use the fast initial map.
PropertyAttributes final =
static_cast<PropertyAttributes>(READ_ONLY | DONT_ENUM | DONT_DELETE);
PropertyAttributes writable =
static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE);
Handle<Object> zero(Smi::FromInt(0), isolate);
JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->global_string(),
global, final).Check();
JSObject::SetOwnPropertyIgnoreAttributes(
regexp, factory->ignore_case_string(), ignore_case, final).Check();
JSObject::SetOwnPropertyIgnoreAttributes(
regexp, factory->multiline_string(), multiline, final).Check();
if (FLAG_harmony_regexps) {
JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(),
sticky, final).Check();
}
if (FLAG_harmony_unicode) {
JSObject::SetOwnPropertyIgnoreAttributes(
regexp, factory->unicode_string(), unicode, final).Check();
}
JSObject::SetOwnPropertyIgnoreAttributes(
regexp, factory->last_index_string(), zero, writable).Check();
}
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, RegExpImpl::Compile(regexp, source, flags));
return *result;
}
RUNTIME_FUNCTION(Runtime_MaterializeRegExpLiteral) {
HandleScope scope(isolate);
DCHECK(args.length() == 4);
CONVERT_ARG_HANDLE_CHECKED(FixedArray, literals, 0);
CONVERT_SMI_ARG_CHECKED(index, 1);
CONVERT_ARG_HANDLE_CHECKED(String, pattern, 2);
CONVERT_ARG_HANDLE_CHECKED(String, flags, 3);
// Get the RegExp function from the context in the literals array.
// This is the RegExp function from the context in which the
// function was created. We do not use the RegExp function from the
// current native context because this might be the RegExp function
// from another context which we should not have access to.
Handle<JSFunction> constructor = Handle<JSFunction>(
JSFunction::NativeContextFromLiterals(*literals)->regexp_function());
// Compute the regular expression literal.
Handle<Object> regexp;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, regexp,
RegExpImpl::CreateRegExpLiteral(constructor, pattern, flags));
literals->set(index, *regexp);
return *regexp;
}
// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
// separate last match info. See comment on that function.
template <bool has_capture>
static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
Handle<JSRegExp> regexp,
Handle<JSArray> last_match_array,
Handle<JSArray> result_array) {
DCHECK(subject->IsFlat());
DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
int capture_count = regexp->CaptureCount();
int subject_length = subject->length();
static const int kMinLengthToCache = 0x1000;
if (subject_length > kMinLengthToCache) {
Handle<Object> cached_answer(
RegExpResultsCache::Lookup(isolate->heap(), *subject, regexp->data(),
RegExpResultsCache::REGEXP_MULTIPLE_INDICES),
isolate);
if (*cached_answer != Smi::FromInt(0)) {
Handle<FixedArray> cached_fixed_array =
Handle<FixedArray>(FixedArray::cast(*cached_answer));
// The cache FixedArray is a COW-array and can therefore be reused.
JSArray::SetContent(result_array, cached_fixed_array);
// The actual length of the result array is stored in the last element of
// the backing store (the backing FixedArray may have a larger capacity).
Object* cached_fixed_array_last_element =
cached_fixed_array->get(cached_fixed_array->length() - 1);
Smi* js_array_length = Smi::cast(cached_fixed_array_last_element);
result_array->set_length(js_array_length);
RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
NULL);
return *result_array;
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
if (global_cache.HasException()) return isolate->heap()->exception();
// Ensured in Runtime_RegExpExecMultiple.
DCHECK(result_array->HasFastObjectElements());
Handle<FixedArray> result_elements(
FixedArray::cast(result_array->elements()));
if (result_elements->length() < 16) {
result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
}
FixedArrayBuilder builder(result_elements);
// Position to search from.
int match_start = -1;
int match_end = 0;
bool first = true;
// Two smis before and after the match, for very long strings.
static const int kMaxBuilderEntriesPerRegExpMatch = 5;
while (true) {
int32_t* current_match = global_cache.FetchNext();
if (current_match == NULL) break;
match_start = current_match[0];
builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
if (match_end < match_start) {
ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
match_start);
}
match_end = current_match[1];
{
// Avoid accumulating new handles inside loop.
HandleScope temp_scope(isolate);
Handle<String> match;
if (!first) {
match = isolate->factory()->NewProperSubString(subject, match_start,
match_end);
} else {
match =
isolate->factory()->NewSubString(subject, match_start, match_end);
first = false;
}
if (has_capture) {
// Arguments array to replace function is match, captures, index and
// subject, i.e., 3 + capture count in total.
Handle<FixedArray> elements =
isolate->factory()->NewFixedArray(3 + capture_count);
elements->set(0, *match);
for (int i = 1; i <= capture_count; i++) {
int start = current_match[i * 2];
if (start >= 0) {
int end = current_match[i * 2 + 1];
DCHECK(start <= end);
Handle<String> substring =
isolate->factory()->NewSubString(subject, start, end);
elements->set(i, *substring);
} else {
DCHECK(current_match[i * 2 + 1] < 0);
elements->set(i, isolate->heap()->undefined_value());
}
}
elements->set(capture_count + 1, Smi::FromInt(match_start));
elements->set(capture_count + 2, *subject);
builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
} else {
builder.Add(*match);
}
}
}
if (global_cache.HasException()) return isolate->heap()->exception();
if (match_start >= 0) {
// Finished matching, with at least one match.
if (match_end < subject_length) {
ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
subject_length);
}
RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
NULL);
if (subject_length > kMinLengthToCache) {
// Store the length of the result array into the last element of the
// backing FixedArray.
builder.EnsureCapacity(1);
Handle<FixedArray> fixed_array = builder.array();
fixed_array->set(fixed_array->length() - 1,
Smi::FromInt(builder.length()));
// Cache the result and turn the FixedArray into a COW array.
RegExpResultsCache::Enter(isolate, subject,
handle(regexp->data(), isolate), fixed_array,
RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
}
return *builder.ToJSArray(result_array);
} else {
return isolate->heap()->null_value(); // No matches at all.
}
}
// This is only called for StringReplaceGlobalRegExpWithFunction. This sets
// lastMatchInfoOverride to maintain the last match info, so we don't need to
// set any other last match array info.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
HandleScope handles(isolate);
DCHECK(args.length() == 4);
CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
RUNTIME_ASSERT(result_array->HasFastObjectElements());
subject = String::Flatten(subject);
RUNTIME_ASSERT(regexp->GetFlags().is_global());
if (regexp->CaptureCount() == 0) {
return SearchRegExpMultiple<false>(isolate, subject, regexp,
last_match_info, result_array);
} else {
return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
result_array);
}
}
RUNTIME_FUNCTION(RuntimeReference_RegExpConstructResult) {
SealHandleScope shs(isolate);
return __RT_impl_Runtime_RegExpConstructResult(args, isolate);
}
RUNTIME_FUNCTION(RuntimeReference_RegExpExec) {
SealHandleScope shs(isolate);
return __RT_impl_Runtime_RegExpExecRT(args, isolate);
}
RUNTIME_FUNCTION(RuntimeReference_IsRegExp) {
SealHandleScope shs(isolate);
DCHECK(args.length() == 1);
CONVERT_ARG_CHECKED(Object, obj, 0);
return isolate->heap()->ToBoolean(obj->IsJSRegExp());
}
}
} // namespace v8::internal