blob: 33a56893299f2cde60844af94356b301f17283a1 [file] [log] [blame]
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include 'src/builtins/builtins-regexp-gen.h'
namespace regexp {
extern builtin SubString(implicit context: Context)(String, Smi, Smi): String;
extern runtime RegExpExecMultiple(
implicit context: Context)(JSRegExp, String, RegExpMatchInfo): Null
|FixedArray;
extern transitioning runtime RegExpReplaceRT(
Context, JSReceiver, String, Object): String;
extern transitioning runtime StringBuilderConcat(
implicit context: Context)(FixedArray, Smi, String): String;
extern transitioning runtime StringReplaceNonGlobalRegExpWithFunction(
implicit context: Context)(String, JSRegExp, Callable): String;
extern transitioning macro
RegExpBuiltinsAssembler::RegExpReplaceGlobalSimpleString(
implicit context: Context)(JSRegExp, String, RegExpData, String):
String;
// matchesCapacity is the length of the matchesElements FixedArray, and
// matchesElements is allowed to contain holes at the end.
transitioning macro RegExpReplaceCallableNoExplicitCaptures(
implicit context: Context)(matchesElements: FixedArray,
matchesCapacity: intptr, string: String, replaceFn: Callable): intptr {
let matchStart: Smi = 0;
for (let i: intptr = 0; i < matchesCapacity; i++) {
typeswitch (matchesElements.objects[i]) {
// Element represents a slice.
case (elSmi: Smi): {
// The slice's match start and end is either encoded as one or two
// smis. A positive smi indicates a single smi encoding (see
// ReplacementStringBuilder::AddSubjectSlice()).
if (elSmi > 0) {
// For single smi encoding, see
// StringBuilderSubstringLength::encode() and
// StringBuilderSubstringPosition::encode().
const elInt: intptr = Convert<intptr>(elSmi);
const newMatchStart: intptr = (elInt >> 11) + (elInt & 0x7FF);
matchStart = Convert<Smi>(newMatchStart);
} else {
// For two smi encoding, the length is negative followed by the
// match start.
const nextEl: Smi = UnsafeCast<Smi>(matchesElements.objects[++i]);
matchStart = nextEl - elSmi;
}
}
// Element represents the matched substring, which is then passed to the
// replace function.
case (elString: String): {
const replacementObj: JSAny =
Call(context, replaceFn, Undefined, elString, matchStart, string);
const replacement: String = ToString_Inline(replacementObj);
matchesElements.objects[i] = replacement;
matchStart += elString.length_smi;
}
case (TheHole): deferred {
// No more elements.
return i;
}
case (Object): deferred {
unreachable;
}
}
}
return matchesCapacity;
}
// matchesCapacity is the length of the matchesElements FixedArray, and
// matchesElements is allowed to contain holes at the end.
transitioning macro RegExpReplaceCallableWithExplicitCaptures(
implicit context: Context)(matchesElements: FixedArray,
matchesCapacity: intptr, replaceFn: Callable): intptr {
for (let i: intptr = 0; i < matchesCapacity; i++) {
if (matchesElements.objects[i] == TheHole) {
// No more elements.
return i;
}
const elArray =
Cast<JSArray>(matchesElements.objects[i]) otherwise continue;
// The JSArray is expanded into the function args by Reflect.apply().
// TODO(jgruber): Remove indirection through Call->ReflectApply.
const replacementObj: JSAny = Call(
context, GetReflectApply(), Undefined, replaceFn, Undefined, elArray);
// Overwrite the i'th element in the results with the string
// we got back from the callback function.
matchesElements.objects[i] = ToString_Inline(replacementObj);
}
return matchesCapacity;
}
transitioning macro RegExpReplaceFastGlobalCallable(
implicit context: Context)(regexp: FastJSRegExp, string: String,
replaceFn: Callable): String {
regexp.lastIndex = 0;
const result: Null|FixedArray =
RegExpExecMultiple(regexp, string, GetRegExpLastMatchInfo());
regexp.lastIndex = 0;
// If no matches, return the subject string.
if (result == Null) return string;
const matches: FixedArray = UnsafeCast<FixedArray>(result);
// The FixedArray will contain holes at the end and we've lost the information
// of its real length. This is OK because the users iterate it from the
// beginning.
const matchesCapacity: Smi = Cast<Smi>(matches.length) otherwise unreachable;
const matchesCapacityInt: intptr = Convert<intptr>(matchesCapacity);
// Reload last match info since it might have changed.
const nofCaptures: Smi = GetRegExpLastMatchInfo().number_of_capture_registers;
// If the number of captures is two then there are no explicit captures in
// the regexp, just the implicit capture that captures the whole match. In
// this case we can simplify quite a bit and end up with something faster.
let matchesLength: intptr;
if (nofCaptures == 2) {
matchesLength = RegExpReplaceCallableNoExplicitCaptures(
matches, matchesCapacityInt, string, replaceFn);
} else {
matchesLength = RegExpReplaceCallableWithExplicitCaptures(
matches, matchesCapacityInt, replaceFn);
}
return StringBuilderConcat(matches, Convert<Smi>(matchesLength), string);
}
transitioning macro RegExpReplaceFastString(
implicit context: Context)(regexp: JSRegExp, string: String,
replaceString: String): String {
// The fast path is reached only if {receiver} is an unmodified JSRegExp
// instance, {replace_value} is non-callable, and ToString({replace_value})
// does not contain '$', i.e. we're doing a simple string replacement.
let result: String = kEmptyString;
let unicode: bool = false;
const replaceLength: Smi = replaceString.length_smi;
const fastRegexp = UnsafeCast<FastJSRegExp>(regexp);
const global: bool = fastRegexp.global;
if (global) {
unicode = fastRegexp.unicode || fastRegexp.unicodeSets;
fastRegexp.lastIndex = 0;
const data: RegExpData =
UnsafeCast<RegExpData>(LoadTrustedPointerFromObject(
fastRegexp, kJSRegExpRegExpDataOffset,
kRegExpDataIndirectPointerTag));
return RegExpReplaceGlobalSimpleString(regexp, string, data, replaceString);
}
dcheck(!global);
const match: RegExpMatchInfo =
RegExpPrototypeExecBodyWithoutResultFast(regexp, string)
otherwise return string;
const matchStart: Smi = match.GetStartOfCapture(0);
const matchEnd: Smi = match.GetEndOfCapture(0);
// TODO(jgruber): We could skip many of the checks that using SubString
// here entails.
result = result + SubString(string, 0, matchStart);
if (replaceLength != 0) result = result + replaceString;
return result + SubString(string, matchEnd, string.length_smi);
}
transitioning builtin RegExpReplace(
implicit context: Context)(regexp: FastJSRegExp, string: String,
replaceValue: JSAny): String {
// TODO(pwong): Remove dcheck when all callers (StringPrototypeReplace) are
// from Torque.
dcheck(Is<FastJSRegExp>(regexp));
// 2. Is {replace_value} callable?
typeswitch (replaceValue) {
case (replaceFn: Callable): {
return regexp.global ?
RegExpReplaceFastGlobalCallable(regexp, string, replaceFn) :
StringReplaceNonGlobalRegExpWithFunction(string, regexp, replaceFn);
}
case (JSAny): {
const stableRegexp: JSRegExp = regexp;
const replaceString: String = ToString_Inline(replaceValue);
try {
// ToString(replaceValue) could potentially change the shape of the
// RegExp object. Recheck that we are still on the fast path and bail
// to runtime otherwise.
const fastRegexp = Cast<FastJSRegExp>(stableRegexp) otherwise Runtime;
if (StringIndexOf(
replaceString, SingleCharacterStringConstant('$'), 0) != -1) {
goto Runtime;
}
return RegExpReplaceFastString(fastRegexp, string, replaceString);
} label Runtime deferred {
return RegExpReplaceRT(context, stableRegexp, string, replaceString);
}
}
}
}
const kRegExpReplaceCalledOnSlowRegExp: constexpr int31
generates 'v8::Isolate::kRegExpReplaceCalledOnSlowRegExp';
transitioning javascript builtin RegExpPrototypeReplace(
js-implicit context: NativeContext, receiver: JSAny)(...arguments): JSAny {
const methodName: constexpr string = 'RegExp.prototype.@@replace';
// RegExpPrototypeReplace is a bit of a beast - a summary of dispatch logic:
//
// if (!IsFastRegExp(receiver)) CallRuntime(RegExpReplace)
// if (IsCallable(replace)) {
// if (IsGlobal(receiver)) {
// // Called 'fast-path' but contains several runtime calls.
// RegExpReplaceFastGlobalCallable()
// } else {
// CallRuntime(StringReplaceNonGlobalRegExpWithFunction)
// }
// } else {
// if (replace.contains("$")) {
// CallRuntime(RegExpReplace)
// } else {
// RegExpReplaceFastString()
// }
// }
const string: JSAny = arguments[0];
const replaceValue: JSAny = arguments[1];
// Let rx be the this value.
// If Type(rx) is not Object, throw a TypeError exception.
const rx = Cast<JSReceiver>(receiver)
otherwise ThrowTypeError(
MessageTemplate::kIncompatibleMethodReceiver, methodName, receiver);
// Let S be ? ToString(string).
const s = ToString_Inline(string);
// Fast-path checks: 1. Is the {receiver} an unmodified JSRegExp instance?
try {
const fastRx: FastJSRegExp = Cast<FastJSRegExp>(rx) otherwise Runtime;
return RegExpReplace(fastRx, s, replaceValue);
} label Runtime deferred {
IncrementUseCounter(context, SmiConstant(kRegExpReplaceCalledOnSlowRegExp));
return RegExpReplaceRT(context, rx, s, replaceValue);
}
}
}