| // Copyright 2019 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include 'src/builtins/builtins-regexp-gen.h' |
| |
| namespace regexp { |
| |
| extern builtin SubString(implicit context: Context)(String, Smi, Smi): String; |
| |
| extern runtime RegExpExecMultiple( |
| implicit context: Context)(JSRegExp, String, RegExpMatchInfo): Null |
| |FixedArray; |
| extern transitioning runtime RegExpReplaceRT( |
| Context, JSReceiver, String, Object): String; |
| extern transitioning runtime StringBuilderConcat( |
| implicit context: Context)(FixedArray, Smi, String): String; |
| extern transitioning runtime StringReplaceNonGlobalRegExpWithFunction( |
| implicit context: Context)(String, JSRegExp, Callable): String; |
| extern transitioning macro |
| RegExpBuiltinsAssembler::RegExpReplaceGlobalSimpleString( |
| implicit context: Context)(JSRegExp, String, RegExpData, String): |
| String; |
| |
| // matchesCapacity is the length of the matchesElements FixedArray, and |
| // matchesElements is allowed to contain holes at the end. |
| transitioning macro RegExpReplaceCallableNoExplicitCaptures( |
| implicit context: Context)(matchesElements: FixedArray, |
| matchesCapacity: intptr, string: String, replaceFn: Callable): intptr { |
| let matchStart: Smi = 0; |
| for (let i: intptr = 0; i < matchesCapacity; i++) { |
| typeswitch (matchesElements.objects[i]) { |
| // Element represents a slice. |
| case (elSmi: Smi): { |
| // The slice's match start and end is either encoded as one or two |
| // smis. A positive smi indicates a single smi encoding (see |
| // ReplacementStringBuilder::AddSubjectSlice()). |
| if (elSmi > 0) { |
| // For single smi encoding, see |
| // StringBuilderSubstringLength::encode() and |
| // StringBuilderSubstringPosition::encode(). |
| const elInt: intptr = Convert<intptr>(elSmi); |
| const newMatchStart: intptr = (elInt >> 11) + (elInt & 0x7FF); |
| matchStart = Convert<Smi>(newMatchStart); |
| } else { |
| // For two smi encoding, the length is negative followed by the |
| // match start. |
| const nextEl: Smi = UnsafeCast<Smi>(matchesElements.objects[++i]); |
| matchStart = nextEl - elSmi; |
| } |
| } |
| // Element represents the matched substring, which is then passed to the |
| // replace function. |
| case (elString: String): { |
| const replacementObj: JSAny = |
| Call(context, replaceFn, Undefined, elString, matchStart, string); |
| const replacement: String = ToString_Inline(replacementObj); |
| matchesElements.objects[i] = replacement; |
| matchStart += elString.length_smi; |
| } |
| case (TheHole): deferred { |
| // No more elements. |
| return i; |
| } |
| case (Object): deferred { |
| unreachable; |
| } |
| } |
| } |
| return matchesCapacity; |
| } |
| |
| // matchesCapacity is the length of the matchesElements FixedArray, and |
| // matchesElements is allowed to contain holes at the end. |
| transitioning macro RegExpReplaceCallableWithExplicitCaptures( |
| implicit context: Context)(matchesElements: FixedArray, |
| matchesCapacity: intptr, replaceFn: Callable): intptr { |
| for (let i: intptr = 0; i < matchesCapacity; i++) { |
| if (matchesElements.objects[i] == TheHole) { |
| // No more elements. |
| return i; |
| } |
| const elArray = |
| Cast<JSArray>(matchesElements.objects[i]) otherwise continue; |
| |
| // The JSArray is expanded into the function args by Reflect.apply(). |
| // TODO(jgruber): Remove indirection through Call->ReflectApply. |
| const replacementObj: JSAny = Call( |
| context, GetReflectApply(), Undefined, replaceFn, Undefined, elArray); |
| |
| // Overwrite the i'th element in the results with the string |
| // we got back from the callback function. |
| matchesElements.objects[i] = ToString_Inline(replacementObj); |
| } |
| return matchesCapacity; |
| } |
| |
| transitioning macro RegExpReplaceFastGlobalCallable( |
| implicit context: Context)(regexp: FastJSRegExp, string: String, |
| replaceFn: Callable): String { |
| regexp.lastIndex = 0; |
| |
| const result: Null|FixedArray = |
| RegExpExecMultiple(regexp, string, GetRegExpLastMatchInfo()); |
| |
| regexp.lastIndex = 0; |
| |
| // If no matches, return the subject string. |
| if (result == Null) return string; |
| |
| const matches: FixedArray = UnsafeCast<FixedArray>(result); |
| // The FixedArray will contain holes at the end and we've lost the information |
| // of its real length. This is OK because the users iterate it from the |
| // beginning. |
| const matchesCapacity: Smi = Cast<Smi>(matches.length) otherwise unreachable; |
| const matchesCapacityInt: intptr = Convert<intptr>(matchesCapacity); |
| |
| // Reload last match info since it might have changed. |
| const nofCaptures: Smi = GetRegExpLastMatchInfo().number_of_capture_registers; |
| |
| // If the number of captures is two then there are no explicit captures in |
| // the regexp, just the implicit capture that captures the whole match. In |
| // this case we can simplify quite a bit and end up with something faster. |
| let matchesLength: intptr; |
| if (nofCaptures == 2) { |
| matchesLength = RegExpReplaceCallableNoExplicitCaptures( |
| matches, matchesCapacityInt, string, replaceFn); |
| } else { |
| matchesLength = RegExpReplaceCallableWithExplicitCaptures( |
| matches, matchesCapacityInt, replaceFn); |
| } |
| |
| return StringBuilderConcat(matches, Convert<Smi>(matchesLength), string); |
| } |
| |
| transitioning macro RegExpReplaceFastString( |
| implicit context: Context)(regexp: JSRegExp, string: String, |
| replaceString: String): String { |
| // The fast path is reached only if {receiver} is an unmodified JSRegExp |
| // instance, {replace_value} is non-callable, and ToString({replace_value}) |
| // does not contain '$', i.e. we're doing a simple string replacement. |
| let result: String = kEmptyString; |
| let unicode: bool = false; |
| const replaceLength: Smi = replaceString.length_smi; |
| const fastRegexp = UnsafeCast<FastJSRegExp>(regexp); |
| const global: bool = fastRegexp.global; |
| |
| if (global) { |
| unicode = fastRegexp.unicode || fastRegexp.unicodeSets; |
| fastRegexp.lastIndex = 0; |
| |
| const data: RegExpData = |
| UnsafeCast<RegExpData>(LoadTrustedPointerFromObject( |
| fastRegexp, kJSRegExpRegExpDataOffset, |
| kRegExpDataIndirectPointerTag)); |
| return RegExpReplaceGlobalSimpleString(regexp, string, data, replaceString); |
| } |
| |
| dcheck(!global); |
| |
| const match: RegExpMatchInfo = |
| RegExpPrototypeExecBodyWithoutResultFast(regexp, string) |
| otherwise return string; |
| const matchStart: Smi = match.GetStartOfCapture(0); |
| const matchEnd: Smi = match.GetEndOfCapture(0); |
| |
| // TODO(jgruber): We could skip many of the checks that using SubString |
| // here entails. |
| result = result + SubString(string, 0, matchStart); |
| |
| if (replaceLength != 0) result = result + replaceString; |
| |
| return result + SubString(string, matchEnd, string.length_smi); |
| } |
| |
| transitioning builtin RegExpReplace( |
| implicit context: Context)(regexp: FastJSRegExp, string: String, |
| replaceValue: JSAny): String { |
| // TODO(pwong): Remove dcheck when all callers (StringPrototypeReplace) are |
| // from Torque. |
| dcheck(Is<FastJSRegExp>(regexp)); |
| |
| // 2. Is {replace_value} callable? |
| typeswitch (replaceValue) { |
| case (replaceFn: Callable): { |
| return regexp.global ? |
| RegExpReplaceFastGlobalCallable(regexp, string, replaceFn) : |
| StringReplaceNonGlobalRegExpWithFunction(string, regexp, replaceFn); |
| } |
| case (JSAny): { |
| const stableRegexp: JSRegExp = regexp; |
| const replaceString: String = ToString_Inline(replaceValue); |
| |
| try { |
| // ToString(replaceValue) could potentially change the shape of the |
| // RegExp object. Recheck that we are still on the fast path and bail |
| // to runtime otherwise. |
| const fastRegexp = Cast<FastJSRegExp>(stableRegexp) otherwise Runtime; |
| if (StringIndexOf( |
| replaceString, SingleCharacterStringConstant('$'), 0) != -1) { |
| goto Runtime; |
| } |
| |
| return RegExpReplaceFastString(fastRegexp, string, replaceString); |
| } label Runtime deferred { |
| return RegExpReplaceRT(context, stableRegexp, string, replaceString); |
| } |
| } |
| } |
| } |
| |
| const kRegExpReplaceCalledOnSlowRegExp: constexpr int31 |
| generates 'v8::Isolate::kRegExpReplaceCalledOnSlowRegExp'; |
| |
| transitioning javascript builtin RegExpPrototypeReplace( |
| js-implicit context: NativeContext, receiver: JSAny)(...arguments): JSAny { |
| const methodName: constexpr string = 'RegExp.prototype.@@replace'; |
| |
| // RegExpPrototypeReplace is a bit of a beast - a summary of dispatch logic: |
| // |
| // if (!IsFastRegExp(receiver)) CallRuntime(RegExpReplace) |
| // if (IsCallable(replace)) { |
| // if (IsGlobal(receiver)) { |
| // // Called 'fast-path' but contains several runtime calls. |
| // RegExpReplaceFastGlobalCallable() |
| // } else { |
| // CallRuntime(StringReplaceNonGlobalRegExpWithFunction) |
| // } |
| // } else { |
| // if (replace.contains("$")) { |
| // CallRuntime(RegExpReplace) |
| // } else { |
| // RegExpReplaceFastString() |
| // } |
| // } |
| |
| const string: JSAny = arguments[0]; |
| const replaceValue: JSAny = arguments[1]; |
| |
| // Let rx be the this value. |
| // If Type(rx) is not Object, throw a TypeError exception. |
| const rx = Cast<JSReceiver>(receiver) |
| otherwise ThrowTypeError( |
| MessageTemplate::kIncompatibleMethodReceiver, methodName, receiver); |
| |
| // Let S be ? ToString(string). |
| const s = ToString_Inline(string); |
| |
| // Fast-path checks: 1. Is the {receiver} an unmodified JSRegExp instance? |
| try { |
| const fastRx: FastJSRegExp = Cast<FastJSRegExp>(rx) otherwise Runtime; |
| return RegExpReplace(fastRx, s, replaceValue); |
| } label Runtime deferred { |
| IncrementUseCounter(context, SmiConstant(kRegExpReplaceCalledOnSlowRegExp)); |
| return RegExpReplaceRT(context, rx, s, replaceValue); |
| } |
| } |
| } |