blob: 97f899f2344780dccff64a3fae71292756b5bd91 [file] [log] [blame]
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include 'src/builtins/builtins-string-gen.h'
@abstract
@reserveBitsInInstanceType(7)
@cppObjectLayoutDefinition
extern class String extends Name {
macro StringInstanceType(): StringInstanceType {
return %RawDownCast<StringInstanceType>(
Convert<uint16>(this.map.instance_type));
}
macro IsNotInternalized(): bool {
return this.StringInstanceType().is_not_internalized;
}
// Keep this in sync with the C++ String::IsOneByteRepresentationUnderneath.
macro IsOneByteRepresentationUnderneath(): bool {
let string = this;
while (true) {
typeswitch (string) {
case (cons: ConsString): {
dcheck(cons.IsFlat());
string = cons.first;
}
case (thin: ThinString): {
// Internalized strings can't change representation.
dcheck(
thin.StringInstanceType().is_one_byte ==
thin.actual.StringInstanceType().is_one_byte);
return thin.StringInstanceType().is_one_byte;
}
case (slice: SlicedString): {
string = slice.parent;
}
case (String): {
return string.StringInstanceType().is_one_byte;
}
}
}
VerifiedUnreachable();
}
const length: int32;
}
extern enum StringRepresentationTag extends uint32 {
kSeqStringTag,
kConsStringTag,
kExternalStringTag,
kSlicedStringTag,
kThinStringTag
}
bitfield struct StringInstanceType extends uint16 {
representation: StringRepresentationTag: 3 bit;
is_one_byte: bool: 1 bit;
is_uncached: bool: 1 bit;
is_not_internalized: bool: 1 bit;
is_shared: bool: 1 bit;
}
@doNotGenerateCast
@cppObjectLayoutDefinition
extern class ConsString extends String {
// Corresponds to String::IsFlat() in the C++ runtime.
macro IsFlat(): bool {
return this.second.length == 0;
}
macro IsOneByteRepresentation(): bool {
return this.StringInstanceType().is_one_byte;
}
first: String;
second: String;
}
@abstract
@doNotGenerateCast
@cppObjectLayoutDefinition
extern class ExternalString extends String {
resource: ExternalPointer;
// WARNING: This field is missing for uncached external strings.
resource_data: ExternalPointer;
}
extern operator '.resource_ptr' macro LoadExternalStringResourcePtr(
ExternalString): RawPtr;
extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr(
ExternalString): RawPtr;
extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr(
ExternalOneByteString): RawPtr<char8>;
extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr(
ExternalTwoByteString): RawPtr<char16>;
extern macro ExternalOneByteStringGetChars(ExternalOneByteString):
RawPtr<char8>;
extern macro ExternalTwoByteStringGetChars(ExternalTwoByteString):
RawPtr<char16>;
@doNotGenerateCast
@cppObjectLayoutDefinition
extern class ExternalOneByteString extends ExternalString {
macro GetChars(): RawPtr<char8> {
if (this.StringInstanceType().is_uncached) {
return ExternalOneByteStringGetChars(this);
} else {
return this.resource_data_ptr;
}
}
}
@doNotGenerateCast
@cppObjectLayoutDefinition
extern class ExternalTwoByteString extends ExternalString {
macro GetChars(): RawPtr<char16> {
if (this.StringInstanceType().is_uncached) {
return ExternalTwoByteStringGetChars(this);
} else {
return this.resource_data_ptr;
}
}
}
@doNotGenerateCast
@cppObjectLayoutDefinition
extern class InternalizedString extends String {}
@abstract
@doNotGenerateCast
@cppObjectLayoutDefinition
extern class SeqString extends String {}
@doNotGenerateCast
@cppObjectLayoutDefinition
extern class SeqOneByteString extends SeqString {
const chars[length]: char8;
}
@doNotGenerateCast
@cppObjectLayoutDefinition
extern class SeqTwoByteString extends SeqString {
const chars[length]: char16;
}
@doNotGenerateCast
@cppObjectLayoutDefinition
extern class SlicedString extends String {
parent: String;
offset: Smi;
}
@doNotGenerateCast
@cppObjectLayoutDefinition
extern class ThinString extends String {
actual: String;
}
// A direct string can be accessed directly through CSA without going into the
// C++ runtime. See also: ToDirectStringAssembler.
type DirectString extends String;
macro AllocateNonEmptySeqOneByteString<Iterator: type>(
length: uint32, content: Iterator): SeqOneByteString {
dcheck(length != 0 && length <= kStringMaxLength);
return new (ClearPadding) SeqOneByteString{
map: kSeqOneByteStringMap,
raw_hash_field: kNameEmptyHashField,
length: Signed(length),
chars: ...content
};
}
macro AllocateNonEmptySeqTwoByteString<Iterator: type>(
length: uint32, content: Iterator): SeqTwoByteString {
dcheck(length > 0 && length <= kStringMaxLength);
return new (ClearPadding) SeqTwoByteString{
map: kSeqTwoByteStringMap,
raw_hash_field: kNameEmptyHashField,
length: Signed(length),
chars: ...content
};
}
macro AllocateNonEmptySeqOneByteString(length: uint32): SeqOneByteString {
return AllocateNonEmptySeqOneByteString(length, UninitializedIterator{});
}
macro AllocateNonEmptySeqTwoByteString(length: uint32): SeqTwoByteString {
return AllocateNonEmptySeqTwoByteString(length, UninitializedIterator{});
}
macro AllocateSeqOneByteString<Iterator: type>(
length: uint32, content: Iterator): SeqOneByteString|EmptyString {
if (length == 0) return kEmptyString;
return AllocateNonEmptySeqOneByteString(length, content);
}
macro AllocateSeqTwoByteString<Iterator: type>(
length: uint32, content: Iterator): SeqTwoByteString|EmptyString {
if (length == 0) return kEmptyString;
return AllocateNonEmptySeqTwoByteString(length, content);
}
@export
macro AllocateSeqOneByteString(length: uint32): SeqOneByteString|
EmptyString {
return AllocateSeqOneByteString(length, UninitializedIterator{});
}
@export
macro AllocateSeqTwoByteString(length: uint32): SeqTwoByteString|
EmptyString {
return AllocateSeqTwoByteString(length, UninitializedIterator{});
}
extern macro StringWriteToFlatOneByte(
String, RawPtr<char8>, int32, int32): void;
extern macro StringWriteToFlatTwoByte(
String, RawPtr<char16>, int32, int32): void;
// Corresponds to String::SlowFlatten in the C++ runtime.
builtin StringSlowFlatten(cons: ConsString): String {
// TurboFan can create cons strings with empty first parts.
let cons = cons;
while (cons.first.length == 0) {
// We do not want to call this function recursively. Therefore we call
// String::Flatten only in those cases where String::SlowFlatten is not
// called again.
try {
const second = Cast<ConsString>(cons.second) otherwise FoundFlatString;
if (second.IsFlat()) goto FoundFlatString;
cons = second;
} label FoundFlatString {
return Flatten(cons.second);
}
}
let flat: String;
if (cons.IsOneByteRepresentation()) {
const allocated = AllocateNonEmptySeqOneByteString(Unsigned(cons.length));
StringWriteToFlatOneByte(
cons, (&allocated.chars).GCUnsafeStartPointer(), 0, cons.length);
flat = allocated;
} else {
const allocated = UnsafeCast<SeqTwoByteString>(
AllocateNonEmptySeqTwoByteString(Unsigned(cons.length)));
StringWriteToFlatTwoByte(
cons, (&allocated.chars).GCUnsafeStartPointer(), 0, cons.length);
flat = allocated;
}
cons.first = flat;
cons.second = kEmptyString;
return flat;
}
// Corresponds to String::Flatten in the C++ runtime.
macro Flatten(string: String): String {
typeswitch (string) {
case (cons: ConsString): {
return Flatten(cons);
}
case (thin: ThinString): {
dcheck(!Is<ConsString>(thin.actual));
return thin.actual;
}
case (other: String): {
return other;
}
}
}
macro Flatten(cons: ConsString): String {
if (cons.IsFlat()) return cons.first;
return StringSlowFlatten(cons);
}
// Get a slice to the string data, flatten only if unavoidable for this.
macro StringToSlice(string: String): never labels OneByte(ConstSlice<char8>),
TwoByte(ConstSlice<char16>) {
let string = string;
let offset: intptr = 0;
const length = Convert<intptr>(string.length);
while (true) {
typeswitch (string) {
case (s: SeqOneByteString): {
goto OneByte(Subslice(&s.chars, offset, length) otherwise unreachable);
}
case (s: SeqTwoByteString): {
goto TwoByte(Subslice(&s.chars, offset, length) otherwise unreachable);
}
case (s: ThinString): {
string = s.actual;
}
case (s: ConsString): {
string = Flatten(s);
}
case (s: SlicedString): {
offset += Convert<intptr>(s.offset);
string = s.parent;
}
case (s: ExternalOneByteString): {
const data = torque_internal::unsafe::NewOffHeapConstSlice(
s.GetChars(), Convert<intptr>(s.length));
goto OneByte(Subslice(data, offset, length) otherwise unreachable);
}
case (s: ExternalTwoByteString): {
const data = torque_internal::unsafe::NewOffHeapConstSlice(
s.GetChars(), Convert<intptr>(s.length));
goto TwoByte(Subslice(data, offset, length) otherwise unreachable);
}
case (String): {
unreachable;
}
}
}
VerifiedUnreachable();
}
// Dispatch on the slice type of two different strings.
macro TwoStringsToSlices<Result: type, Functor: type>(
s1: String, s2: String, f: Functor): Result {
try {
StringToSlice(s1) otherwise FirstOneByte, FirstTwoByte;
} label FirstOneByte(s1Slice: ConstSlice<char8>) {
try {
StringToSlice(s2) otherwise SecondOneByte, SecondTwoByte;
} label SecondOneByte(s2Slice: ConstSlice<char8>) {
return Call(f, s1Slice, s2Slice);
} label SecondTwoByte(s2Slice: ConstSlice<char16>) {
return Call(f, s1Slice, s2Slice);
}
} label FirstTwoByte(s1Slice: ConstSlice<char16>) {
try {
StringToSlice(s2) otherwise SecondOneByte, SecondTwoByte;
} label SecondOneByte(s2Slice: ConstSlice<char8>) {
return Call(f, s1Slice, s2Slice);
} label SecondTwoByte(s2Slice: ConstSlice<char16>) {
return Call(f, s1Slice, s2Slice);
}
}
}
macro StaticAssertStringLengthFitsSmi(): void {
const kMaxStringLengthFitsSmi: constexpr bool =
kStringMaxLengthUintptr < kSmiMaxValue;
static_assert(kMaxStringLengthFitsSmi);
}
extern macro StringBuiltinsAssembler::SearchOneByteStringInTwoByteString(
RawPtr<char16>, intptr, RawPtr<char8>, intptr, intptr): intptr;
extern macro StringBuiltinsAssembler::SearchOneByteStringInOneByteString(
RawPtr<char8>, intptr, RawPtr<char8>, intptr, intptr): intptr;
extern macro StringBuiltinsAssembler::SearchTwoByteStringInTwoByteString(
RawPtr<char16>, intptr, RawPtr<char16>, intptr, intptr): intptr;
extern macro StringBuiltinsAssembler::SearchTwoByteStringInOneByteString(
RawPtr<char8>, intptr, RawPtr<char16>, intptr, intptr): intptr;
extern macro StringBuiltinsAssembler::SearchOneByteInOneByteString(
RawPtr<char8>, intptr, RawPtr<char8>, intptr): intptr;
macro AbstractStringIndexOf(
subject: RawPtr<char16>, subjectLen: intptr, search: RawPtr<char8>,
searchLen: intptr, fromIndex: intptr): intptr {
return SearchOneByteStringInTwoByteString(
subject, subjectLen, search, searchLen, fromIndex);
}
macro AbstractStringIndexOf(
subject: RawPtr<char8>, subjectLen: intptr, search: RawPtr<char8>,
searchLen: intptr, fromIndex: intptr): intptr {
if (searchLen == 1) {
return SearchOneByteInOneByteString(subject, subjectLen, search, fromIndex);
}
return SearchOneByteStringInOneByteString(
subject, subjectLen, search, searchLen, fromIndex);
}
macro AbstractStringIndexOf(
subject: RawPtr<char16>, subjectLen: intptr, search: RawPtr<char16>,
searchLen: intptr, fromIndex: intptr): intptr {
return SearchTwoByteStringInTwoByteString(
subject, subjectLen, search, searchLen, fromIndex);
}
macro AbstractStringIndexOf(
subject: RawPtr<char8>, subjectLen: intptr, search: RawPtr<char16>,
searchLen: intptr, fromIndex: intptr): intptr {
return SearchTwoByteStringInOneByteString(
subject, subjectLen, search, searchLen, fromIndex);
}
struct AbstractStringIndexOfFunctor {
fromIndex: Smi;
}
// Ideally, this would be a method of AbstractStringIndexOfFunctor, but
// currently methods don't support templates.
macro Call<A: type, B: type>(
self: AbstractStringIndexOfFunctor, string: ConstSlice<A>,
searchStr: ConstSlice<B>): Smi {
return Convert<Smi>(AbstractStringIndexOf(
string.GCUnsafeStartPointer(), string.length,
searchStr.GCUnsafeStartPointer(), searchStr.length,
Convert<intptr>(self.fromIndex)));
}
macro AbstractStringIndexOf(
implicit context: Context)(string: String, searchString: String,
fromIndex: Smi): Smi {
// Special case the empty string.
const searchStringLength = searchString.length_intptr;
const stringLength = string.length_intptr;
if (searchStringLength == 0 && SmiUntag(fromIndex) <= stringLength) {
return fromIndex;
}
// Don't bother to search if the searchString would go past the end
// of the string. This is actually necessary because of runtime
// checks.
if (SmiUntag(fromIndex) + searchStringLength > stringLength) {
return -1;
}
return TwoStringsToSlices<Smi>(
string, searchString, AbstractStringIndexOfFunctor{fromIndex: fromIndex});
}
builtin StringIndexOf(s: String, searchString: String, start: Smi): Smi {
return AbstractStringIndexOf(s, searchString, SmiMax(start, 0));
}