blob: 56a4c3a4aede6df395858301ee233e725f7e6cb0 [file] [log] [blame]
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#include "src/objects/js-segment-iterator.h"
#include <map>
#include <memory>
#include <string>
#include "src/execution/isolate.h"
#include "src/heap/factory.h"
#include "src/objects/intl-objects.h"
#include "src/objects/js-segment-iterator-inl.h"
#include "src/objects/js-segments.h"
#include "src/objects/managed-inl.h"
#include "src/objects/objects-inl.h"
#include "unicode/brkiter.h"
namespace v8 {
namespace internal {
Handle<String> JSSegmentIterator::GranularityAsString(Isolate* isolate) const {
return JSSegmenter::GetGranularityString(isolate, granularity());
}
// ecma402 #sec-createsegmentiterator
MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
Isolate* isolate, Handle<String> input_string,
icu::BreakIterator* break_iterator, JSSegmenter::Granularity granularity) {
// Clone a copy for both the ownership and not sharing with containing and
// other calls to the iterator because icu::BreakIterator keep the iteration
// position internally and cannot be shared across multiple calls to
// JSSegmentIterator::Create and JSSegments::Containing.
break_iterator = break_iterator->clone();
DCHECK_NOT_NULL(break_iterator);
Handle<Map> map = Handle<Map>(
isolate->native_context()->intl_segment_iterator_map(), isolate);
// 5. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to 0.
break_iterator->first();
Handle<Managed<icu::BreakIterator>> managed_break_iterator =
Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
icu::UnicodeString* string = new icu::UnicodeString();
break_iterator->getText().getText(*string);
Handle<Managed<icu::UnicodeString>> unicode_string =
Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, string);
break_iterator->setText(*string);
// Now all properties are ready, so we can allocate the result object.
Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
DisallowGarbageCollection no_gc;
Handle<JSSegmentIterator> segment_iterator =
Handle<JSSegmentIterator>::cast(result);
segment_iterator->set_flags(0);
segment_iterator->set_granularity(granularity);
segment_iterator->set_icu_break_iterator(*managed_break_iterator);
segment_iterator->set_raw_string(*input_string);
segment_iterator->set_unicode_string(*unicode_string);
return segment_iterator;
}
// ecma402 #sec-%segmentiteratorprototype%.next
MaybeHandle<JSReceiver> JSSegmentIterator::Next(
Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
// Sketches of ideas for future performance improvements, roughly in order
// of difficulty:
// - Add a fast path for grapheme segmentation of one-byte strings that
// entirely skips calling into ICU.
// - When we enter this function, perform a batch of calls into ICU and
// stash away the results, so the next couple of invocations can access
// them from a (Torque?) builtin without calling into C++.
// - Implement compiler support for escape-analyzing the JSSegmentDataObject
// and avoid allocating it when possible.
// TODO(v8:14681): We StackCheck here to break execution in the event of an
// interrupt. Ordinarily in JS loops, this stack check should already be
// occuring, however some loops implemented within CodeStubAssembler and
// Torque builtins do not currently implement these checks. A preferable
// solution which would benefit other iterators implemented in C++ include:
// 1) Performing the stack check in CEntry, which would provide a solution
// for all methods implemented in C++.
//
// 2) Rewriting the loop to include an outer loop, which performs periodic
// stack checks every N loop bodies (where N is some arbitrary heuristic
// selected to allow short loop counts to run with few interruptions).
STACK_CHECK(isolate, MaybeHandle<JSReceiver>());
Factory* factory = isolate->factory();
icu::BreakIterator* icu_break_iterator =
segment_iterator->icu_break_iterator()->raw();
// 5. Let startIndex be iterator.[[IteratedStringNextSegmentCodeUnitIndex]].
int32_t start_index = icu_break_iterator->current();
// 6. Let endIndex be ! FindBoundary(segmenter, string, startIndex, after).
int32_t end_index = icu_break_iterator->next();
// 7. If endIndex is not finite, then
if (end_index == icu::BreakIterator::DONE) {
// a. Return ! CreateIterResultObject(undefined, true).
return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
true);
}
// 8. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to endIndex.
// 9. Let segmentData be ! CreateSegmentDataObject(segmenter, string,
// startIndex, endIndex).
Handle<JSSegmentDataObject> segment_data;
if (segment_iterator->granularity() == JSSegmenter::Granularity::GRAPHEME &&
start_index == end_index - 1) {
// Fast path: use cached segment string and skip avoidable handle creations.
Handle<String> segment;
uint16_t code = segment_iterator->raw_string()->Get(start_index);
if (code > unibrow::Latin1::kMaxChar) {
segment = factory->LookupSingleCharacterStringFromCode(code);
}
Handle<Object> index;
if (!Smi::IsValid(start_index)) index = factory->NewHeapNumber(start_index);
Handle<Map> map(isolate->native_context()->intl_segment_data_object_map(),
isolate);
segment_data =
Handle<JSSegmentDataObject>::cast(factory->NewJSObjectFromMap(map));
Tagged<JSSegmentDataObject> raw = *segment_data;
DisallowHeapAllocation no_gc;
// We can skip write barriers because {segment_data} is the last object
// that was allocated.
raw->set_segment(
code <= unibrow::Latin1::kMaxChar
? String::cast(factory->single_character_string_table()->get(code))
: *segment,
SKIP_WRITE_BARRIER);
raw->set_index(
Smi::IsValid(start_index) ? Smi::FromInt(start_index) : *index,
SKIP_WRITE_BARRIER);
raw->set_input(segment_iterator->raw_string(), SKIP_WRITE_BARRIER);
} else {
ASSIGN_RETURN_ON_EXCEPTION(
isolate, segment_data,
JSSegments::CreateSegmentDataObject(
isolate, segment_iterator->granularity(), icu_break_iterator,
handle(segment_iterator->raw_string(), isolate),
*segment_iterator->unicode_string()->raw(), start_index, end_index),
JSReceiver);
}
// 10. Return ! CreateIterResultObject(segmentData, false).
return factory->NewJSIteratorResult(segment_data, false);
}
} // namespace internal
} // namespace v8