blob: a0b2e9659d13c29a146b74f03458525b430beb96 [file] [log] [blame]
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#include "src/objects/js-segments.h"
#include <map>
#include <memory>
#include <string>
#include "src/execution/isolate.h"
#include "src/heap/factory.h"
#include "src/objects/intl-objects.h"
#include "src/objects/js-segment-iterator-inl.h"
#include "src/objects/js-segmenter-inl.h"
#include "src/objects/js-segments-inl.h"
#include "src/objects/managed-inl.h"
#include "src/objects/objects-inl.h"
#include "unicode/brkiter.h"
namespace v8 {
namespace internal {
// ecma402 #sec-createsegmentsobject
MaybeHandle<JSSegments> JSSegments::Create(Isolate* isolate,
Handle<JSSegmenter> segmenter,
Handle<String> string) {
icu::BreakIterator* break_iterator =
segmenter->icu_break_iterator()->raw()->clone();
DCHECK_NOT_NULL(break_iterator);
Handle<Managed<icu::UnicodeString>> unicode_string =
Intl::SetTextToBreakIterator(isolate, string, break_iterator);
Handle<Managed<icu::BreakIterator>> managed_break_iterator =
Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
// 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ».
// 2. Let segments be ! ObjectCreate(%Segments.prototype%, internalSlotsList).
Handle<Map> map(isolate->native_context()->intl_segments_map(), isolate);
Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
Handle<JSSegments> segments = Handle<JSSegments>::cast(result);
segments->set_flags(0);
// 3. Set segments.[[SegmentsSegmenter]] to segmenter.
segments->set_icu_break_iterator(*managed_break_iterator);
segments->set_granularity(segmenter->granularity());
// 4. Set segments.[[SegmentsString]] to string.
segments->set_raw_string(*string);
segments->set_unicode_string(*unicode_string);
// 5. Return segments.
return segments;
}
// ecma402 #sec-%segmentsprototype%.containing
MaybeHandle<Object> JSSegments::Containing(Isolate* isolate,
Handle<JSSegments> segments,
double n_double) {
// 5. Let len be the length of string.
int32_t len = segments->unicode_string()->raw()->length();
// 7. If n < 0 or n ≥ len, return undefined.
if (n_double < 0 || n_double >= len) {
return isolate->factory()->undefined_value();
}
int32_t n = static_cast<int32_t>(n_double);
// n may point to the surrogate tail- adjust it back to the lead.
n = segments->unicode_string()->raw()->getChar32Start(n);
icu::BreakIterator* break_iterator = segments->icu_break_iterator()->raw();
// 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
int32_t start_index =
break_iterator->isBoundary(n) ? n : break_iterator->preceding(n);
// 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
int32_t end_index = break_iterator->following(n);
// 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex,
// endIndex).
return CreateSegmentDataObject(
isolate, segments->granularity(), break_iterator,
handle(segments->raw_string(), isolate),
*(segments->unicode_string()->raw()), start_index, end_index);
}
namespace {
bool CurrentSegmentIsWordLike(icu::BreakIterator* break_iterator) {
int32_t rule_status = break_iterator->getRuleStatus();
return (rule_status >= UBRK_WORD_NUMBER &&
rule_status < UBRK_WORD_NUMBER_LIMIT) ||
(rule_status >= UBRK_WORD_LETTER &&
rule_status < UBRK_WORD_LETTER_LIMIT) ||
(rule_status >= UBRK_WORD_KANA &&
rule_status < UBRK_WORD_KANA_LIMIT) ||
(rule_status >= UBRK_WORD_IDEO && rule_status < UBRK_WORD_IDEO_LIMIT);
}
} // namespace
// ecma402 #sec-createsegmentdataobject
MaybeHandle<JSSegmentDataObject> JSSegments::CreateSegmentDataObject(
Isolate* isolate, JSSegmenter::Granularity granularity,
icu::BreakIterator* break_iterator, Handle<String> input_string,
const icu::UnicodeString& unicode_string, int32_t start_index,
int32_t end_index) {
Factory* factory = isolate->factory();
// 1. Let len be the length of string.
// 2. Assert: startIndex ≥ 0.
DCHECK_GE(start_index, 0);
// 3. Assert: endIndex ≤ len.
DCHECK_LE(end_index, unicode_string.length());
// 4. Assert: startIndex < endIndex.
DCHECK_LT(start_index, end_index);
// 5. Let result be ! ObjectCreate(%ObjectPrototype%).
Handle<Map> map(
granularity == JSSegmenter::Granularity::WORD
? isolate->native_context()->intl_segment_data_object_wordlike_map()
: isolate->native_context()->intl_segment_data_object_map(),
isolate);
Handle<JSSegmentDataObject> result =
Handle<JSSegmentDataObject>::cast(factory->NewJSObjectFromMap(map));
// 6. Let segment be the String value equal to the substring of string
// consisting of the code units at indices startIndex (inclusive) through
// endIndex (exclusive).
Handle<String> segment;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, segment,
Intl::ToString(isolate, unicode_string, start_index, end_index),
JSSegmentDataObject);
Handle<Object> index = factory->NewNumberFromInt(start_index);
// 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment).
DisallowGarbageCollection no_gc;
Tagged<JSSegmentDataObject> raw = JSSegmentDataObject::cast(*result);
raw->set_segment(*segment);
// 8. Perform ! CreateDataPropertyOrThrow(result, "index", startIndex).
raw->set_index(*index);
// 9. Perform ! CreateDataPropertyOrThrow(result, "input", string).
raw->set_input(*input_string);
// 10. Let granularity be segmenter.[[SegmenterGranularity]].
// 11. If granularity is "word", then
if (granularity == JSSegmenter::Granularity::WORD) {
// a. Let isWordLike be a Boolean value indicating whether the segment in
// string is "word-like" according to locale segmenter.[[Locale]].
Handle<Boolean> is_word_like =
factory->ToBoolean(CurrentSegmentIsWordLike(break_iterator));
// b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
JSSegmentDataObjectWithIsWordLike::cast(raw)->set_is_word_like(
*is_word_like);
}
return result;
}
Handle<String> JSSegments::GranularityAsString(Isolate* isolate) const {
return JSSegmenter::GetGranularityString(isolate, granularity());
}
} // namespace internal
} // namespace v8