| // Copyright 2018 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef V8_INTL_SUPPORT |
| #error Internationalization is expected to be enabled. |
| #endif // V8_INTL_SUPPORT |
| |
| #include "src/objects/js-segment-iterator.h" |
| |
| #include <map> |
| #include <memory> |
| #include <string> |
| |
| #include "src/execution/isolate.h" |
| #include "src/heap/factory.h" |
| #include "src/objects/intl-objects.h" |
| #include "src/objects/js-segment-iterator-inl.h" |
| #include "src/objects/managed.h" |
| #include "src/objects/objects-inl.h" |
| #include "unicode/brkiter.h" |
| |
| namespace v8 { |
| namespace internal { |
| |
| MaybeHandle<String> JSSegmentIterator::GetSegment(Isolate* isolate, |
| int32_t start, |
| int32_t end) const { |
| return Intl::ToString(isolate, *(unicode_string().raw()), start, end); |
| } |
| |
| Handle<String> JSSegmentIterator::GranularityAsString() const { |
| switch (granularity()) { |
| case JSSegmenter::Granularity::GRAPHEME: |
| return GetReadOnlyRoots().grapheme_string_handle(); |
| case JSSegmenter::Granularity::WORD: |
| return GetReadOnlyRoots().word_string_handle(); |
| case JSSegmenter::Granularity::SENTENCE: |
| return GetReadOnlyRoots().sentence_string_handle(); |
| } |
| UNREACHABLE(); |
| } |
| |
| MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create( |
| Isolate* isolate, icu::BreakIterator* break_iterator, |
| JSSegmenter::Granularity granularity, Handle<String> text) { |
| CHECK_NOT_NULL(break_iterator); |
| // 1. Let iterator be ObjectCreate(%SegmentIteratorPrototype%). |
| Handle<Map> map = Handle<Map>( |
| isolate->native_context()->intl_segment_iterator_map(), isolate); |
| |
| Handle<Managed<icu::BreakIterator>> managed_break_iterator = |
| Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator); |
| Handle<Managed<icu::UnicodeString>> unicode_string = |
| Intl::SetTextToBreakIterator(isolate, text, break_iterator); |
| |
| // Now all properties are ready, so we can allocate the result object. |
| Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map); |
| DisallowHeapAllocation no_gc; |
| Handle<JSSegmentIterator> segment_iterator = |
| Handle<JSSegmentIterator>::cast(result); |
| |
| segment_iterator->set_flags(0); |
| segment_iterator->set_granularity(granularity); |
| // 2. Let iterator.[[SegmentIteratorSegmenter]] be segmenter. |
| segment_iterator->set_icu_break_iterator(*managed_break_iterator); |
| |
| // 3. Let iterator.[[SegmentIteratorString]] be string. |
| segment_iterator->set_unicode_string(*unicode_string); |
| |
| // 4. Let iterator.[[SegmentIteratorIndex]] be 0. |
| // step 4 is stored inside break_iterator. |
| |
| // 5. Let iterator.[[SegmentIteratorBreakType]] be undefined. |
| segment_iterator->set_is_break_type_set(false); |
| |
| return segment_iterator; |
| } |
| |
| // ecma402 #sec-segment-iterator-prototype-breakType |
| Handle<Object> JSSegmentIterator::BreakType() const { |
| if (!is_break_type_set()) { |
| return GetReadOnlyRoots().undefined_value_handle(); |
| } |
| icu::BreakIterator* break_iterator = icu_break_iterator().raw(); |
| int32_t rule_status = break_iterator->getRuleStatus(); |
| switch (granularity()) { |
| case JSSegmenter::Granularity::GRAPHEME: |
| return GetReadOnlyRoots().undefined_value_handle(); |
| case JSSegmenter::Granularity::WORD: |
| if (rule_status >= UBRK_WORD_NONE && rule_status < UBRK_WORD_NONE_LIMIT) { |
| // "words" that do not fit into any of other categories. Includes spaces |
| // and most punctuation. |
| return GetReadOnlyRoots().none_string_handle(); |
| } |
| if ((rule_status >= UBRK_WORD_NUMBER && |
| rule_status < UBRK_WORD_NUMBER_LIMIT) || |
| (rule_status >= UBRK_WORD_LETTER && |
| rule_status < UBRK_WORD_LETTER_LIMIT) || |
| (rule_status >= UBRK_WORD_KANA && |
| rule_status < UBRK_WORD_KANA_LIMIT) || |
| (rule_status >= UBRK_WORD_IDEO && |
| rule_status < UBRK_WORD_IDEO_LIMIT)) { |
| // words that appear to be numbers, letters, kana characters, |
| // ideographic characters, etc |
| return GetReadOnlyRoots().word_string_handle(); |
| } |
| return GetReadOnlyRoots().undefined_value_handle(); |
| case JSSegmenter::Granularity::SENTENCE: |
| if (rule_status >= UBRK_SENTENCE_TERM && |
| rule_status < UBRK_SENTENCE_TERM_LIMIT) { |
| // sentences ending with a sentence terminator ('.', '?', '!', etc.) |
| // character, possibly followed by a hard separator (CR, LF, PS, etc.) |
| return GetReadOnlyRoots().term_string_handle(); |
| } |
| if ((rule_status >= UBRK_SENTENCE_SEP && |
| rule_status < UBRK_SENTENCE_SEP_LIMIT)) { |
| // sentences that do not contain an ending sentence terminator ('.', |
| // '?', '!', etc.) character, but are ended only by a hard separator |
| // (CR, LF, PS, etc.) hard, or mandatory line breaks |
| return GetReadOnlyRoots().sep_string_handle(); |
| } |
| return GetReadOnlyRoots().undefined_value_handle(); |
| } |
| UNREACHABLE(); |
| } |
| |
| // ecma402 #sec-segment-iterator-prototype-index |
| Handle<Object> JSSegmentIterator::Index( |
| Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) { |
| icu::BreakIterator* icu_break_iterator = |
| segment_iterator->icu_break_iterator().raw(); |
| CHECK_NOT_NULL(icu_break_iterator); |
| return isolate->factory()->NewNumberFromInt(icu_break_iterator->current()); |
| } |
| |
| // ecma402 #sec-segment-iterator-prototype-next |
| MaybeHandle<JSReceiver> JSSegmentIterator::Next( |
| Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) { |
| Factory* factory = isolate->factory(); |
| icu::BreakIterator* icu_break_iterator = |
| segment_iterator->icu_break_iterator().raw(); |
| // 3. Let _previousIndex be iterator.[[SegmentIteratorIndex]]. |
| int32_t prev = icu_break_iterator->current(); |
| // 4. Let done be AdvanceSegmentIterator(iterator, forwards). |
| int32_t index = icu_break_iterator->next(); |
| segment_iterator->set_is_break_type_set(true); |
| if (index == icu::BreakIterator::DONE) { |
| // 5. If done is true, return CreateIterResultObject(undefined, true). |
| return factory->NewJSIteratorResult(isolate->factory()->undefined_value(), |
| true); |
| } |
| // 6. Let newIndex be iterator.[[SegmentIteratorIndex]]. |
| Handle<Object> new_index = factory->NewNumberFromInt(index); |
| |
| // 8. Let segment be the substring of string from previousIndex to |
| // newIndex, inclusive of previousIndex and exclusive of newIndex. |
| Handle<String> segment; |
| ASSIGN_RETURN_ON_EXCEPTION(isolate, segment, |
| segment_iterator->GetSegment(isolate, prev, index), |
| JSReceiver); |
| |
| // 9. Let breakType be iterator.[[SegmentIteratorBreakType]]. |
| Handle<Object> break_type = segment_iterator->BreakType(); |
| |
| // 10. Let result be ! ObjectCreate(%ObjectPrototype%). |
| Handle<JSObject> result = factory->NewJSObject(isolate->object_function()); |
| |
| // 11. Perform ! CreateDataProperty(result "segment", segment). |
| CHECK(JSReceiver::CreateDataProperty(isolate, result, |
| factory->segment_string(), segment, |
| Just(kDontThrow)) |
| .FromJust()); |
| |
| // 12. Perform ! CreateDataProperty(result, "breakType", breakType). |
| CHECK(JSReceiver::CreateDataProperty(isolate, result, |
| factory->breakType_string(), break_type, |
| Just(kDontThrow)) |
| .FromJust()); |
| |
| // 13. Perform ! CreateDataProperty(result, "index", newIndex). |
| CHECK(JSReceiver::CreateDataProperty(isolate, result, factory->index_string(), |
| new_index, Just(kDontThrow)) |
| .FromJust()); |
| |
| // 14. Return CreateIterResultObject(result, false). |
| return factory->NewJSIteratorResult(result, false); |
| } |
| |
| // ecma402 #sec-segment-iterator-prototype-following |
| Maybe<bool> JSSegmentIterator::Following( |
| Isolate* isolate, Handle<JSSegmentIterator> segment_iterator, |
| Handle<Object> from_obj) { |
| Factory* factory = isolate->factory(); |
| icu::BreakIterator* icu_break_iterator = |
| segment_iterator->icu_break_iterator().raw(); |
| // 3. If from is not undefined, |
| if (!from_obj->IsUndefined()) { |
| // a. Let from be ? ToIndex(from). |
| uint32_t from; |
| Handle<Object> index; |
| ASSIGN_RETURN_ON_EXCEPTION_VALUE( |
| isolate, index, |
| Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex), |
| Nothing<bool>()); |
| if (!index->ToArrayIndex(&from)) { |
| THROW_NEW_ERROR_RETURN_VALUE( |
| isolate, |
| NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange, |
| factory->from_string(), |
| factory->NewStringFromStaticChars("following"), index), |
| Nothing<bool>()); |
| } |
| // b. Let length be the length of iterator.[[SegmentIteratorString]]. |
| uint32_t length = |
| static_cast<uint32_t>(icu_break_iterator->getText().getLength()); |
| |
| // c. If from ≥ length, throw a RangeError exception. |
| if (from >= length) { |
| THROW_NEW_ERROR_RETURN_VALUE( |
| isolate, |
| NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange, |
| factory->from_string(), |
| factory->NewStringFromStaticChars("following"), |
| from_obj), |
| Nothing<bool>()); |
| } |
| |
| // d. Let iterator.[[SegmentIteratorPosition]] be from. |
| segment_iterator->set_is_break_type_set(true); |
| icu_break_iterator->following(from); |
| return Just(false); |
| } |
| // 4. return AdvanceSegmentIterator(iterator, forward). |
| // 4. .... or if direction is backwards and position is 0, return true. |
| // 4. If direction is forwards and position is the length of string ... return |
| // true. |
| segment_iterator->set_is_break_type_set(true); |
| return Just(icu_break_iterator->next() == icu::BreakIterator::DONE); |
| } |
| |
| // ecma402 #sec-segment-iterator-prototype-preceding |
| Maybe<bool> JSSegmentIterator::Preceding( |
| Isolate* isolate, Handle<JSSegmentIterator> segment_iterator, |
| Handle<Object> from_obj) { |
| Factory* factory = isolate->factory(); |
| icu::BreakIterator* icu_break_iterator = |
| segment_iterator->icu_break_iterator().raw(); |
| // 3. If from is not undefined, |
| if (!from_obj->IsUndefined()) { |
| // a. Let from be ? ToIndex(from). |
| uint32_t from; |
| Handle<Object> index; |
| ASSIGN_RETURN_ON_EXCEPTION_VALUE( |
| isolate, index, |
| Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex), |
| Nothing<bool>()); |
| |
| if (!index->ToArrayIndex(&from)) { |
| THROW_NEW_ERROR_RETURN_VALUE( |
| isolate, |
| NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange, |
| factory->from_string(), |
| factory->NewStringFromStaticChars("preceding"), index), |
| Nothing<bool>()); |
| } |
| // b. Let length be the length of iterator.[[SegmentIteratorString]]. |
| uint32_t length = |
| static_cast<uint32_t>(icu_break_iterator->getText().getLength()); |
| // c. If from > length or from = 0, throw a RangeError exception. |
| if (from > length || from == 0) { |
| THROW_NEW_ERROR_RETURN_VALUE( |
| isolate, |
| NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange, |
| factory->from_string(), |
| factory->NewStringFromStaticChars("preceding"), |
| from_obj), |
| Nothing<bool>()); |
| } |
| // d. Let iterator.[[SegmentIteratorIndex]] be from. |
| segment_iterator->set_is_break_type_set(true); |
| icu_break_iterator->preceding(from); |
| return Just(false); |
| } |
| // 4. return AdvanceSegmentIterator(iterator, backwards). |
| // 4. .... or if direction is backwards and position is 0, return true. |
| segment_iterator->set_is_break_type_set(true); |
| return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE); |
| } |
| |
| } // namespace internal |
| } // namespace v8 |