blob: b365f482733aebfcf088e66d2f57bbf678746d90 [file] [log] [blame] [edit]
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif // V8_INTL_SUPPORT
#include "src/objects/js-break-iterator.h"
#include "src/objects/intl-objects.h"
#include "src/objects/js-break-iterator-inl.h"
#include "src/objects/managed-inl.h"
#include "src/objects/option-utils.h"
#include "unicode/brkiter.h"
namespace v8 {
namespace internal {
namespace {
enum class Type { CHARACTER, WORD, SENTENCE, LINE };
} // anonymous namespace
MaybeHandle<JSV8BreakIterator> JSV8BreakIterator::New(
Isolate* isolate, DirectHandle<Map> map, Handle<Object> locales,
Handle<Object> options_obj, const char* service) {
Factory* factory = isolate->factory();
// 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
Maybe<std::vector<std::string>> maybe_requested_locales =
Intl::CanonicalizeLocaleList(isolate, locales);
MAYBE_RETURN(maybe_requested_locales, MaybeHandle<JSV8BreakIterator>());
std::vector<std::string> requested_locales =
maybe_requested_locales.FromJust();
Handle<JSReceiver> options;
if (IsUndefined(*options_obj, isolate)) {
options = factory->NewJSObjectWithNullProto();
} else {
ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
Object::ToObject(isolate, options_obj, service));
}
// Extract locale string
Maybe<Intl::MatcherOption> maybe_locale_matcher =
Intl::GetLocaleMatcher(isolate, options, service);
MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSV8BreakIterator>());
Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
Maybe<Intl::ResolvedLocale> maybe_resolve_locale =
Intl::ResolveLocale(isolate, JSV8BreakIterator::GetAvailableLocales(),
requested_locales, matcher, {});
if (maybe_resolve_locale.IsNothing()) {
THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError));
}
Intl::ResolvedLocale r = maybe_resolve_locale.FromJust();
// Extract type from options
Maybe<Type> maybe_type = GetStringOption<Type>(
isolate, options, "type", service,
{"word", "character", "sentence", "line"},
{Type::WORD, Type::CHARACTER, Type::SENTENCE, Type::LINE}, Type::WORD);
MAYBE_RETURN(maybe_type, MaybeHandle<JSV8BreakIterator>());
Type type_enum = maybe_type.FromJust();
icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus());
// Construct break_iterator using icu_locale and type
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::BreakIterator> break_iterator = nullptr;
switch (type_enum) {
case Type::CHARACTER:
break_iterator.reset(
icu::BreakIterator::createCharacterInstance(icu_locale, status));
break;
case Type::SENTENCE:
break_iterator.reset(
icu::BreakIterator::createSentenceInstance(icu_locale, status));
break;
case Type::LINE:
isolate->CountUsage(
v8::Isolate::UseCounterFeature::kBreakIteratorTypeLine);
break_iterator.reset(
icu::BreakIterator::createLineInstance(icu_locale, status));
break;
default:
isolate->CountUsage(
v8::Isolate::UseCounterFeature::kBreakIteratorTypeWord);
break_iterator.reset(
icu::BreakIterator::createWordInstance(icu_locale, status));
break;
}
// Error handling for break_iterator
if (U_FAILURE(status) || break_iterator == nullptr) {
THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError));
}
isolate->CountUsage(v8::Isolate::UseCounterFeature::kBreakIterator);
// Construct managed objects from pointers
DirectHandle<Managed<icu::BreakIterator>> managed_break_iterator =
Managed<icu::BreakIterator>::From(isolate, 0, std::move(break_iterator));
DirectHandle<Managed<icu::UnicodeString>> managed_unicode_string =
Managed<icu::UnicodeString>::From(isolate, 0, nullptr);
DirectHandle<String> locale_str =
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
// Now all properties are ready, so we can allocate the result object.
Handle<JSV8BreakIterator> break_iterator_holder = Cast<JSV8BreakIterator>(
isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
DisallowGarbageCollection no_gc;
break_iterator_holder->set_locale(*locale_str);
break_iterator_holder->set_break_iterator(*managed_break_iterator);
break_iterator_holder->set_unicode_string(*managed_unicode_string);
// Return break_iterator_holder
return break_iterator_holder;
}
namespace {
Type GetType(icu::BreakIterator* break_iterator) {
// Since the developer calling the Intl.v8BreakIterator already know the type,
// we usually do not need to know the type unless the resolvedOptions() is
// called, we use the following trick to figure out the type instead of
// storing it with the JSV8BreakIterator object to save memory.
// This routine is not fast but should be seldomly used only.
// We need to clone a copy of break iteator because we need to setText to it.
std::unique_ptr<icu::BreakIterator> cloned_break_iterator(
break_iterator->clone());
// Use a magic string "He is." to call next().
// character type: will return 1 for "H"
// word type: will return 2 for "He"
// line type: will return 3 for "He "
// sentence type: will return 6 for "He is."
icu::UnicodeString data("He is.");
cloned_break_iterator->setText(data);
switch (cloned_break_iterator->next()) {
case 1: // After "H"
return Type::CHARACTER;
case 2: // After "He"
return Type::WORD;
case 3: // After "He "
return Type::LINE;
case 6: // After "He is."
return Type::SENTENCE;
default:
UNREACHABLE();
}
}
Handle<String> TypeAsString(Isolate* isolate, Type type) {
switch (type) {
case Type::CHARACTER:
return ReadOnlyRoots(isolate).character_string_handle();
case Type::WORD:
return ReadOnlyRoots(isolate).word_string_handle();
case Type::SENTENCE:
return ReadOnlyRoots(isolate).sentence_string_handle();
case Type::LINE:
return ReadOnlyRoots(isolate).line_string_handle();
}
UNREACHABLE();
}
} // anonymous namespace
Handle<JSObject> JSV8BreakIterator::ResolvedOptions(
Isolate* isolate, DirectHandle<JSV8BreakIterator> break_iterator) {
Factory* factory = isolate->factory();
Type type = GetType(break_iterator->break_iterator()->raw());
Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
DirectHandle<String> locale(break_iterator->locale(), isolate);
JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
NONE);
JSObject::AddProperty(isolate, result, factory->type_string(),
TypeAsString(isolate, type), NONE);
return result;
}
void JSV8BreakIterator::AdoptText(
Isolate* isolate, DirectHandle<JSV8BreakIterator> break_iterator_holder,
Handle<String> text) {
icu::BreakIterator* break_iterator =
break_iterator_holder->break_iterator()->raw();
DCHECK_NOT_NULL(break_iterator);
DirectHandle<Managed<icu::UnicodeString>> unicode_string =
Intl::SetTextToBreakIterator(isolate, text, break_iterator);
break_iterator_holder->set_unicode_string(*unicode_string);
}
Handle<Object> JSV8BreakIterator::Current(
Isolate* isolate, DirectHandle<JSV8BreakIterator> break_iterator) {
return isolate->factory()->NewNumberFromInt(
break_iterator->break_iterator()->raw()->current());
}
Handle<Object> JSV8BreakIterator::First(
Isolate* isolate, DirectHandle<JSV8BreakIterator> break_iterator) {
return isolate->factory()->NewNumberFromInt(
break_iterator->break_iterator()->raw()->first());
}
Handle<Object> JSV8BreakIterator::Next(
Isolate* isolate, DirectHandle<JSV8BreakIterator> break_iterator) {
return isolate->factory()->NewNumberFromInt(
break_iterator->break_iterator()->raw()->next());
}
Tagged<String> JSV8BreakIterator::BreakType(
Isolate* isolate, DirectHandle<JSV8BreakIterator> break_iterator) {
int32_t status = break_iterator->break_iterator()->raw()->getRuleStatus();
// Keep return values in sync with JavaScript BreakType enum.
if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
return ReadOnlyRoots(isolate).none_string();
}
if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
return ReadOnlyRoots(isolate).number_string();
}
if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
return ReadOnlyRoots(isolate).letter_string();
}
if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
return ReadOnlyRoots(isolate).kana_string();
}
if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
return ReadOnlyRoots(isolate).ideo_string();
}
return ReadOnlyRoots(isolate).unknown_string();
}
const std::set<std::string>& JSV8BreakIterator::GetAvailableLocales() {
return Intl::GetAvailableLocales();
}
} // namespace internal
} // namespace v8