| // Copyright Joyent, Inc. and other Node contributors. |
| // |
| // Permission is hereby granted, free of charge, to any person obtaining a |
| // copy of this software and associated documentation files (the |
| // "Software"), to deal in the Software without restriction, including |
| // without limitation the rights to use, copy, modify, merge, publish, |
| // distribute, sublicense, and/or sell copies of the Software, and to permit |
| // persons to whom the Software is furnished to do so, subject to the |
| // following conditions: |
| // |
| // The above copyright notice and this permission notice shall be included |
| // in all copies or substantial portions of the Software. |
| // |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN |
| // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
| // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| // USE OR OTHER DEALINGS IN THE SOFTWARE. |
| |
| /* |
| * notes: by srl295 |
| * - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data |
| * ( stubdata/libicudata.a ) containing nothing, no data, and it's also |
| * linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT |
| * macro names. That's the "english+root" data. |
| * |
| * If icu_data_path is non-null, the user has provided a path and we assume |
| * it goes somewhere useful. We set that path in ICU, and exit. |
| * If icu_data_path is null, they haven't set a path and we want the |
| * "english+root" data. We call |
| * udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...) |
| * to load up the english+root data. |
| * |
| * - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full |
| * data. All of the variables and command line options for changing data at |
| * runtime are disabled, as they wouldn't fully override the internal data. |
| * See: http://bugs.icu-project.org/trac/ticket/10924 |
| */ |
| |
| |
| #include "node_i18n.h" |
| |
| #if defined(NODE_HAVE_I18N_SUPPORT) |
| |
| #include "base_object-inl.h" |
| #include "node.h" |
| #include "node_buffer.h" |
| #include "node_errors.h" |
| #include "node_internals.h" |
| #include "util-inl.h" |
| #include "v8.h" |
| |
| #include <unicode/utypes.h> |
| #include <unicode/putil.h> |
| #include <unicode/uchar.h> |
| #include <unicode/uclean.h> |
| #include <unicode/udata.h> |
| #include <unicode/uidna.h> |
| #include <unicode/ucnv.h> |
| #include <unicode/utf8.h> |
| #include <unicode/utf16.h> |
| #include <unicode/timezone.h> |
| #include <unicode/ulocdata.h> |
| #include <unicode/uvernum.h> |
| #include <unicode/uversion.h> |
| #include <unicode/ustring.h> |
| |
| #ifdef NODE_HAVE_SMALL_ICU |
| /* if this is defined, we have a 'secondary' entry point. |
| compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */ |
| #define SMALL_ICUDATA_ENTRY_POINT \ |
| SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME) |
| #define SMALL_DEF2(major, suff) SMALL_DEF(major, suff) |
| #ifndef U_LIB_SUFFIX_C_NAME |
| #define SMALL_DEF(major, suff) icusmdt##major##_dat |
| #else |
| #define SMALL_DEF(major, suff) icusmdt##suff##major##_dat |
| #endif |
| |
| extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[]; |
| #endif |
| |
| namespace node { |
| |
| using v8::Context; |
| using v8::FunctionCallbackInfo; |
| using v8::FunctionTemplate; |
| using v8::Int32; |
| using v8::Isolate; |
| using v8::Local; |
| using v8::MaybeLocal; |
| using v8::NewStringType; |
| using v8::Object; |
| using v8::ObjectTemplate; |
| using v8::String; |
| using v8::Uint8Array; |
| using v8::Value; |
| |
| namespace i18n { |
| namespace { |
| |
| template <typename T> |
| MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) { |
| MaybeLocal<Object> ret = Buffer::New(env, buf); |
| if (ret.IsEmpty()) |
| return ret; |
| |
| static_assert(sizeof(T) == 1 || sizeof(T) == 2, |
| "Currently only one- or two-byte buffers are supported"); |
| if (sizeof(T) > 1 && IsBigEndian()) { |
| SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf); |
| SwapBytes16(retbuf_data, retbuf_length); |
| } |
| |
| return ret; |
| } |
| |
| // One-Shot Converters |
| |
| void CopySourceBuffer(MaybeStackBuffer<UChar>* dest, |
| const char* data, |
| const size_t length, |
| const size_t length_in_chars) { |
| dest->AllocateSufficientStorage(length_in_chars); |
| char* dst = reinterpret_cast<char*>(**dest); |
| memcpy(dst, data, length); |
| if (IsBigEndian()) { |
| SwapBytes16(dst, length); |
| } |
| } |
| |
| typedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env, |
| const char* fromEncoding, |
| const char* toEncoding, |
| const char* source, |
| const size_t source_length, |
| UErrorCode* status); |
| |
| MaybeLocal<Object> Transcode(Environment* env, |
| const char* fromEncoding, |
| const char* toEncoding, |
| const char* source, |
| const size_t source_length, |
| UErrorCode* status) { |
| *status = U_ZERO_ERROR; |
| MaybeLocal<Object> ret; |
| MaybeStackBuffer<char> result; |
| Converter to(toEncoding, "?"); |
| Converter from(fromEncoding); |
| const uint32_t limit = source_length * to.max_char_size(); |
| result.AllocateSufficientStorage(limit); |
| char* target = *result; |
| ucnv_convertEx(to.conv(), from.conv(), &target, target + limit, |
| &source, source + source_length, nullptr, nullptr, |
| nullptr, nullptr, true, true, status); |
| if (U_SUCCESS(*status)) { |
| result.SetLength(target - &result[0]); |
| ret = ToBufferEndian(env, &result); |
| } |
| return ret; |
| } |
| |
| MaybeLocal<Object> TranscodeToUcs2(Environment* env, |
| const char* fromEncoding, |
| const char* toEncoding, |
| const char* source, |
| const size_t source_length, |
| UErrorCode* status) { |
| *status = U_ZERO_ERROR; |
| MaybeLocal<Object> ret; |
| MaybeStackBuffer<UChar> destbuf(source_length); |
| Converter from(fromEncoding); |
| const size_t length_in_chars = source_length * sizeof(UChar); |
| ucnv_toUChars(from.conv(), *destbuf, length_in_chars, |
| source, source_length, status); |
| if (U_SUCCESS(*status)) |
| ret = ToBufferEndian(env, &destbuf); |
| return ret; |
| } |
| |
| MaybeLocal<Object> TranscodeFromUcs2(Environment* env, |
| const char* fromEncoding, |
| const char* toEncoding, |
| const char* source, |
| const size_t source_length, |
| UErrorCode* status) { |
| *status = U_ZERO_ERROR; |
| MaybeStackBuffer<UChar> sourcebuf; |
| MaybeLocal<Object> ret; |
| Converter to(toEncoding, "?"); |
| const size_t length_in_chars = source_length / sizeof(UChar); |
| CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars); |
| MaybeStackBuffer<char> destbuf(length_in_chars); |
| const uint32_t len = ucnv_fromUChars(to.conv(), *destbuf, length_in_chars, |
| *sourcebuf, length_in_chars, status); |
| if (U_SUCCESS(*status)) { |
| destbuf.SetLength(len); |
| ret = ToBufferEndian(env, &destbuf); |
| } |
| return ret; |
| } |
| |
| MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env, |
| const char* fromEncoding, |
| const char* toEncoding, |
| const char* source, |
| const size_t source_length, |
| UErrorCode* status) { |
| *status = U_ZERO_ERROR; |
| MaybeStackBuffer<UChar> destbuf; |
| int32_t result_length; |
| u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length, |
| source, source_length, status); |
| MaybeLocal<Object> ret; |
| if (U_SUCCESS(*status)) { |
| destbuf.SetLength(result_length); |
| ret = ToBufferEndian(env, &destbuf); |
| } else if (*status == U_BUFFER_OVERFLOW_ERROR) { |
| *status = U_ZERO_ERROR; |
| destbuf.AllocateSufficientStorage(result_length); |
| u_strFromUTF8(*destbuf, result_length, &result_length, |
| source, source_length, status); |
| if (U_SUCCESS(*status)) { |
| destbuf.SetLength(result_length); |
| ret = ToBufferEndian(env, &destbuf); |
| } |
| } |
| return ret; |
| } |
| |
| MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env, |
| const char* fromEncoding, |
| const char* toEncoding, |
| const char* source, |
| const size_t source_length, |
| UErrorCode* status) { |
| *status = U_ZERO_ERROR; |
| MaybeLocal<Object> ret; |
| const size_t length_in_chars = source_length / sizeof(UChar); |
| int32_t result_length; |
| MaybeStackBuffer<UChar> sourcebuf; |
| MaybeStackBuffer<char> destbuf; |
| CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars); |
| u_strToUTF8(*destbuf, destbuf.capacity(), &result_length, |
| *sourcebuf, length_in_chars, status); |
| if (U_SUCCESS(*status)) { |
| destbuf.SetLength(result_length); |
| ret = ToBufferEndian(env, &destbuf); |
| } else if (*status == U_BUFFER_OVERFLOW_ERROR) { |
| *status = U_ZERO_ERROR; |
| destbuf.AllocateSufficientStorage(result_length); |
| u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf, |
| length_in_chars, status); |
| if (U_SUCCESS(*status)) { |
| destbuf.SetLength(result_length); |
| ret = ToBufferEndian(env, &destbuf); |
| } |
| } |
| return ret; |
| } |
| |
| const char* EncodingName(const enum encoding encoding) { |
| switch (encoding) { |
| case ASCII: return "us-ascii"; |
| case LATIN1: return "iso8859-1"; |
| case UCS2: return "utf16le"; |
| case UTF8: return "utf-8"; |
| default: return nullptr; |
| } |
| } |
| |
| bool SupportedEncoding(const enum encoding encoding) { |
| switch (encoding) { |
| case ASCII: |
| case LATIN1: |
| case UCS2: |
| case UTF8: return true; |
| default: return false; |
| } |
| } |
| |
| void Transcode(const FunctionCallbackInfo<Value>&args) { |
| Environment* env = Environment::GetCurrent(args); |
| Isolate* isolate = env->isolate(); |
| UErrorCode status = U_ZERO_ERROR; |
| MaybeLocal<Object> result; |
| |
| ArrayBufferViewContents<char> input(args[0]); |
| const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER); |
| const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER); |
| |
| if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) { |
| TranscodeFunc tfn = &Transcode; |
| switch (fromEncoding) { |
| case ASCII: |
| case LATIN1: |
| if (toEncoding == UCS2) |
| tfn = &TranscodeToUcs2; |
| break; |
| case UTF8: |
| if (toEncoding == UCS2) |
| tfn = &TranscodeUcs2FromUtf8; |
| break; |
| case UCS2: |
| switch (toEncoding) { |
| case UCS2: |
| tfn = &Transcode; |
| break; |
| case UTF8: |
| tfn = &TranscodeUtf8FromUcs2; |
| break; |
| default: |
| tfn = &TranscodeFromUcs2; |
| } |
| break; |
| default: |
| // This should not happen because of the SupportedEncoding checks |
| ABORT(); |
| } |
| |
| result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding), |
| input.data(), input.length(), &status); |
| } else { |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| |
| if (result.IsEmpty()) |
| return args.GetReturnValue().Set(status); |
| |
| return args.GetReturnValue().Set(result.ToLocalChecked()); |
| } |
| |
| void ICUErrorName(const FunctionCallbackInfo<Value>& args) { |
| Environment* env = Environment::GetCurrent(args); |
| CHECK(args[0]->IsInt32()); |
| UErrorCode status = static_cast<UErrorCode>(args[0].As<Int32>()->Value()); |
| args.GetReturnValue().Set( |
| String::NewFromUtf8(env->isolate(), |
| u_errorName(status), |
| NewStringType::kNormal).ToLocalChecked()); |
| } |
| |
| } // anonymous namespace |
| |
| Converter::Converter(const char* name, const char* sub) { |
| UErrorCode status = U_ZERO_ERROR; |
| UConverter* conv = ucnv_open(name, &status); |
| CHECK(U_SUCCESS(status)); |
| conv_.reset(conv); |
| set_subst_chars(sub); |
| } |
| |
| Converter::Converter(UConverter* converter, const char* sub) |
| : conv_(converter) { |
| set_subst_chars(sub); |
| } |
| |
| void Converter::set_subst_chars(const char* sub) { |
| CHECK(conv_); |
| UErrorCode status = U_ZERO_ERROR; |
| if (sub != nullptr) { |
| ucnv_setSubstChars(conv_.get(), sub, strlen(sub), &status); |
| CHECK(U_SUCCESS(status)); |
| } |
| } |
| |
| void Converter::reset() { |
| ucnv_reset(conv_.get()); |
| } |
| |
| size_t Converter::min_char_size() const { |
| CHECK(conv_); |
| return ucnv_getMinCharSize(conv_.get()); |
| } |
| |
| size_t Converter::max_char_size() const { |
| CHECK(conv_); |
| return ucnv_getMaxCharSize(conv_.get()); |
| } |
| |
| void ConverterObject::Has(const FunctionCallbackInfo<Value>& args) { |
| Environment* env = Environment::GetCurrent(args); |
| |
| CHECK_GE(args.Length(), 1); |
| Utf8Value label(env->isolate(), args[0]); |
| |
| UErrorCode status = U_ZERO_ERROR; |
| ConverterPointer conv(ucnv_open(*label, &status)); |
| args.GetReturnValue().Set(!!U_SUCCESS(status)); |
| } |
| |
| void ConverterObject::Create(const FunctionCallbackInfo<Value>& args) { |
| Environment* env = Environment::GetCurrent(args); |
| |
| Local<ObjectTemplate> t = env->i18n_converter_template(); |
| Local<Object> obj; |
| if (!t->NewInstance(env->context()).ToLocal(&obj)) return; |
| |
| CHECK_GE(args.Length(), 2); |
| Utf8Value label(env->isolate(), args[0]); |
| int flags = args[1]->Uint32Value(env->context()).ToChecked(); |
| bool fatal = |
| (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL; |
| |
| UErrorCode status = U_ZERO_ERROR; |
| UConverter* conv = ucnv_open(*label, &status); |
| if (U_FAILURE(status)) |
| return; |
| |
| if (fatal) { |
| status = U_ZERO_ERROR; |
| ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, |
| nullptr, nullptr, nullptr, &status); |
| } |
| |
| new ConverterObject(env, obj, conv, flags); |
| args.GetReturnValue().Set(obj); |
| } |
| |
| void ConverterObject::Decode(const FunctionCallbackInfo<Value>& args) { |
| Environment* env = Environment::GetCurrent(args); |
| |
| CHECK_GE(args.Length(), 3); // Converter, Buffer, Flags |
| |
| ConverterObject* converter; |
| ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>()); |
| ArrayBufferViewContents<char> input(args[1]); |
| int flags = args[2]->Uint32Value(env->context()).ToChecked(); |
| |
| UErrorCode status = U_ZERO_ERROR; |
| MaybeStackBuffer<UChar> result; |
| MaybeLocal<Object> ret; |
| size_t limit = converter->min_char_size() * input.length(); |
| if (limit > 0) |
| result.AllocateSufficientStorage(limit); |
| |
| UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH; |
| auto cleanup = OnScopeLeave([&]() { |
| if (flush) { |
| // Reset the converter state. |
| converter->set_bom_seen(false); |
| converter->reset(); |
| } |
| }); |
| |
| const char* source = input.data(); |
| size_t source_length = input.length(); |
| |
| UChar* target = *result; |
| ucnv_toUnicode(converter->conv(), |
| &target, |
| target + (limit * sizeof(UChar)), |
| &source, |
| source + source_length, |
| nullptr, |
| flush, |
| &status); |
| |
| if (U_SUCCESS(status)) { |
| bool omit_initial_bom = false; |
| if (limit > 0) { |
| result.SetLength(target - &result[0]); |
| if (result.length() > 0 && |
| converter->unicode() && |
| !converter->ignore_bom() && |
| !converter->bom_seen()) { |
| // If the very first result in the stream is a BOM, and we are not |
| // explicitly told to ignore it, then we mark it for discarding. |
| if (result[0] == 0xFEFF) |
| omit_initial_bom = true; |
| converter->set_bom_seen(true); |
| } |
| } |
| ret = ToBufferEndian(env, &result); |
| if (omit_initial_bom && !ret.IsEmpty()) { |
| // Peform `ret = ret.slice(2)`. |
| CHECK(ret.ToLocalChecked()->IsUint8Array()); |
| Local<Uint8Array> orig_ret = ret.ToLocalChecked().As<Uint8Array>(); |
| ret = Buffer::New(env, |
| orig_ret->Buffer(), |
| orig_ret->ByteOffset() + 2, |
| orig_ret->ByteLength() - 2) |
| .FromMaybe(Local<Uint8Array>()); |
| } |
| if (!ret.IsEmpty()) |
| args.GetReturnValue().Set(ret.ToLocalChecked()); |
| return; |
| } |
| |
| args.GetReturnValue().Set(status); |
| } |
| |
| ConverterObject::ConverterObject( |
| Environment* env, |
| Local<Object> wrap, |
| UConverter* converter, |
| int flags, |
| const char* sub) |
| : BaseObject(env, wrap), |
| Converter(converter, sub), |
| flags_(flags) { |
| MakeWeak(); |
| |
| switch (ucnv_getType(converter)) { |
| case UCNV_UTF8: |
| case UCNV_UTF16_BigEndian: |
| case UCNV_UTF16_LittleEndian: |
| flags_ |= CONVERTER_FLAGS_UNICODE; |
| break; |
| default: { |
| // Fall through |
| } |
| } |
| } |
| |
| |
| bool InitializeICUDirectory(const std::string& path) { |
| UErrorCode status = U_ZERO_ERROR; |
| if (path.empty()) { |
| #ifdef NODE_HAVE_SMALL_ICU |
| // install the 'small' data. |
| udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status); |
| #else // !NODE_HAVE_SMALL_ICU |
| // no small data, so nothing to do. |
| #endif // !NODE_HAVE_SMALL_ICU |
| } else { |
| u_setDataDirectory(path.c_str()); |
| u_init(&status); |
| } |
| return status == U_ZERO_ERROR; |
| } |
| |
| int32_t ToUnicode(MaybeStackBuffer<char>* buf, |
| const char* input, |
| size_t length) { |
| UErrorCode status = U_ZERO_ERROR; |
| uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE; |
| UIDNA* uidna = uidna_openUTS46(options, &status); |
| if (U_FAILURE(status)) |
| return -1; |
| UIDNAInfo info = UIDNA_INFO_INITIALIZER; |
| |
| int32_t len = uidna_nameToUnicodeUTF8(uidna, |
| input, length, |
| **buf, buf->capacity(), |
| &info, |
| &status); |
| |
| // Do not check info.errors like we do with ToASCII since ToUnicode always |
| // returns a string, despite any possible errors that may have occurred. |
| |
| if (status == U_BUFFER_OVERFLOW_ERROR) { |
| status = U_ZERO_ERROR; |
| buf->AllocateSufficientStorage(len); |
| len = uidna_nameToUnicodeUTF8(uidna, |
| input, length, |
| **buf, buf->capacity(), |
| &info, |
| &status); |
| } |
| |
| // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode |
| // string, regardless of whether an error occurred. |
| |
| if (U_FAILURE(status)) { |
| len = -1; |
| buf->SetLength(0); |
| } else { |
| buf->SetLength(len); |
| } |
| |
| uidna_close(uidna); |
| return len; |
| } |
| |
| int32_t ToASCII(MaybeStackBuffer<char>* buf, |
| const char* input, |
| size_t length, |
| enum idna_mode mode) { |
| UErrorCode status = U_ZERO_ERROR; |
| uint32_t options = // CheckHyphens = false; handled later |
| UIDNA_CHECK_BIDI | // CheckBidi = true |
| UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true |
| UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing |
| if (mode == IDNA_STRICT) { |
| options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict |
| // VerifyDnsLength = beStrict; |
| // handled later |
| } |
| |
| UIDNA* uidna = uidna_openUTS46(options, &status); |
| if (U_FAILURE(status)) |
| return -1; |
| UIDNAInfo info = UIDNA_INFO_INITIALIZER; |
| |
| int32_t len = uidna_nameToASCII_UTF8(uidna, |
| input, length, |
| **buf, buf->capacity(), |
| &info, |
| &status); |
| |
| if (status == U_BUFFER_OVERFLOW_ERROR) { |
| status = U_ZERO_ERROR; |
| buf->AllocateSufficientStorage(len); |
| len = uidna_nameToASCII_UTF8(uidna, |
| input, length, |
| **buf, buf->capacity(), |
| &info, |
| &status); |
| } |
| |
| // In UTS #46 which specifies ToASCII, certain error conditions are |
| // configurable through options, and the WHATWG URL Standard promptly elects |
| // to disable some of them to accommodate for real-world use cases. |
| // Unfortunately, ICU4C's IDNA module does not support disabling some of |
| // these options through `options` above, and thus continues throwing |
| // unnecessary errors. To counter this situation, we just filter out the |
| // errors that may have happened afterwards, before deciding whether to |
| // return an error from this function. |
| |
| // CheckHyphens = false |
| // (Specified in the current UTS #46 draft rev. 18.) |
| // Refs: |
| // - https://github.com/whatwg/url/issues/53 |
| // - https://github.com/whatwg/url/pull/309 |
| // - http://www.unicode.org/review/pri317/ |
| // - http://www.unicode.org/reports/tr46/tr46-18.html |
| // - https://www.icann.org/news/announcement-2000-01-07-en |
| info.errors &= ~UIDNA_ERROR_HYPHEN_3_4; |
| info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN; |
| info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN; |
| |
| if (mode != IDNA_STRICT) { |
| // VerifyDnsLength = beStrict |
| info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; |
| info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; |
| info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; |
| } |
| |
| if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) { |
| len = -1; |
| buf->SetLength(0); |
| } else { |
| buf->SetLength(len); |
| } |
| |
| uidna_close(uidna); |
| return len; |
| } |
| |
| static void ToUnicode(const FunctionCallbackInfo<Value>& args) { |
| Environment* env = Environment::GetCurrent(args); |
| CHECK_GE(args.Length(), 1); |
| CHECK(args[0]->IsString()); |
| Utf8Value val(env->isolate(), args[0]); |
| |
| MaybeStackBuffer<char> buf; |
| int32_t len = ToUnicode(&buf, *val, val.length()); |
| |
| if (len < 0) { |
| return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode"); |
| } |
| |
| args.GetReturnValue().Set( |
| String::NewFromUtf8(env->isolate(), |
| *buf, |
| NewStringType::kNormal, |
| len).ToLocalChecked()); |
| } |
| |
| static void ToASCII(const FunctionCallbackInfo<Value>& args) { |
| Environment* env = Environment::GetCurrent(args); |
| CHECK_GE(args.Length(), 1); |
| CHECK(args[0]->IsString()); |
| Utf8Value val(env->isolate(), args[0]); |
| // optional arg |
| bool lenient = args[1]->BooleanValue(env->isolate()); |
| enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT; |
| |
| MaybeStackBuffer<char> buf; |
| int32_t len = ToASCII(&buf, *val, val.length(), mode); |
| |
| if (len < 0) { |
| return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII"); |
| } |
| |
| args.GetReturnValue().Set( |
| String::NewFromUtf8(env->isolate(), |
| *buf, |
| NewStringType::kNormal, |
| len).ToLocalChecked()); |
| } |
| |
| // This is similar to wcwidth except that it takes the current unicode |
| // character properties database into consideration, allowing it to |
| // correctly calculate the column widths of things like emoji's and |
| // newer wide characters. wcwidth, on the other hand, uses a fixed |
| // algorithm that does not take things like emoji into proper |
| // consideration. |
| // |
| // TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by |
| // GNOME Terminal) and Konsole don't consider them to be zero-width (see refs |
| // below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't |
| // allow it to be input. Linux's PTY terminal prints control characters as |
| // Narrow rhombi. |
| // |
| // TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final |
| // consonants are 0-width when combined with initial consonants; otherwise they |
| // are technically Wide. But many terminals (including Konsole and |
| // VTE/GLib-based) implement all medials and finals as 0-width. |
| // |
| // Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width |
| // Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420 |
| // Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223 |
| static int GetColumnWidth(UChar32 codepoint, |
| bool ambiguous_as_full_width = false) { |
| // UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a |
| // codepoint as being full width, wide, ambiguous, neutral, narrow, |
| // or halfwidth. |
| const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH); |
| switch (eaw) { |
| case U_EA_FULLWIDTH: |
| case U_EA_WIDE: |
| return 2; |
| case U_EA_AMBIGUOUS: |
| // See: http://www.unicode.org/reports/tr11/#Ambiguous for details |
| if (ambiguous_as_full_width) { |
| return 2; |
| } |
| // If ambiguous_as_full_width is false: |
| // Fall through |
| case U_EA_NEUTRAL: |
| if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) { |
| return 2; |
| } |
| // Fall through |
| case U_EA_HALFWIDTH: |
| case U_EA_NARROW: |
| default: |
| const auto zero_width_mask = U_GC_CC_MASK | // C0/C1 control code |
| U_GC_CF_MASK | // Format control character |
| U_GC_ME_MASK | // Enclosing mark |
| U_GC_MN_MASK; // Nonspacing mark |
| if (codepoint != 0x00AD && // SOFT HYPHEN is Cf but not zero-width |
| ((U_MASK(u_charType(codepoint)) & zero_width_mask) || |
| u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) { |
| return 0; |
| } |
| return 1; |
| } |
| } |
| |
| // Returns the column width for the given String. |
| static void GetStringWidth(const FunctionCallbackInfo<Value>& args) { |
| Environment* env = Environment::GetCurrent(args); |
| CHECK(args[0]->IsString()); |
| |
| bool ambiguous_as_full_width = args[1]->IsTrue(); |
| bool expand_emoji_sequence = !args[2]->IsBoolean() || args[2]->IsTrue(); |
| |
| TwoByteValue value(env->isolate(), args[0]); |
| // reinterpret_cast is required by windows to compile |
| UChar* str = reinterpret_cast<UChar*>(*value); |
| static_assert(sizeof(*str) == sizeof(**value), |
| "sizeof(*str) == sizeof(**value)"); |
| UChar32 c = 0; |
| UChar32 p; |
| size_t n = 0; |
| uint32_t width = 0; |
| |
| while (n < value.length()) { |
| p = c; |
| U16_NEXT(str, n, value.length(), c); |
| // Don't count individual emoji codepoints that occur within an |
| // emoji sequence. This is not necessarily foolproof. Some |
| // environments display emoji sequences in the appropriate |
| // condensed form (as a single emoji glyph), other environments |
| // may not understand an emoji sequence and will display each |
| // individual emoji separately. When this happens, the width |
| // calculated will be off, and there's no reliable way of knowing |
| // in advance if a particular sequence is going to be supported. |
| // The expand_emoji_sequence option allows the caller to skip this |
| // check and count each code within an emoji sequence separately. |
| // https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences |
| if (!expand_emoji_sequence && |
| n > 0 && p == 0x200d && // 0x200d == ZWJ (zero width joiner) |
| (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) || |
| u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) { |
| continue; |
| } |
| width += GetColumnWidth(c, ambiguous_as_full_width); |
| } |
| args.GetReturnValue().Set(width); |
| } |
| |
| void Initialize(Local<Object> target, |
| Local<Value> unused, |
| Local<Context> context, |
| void* priv) { |
| Environment* env = Environment::GetCurrent(context); |
| env->SetMethod(target, "toUnicode", ToUnicode); |
| env->SetMethod(target, "toASCII", ToASCII); |
| env->SetMethod(target, "getStringWidth", GetStringWidth); |
| |
| // One-shot converters |
| env->SetMethod(target, "icuErrName", ICUErrorName); |
| env->SetMethod(target, "transcode", Transcode); |
| |
| // ConverterObject |
| { |
| Local<FunctionTemplate> t = FunctionTemplate::New(env->isolate()); |
| t->Inherit(BaseObject::GetConstructorTemplate(env)); |
| t->InstanceTemplate()->SetInternalFieldCount( |
| ConverterObject::kInternalFieldCount); |
| Local<String> converter_string = |
| FIXED_ONE_BYTE_STRING(env->isolate(), "Converter"); |
| t->SetClassName(converter_string); |
| env->set_i18n_converter_template(t->InstanceTemplate()); |
| } |
| |
| env->SetMethod(target, "getConverter", ConverterObject::Create); |
| env->SetMethod(target, "decode", ConverterObject::Decode); |
| env->SetMethod(target, "hasConverter", ConverterObject::Has); |
| } |
| |
| } // namespace i18n |
| } // namespace node |
| |
| NODE_MODULE_CONTEXT_AWARE_INTERNAL(icu, node::i18n::Initialize) |
| |
| #endif // NODE_HAVE_I18N_SUPPORT |