| /* |
| * (C) 1999 Lars Knoll (knoll@kde.org) |
| * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights |
| * reserved. |
| * Copyright (C) 2007-2009 Torch Mobile, Inc. |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Library General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| * |
| * You should have received a copy of the GNU Library General Public License |
| * along with this library; see the file COPYING.LIB. If not, write to |
| * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| * Boston, MA 02110-1301, USA. |
| */ |
| |
| #include "third_party/blink/renderer/platform/wtf/text/wtf_string.h" |
| |
| #include <locale.h> |
| #include <stdarg.h> |
| #include <algorithm> |
| #include "base/strings/string_util.h" |
| #include "build/build_config.h" |
| #include "third_party/blink/renderer/platform/wtf/ascii_ctype.h" |
| #include "third_party/blink/renderer/platform/wtf/dtoa.h" |
| #include "third_party/blink/renderer/platform/wtf/math_extras.h" |
| #include "third_party/blink/renderer/platform/wtf/text/character_names.h" |
| #include "third_party/blink/renderer/platform/wtf/text/cstring.h" |
| #include "third_party/blink/renderer/platform/wtf/text/unicode.h" |
| #include "third_party/blink/renderer/platform/wtf/text/utf8.h" |
| #include "third_party/blink/renderer/platform/wtf/vector.h" |
| |
| namespace WTF { |
| |
| // Construct a string with UTF-16 data. |
| String::String(const UChar* characters, unsigned length) |
| : impl_(characters ? StringImpl::Create(characters, length) : nullptr) {} |
| |
| // Construct a string with UTF-16 data, from a null-terminated source. |
| String::String(const UChar* str) { |
| if (!str) |
| return; |
| impl_ = StringImpl::Create(str, LengthOfNullTerminatedString(str)); |
| } |
| |
| // Construct a string with latin1 data. |
| String::String(const LChar* characters, unsigned length) |
| : impl_(characters ? StringImpl::Create(characters, length) : nullptr) {} |
| |
| String::String(const char* characters, unsigned length) |
| : impl_(characters |
| ? StringImpl::Create(reinterpret_cast<const LChar*>(characters), |
| length) |
| : nullptr) {} |
| |
| #if defined(ARCH_CPU_64_BITS) |
| String::String(const char* characters, size_t length) |
| : String(characters, SafeCast<unsigned>(length)) {} |
| #endif // defined(ARCH_CPU_64_BITS) |
| |
| void String::append(const StringView& string) { |
| if (string.IsEmpty()) |
| return; |
| if (!impl_) { |
| impl_ = string.ToString().ReleaseImpl(); |
| return; |
| } |
| |
| // FIXME: This is extremely inefficient. So much so that we might want to |
| // take this out of String's API. We can make it better by optimizing the |
| // case where exactly one String is pointing at this StringImpl, but even |
| // then it's going to require a call into the allocator every single time. |
| |
| if (impl_->Is8Bit() && string.Is8Bit()) { |
| LChar* data; |
| CHECK_LE(string.length(), |
| std::numeric_limits<unsigned>::max() - impl_->length()); |
| scoped_refptr<StringImpl> new_impl = StringImpl::CreateUninitialized( |
| impl_->length() + string.length(), data); |
| memcpy(data, impl_->Characters8(), impl_->length() * sizeof(LChar)); |
| memcpy(data + impl_->length(), string.Characters8(), |
| string.length() * sizeof(LChar)); |
| impl_ = std::move(new_impl); |
| return; |
| } |
| |
| UChar* data; |
| CHECK_LE(string.length(), |
| std::numeric_limits<unsigned>::max() - impl_->length()); |
| scoped_refptr<StringImpl> new_impl = |
| StringImpl::CreateUninitialized(impl_->length() + string.length(), data); |
| |
| if (impl_->Is8Bit()) |
| StringImpl::CopyChars(data, impl_->Characters8(), impl_->length()); |
| else |
| StringImpl::CopyChars(data, impl_->Characters16(), impl_->length()); |
| |
| if (string.Is8Bit()) |
| StringImpl::CopyChars(data + impl_->length(), string.Characters8(), |
| string.length()); |
| else |
| StringImpl::CopyChars(data + impl_->length(), string.Characters16(), |
| string.length()); |
| |
| impl_ = std::move(new_impl); |
| } |
| |
| template <typename CharacterType> |
| inline void String::AppendInternal(CharacterType c) { |
| // FIXME: This is extremely inefficient. So much so that we might want to |
| // take this out of String's API. We can make it better by optimizing the |
| // case where exactly one String is pointing at this StringImpl, but even |
| // then it's going to require a call into the allocator every single time. |
| if (!impl_) { |
| impl_ = StringImpl::Create(&c, 1); |
| return; |
| } |
| |
| // FIXME: We should be able to create an 8 bit string via this code path. |
| UChar* data; |
| CHECK_LT(impl_->length(), std::numeric_limits<unsigned>::max()); |
| scoped_refptr<StringImpl> new_impl = |
| StringImpl::CreateUninitialized(impl_->length() + 1, data); |
| if (impl_->Is8Bit()) |
| StringImpl::CopyChars(data, impl_->Characters8(), impl_->length()); |
| else |
| StringImpl::CopyChars(data, impl_->Characters16(), impl_->length()); |
| data[impl_->length()] = c; |
| impl_ = std::move(new_impl); |
| } |
| |
| void String::append(LChar c) { |
| AppendInternal(c); |
| } |
| |
| void String::append(UChar c) { |
| AppendInternal(c); |
| } |
| |
| int CodePointCompare(const String& a, const String& b) { |
| return CodePointCompare(a.Impl(), b.Impl()); |
| } |
| |
| int CodePointCompareIgnoringASCIICase(const String& a, const char* b) { |
| return CodePointCompareIgnoringASCIICase(a.Impl(), |
| reinterpret_cast<const LChar*>(b)); |
| } |
| |
| template <typename CharType> |
| scoped_refptr<StringImpl> InsertInternal(scoped_refptr<StringImpl> impl, |
| const CharType* characters_to_insert, |
| unsigned length_to_insert, |
| unsigned position) { |
| if (!length_to_insert) |
| return impl; |
| |
| DCHECK(characters_to_insert); |
| UChar* data; // FIXME: We should be able to create an 8 bit string here. |
| CHECK_LE(length_to_insert, |
| std::numeric_limits<unsigned>::max() - impl->length()); |
| scoped_refptr<StringImpl> new_impl = |
| StringImpl::CreateUninitialized(impl->length() + length_to_insert, data); |
| |
| if (impl->Is8Bit()) |
| StringImpl::CopyChars(data, impl->Characters8(), position); |
| else |
| StringImpl::CopyChars(data, impl->Characters16(), position); |
| |
| StringImpl::CopyChars(data + position, characters_to_insert, |
| length_to_insert); |
| |
| if (impl->Is8Bit()) |
| StringImpl::CopyChars(data + position + length_to_insert, |
| impl->Characters8() + position, |
| impl->length() - position); |
| else |
| StringImpl::CopyChars(data + position + length_to_insert, |
| impl->Characters16() + position, |
| impl->length() - position); |
| |
| return new_impl; |
| } |
| |
| void String::insert(const StringView& string, unsigned position) { |
| if (string.IsEmpty()) { |
| if (string.IsNull()) |
| return; |
| if (IsNull()) |
| impl_ = string.ToString().ReleaseImpl(); |
| return; |
| } |
| |
| if (position >= length()) { |
| if (string.Is8Bit()) |
| append(string); |
| else |
| append(string); |
| return; |
| } |
| |
| DCHECK(impl_); |
| if (string.Is8Bit()) |
| impl_ = InsertInternal(std::move(impl_), string.Characters8(), |
| string.length(), position); |
| else |
| impl_ = InsertInternal(std::move(impl_), string.Characters16(), |
| string.length(), position); |
| } |
| |
| UChar32 String::CharacterStartingAt(unsigned i) const { |
| if (!impl_ || i >= impl_->length()) |
| return 0; |
| return impl_->CharacterStartingAt(i); |
| } |
| |
| void String::Ensure16Bit() { |
| if (IsNull()) |
| return; |
| if (!Is8Bit()) |
| return; |
| if (unsigned length = this->length()) |
| impl_ = Make16BitFrom8BitSource(impl_->Characters8(), length).ReleaseImpl(); |
| else |
| impl_ = StringImpl::empty16_bit_; |
| } |
| |
| void String::Truncate(unsigned length) { |
| if (impl_) |
| impl_ = impl_->Truncate(length); |
| } |
| |
| void String::Remove(unsigned start, unsigned length_to_remove) { |
| if (impl_) |
| impl_ = impl_->Remove(start, length_to_remove); |
| } |
| |
| String String::Substring(unsigned pos, unsigned len) const { |
| if (!impl_) |
| return String(); |
| return impl_->Substring(pos, len); |
| } |
| |
| String String::DeprecatedLower() const { |
| if (!impl_) |
| return String(); |
| return impl_->LowerUnicode(); |
| } |
| |
| String String::LowerUnicode(const AtomicString& locale_identifier) const { |
| if (!impl_) |
| return String(); |
| return impl_->LowerUnicode(locale_identifier); |
| } |
| |
| String String::UpperUnicode(const AtomicString& locale_identifier) const { |
| if (!impl_) |
| return String(); |
| return impl_->UpperUnicode(locale_identifier); |
| } |
| |
| String String::LowerASCII() const { |
| if (!impl_) |
| return String(); |
| return impl_->LowerASCII(); |
| } |
| |
| String String::UpperASCII() const { |
| if (!impl_) |
| return String(); |
| return impl_->UpperASCII(); |
| } |
| |
| String String::StripWhiteSpace() const { |
| if (!impl_) |
| return String(); |
| return impl_->StripWhiteSpace(); |
| } |
| |
| String String::StripWhiteSpace(IsWhiteSpaceFunctionPtr is_white_space) const { |
| if (!impl_) |
| return String(); |
| return impl_->StripWhiteSpace(is_white_space); |
| } |
| |
| String String::SimplifyWhiteSpace(StripBehavior strip_behavior) const { |
| if (!impl_) |
| return String(); |
| return impl_->SimplifyWhiteSpace(strip_behavior); |
| } |
| |
| String String::SimplifyWhiteSpace(IsWhiteSpaceFunctionPtr is_white_space, |
| StripBehavior strip_behavior) const { |
| if (!impl_) |
| return String(); |
| return impl_->SimplifyWhiteSpace(is_white_space, strip_behavior); |
| } |
| |
| String String::RemoveCharacters(CharacterMatchFunctionPtr find_match) const { |
| if (!impl_) |
| return String(); |
| return impl_->RemoveCharacters(find_match); |
| } |
| |
| String String::FoldCase() const { |
| if (!impl_) |
| return String(); |
| return impl_->FoldCase(); |
| } |
| |
| String String::Format(const char* format, ...) { |
| // vsnprintf is locale sensitive when converting floats to strings |
| // and we need it to always use a decimal point. Double check that |
| // the locale is compatible, and also that it is the default "C" |
| // locale so that we aren't just lucky. Android's locales work |
| // differently so can't check the same way there. |
| DCHECK_EQ(strcmp(localeconv()->decimal_point, "."), 0); |
| #if !defined(OS_ANDROID) |
| DCHECK_EQ(strcmp(setlocale(LC_NUMERIC, NULL), "C"), 0); |
| #endif // !OS_ANDROID |
| |
| va_list args; |
| |
| // TODO(esprehn): base uses 1024, maybe we should use a bigger size too. |
| static const unsigned kDefaultSize = 256; |
| Vector<char, kDefaultSize> buffer(kDefaultSize); |
| |
| va_start(args, format); |
| int length = base::vsnprintf(buffer.data(), buffer.size(), format, args); |
| va_end(args); |
| |
| // TODO(esprehn): This can only happen if there's an encoding error, what's |
| // the locale set to inside blink? Can this happen? We should probably CHECK |
| // instead. |
| if (length < 0) |
| return String(); |
| |
| if (static_cast<unsigned>(length) >= buffer.size()) { |
| // vsnprintf doesn't include the NUL terminator in the length so we need to |
| // add space for it when growing. |
| buffer.Grow(length + 1); |
| |
| // We need to call va_end() and then va_start() each time we use args, as |
| // the contents of args is undefined after the call to vsnprintf according |
| // to http://man.cx/snprintf(3) |
| // |
| // Not calling va_end/va_start here happens to work on lots of systems, but |
| // fails e.g. on 64bit Linux. |
| va_start(args, format); |
| length = base::vsnprintf(buffer.data(), buffer.size(), format, args); |
| va_end(args); |
| } |
| |
| CHECK_LT(static_cast<unsigned>(length), buffer.size()); |
| return String(reinterpret_cast<const LChar*>(buffer.data()), length); |
| } |
| |
| String String::EncodeForDebugging() const { |
| if (IsNull()) |
| return "<null>"; |
| |
| String str; |
| str.append('"'); |
| for (unsigned index = 0; index < length(); ++index) { |
| // Print shorthands for select cases. |
| UChar character = (*impl_)[index]; |
| switch (character) { |
| case '\t': |
| str.append("\\t"); |
| break; |
| case '\n': |
| str.append("\\n"); |
| break; |
| case '\r': |
| str.append("\\r"); |
| break; |
| case '"': |
| str.append("\\\""); |
| break; |
| case '\\': |
| str.append("\\\\"); |
| break; |
| default: |
| if (IsASCIIPrintable(character)) { |
| str.append(static_cast<char>(character)); |
| } else { |
| // Print "\uXXXX" for control or non-ASCII characters. |
| str.append("\\u"); |
| std::stringstream out; |
| out.width(4); |
| out.fill('0'); |
| out.setf(std::ios_base::hex, std::ios_base::basefield); |
| out.setf(std::ios::uppercase); |
| out << character; |
| str.append(out.str().c_str()); |
| } |
| break; |
| } |
| } |
| str.append('"'); |
| return str; |
| } |
| |
| String String::Number(float number) { |
| return Number(static_cast<double>(number)); |
| } |
| |
| String String::Number(double number, unsigned precision) { |
| NumberToStringBuffer buffer; |
| return String(NumberToFixedPrecisionString(number, precision, buffer)); |
| } |
| |
| String String::NumberToStringECMAScript(double number) { |
| NumberToStringBuffer buffer; |
| return String(NumberToString(number, buffer)); |
| } |
| |
| String String::NumberToStringFixedWidth(double number, |
| unsigned decimal_places) { |
| NumberToStringBuffer buffer; |
| return String(NumberToFixedWidthString(number, decimal_places, buffer)); |
| } |
| |
| int String::ToIntStrict(bool* ok) const { |
| if (!impl_) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return impl_->ToInt(NumberParsingOptions::kStrict, ok); |
| } |
| |
| unsigned String::ToUIntStrict(bool* ok) const { |
| if (!impl_) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return impl_->ToUInt(NumberParsingOptions::kStrict, ok); |
| } |
| |
| unsigned String::HexToUIntStrict(bool* ok) const { |
| if (!impl_) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return impl_->HexToUIntStrict(ok); |
| } |
| |
| int64_t String::ToInt64Strict(bool* ok) const { |
| if (!impl_) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return impl_->ToInt64(NumberParsingOptions::kStrict, ok); |
| } |
| |
| uint64_t String::ToUInt64Strict(bool* ok) const { |
| if (!impl_) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return impl_->ToUInt64(NumberParsingOptions::kStrict, ok); |
| } |
| |
| int String::ToInt(bool* ok) const { |
| if (!impl_) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return impl_->ToInt(NumberParsingOptions::kLoose, ok); |
| } |
| |
| unsigned String::ToUInt(bool* ok) const { |
| if (!impl_) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return impl_->ToUInt(NumberParsingOptions::kLoose, ok); |
| } |
| |
| double String::ToDouble(bool* ok) const { |
| if (!impl_) { |
| if (ok) |
| *ok = false; |
| return 0.0; |
| } |
| return impl_->ToDouble(ok); |
| } |
| |
| float String::ToFloat(bool* ok) const { |
| if (!impl_) { |
| if (ok) |
| *ok = false; |
| return 0.0f; |
| } |
| return impl_->ToFloat(ok); |
| } |
| |
| String String::IsolatedCopy() const { |
| if (!impl_) |
| return String(); |
| return impl_->IsolatedCopy(); |
| } |
| |
| bool String::IsSafeToSendToAnotherThread() const { |
| return !impl_ || impl_->IsSafeToSendToAnotherThread(); |
| } |
| |
| void String::Split(const StringView& separator, |
| bool allow_empty_entries, |
| Vector<String>& result) const { |
| result.clear(); |
| |
| unsigned start_pos = 0; |
| wtf_size_t end_pos; |
| while ((end_pos = Find(separator, start_pos)) != kNotFound) { |
| if (allow_empty_entries || start_pos != end_pos) |
| result.push_back(Substring(start_pos, end_pos - start_pos)); |
| start_pos = end_pos + separator.length(); |
| } |
| if (allow_empty_entries || start_pos != length()) |
| result.push_back(Substring(start_pos)); |
| } |
| |
| void String::Split(UChar separator, |
| bool allow_empty_entries, |
| Vector<String>& result) const { |
| result.clear(); |
| |
| unsigned start_pos = 0; |
| wtf_size_t end_pos; |
| while ((end_pos = find(separator, start_pos)) != kNotFound) { |
| if (allow_empty_entries || start_pos != end_pos) |
| result.push_back(Substring(start_pos, end_pos - start_pos)); |
| start_pos = end_pos + 1; |
| } |
| if (allow_empty_entries || start_pos != length()) |
| result.push_back(Substring(start_pos)); |
| } |
| |
| CString String::Ascii() const { |
| // Printable ASCII characters 32..127 and the null character are |
| // preserved, characters outside of this range are converted to '?'. |
| |
| unsigned length = this->length(); |
| if (!length) { |
| char* character_buffer; |
| return CString::CreateUninitialized(length, character_buffer); |
| } |
| |
| if (this->Is8Bit()) { |
| const LChar* characters = this->Characters8(); |
| |
| char* character_buffer; |
| CString result = CString::CreateUninitialized(length, character_buffer); |
| |
| for (unsigned i = 0; i < length; ++i) { |
| LChar ch = characters[i]; |
| character_buffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; |
| } |
| |
| return result; |
| } |
| |
| const UChar* characters = this->Characters16(); |
| |
| char* character_buffer; |
| CString result = CString::CreateUninitialized(length, character_buffer); |
| |
| for (unsigned i = 0; i < length; ++i) { |
| UChar ch = characters[i]; |
| character_buffer[i] = |
| ch && (ch < 0x20 || ch > 0x7f) ? '?' : static_cast<char>(ch); |
| } |
| |
| return result; |
| } |
| |
| CString String::Latin1() const { |
| // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are |
| // preserved, characters outside of this range are converted to '?'. |
| |
| unsigned length = this->length(); |
| |
| if (!length) |
| return CString("", 0); |
| |
| if (Is8Bit()) |
| return CString(reinterpret_cast<const char*>(this->Characters8()), length); |
| |
| const UChar* characters = this->Characters16(); |
| |
| char* character_buffer; |
| CString result = CString::CreateUninitialized(length, character_buffer); |
| |
| for (unsigned i = 0; i < length; ++i) { |
| UChar ch = characters[i]; |
| character_buffer[i] = ch > 0xff ? '?' : static_cast<char>(ch); |
| } |
| |
| return result; |
| } |
| |
| // Helper to write a three-byte UTF-8 code point to the buffer, caller must |
| // check room is available. |
| static inline void PutUTF8Triple(char*& buffer, UChar ch) { |
| DCHECK_GE(ch, 0x0800); |
| *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); |
| *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); |
| *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); |
| } |
| |
| CString String::Utf8(UTF8ConversionMode mode) const { |
| unsigned length = this->length(); |
| |
| if (!length) |
| return CString("", 0); |
| |
| // Allocate a buffer big enough to hold all the characters |
| // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). |
| // Optimization ideas, if we find this function is hot: |
| // * We could speculatively create a CStringImpl to contain 'length' |
| // characters, and resize if necessary (i.e. if the buffer contains |
| // non-ascii characters). (Alternatively, scan the buffer first for |
| // ascii characters, so we know this will be sufficient). |
| // * We could allocate a CStringImpl with an appropriate size to |
| // have a good chance of being able to write the string into the |
| // buffer without reallocing (say, 1.5 x length). |
| if (length > std::numeric_limits<unsigned>::max() / 3) |
| return CString(); |
| Vector<char, 1024> buffer_vector(length * 3); |
| |
| char* buffer = buffer_vector.data(); |
| |
| if (Is8Bit()) { |
| const LChar* characters = this->Characters8(); |
| |
| unicode::ConversionResult result = |
| unicode::ConvertLatin1ToUTF8(&characters, characters + length, &buffer, |
| buffer + buffer_vector.size()); |
| // (length * 3) should be sufficient for any conversion |
| DCHECK_NE(result, unicode::kTargetExhausted); |
| } else { |
| const UChar* characters = this->Characters16(); |
| |
| if (mode == kStrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD) { |
| const UChar* characters_end = characters + length; |
| char* buffer_end = buffer + buffer_vector.size(); |
| while (characters < characters_end) { |
| // Use strict conversion to detect unpaired surrogates. |
| unicode::ConversionResult result = unicode::ConvertUTF16ToUTF8( |
| &characters, characters_end, &buffer, buffer_end, true); |
| DCHECK_NE(result, unicode::kTargetExhausted); |
| // Conversion fails when there is an unpaired surrogate. Put |
| // replacement character (U+FFFD) instead of the unpaired |
| // surrogate. |
| if (result != unicode::kConversionOK) { |
| DCHECK_LE(0xD800, *characters); |
| DCHECK_LE(*characters, 0xDFFF); |
| // There should be room left, since one UChar hasn't been |
| // converted. |
| DCHECK_LE(buffer + 3, buffer_end); |
| PutUTF8Triple(buffer, kReplacementCharacter); |
| ++characters; |
| } |
| } |
| } else { |
| bool strict = mode == kStrictUTF8Conversion; |
| unicode::ConversionResult result = |
| unicode::ConvertUTF16ToUTF8(&characters, characters + length, &buffer, |
| buffer + buffer_vector.size(), strict); |
| // (length * 3) should be sufficient for any conversion |
| DCHECK_NE(result, unicode::kTargetExhausted); |
| |
| // Only produced from strict conversion. |
| if (result == unicode::kSourceIllegal) { |
| DCHECK(strict); |
| return CString(); |
| } |
| |
| // Check for an unconverted high surrogate. |
| if (result == unicode::kSourceExhausted) { |
| if (strict) |
| return CString(); |
| // This should be one unpaired high surrogate. Treat it the same |
| // was as an unpaired high surrogate would have been handled in |
| // the middle of a string with non-strict conversion - which is |
| // to say, simply encode it to UTF-8. |
| DCHECK_EQ(characters + 1, this->Characters16() + length); |
| DCHECK_GE(*characters, 0xD800); |
| DCHECK_LE(*characters, 0xDBFF); |
| // There should be room left, since one UChar hasn't been |
| // converted. |
| DCHECK_LE(buffer + 3, buffer + buffer_vector.size()); |
| PutUTF8Triple(buffer, *characters); |
| } |
| } |
| } |
| |
| return CString(buffer_vector.data(), buffer - buffer_vector.data()); |
| } |
| |
| String String::Make8BitFrom16BitSource(const UChar* source, wtf_size_t length) { |
| if (!length) |
| return g_empty_string; |
| |
| LChar* destination; |
| String result = String::CreateUninitialized(length, destination); |
| |
| CopyLCharsFromUCharSource(destination, source, length); |
| |
| return result; |
| } |
| |
| String String::Make16BitFrom8BitSource(const LChar* source, wtf_size_t length) { |
| if (!length) |
| return g_empty_string16_bit; |
| |
| UChar* destination; |
| String result = String::CreateUninitialized(length, destination); |
| |
| StringImpl::CopyChars(destination, source, length); |
| |
| return result; |
| } |
| |
| String String::FromUTF8(const LChar* string_start, size_t string_length) { |
| wtf_size_t length = SafeCast<wtf_size_t>(string_length); |
| |
| if (!string_start) |
| return String(); |
| |
| if (!length) |
| return g_empty_string; |
| |
| if (CharactersAreAllASCII(string_start, length)) |
| return StringImpl::Create(string_start, length); |
| |
| Vector<UChar, 1024> buffer(length); |
| UChar* buffer_start = buffer.data(); |
| |
| UChar* buffer_current = buffer_start; |
| const char* string_current = reinterpret_cast<const char*>(string_start); |
| if (unicode::ConvertUTF8ToUTF16( |
| &string_current, reinterpret_cast<const char*>(string_start + length), |
| &buffer_current, |
| buffer_current + buffer.size()) != unicode::kConversionOK) |
| return String(); |
| |
| unsigned utf16_length = |
| static_cast<wtf_size_t>(buffer_current - buffer_start); |
| DCHECK_LT(utf16_length, length); |
| return StringImpl::Create(buffer_start, utf16_length); |
| } |
| |
| String String::FromUTF8(const LChar* string) { |
| if (!string) |
| return String(); |
| return FromUTF8(string, strlen(reinterpret_cast<const char*>(string))); |
| } |
| |
| String String::FromUTF8(const CString& s) { |
| return FromUTF8(s.data()); |
| } |
| |
| String String::FromUTF8WithLatin1Fallback(const LChar* string, size_t size) { |
| String utf8 = FromUTF8(string, size); |
| if (!utf8) |
| return String(string, SafeCast<wtf_size_t>(size)); |
| return utf8; |
| } |
| |
| std::ostream& operator<<(std::ostream& out, const String& string) { |
| return out << string.EncodeForDebugging().Utf8().data(); |
| } |
| |
| #ifndef NDEBUG |
| void String::Show() const { |
| DLOG(INFO) << *this; |
| } |
| #endif |
| |
| } // namespace WTF |