third_party/WebKit/Source/platform/wtf/text/StringImpl.h - chromium/src.git - Git at Google

 /*
  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
  * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights
  * reserved.
  * Copyright (C) 2009 Google Inc. All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Library General Public License
  * along with this library; see the file COPYING.LIB.  If not, write to
  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  *
  */

 #ifndef StringImpl_h
 #define StringImpl_h

 #include <limits.h>
 #include <string.h>

 #include "base/macros.h"
 #include "base/memory/ref_counted.h"
 #include "build/build_config.h"
 #include "platform/wtf/ASCIICType.h"
 #include "platform/wtf/Forward.h"
 #include "platform/wtf/HashMap.h"
 #include "platform/wtf/StringHasher.h"
 #include "platform/wtf/Vector.h"
 #include "platform/wtf/WTFExport.h"
 #include "platform/wtf/text/ASCIIFastPath.h"
 #include "platform/wtf/text/NumberParsingOptions.h"
 #include "platform/wtf/text/Unicode.h"

 #if DCHECK_IS_ON()
 #include "platform/wtf/ThreadRestrictionVerifier.h"
 #endif

 #if defined(OS_MACOSX)
 typedef const struct __CFString* CFStringRef;
 #endif

 #ifdef __OBJC__
 @class NSString;
 #endif

 namespace WTF {

 struct AlreadyHashed;
 template <typename>
 class RetainPtr;

 enum TextCaseSensitivity {
   kTextCaseSensitive,
   kTextCaseASCIIInsensitive,

   // Unicode aware case insensitive matching. Non-ASCII characters might match
   // to ASCII characters. This flag is rarely used to implement web platform
   // features.
   kTextCaseUnicodeInsensitive
 };

 enum StripBehavior { kStripExtraWhiteSpace, kDoNotStripWhiteSpace };

 typedef bool (*CharacterMatchFunctionPtr)(UChar);
 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar);
 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> StaticStringsTable;

 // You can find documentation about this class in this doc:
 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl14/edit?usp=sharing
 class WTF_EXPORT StringImpl {
  private:
   // StringImpls are allocated out of the WTF buffer partition.
   void* operator new(size_t);
   void* operator new(size_t, void* ptr) { return ptr; }
   void operator delete(void*);

   // Used to construct static strings, which have an special refCount that can
   // never hit zero.  This means that the static string will never be
   // destroyed, which is important because static strings will be shared
   // across threads & ref-counted in a non-threadsafe manner.
   enum ConstructEmptyStringTag { kConstructEmptyString };
   explicit StringImpl(ConstructEmptyStringTag)
       : ref_count_(1),
         length_(0),
         hash_(0),
         contains_only_ascii_(true),
         needs_ascii_check_(false),
         is_atomic_(false),
         is8_bit_(true),
         is_static_(true) {
     // Ensure that the hash is computed so that AtomicStringHash can call
     // existingHash() with impunity. The empty string is special because it
     // is never entered into AtomicString's HashKey, but still needs to
     // compare correctly.
     GetHash();
   }

   enum ConstructEmptyString16BitTag { kConstructEmptyString16Bit };
   explicit StringImpl(ConstructEmptyString16BitTag)
       : ref_count_(1),
         length_(0),
         hash_(0),
         contains_only_ascii_(true),
         needs_ascii_check_(false),
         is_atomic_(false),
         is8_bit_(false),
         is_static_(true) {
     GetHash();
   }

   // FIXME: there has to be a less hacky way to do this.
   enum Force8Bit { kForce8BitConstructor };
   StringImpl(unsigned length, Force8Bit)
       : ref_count_(1),
         length_(length),
         hash_(0),
         contains_only_ascii_(!length),
         needs_ascii_check_(static_cast<bool>(length)),
         is_atomic_(false),
         is8_bit_(true),
         is_static_(false) {
     DCHECK(length_);
   }

   StringImpl(unsigned length)
       : ref_count_(1),
         length_(length),
         hash_(0),
         contains_only_ascii_(!length),
         needs_ascii_check_(static_cast<bool>(length)),
         is_atomic_(false),
         is8_bit_(false),
         is_static_(false) {
     DCHECK(length_);
   }

   enum StaticStringTag { kStaticString };
   StringImpl(unsigned length, unsigned hash, StaticStringTag)
       : ref_count_(1),
         length_(length),
         hash_(hash),
         contains_only_ascii_(!length),
         needs_ascii_check_(static_cast<bool>(length)),
         is_atomic_(false),
         is8_bit_(true),
         is_static_(true) {}

  public:
   REQUIRE_ADOPTION_FOR_REFCOUNTED_TYPE();
   static StringImpl* empty_;
   static StringImpl* empty16_bit_;

   ~StringImpl();

   static void InitStatics();

   static StringImpl* CreateStatic(const char* string,
                                   unsigned length,
                                   unsigned hash);
   static void ReserveStaticStringsCapacityForSize(unsigned size);
   static void FreezeStaticStrings();
   static const StaticStringsTable& AllStaticStrings();
   static unsigned HighestStaticStringLength() {
     return highest_static_string_length_;
   }

   static scoped_refptr<StringImpl> Create(const UChar*, unsigned length);
   static scoped_refptr<StringImpl> Create(const LChar*, unsigned length);
   static scoped_refptr<StringImpl> Create8BitIfPossible(const UChar*,
                                                         unsigned length);
   template <size_t inlineCapacity>
   static scoped_refptr<StringImpl> Create8BitIfPossible(
       const Vector<UChar, inlineCapacity>& vector) {
     return Create8BitIfPossible(vector.data(), vector.size());
   }

   ALWAYS_INLINE static scoped_refptr<StringImpl> Create(const char* s,
                                                         unsigned length) {
     return Create(reinterpret_cast<const LChar*>(s), length);
   }
   static scoped_refptr<StringImpl> Create(const LChar*);
   ALWAYS_INLINE static scoped_refptr<StringImpl> Create(const char* s) {
     return Create(reinterpret_cast<const LChar*>(s));
   }

   static scoped_refptr<StringImpl> CreateUninitialized(unsigned length,
                                                        LChar*& data);
   static scoped_refptr<StringImpl> CreateUninitialized(unsigned length,
                                                        UChar*& data);

   unsigned length() const { return length_; }
   bool Is8Bit() const { return is8_bit_; }

   ALWAYS_INLINE const LChar* Characters8() const {
     DCHECK(Is8Bit());
     return reinterpret_cast<const LChar*>(this + 1);
   }
   ALWAYS_INLINE const UChar* Characters16() const {
     DCHECK(!Is8Bit());
     return reinterpret_cast<const UChar*>(this + 1);
   }
   ALWAYS_INLINE const void* Bytes() const {
     return reinterpret_cast<const void*>(this + 1);
   }

   template <typename CharType>
   ALWAYS_INLINE const CharType* GetCharacters() const;

   size_t CharactersSizeInBytes() const {
     return length() * (Is8Bit() ? sizeof(LChar) : sizeof(UChar));
   }

   bool IsAtomic() const { return is_atomic_; }
   void SetIsAtomic(bool is_atomic) { is_atomic_ = is_atomic; }

   bool IsStatic() const { return is_static_; }

   bool ContainsOnlyASCII() const;

   bool IsSafeToSendToAnotherThread() const;

   // The high bits of 'hash' are always empty, but we prefer to store our
   // flags in the low bits because it makes them slightly more efficient to
   // access.  So, we shift left and right when setting and getting our hash
   // code.
   void SetHash(unsigned hash) const {
     DCHECK(!HasHash());
     // Multiple clients assume that StringHasher is the canonical string
     // hash function.
     DCHECK(hash == (Is8Bit() ? StringHasher::ComputeHashAndMaskTop8Bits(
                                    Characters8(), length_)
                              : StringHasher::ComputeHashAndMaskTop8Bits(
                                    Characters16(), length_)));
     hash_ = hash;
     DCHECK(hash);  // Verify that 0 is a valid sentinel hash value.
   }

   bool HasHash() const { return hash_ != 0; }

   unsigned ExistingHash() const {
     DCHECK(HasHash());
     return hash_;
   }

   unsigned GetHash() const {
     if (HasHash())
       return ExistingHash();
     return HashSlowCase();
   }

   ALWAYS_INLINE bool HasOneRef() const {
 #if DCHECK_IS_ON()
     DCHECK(IsStatic() || verifier_.IsSafeToUse()) << AsciiForDebugging();
 #endif
     return ref_count_ == 1;
   }

   ALWAYS_INLINE void AddRef() const {
 #if DCHECK_IS_ON()
     DCHECK(IsStatic() || verifier_.OnRef(ref_count_)) << AsciiForDebugging();
 #endif
     ++ref_count_;
   }

   ALWAYS_INLINE void Release() const {
 #if DCHECK_IS_ON()
     DCHECK(IsStatic() || verifier_.OnDeref(ref_count_))
         << AsciiForDebugging() << " " << CurrentThread();
 #endif
     if (!--ref_count_)
       DestroyIfNotStatic();
   }

   ALWAYS_INLINE void Adopted() const {}

   // FIXME: Does this really belong in StringImpl?
   template <typename T>
   static void CopyChars(T* destination,
                         const T* source,
                         unsigned num_characters) {
     memcpy(destination, source, num_characters * sizeof(T));
   }

   ALWAYS_INLINE static void CopyChars(UChar* destination,
                                       const LChar* source,
                                       unsigned num_characters) {
     for (unsigned i = 0; i < num_characters; ++i)
       destination[i] = source[i];
   }

   // Some string features, like refcounting and the atomicity flag, are not
   // thread-safe. We achieve thread safety by isolation, giving each thread
   // its own copy of the string.
   scoped_refptr<StringImpl> IsolatedCopy() const;

   scoped_refptr<StringImpl> Substring(unsigned pos,
                                       unsigned len = UINT_MAX) const;

   UChar operator[](unsigned i) const {
     SECURITY_DCHECK(i < length_);
     if (Is8Bit())
       return Characters8()[i];
     return Characters16()[i];
   }
   UChar32 CharacterStartingAt(unsigned);

   bool ContainsOnlyWhitespace();

   int ToInt(NumberParsingOptions, bool* ok) const;
   unsigned ToUInt(NumberParsingOptions, bool* ok) const;
   int64_t ToInt64(NumberParsingOptions, bool* ok) const;
   uint64_t ToUInt64(NumberParsingOptions, bool* ok) const;

   unsigned HexToUIntStrict(bool* ok);

   // FIXME: Like NumberParsingOptions::kStrict, these give false for "ok" when
   // there is trailing garbage.  Like NumberParsingOptions::kLoose, these return
   // the value when there is trailing garbage.  It would be better if these were
   // more consistent with the above functions instead.
   double ToDouble(bool* ok = nullptr);
   float ToFloat(bool* ok = nullptr);

   scoped_refptr<StringImpl> LowerUnicode();
   scoped_refptr<StringImpl> LowerASCII();
   scoped_refptr<StringImpl> UpperUnicode();
   scoped_refptr<StringImpl> UpperASCII();
   scoped_refptr<StringImpl> LowerUnicode(const AtomicString& locale_identifier);
   scoped_refptr<StringImpl> UpperUnicode(const AtomicString& locale_identifier);

   scoped_refptr<StringImpl> Fill(UChar);
   // FIXME: Do we need fill(char) or can we just do the right thing if UChar is
   // ASCII?
   scoped_refptr<StringImpl> FoldCase();

   scoped_refptr<StringImpl> Truncate(unsigned length);

   scoped_refptr<StringImpl> StripWhiteSpace();
   scoped_refptr<StringImpl> StripWhiteSpace(IsWhiteSpaceFunctionPtr);
   scoped_refptr<StringImpl> SimplifyWhiteSpace(
       StripBehavior = kStripExtraWhiteSpace);
   scoped_refptr<StringImpl> SimplifyWhiteSpace(
       IsWhiteSpaceFunctionPtr,
       StripBehavior = kStripExtraWhiteSpace);

   scoped_refptr<StringImpl> RemoveCharacters(CharacterMatchFunctionPtr);
   template <typename CharType>
   ALWAYS_INLINE scoped_refptr<StringImpl> RemoveCharacters(
       const CharType* characters,
       CharacterMatchFunctionPtr);

   // Remove characters between [start, start+lengthToRemove). The range is
   // clamped to the size of the string. Does nothing if start >= length().
   scoped_refptr<StringImpl> Remove(unsigned start,
                                    unsigned length_to_remove = 1);

   // Find characters.
   size_t Find(LChar character, unsigned start = 0);
   size_t Find(char character, unsigned start = 0);
   size_t Find(UChar character, unsigned start = 0);
   size_t Find(CharacterMatchFunctionPtr, unsigned index = 0);

   // Find substrings.
   size_t Find(const StringView&, unsigned index = 0);
   // Unicode aware case insensitive string matching. Non-ASCII characters might
   // match to ASCII characters. This function is rarely used to implement web
   // platform features.
   size_t FindIgnoringCase(const StringView&, unsigned index = 0);
   size_t FindIgnoringASCIICase(const StringView&, unsigned index = 0);

   size_t ReverseFind(UChar, unsigned index = UINT_MAX);
   size_t ReverseFind(const StringView&, unsigned index = UINT_MAX);

   bool StartsWith(UChar) const;
   bool StartsWith(const StringView&) const;
   bool StartsWithIgnoringCase(const StringView&) const;
   bool StartsWithIgnoringASCIICase(const StringView&) const;

   bool EndsWith(UChar) const;
   bool EndsWith(const StringView&) const;
   bool EndsWithIgnoringCase(const StringView&) const;
   bool EndsWithIgnoringASCIICase(const StringView&) const;

   // Replace parts of the string.
   scoped_refptr<StringImpl> Replace(UChar pattern, UChar replacement);
   scoped_refptr<StringImpl> Replace(UChar pattern,
                                     const StringView& replacement);
   scoped_refptr<StringImpl> Replace(const StringView& pattern,
                                     const StringView& replacement);
   scoped_refptr<StringImpl> Replace(unsigned index,
                                     unsigned length_to_replace,
                                     const StringView& replacement);

   scoped_refptr<StringImpl> UpconvertedString();

   // Copy characters from string starting at |start| up until |maxLength| or
   // the end of the string is reached. Returns the actual number of characters
   // copied.
   unsigned CopyTo(UChar* buffer, unsigned start, unsigned max_length) const;

   // Append characters from this string into a buffer. Expects the buffer to
   // have the methods:
   //    append(const UChar*, unsigned length);
   //    append(const LChar*, unsigned length);
   // StringBuilder and Vector conform to this protocol.
   template <typename BufferType>
   void AppendTo(BufferType&,
                 unsigned start = 0,
                 unsigned length = UINT_MAX) const;

   // Prepend characters from this string into a buffer. Expects the buffer to
   // have the methods:
   //    prepend(const UChar*, unsigned length);
   //    prepend(const LChar*, unsigned length);
   // Vector conforms to this protocol.
   template <typename BufferType>
   void PrependTo(BufferType&,
                  unsigned start = 0,
                  unsigned length = UINT_MAX) const;

 #if defined(OS_MACOSX)
   RetainPtr<CFStringRef> CreateCFString();
 #endif
 #ifdef __OBJC__
   operator NSString*();
 #endif

   static const UChar kLatin1CaseFoldTable[256];

  private:
   template <typename CharType>
   static size_t AllocationSize(unsigned length) {
     CHECK_LE(length,
              ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) /
               sizeof(CharType)));
     return sizeof(StringImpl) + length * sizeof(CharType);
   }

   scoped_refptr<StringImpl> Replace(UChar pattern,
                                     const LChar* replacement,
                                     unsigned replacement_length);
   scoped_refptr<StringImpl> Replace(UChar pattern,
                                     const UChar* replacement,
                                     unsigned replacement_length);

   template <class UCharPredicate>
   scoped_refptr<StringImpl> StripMatchedCharacters(UCharPredicate);
   template <typename CharType, class UCharPredicate>
   scoped_refptr<StringImpl> SimplifyMatchedCharactersToSpace(UCharPredicate,
                                                              StripBehavior);
   NEVER_INLINE unsigned HashSlowCase() const;

   void DestroyIfNotStatic() const;
   void UpdateContainsOnlyASCII() const;

 #if DCHECK_IS_ON()
   std::string AsciiForDebugging() const;
 #endif

   static unsigned highest_static_string_length_;

 #if DCHECK_IS_ON()
   void AssertHashIsCorrect() {
     DCHECK(HasHash());
     DCHECK_EQ(ExistingHash(), StringHasher::ComputeHashAndMaskTop8Bits(
                                   Characters8(), length()));
   }
 #endif

  private:
 #if DCHECK_IS_ON()
   mutable ThreadRestrictionVerifier verifier_;
 #endif
   mutable unsigned ref_count_;
   const unsigned length_;
   mutable unsigned hash_ : 24;
   mutable unsigned contains_only_ascii_ : 1;
   mutable unsigned needs_ascii_check_ : 1;
   unsigned is_atomic_ : 1;
   const unsigned is8_bit_ : 1;
   const unsigned is_static_ : 1;

   DISALLOW_COPY_AND_ASSIGN(StringImpl);
 };

 template <>
 ALWAYS_INLINE const LChar* StringImpl::GetCharacters<LChar>() const {
   return Characters8();
 }

 template <>
 ALWAYS_INLINE const UChar* StringImpl::GetCharacters<UChar>() const {
   return Characters16();
 }

 WTF_EXPORT bool Equal(const StringImpl*, const StringImpl*);
 WTF_EXPORT bool Equal(const StringImpl*, const LChar*);
 inline bool Equal(const StringImpl* a, const char* b) {
   return Equal(a, reinterpret_cast<const LChar*>(b));
 }
 WTF_EXPORT bool Equal(const StringImpl*, const LChar*, unsigned);
 WTF_EXPORT bool Equal(const StringImpl*, const UChar*, unsigned);
 inline bool Equal(const StringImpl* a, const char* b, unsigned length) {
   return Equal(a, reinterpret_cast<const LChar*>(b), length);
 }
 inline bool Equal(const LChar* a, StringImpl* b) {
   return Equal(b, a);
 }
 inline bool Equal(const char* a, StringImpl* b) {
   return Equal(b, reinterpret_cast<const LChar*>(a));
 }
 WTF_EXPORT bool EqualNonNull(const StringImpl* a, const StringImpl* b);

 ALWAYS_INLINE bool StringImpl::ContainsOnlyASCII() const {
   if (needs_ascii_check_)
     UpdateContainsOnlyASCII();
   return contains_only_ascii_;
 }

 template <typename CharType>
 ALWAYS_INLINE bool Equal(const CharType* a,
                          const CharType* b,
                          unsigned length) {
   return !memcmp(a, b, length * sizeof(CharType));
 }

 ALWAYS_INLINE bool Equal(const LChar* a, const UChar* b, unsigned length) {
   for (unsigned i = 0; i < length; ++i) {
     if (a[i] != b[i])
       return false;
   }
   return true;
 }

 ALWAYS_INLINE bool Equal(const UChar* a, const LChar* b, unsigned length) {
   return Equal(b, a, length);
 }

 // Unicode aware case insensitive string matching. Non-ASCII characters might
 // match to ASCII characters. These functions are rarely used to implement web
 // platform features.
 // These functions are deprecated. Use EqualIgnoringASCIICase(), or introduce
 // EqualIgnoringUnicodeCase(). See crbug.com/627682
 WTF_EXPORT bool DeprecatedEqualIgnoringCase(const LChar*,
                                             const LChar*,
                                             unsigned length);
 WTF_EXPORT bool DeprecatedEqualIgnoringCase(const UChar*,
                                             const LChar*,
                                             unsigned length);
 inline bool DeprecatedEqualIgnoringCase(const LChar* a,
                                         const UChar* b,
                                         unsigned length) {
   return DeprecatedEqualIgnoringCase(b, a, length);
 }
 WTF_EXPORT bool DeprecatedEqualIgnoringCase(const UChar*,
                                             const UChar*,
                                             unsigned length);

 WTF_EXPORT bool EqualIgnoringNullity(StringImpl*, StringImpl*);

 template <typename CharacterTypeA, typename CharacterTypeB>
 inline bool EqualIgnoringASCIICase(const CharacterTypeA* a,
                                    const CharacterTypeB* b,
                                    unsigned length) {
   for (unsigned i = 0; i < length; ++i) {
     if (ToASCIILower(a[i]) != ToASCIILower(b[i]))
       return false;
   }
   return true;
 }

 WTF_EXPORT int CodePointCompareIgnoringASCIICase(const StringImpl*,
                                                  const LChar*);

 inline size_t Find(const LChar* characters,
                    unsigned length,
                    LChar match_character,
                    unsigned index = 0) {
   // Some clients rely on being able to pass index >= length.
   if (index >= length)
     return kNotFound;
   const LChar* found = static_cast<const LChar*>(
       memchr(characters + index, match_character, length - index));
   return found ? found - characters : kNotFound;
 }

 inline size_t Find(const UChar* characters,
                    unsigned length,
                    UChar match_character,
                    unsigned index = 0) {
   while (index < length) {
     if (characters[index] == match_character)
       return index;
     ++index;
   }
   return kNotFound;
 }

 ALWAYS_INLINE size_t Find(const UChar* characters,
                           unsigned length,
                           LChar match_character,
                           unsigned index = 0) {
   return Find(characters, length, static_cast<UChar>(match_character), index);
 }

 inline size_t Find(const LChar* characters,
                    unsigned length,
                    UChar match_character,
                    unsigned index = 0) {
   if (match_character & ~0xFF)
     return kNotFound;
   return Find(characters, length, static_cast<LChar>(match_character), index);
 }

 template <typename CharacterType>
 inline size_t Find(const CharacterType* characters,
                    unsigned length,
                    char match_character,
                    unsigned index = 0) {
   return Find(characters, length, static_cast<LChar>(match_character), index);
 }

 inline size_t Find(const LChar* characters,
                    unsigned length,
                    CharacterMatchFunctionPtr match_function,
                    unsigned index = 0) {
   while (index < length) {
     if (match_function(characters[index]))
       return index;
     ++index;
   }
   return kNotFound;
 }

 inline size_t Find(const UChar* characters,
                    unsigned length,
                    CharacterMatchFunctionPtr match_function,
                    unsigned index = 0) {
   while (index < length) {
     if (match_function(characters[index]))
       return index;
     ++index;
   }
   return kNotFound;
 }

 template <typename CharacterType>
 inline size_t ReverseFind(const CharacterType* characters,
                           unsigned length,
                           CharacterType match_character,
                           unsigned index = UINT_MAX) {
   if (!length)
     return kNotFound;
   if (index >= length)
     index = length - 1;
   while (characters[index] != match_character) {
     if (!index--)
       return kNotFound;
   }
   return index;
 }

 ALWAYS_INLINE size_t ReverseFind(const UChar* characters,
                                  unsigned length,
                                  LChar match_character,
                                  unsigned index = UINT_MAX) {
   return ReverseFind(characters, length, static_cast<UChar>(match_character),
                      index);
 }

 inline size_t ReverseFind(const LChar* characters,
                           unsigned length,
                           UChar match_character,
                           unsigned index = UINT_MAX) {
   if (match_character & ~0xFF)
     return kNotFound;
   return ReverseFind(characters, length, static_cast<LChar>(match_character),
                      index);
 }

 inline size_t StringImpl::Find(LChar character, unsigned start) {
   if (Is8Bit())
     return WTF::Find(Characters8(), length_, character, start);
   return WTF::Find(Characters16(), length_, character, start);
 }

 ALWAYS_INLINE size_t StringImpl::Find(char character, unsigned start) {
   return Find(static_cast<LChar>(character), start);
 }

 inline size_t StringImpl::Find(UChar character, unsigned start) {
   if (Is8Bit())
     return WTF::Find(Characters8(), length_, character, start);
   return WTF::Find(Characters16(), length_, character, start);
 }

 inline unsigned LengthOfNullTerminatedString(const UChar* string) {
   size_t length = 0;
   while (string[length] != UChar(0))
     ++length;
   CHECK_LE(length, std::numeric_limits<unsigned>::max());
   return static_cast<unsigned>(length);
 }

 template <size_t inlineCapacity>
 bool EqualIgnoringNullity(const Vector<UChar, inlineCapacity>& a,
                           StringImpl* b) {
   if (!b)
     return !a.size();
   if (a.size() != b->length())
     return false;
   if (b->Is8Bit())
     return Equal(a.data(), b->Characters8(), b->length());
   return Equal(a.data(), b->Characters16(), b->length());
 }

 template <typename CharacterType1, typename CharacterType2>
 static inline int CodePointCompare(unsigned l1,
                                    unsigned l2,
                                    const CharacterType1* c1,
                                    const CharacterType2* c2) {
   const unsigned lmin = l1 < l2 ? l1 : l2;
   unsigned pos = 0;
   while (pos < lmin && *c1 == *c2) {
     ++c1;
     ++c2;
     ++pos;
   }

   if (pos < lmin)
     return (c1[0] > c2[0]) ? 1 : -1;

   if (l1 == l2)
     return 0;

   return (l1 > l2) ? 1 : -1;
 }

 static inline int CodePointCompare8(const StringImpl* string1,
                                     const StringImpl* string2) {
   return CodePointCompare(string1->length(), string2->length(),
                           string1->Characters8(), string2->Characters8());
 }

 static inline int CodePointCompare16(const StringImpl* string1,
                                      const StringImpl* string2) {
   return CodePointCompare(string1->length(), string2->length(),
                           string1->Characters16(), string2->Characters16());
 }

 static inline int CodePointCompare8To16(const StringImpl* string1,
                                         const StringImpl* string2) {
   return CodePointCompare(string1->length(), string2->length(),
                           string1->Characters8(), string2->Characters16());
 }

 static inline int CodePointCompare(const StringImpl* string1,
                                    const StringImpl* string2) {
   if (!string1)
     return (string2 && string2->length()) ? -1 : 0;

   if (!string2)
     return string1->length() ? 1 : 0;

   bool string1_is8_bit = string1->Is8Bit();
   bool string2_is8_bit = string2->Is8Bit();
   if (string1_is8_bit) {
     if (string2_is8_bit)
       return CodePointCompare8(string1, string2);
     return CodePointCompare8To16(string1, string2);
   }
   if (string2_is8_bit)
     return -CodePointCompare8To16(string2, string1);
   return CodePointCompare16(string1, string2);
 }

 static inline bool IsSpaceOrNewline(UChar c) {
   // Use IsASCIISpace() for basic Latin-1.
   // This will include newlines, which aren't included in Unicode DirWS.
   return c <= 0x7F
              ? WTF::IsASCIISpace(c)
              : WTF::Unicode::Direction(c) == WTF::Unicode::kWhiteSpaceNeutral;
 }

 inline scoped_refptr<StringImpl> StringImpl::IsolatedCopy() const {
   if (Is8Bit())
     return Create(Characters8(), length_);
   return Create(Characters16(), length_);
 }

 template <typename BufferType>
 inline void StringImpl::AppendTo(BufferType& result,
                                  unsigned start,
                                  unsigned length) const {
   unsigned number_of_characters_to_copy = std::min(length, length_ - start);
   if (!number_of_characters_to_copy)
     return;
   if (Is8Bit())
     result.Append(Characters8() + start, number_of_characters_to_copy);
   else
     result.Append(Characters16() + start, number_of_characters_to_copy);
 }

 template <typename BufferType>
 inline void StringImpl::PrependTo(BufferType& result,
                                   unsigned start,
                                   unsigned length) const {
   unsigned number_of_characters_to_copy = std::min(length, length_ - start);
   if (!number_of_characters_to_copy)
     return;
   if (Is8Bit())
     result.Prepend(Characters8() + start, number_of_characters_to_copy);
   else
     result.Prepend(Characters16() + start, number_of_characters_to_copy);
 }

 // TODO(rob.buis) possibly find a better place for this method.
 // Turns a UChar32 to uppercase based on localeIdentifier.
 WTF_EXPORT UChar32 ToUpper(UChar32, const AtomicString& locale_identifier);

 struct StringHash;

 // StringHash is the default hash for StringImpl* and scoped_refptr<StringImpl>
 template <typename T>
 struct DefaultHash;
 template <>
 struct DefaultHash<StringImpl*> {
   typedef StringHash Hash;
 };
 template <>
 struct DefaultHash<scoped_refptr<StringImpl>> {
   typedef StringHash Hash;
 };

 }  // namespace WTF

 using WTF::StringImpl;
 using WTF::kTextCaseASCIIInsensitive;
 using WTF::kTextCaseUnicodeInsensitive;
 using WTF::kTextCaseSensitive;
 using WTF::TextCaseSensitivity;
 using WTF::Equal;
 using WTF::EqualNonNull;
 using WTF::LengthOfNullTerminatedString;
 using WTF::ReverseFind;

 #endif