src/objects/string.h - v8/v8.git - Git at Google

 // Copyright 2017 the V8 project authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef V8_OBJECTS_STRING_H_
 #define V8_OBJECTS_STRING_H_

 #include "src/base/bits.h"
 #include "src/objects/name.h"
 #include "src/unicode-decoder.h"

 // Has to be the last include (doesn't have include guards):
 #include "src/objects/object-macros.h"

 namespace v8 {
 namespace internal {

 class BigInt;

 enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS };
 enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };

 // The characteristics of a string are stored in its map.  Retrieving these
 // few bits of information is moderately expensive, involving two memory
 // loads where the second is dependent on the first.  To improve efficiency
 // the shape of the string is given its own class so that it can be retrieved
 // once and used for several string operations.  A StringShape is small enough
 // to be passed by value and is immutable, but be aware that flattening a
 // string can potentially alter its shape.  Also be aware that a GC caused by
 // something else can alter the shape of a string due to ConsString
 // shortcutting.  Keeping these restrictions in mind has proven to be error-
 // prone and so we no longer put StringShapes in variables unless there is a
 // concrete performance benefit at that particular point in the code.
 class StringShape BASE_EMBEDDED {
  public:
   inline explicit StringShape(const String* s);
   inline explicit StringShape(Map* s);
   inline explicit StringShape(InstanceType t);
   inline bool IsSequential();
   inline bool IsExternal();
   inline bool IsCons();
   inline bool IsSliced();
   inline bool IsThin();
   inline bool IsIndirect();
   inline bool IsExternalOneByte();
   inline bool IsExternalTwoByte();
   inline bool IsSequentialOneByte();
   inline bool IsSequentialTwoByte();
   inline bool IsInternalized();
   inline StringRepresentationTag representation_tag();
   inline uint32_t encoding_tag();
   inline uint32_t full_representation_tag();
   inline bool HasOnlyOneByteChars();
 #ifdef DEBUG
   inline uint32_t type() { return type_; }
   inline void invalidate() { valid_ = false; }
   inline bool valid() { return valid_; }
 #else
   inline void invalidate() {}
 #endif

  private:
   uint32_t type_;
 #ifdef DEBUG
   inline void set_valid() { valid_ = true; }
   bool valid_;
 #else
   inline void set_valid() {}
 #endif
 };

 // The String abstract class captures JavaScript string values:
 //
 // Ecma-262:
 //  4.3.16 String Value
 //    A string value is a member of the type String and is a finite
 //    ordered sequence of zero or more 16-bit unsigned integer values.
 //
 // All string values have a length field.
 class String : public Name {
  public:
   enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING };

   class SubStringRange {
    public:
     explicit inline SubStringRange(String* string, int first = 0,
                                    int length = -1);
     class iterator;
     inline iterator begin();
     inline iterator end();

    private:
     String* string_;
     int first_;
     int length_;
   };

   // Representation of the flat content of a String.
   // A non-flat string doesn't have flat content.
   // A flat string has content that's encoded as a sequence of either
   // one-byte chars or two-byte UC16.
   // Returned by String::GetFlatContent().
   class FlatContent {
    public:
     // Returns true if the string is flat and this structure contains content.
     bool IsFlat() const { return state_ != NON_FLAT; }
     // Returns true if the structure contains one-byte content.
     bool IsOneByte() const { return state_ == ONE_BYTE; }
     // Returns true if the structure contains two-byte content.
     bool IsTwoByte() const { return state_ == TWO_BYTE; }

     // Return the one byte content of the string. Only use if IsOneByte()
     // returns true.
     Vector<const uint8_t> ToOneByteVector() const {
       DCHECK_EQ(ONE_BYTE, state_);
       return Vector<const uint8_t>(onebyte_start, length_);
     }
     // Return the two-byte content of the string. Only use if IsTwoByte()
     // returns true.
     Vector<const uc16> ToUC16Vector() const {
       DCHECK_EQ(TWO_BYTE, state_);
       return Vector<const uc16>(twobyte_start, length_);
     }

     uc16 Get(int i) const {
       DCHECK(i < length_);
       DCHECK(state_ != NON_FLAT);
       if (state_ == ONE_BYTE) return onebyte_start[i];
       return twobyte_start[i];
     }

     bool UsesSameString(const FlatContent& other) const {
       return onebyte_start == other.onebyte_start;
     }

    private:
     enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };

     // Constructors only used by String::GetFlatContent().
     explicit FlatContent(const uint8_t* start, int length)
         : onebyte_start(start), length_(length), state_(ONE_BYTE) {}
     explicit FlatContent(const uc16* start, int length)
         : twobyte_start(start), length_(length), state_(TWO_BYTE) {}
     FlatContent() : onebyte_start(nullptr), length_(0), state_(NON_FLAT) {}

     union {
       const uint8_t* onebyte_start;
       const uc16* twobyte_start;
     };
     int length_;
     State state_;

     friend class String;
     friend class IterableSubString;
   };

   template <typename Char>
   V8_INLINE Vector<const Char> GetCharVector();

   // Get and set the length of the string.
   inline int length() const;
   inline void set_length(int value);

   // Get and set the length of the string using acquire loads and release
   // stores.
   inline int synchronized_length() const;
   inline void synchronized_set_length(int value);

   // Returns whether this string has only one-byte chars, i.e. all of them can
   // be one-byte encoded.  This might be the case even if the string is
   // two-byte.  Such strings may appear when the embedder prefers
   // two-byte external representations even for one-byte data.
   inline bool IsOneByteRepresentation() const;
   inline bool IsTwoByteRepresentation() const;

   // Cons and slices have an encoding flag that may not represent the actual
   // encoding of the underlying string.  This is taken into account here.
   // Requires: this->IsFlat()
   inline bool IsOneByteRepresentationUnderneath();
   inline bool IsTwoByteRepresentationUnderneath();

   // NOTE: this should be considered only a hint.  False negatives are
   // possible.
   inline bool HasOnlyOneByteChars();

   // Get and set individual two byte chars in the string.
   inline void Set(int index, uint16_t value);
   // Get individual two byte char in the string.  Repeated calls
   // to this method are not efficient unless the string is flat.
   V8_INLINE uint16_t Get(int index);

   // ES6 section 7.1.3.1 ToNumber Applied to the String Type
   static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject);

   // Flattens the string.  Checks first inline to see if it is
   // necessary.  Does nothing if the string is not a cons string.
   // Flattening allocates a sequential string with the same data as
   // the given string and mutates the cons string to a degenerate
   // form, where the first component is the new sequential string and
   // the second component is the empty string.  If allocation fails,
   // this function returns a failure.  If flattening succeeds, this
   // function returns the sequential string that is now the first
   // component of the cons string.
   //
   // Degenerate cons strings are handled specially by the garbage
   // collector (see IsShortcutCandidate).

   static inline Handle<String> Flatten(Isolate* isolate, Handle<String> string,
                                        PretenureFlag pretenure = NOT_TENURED);

   // Tries to return the content of a flat string as a structure holding either
   // a flat vector of char or of uc16.
   // If the string isn't flat, and therefore doesn't have flat content, the
   // returned structure will report so, and can't provide a vector of either
   // kind.
   FlatContent GetFlatContent();

   // Returns the parent of a sliced string or first part of a flat cons string.
   // Requires: StringShape(this).IsIndirect() && this->IsFlat()
   inline String* GetUnderlying();

   // String relational comparison, implemented according to ES6 section 7.2.11
   // Abstract Relational Comparison (step 5): The comparison of Strings uses a
   // simple lexicographic ordering on sequences of code unit values. There is no
   // attempt to use the more complex, semantically oriented definitions of
   // character or string equality and collating order defined in the Unicode
   // specification. Therefore String values that are canonically equal according
   // to the Unicode standard could test as unequal. In effect this algorithm
   // assumes that both Strings are already in normalized form. Also, note that
   // for strings containing supplementary characters, lexicographic ordering on
   // sequences of UTF-16 code unit values differs from that on sequences of code
   // point values.
   V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate,
                                                         Handle<String> x,
                                                         Handle<String> y);

   // Perform ES6 21.1.3.8, including checking arguments.
   static Object* IndexOf(Isolate* isolate, Handle<Object> receiver,
                          Handle<Object> search, Handle<Object> position);
   // Perform string match of pattern on subject, starting at start index.
   // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
   // check any arguments.
   static int IndexOf(Isolate* isolate, Handle<String> receiver,
                      Handle<String> search, int start_index);

   static Object* LastIndexOf(Isolate* isolate, Handle<Object> receiver,
                              Handle<Object> search, Handle<Object> position);

   // Encapsulates logic related to a match and its capture groups as required
   // by GetSubstitution.
   class Match {
    public:
     virtual Handle<String> GetMatch() = 0;
     virtual Handle<String> GetPrefix() = 0;
     virtual Handle<String> GetSuffix() = 0;

     // A named capture can be invalid (if it is not specified in the pattern),
     // unmatched (specified but not matched in the current string), and matched.
     enum CaptureState { INVALID, UNMATCHED, MATCHED };

     virtual int CaptureCount() = 0;
     virtual bool HasNamedCaptures() = 0;
     virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
     virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
                                                 CaptureState* state) = 0;

     virtual ~Match() {}
   };

   // ES#sec-getsubstitution
   // GetSubstitution(matched, str, position, captures, replacement)
   // Expand the $-expressions in the string and return a new string with
   // the result.
   // A {start_index} can be passed to specify where to start scanning the
   // replacement string.
   V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution(
       Isolate* isolate, Match* match, Handle<String> replacement,
       int start_index = 0);

   // String equality operations.
   inline bool Equals(String* other);
   inline static bool Equals(Isolate* isolate, Handle<String> one,
                             Handle<String> two);
   bool IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match = false);

   // Dispatches to Is{One,Two}ByteEqualTo.
   template <typename Char>
   bool IsEqualTo(Vector<const Char> str);

   bool IsOneByteEqualTo(Vector<const uint8_t> str);
   bool IsTwoByteEqualTo(Vector<const uc16> str);

   // Return a UTF8 representation of the string.  The string is null
   // terminated but may optionally contain nulls.  Length is returned
   // in length_output if length_output is not a null pointer  The string
   // should be nearly flat, otherwise the performance of this method may
   // be very slow (quadratic in the length).  Setting robustness_flag to
   // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust  This means it
   // handles unexpected data without causing assert failures and it does not
   // do any heap allocations.  This is useful when printing stack traces.
   std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls,
                                     RobustnessFlag robustness_flag, int offset,
                                     int length, int* length_output = 0);
   std::unique_ptr<char[]> ToCString(
       AllowNullsFlag allow_nulls = DISALLOW_NULLS,
       RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
       int* length_output = 0);

   bool ComputeArrayIndex(uint32_t* index);

   // Externalization.
   bool MakeExternal(v8::String::ExternalStringResource* resource);
   bool MakeExternal(v8::String::ExternalOneByteStringResource* resource);
   bool SupportsExternalization();

   // Conversion.
   inline bool AsArrayIndex(uint32_t* index);
   uint32_t inline ToValidIndex(Object* number);

   // Trimming.
   enum TrimMode { kTrim, kTrimStart, kTrimEnd };
   static Handle<String> Trim(Isolate* isolate, Handle<String> string,
                              TrimMode mode);

   DECL_CAST(String)

   void PrintOn(FILE* out);

   // For use during stack traces.  Performs rudimentary sanity check.
   bool LooksValid();

   // Dispatched behavior.
   void StringShortPrint(StringStream* accumulator, bool show_details = true);
   void PrintUC16(std::ostream& os, int start = 0, int end = -1);  // NOLINT
 #if defined(DEBUG) || defined(OBJECT_PRINT)
   char* ToAsciiArray();
 #endif
   DECL_PRINTER(String)
   DECL_VERIFIER(String)

   inline bool IsFlat();

   // Layout description.
   static const int kLengthOffset = Name::kSize;
   static const int kSize = kLengthOffset + kPointerSize;

   // Max char codes.
   static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
   static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
   static const int kMaxUtf16CodeUnit = 0xffff;
   static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
   static const uc32 kMaxCodePoint = 0x10ffff;

   // Maximal string length.
   // The max length is different on 32 and 64 bit platforms. Max length for a
   // 32-bit platform is ~268.4M chars. On 64-bit platforms, max length is
   // ~1.073B chars. The limit on 64-bit is so that SeqTwoByteString::kMaxSize
   // can fit in a 32bit int: 2^31 - 1 is the max positive int, minus one bit as
   // each char needs two bytes, subtract 24 bytes for the string header size.

   // See include/v8.h for the definition.
   static const int kMaxLength = v8::String::kMaxLength;
   static_assert(kMaxLength <= (Smi::kMaxValue / 2 - kSize),
                 "Unexpected max String length");

   // Max length for computing hash. For strings longer than this limit the
   // string length is used as the hash value.
   static const int kMaxHashCalcLength = 16383;

   // Limit for truncation in short printing.
   static const int kMaxShortPrintLength = 1024;

   // Support for regular expressions.
   const uc16* GetTwoByteData(unsigned start);

   // Helper function for flattening strings.
   template <typename sinkchar>
   static void WriteToFlat(String* source, sinkchar* sink, int from, int to);

   // The return value may point to the first aligned word containing the first
   // non-one-byte character, rather than directly to the non-one-byte character.
   // If the return value is >= the passed length, the entire string was
   // one-byte.
   static inline int NonAsciiStart(const char* chars, int length) {
     const char* start = chars;
     const char* limit = chars + length;

     if (length >= kIntptrSize) {
       // Check unaligned bytes.
       while (!IsAligned(reinterpret_cast<intptr_t>(chars), sizeof(uintptr_t))) {
         if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
           return static_cast<int>(chars - start);
         }
         ++chars;
       }
       // Check aligned words.
       DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F);
       const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80;
       while (chars + sizeof(uintptr_t) <= limit) {
         if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
           return static_cast<int>(chars - start);
         }
         chars += sizeof(uintptr_t);
       }
     }
     // Check remaining unaligned bytes.
     while (chars < limit) {
       if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
         return static_cast<int>(chars - start);
       }
       ++chars;
     }

     return static_cast<int>(chars - start);
   }

   static inline bool IsAscii(const char* chars, int length) {
     return NonAsciiStart(chars, length) >= length;
   }

   static inline bool IsAscii(const uint8_t* chars, int length) {
     return NonAsciiStart(reinterpret_cast<const char*>(chars), length) >=
            length;
   }

   static inline int NonOneByteStart(const uc16* chars, int length) {
     const uc16* limit = chars + length;
     const uc16* start = chars;
     while (chars < limit) {
       if (*chars > kMaxOneByteCharCodeU) return static_cast<int>(chars - start);
       ++chars;
     }
     return static_cast<int>(chars - start);
   }

   static inline bool IsOneByte(const uc16* chars, int length) {
     return NonOneByteStart(chars, length) >= length;
   }

   template <class Visitor>
   static inline ConsString* VisitFlat(Visitor* visitor, String* string,
                                       int offset = 0);

   static Handle<FixedArray> CalculateLineEnds(Isolate* isolate,
                                               Handle<String> string,
                                               bool include_ending_line);

  private:
   friend class Name;
   friend class StringTableInsertionKey;
   friend class InternalizedStringKey;

   static Handle<String> SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
                                     PretenureFlag tenure);

   // Slow case of String::Equals.  This implementation works on any strings
   // but it is most efficient on strings that are almost flat.
   bool SlowEquals(String* other);

   static bool SlowEquals(Isolate* isolate, Handle<String> one,
                          Handle<String> two);

   // Slow case of AsArrayIndex.
   V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);

   // Compute and set the hash code.
   uint32_t ComputeAndSetHash(Isolate* isolate);

   DISALLOW_IMPLICIT_CONSTRUCTORS(String);
 };

 // The SeqString abstract class captures sequential string values.
 class SeqString : public String {
  public:
   DECL_CAST(SeqString)

   // Layout description.
   static const int kHeaderSize = String::kSize;

   // Truncate the string in-place if possible and return the result.
   // In case of new_length == 0, the empty string is returned without
   // truncating the original string.
   V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string,
                                                        int new_length);

  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(SeqString);
 };

 // The OneByteString class captures sequential one-byte string objects.
 // Each character in the OneByteString is an one-byte character.
 class SeqOneByteString : public SeqString {
  public:
   static const bool kHasOneByteEncoding = true;

   // Dispatched behavior.
   inline uint16_t SeqOneByteStringGet(int index);
   inline void SeqOneByteStringSet(int index, uint16_t value);

   // Get the address of the characters in this string.
   inline Address GetCharsAddress();

   inline uint8_t* GetChars();

   // Clear uninitialized padding space. This ensures that the snapshot content
   // is deterministic.
   void clear_padding();

   DECL_CAST(SeqOneByteString)

   // Garbage collection support.  This method is called by the
   // garbage collector to compute the actual size of an OneByteString
   // instance.
   inline int SeqOneByteStringSize(InstanceType instance_type);

   // Computes the size for an OneByteString instance of a given length.
   static int SizeFor(int length) {
     return OBJECT_POINTER_ALIGN(kHeaderSize + length * kCharSize);
   }

   // Maximal memory usage for a single sequential one-byte string.
   static const int kMaxCharsSize = kMaxLength;
   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
   STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength);

   class BodyDescriptor;
   // No weak fields.
   typedef BodyDescriptor BodyDescriptorWeak;

  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(SeqOneByteString);
 };

 // The TwoByteString class captures sequential unicode string objects.
 // Each character in the TwoByteString is a two-byte uint16_t.
 class SeqTwoByteString : public SeqString {
  public:
   static const bool kHasOneByteEncoding = false;

   // Dispatched behavior.
   inline uint16_t SeqTwoByteStringGet(int index);
   inline void SeqTwoByteStringSet(int index, uint16_t value);

   // Get the address of the characters in this string.
   inline Address GetCharsAddress();

   inline uc16* GetChars();

   // Clear uninitialized padding space. This ensures that the snapshot content
   // is deterministic.
   void clear_padding();

   // For regexp code.
   const uint16_t* SeqTwoByteStringGetData(unsigned start);

   DECL_CAST(SeqTwoByteString)

   // Garbage collection support.  This method is called by the
   // garbage collector to compute the actual size of a TwoByteString
   // instance.
   inline int SeqTwoByteStringSize(InstanceType instance_type);

   // Computes the size for a TwoByteString instance of a given length.
   static int SizeFor(int length) {
     return OBJECT_POINTER_ALIGN(kHeaderSize + length * kShortSize);
   }

   // Maximal memory usage for a single sequential two-byte string.
   static const int kMaxCharsSize = kMaxLength * 2;
   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
   STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >=
                 String::kMaxLength);

   class BodyDescriptor;
   // No weak fields.
   typedef BodyDescriptor BodyDescriptorWeak;

  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(SeqTwoByteString);
 };

 // The ConsString class describes string values built by using the
 // addition operator on strings.  A ConsString is a pair where the
 // first and second components are pointers to other string values.
 // One or both components of a ConsString can be pointers to other
 // ConsStrings, creating a binary tree of ConsStrings where the leaves
 // are non-ConsString string values.  The string value represented by
 // a ConsString can be obtained by concatenating the leaf string
 // values in a left-to-right depth-first traversal of the tree.
 class ConsString : public String {
  public:
   // First string of the cons cell.
   inline String* first();
   // Doesn't check that the result is a string, even in debug mode.  This is
   // useful during GC where the mark bits confuse the checks.
   inline Object* unchecked_first();
   inline void set_first(Isolate* isolate, String* first,
                         WriteBarrierMode mode = UPDATE_WRITE_BARRIER);

   // Second string of the cons cell.
   inline String* second();
   // Doesn't check that the result is a string, even in debug mode.  This is
   // useful during GC where the mark bits confuse the checks.
   inline Object* unchecked_second();
   inline void set_second(Isolate* isolate, String* second,
                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);

   // Dispatched behavior.
   V8_EXPORT_PRIVATE uint16_t ConsStringGet(int index);

   DECL_CAST(ConsString)

   // Layout description.
   static const int kFirstOffset = POINTER_SIZE_ALIGN(String::kSize);
   static const int kSecondOffset = kFirstOffset + kPointerSize;
   static const int kSize = kSecondOffset + kPointerSize;

   // Minimum length for a cons string.
   static const int kMinLength = 13;

   typedef FixedBodyDescriptor<kFirstOffset, kSecondOffset + kPointerSize, kSize>
       BodyDescriptor;
   // No weak fields.
   typedef BodyDescriptor BodyDescriptorWeak;

   DECL_VERIFIER(ConsString)

  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(ConsString);
 };

 // The ThinString class describes string objects that are just references
 // to another string object. They are used for in-place internalization when
 // the original string cannot actually be internalized in-place: in these
 // cases, the original string is converted to a ThinString pointing at its
 // internalized version (which is allocated as a new object).
 // In terms of memory layout and most algorithms operating on strings,
 // ThinStrings can be thought of as "one-part cons strings".
 class ThinString : public String {
  public:
   // Actual string that this ThinString refers to.
   inline String* actual() const;
   inline HeapObject* unchecked_actual() const;
   inline void set_actual(String* s,
                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);

   V8_EXPORT_PRIVATE uint16_t ThinStringGet(int index);

   DECL_CAST(ThinString)
   DECL_VERIFIER(ThinString)

   // Layout description.
   static const int kActualOffset = String::kSize;
   static const int kSize = kActualOffset + kPointerSize;

   typedef FixedBodyDescriptor<kActualOffset, kSize, kSize> BodyDescriptor;
   // No weak fields.
   typedef BodyDescriptor BodyDescriptorWeak;

  private:
   DISALLOW_COPY_AND_ASSIGN(ThinString);
 };

 // The Sliced String class describes strings that are substrings of another
 // sequential string.  The motivation is to save time and memory when creating
 // a substring.  A Sliced String is described as a pointer to the parent,
 // the offset from the start of the parent string and the length.  Using
 // a Sliced String therefore requires unpacking of the parent string and
 // adding the offset to the start address.  A substring of a Sliced String
 // are not nested since the double indirection is simplified when creating
 // such a substring.
 // Currently missing features are:
 //  - handling externalized parent strings
 //  - external strings as parent
 //  - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
 class SlicedString : public String {
  public:
   inline String* parent();
   inline void set_parent(Isolate* isolate, String* parent,
                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
   inline int offset() const;
   inline void set_offset(int offset);

   // Dispatched behavior.
   V8_EXPORT_PRIVATE uint16_t SlicedStringGet(int index);

   DECL_CAST(SlicedString)

   // Layout description.
   static const int kParentOffset = POINTER_SIZE_ALIGN(String::kSize);
   static const int kOffsetOffset = kParentOffset + kPointerSize;
   static const int kSize = kOffsetOffset + kPointerSize;

   // Minimum length for a sliced string.
   static const int kMinLength = 13;

   typedef FixedBodyDescriptor<kParentOffset, kOffsetOffset + kPointerSize,
                               kSize>
       BodyDescriptor;
   // No weak fields.
   typedef BodyDescriptor BodyDescriptorWeak;

   DECL_VERIFIER(SlicedString)

  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString);
 };

 // The ExternalString class describes string values that are backed by
 // a string resource that lies outside the V8 heap.  ExternalStrings
 // consist of the length field common to all strings, a pointer to the
 // external resource.  It is important to ensure (externally) that the
 // resource is not deallocated while the ExternalString is live in the
 // V8 heap.
 //
 // The API expects that all ExternalStrings are created through the
 // API.  Therefore, ExternalStrings should not be used internally.
 class ExternalString : public String {
  public:
   DECL_CAST(ExternalString)

   // Layout description.
   static const int kResourceOffset = POINTER_SIZE_ALIGN(String::kSize);
   static const int kShortSize = kResourceOffset + kPointerSize;
   static const int kResourceDataOffset = kResourceOffset + kPointerSize;
   static const int kSize = kResourceDataOffset + kPointerSize;

   // Return whether external string is short (data pointer is not cached).
   inline bool is_short() const;
   // Size in bytes of the external payload.
   int ExternalPayloadSize() const;

   // Used in the serializer/deserializer.
   inline Address resource_as_address();
   inline void set_address_as_resource(Address address);
   inline uint32_t resource_as_uint32();
   inline void set_uint32_as_resource(uint32_t value);

   STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset);

  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalString);
 };

 // The ExternalOneByteString class is an external string backed by an
 // one-byte string.
 class ExternalOneByteString : public ExternalString {
  public:
   static const bool kHasOneByteEncoding = true;

   typedef v8::String::ExternalOneByteStringResource Resource;

   // The underlying resource.
   inline const Resource* resource();

   // It is assumed that the previous resource is null. If it is not null, then
   // it is the responsability of the caller the handle the previous resource.
   inline void SetResource(Isolate* isolate, const Resource* buffer);
   // Used only during serialization.
   inline void set_resource(const Resource* buffer);

   // Update the pointer cache to the external character array.
   // The cached pointer is always valid, as the external character array does =
   // not move during lifetime.  Deserialization is the only exception, after
   // which the pointer cache has to be refreshed.
   inline void update_data_cache();

   inline const uint8_t* GetChars();

   // Dispatched behavior.
   inline uint16_t ExternalOneByteStringGet(int index);

   DECL_CAST(ExternalOneByteString)

   class BodyDescriptor;
   // No weak fields.
   typedef BodyDescriptor BodyDescriptorWeak;

  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalOneByteString);
 };

 // The ExternalTwoByteString class is an external string backed by a UTF-16
 // encoded string.
 class ExternalTwoByteString : public ExternalString {
  public:
   static const bool kHasOneByteEncoding = false;

   typedef v8::String::ExternalStringResource Resource;

   // The underlying string resource.
   inline const Resource* resource();

   // It is assumed that the previous resource is null. If it is not null, then
   // it is the responsability of the caller the handle the previous resource.
   inline void SetResource(Isolate* isolate, const Resource* buffer);
   // Used only during serialization.
   inline void set_resource(const Resource* buffer);

   // Update the pointer cache to the external character array.
   // The cached pointer is always valid, as the external character array does =
   // not move during lifetime.  Deserialization is the only exception, after
   // which the pointer cache has to be refreshed.
   inline void update_data_cache();

   inline const uint16_t* GetChars();

   // Dispatched behavior.
   inline uint16_t ExternalTwoByteStringGet(int index);

   // For regexp code.
   inline const uint16_t* ExternalTwoByteStringGetData(unsigned start);

   DECL_CAST(ExternalTwoByteString)

   class BodyDescriptor;
   // No weak fields.
   typedef BodyDescriptor BodyDescriptorWeak;

  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalTwoByteString);
 };

 // A flat string reader provides random access to the contents of a
 // string independent of the character width of the string.  The handle
 // must be valid as long as the reader is being used.
 class FlatStringReader : public Relocatable {
  public:
   FlatStringReader(Isolate* isolate, Handle<String> str);
   FlatStringReader(Isolate* isolate, Vector<const char> input);
   void PostGarbageCollection();
   inline uc32 Get(int index);
   template <typename Char>
   inline Char Get(int index);
   int length() { return length_; }

  private:
   String** str_;
   bool is_one_byte_;
   int length_;
   const void* start_;
 };

 // This maintains an off-stack representation of the stack frames required
 // to traverse a ConsString, allowing an entirely iterative and restartable
 // traversal of the entire string
 class ConsStringIterator {
  public:
   inline ConsStringIterator() {}
   inline explicit ConsStringIterator(ConsString* cons_string, int offset = 0) {
     Reset(cons_string, offset);
   }
   inline void Reset(ConsString* cons_string, int offset = 0) {
     depth_ = 0;
     // Next will always return nullptr.
     if (cons_string == nullptr) return;
     Initialize(cons_string, offset);
   }
   // Returns nullptr when complete.
   inline String* Next(int* offset_out) {
     *offset_out = 0;
     if (depth_ == 0) return nullptr;
     return Continue(offset_out);
   }

  private:
   static const int kStackSize = 32;
   // Use a mask instead of doing modulo operations for stack wrapping.
   static const int kDepthMask = kStackSize - 1;
   static_assert(base::bits::IsPowerOfTwo(kStackSize),
                 "kStackSize must be power of two");
   static inline int OffsetForDepth(int depth);

   inline void PushLeft(ConsString* string);
   inline void PushRight(ConsString* string);
   inline void AdjustMaximumDepth();
   inline void Pop();
   inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
   void Initialize(ConsString* cons_string, int offset);
   String* Continue(int* offset_out);
   String* NextLeaf(bool* blew_stack);
   String* Search(int* offset_out);

   // Stack must always contain only frames for which right traversal
   // has not yet been performed.
   ConsString* frames_[kStackSize];
   ConsString* root_;
   int depth_;
   int maximum_depth_;
   int consumed_;
   DISALLOW_COPY_AND_ASSIGN(ConsStringIterator);
 };

 class StringCharacterStream {
  public:
   inline explicit StringCharacterStream(String* string, int offset = 0);
   inline uint16_t GetNext();
   inline bool HasMore();
   inline void Reset(String* string, int offset = 0);
   inline void VisitOneByteString(const uint8_t* chars, int length);
   inline void VisitTwoByteString(const uint16_t* chars, int length);

  private:
   ConsStringIterator iter_;
   bool is_one_byte_;
   union {
     const uint8_t* buffer8_;
     const uint16_t* buffer16_;
   };
   const uint8_t* end_;
   DISALLOW_COPY_AND_ASSIGN(StringCharacterStream);
 };

 }  // namespace internal
 }  // namespace v8

 #include "src/objects/object-macros-undef.h"

 #endif  // V8_OBJECTS_STRING_H_