| // Copyright 2011 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef V8_PARSING_SCANNER_CHARACTER_STREAMS_H_ |
| #define V8_PARSING_SCANNER_CHARACTER_STREAMS_H_ |
| |
| #include <algorithm> |
| |
| #include "include/v8.h" // for v8::ScriptCompiler |
| #include "src/globals.h" |
| |
| namespace v8 { |
| namespace internal { |
| |
| template <typename T> |
| class Handle; |
| template <typename Char> |
| class CharacterStream; |
| class RuntimeCallStats; |
| class String; |
| |
| class V8_EXPORT_PRIVATE ScannerStream { |
| public: |
| static const uc32 kEndOfInput = -1; |
| |
| static ScannerStream* For(Isolate* isolate, Handle<String> data); |
| static ScannerStream* For(Isolate* isolate, Handle<String> data, |
| int start_pos, int end_pos); |
| static ScannerStream* For(ScriptCompiler::ExternalSourceStream* source_stream, |
| ScriptCompiler::StreamedSource::Encoding encoding, |
| RuntimeCallStats* stats); |
| |
| // For testing: |
| static std::unique_ptr<CharacterStream<uint16_t>> ForTesting( |
| const char* data); |
| static std::unique_ptr<CharacterStream<uint16_t>> ForTesting(const char* data, |
| size_t length); |
| |
| // Returns true if the stream could access the V8 heap after construction. |
| virtual bool can_access_heap() = 0; |
| virtual uc32 Advance() = 0; |
| virtual void Seek(size_t pos) = 0; |
| virtual size_t pos() const = 0; |
| virtual void Back() = 0; |
| |
| virtual ~ScannerStream() {} |
| }; |
| |
| template <typename Char> |
| class CharacterStream : public ScannerStream { |
| public: |
| // Returns and advances past the next UTF-16 code unit in the input |
| // stream. If there are no more code units it returns kEndOfInput. |
| inline uc32 Advance() final { |
| if (V8_LIKELY(buffer_cursor_ < buffer_end_)) { |
| return static_cast<uc32>(*(buffer_cursor_++)); |
| } else if (ReadBlockChecked()) { |
| return static_cast<uc32>(*(buffer_cursor_++)); |
| } else { |
| // Note: currently the following increment is necessary to avoid a |
| // parser problem! The scanner treats the final kEndOfInput as |
| // a code unit with a position, and does math relative to that |
| // position. |
| buffer_cursor_++; |
| return kEndOfInput; |
| } |
| } |
| |
| // Returns and advances past the next UTF-16 code unit in the input stream |
| // that meets the checks requirement. If there are no more code units it |
| // returns kEndOfInput. |
| template <typename FunctionType> |
| V8_INLINE uc32 AdvanceUntil(FunctionType check) { |
| while (true) { |
| auto next_cursor_pos = |
| std::find_if(buffer_cursor_, buffer_end_, [&check](Char raw_c0) { |
| uc32 c0 = static_cast<uc32>(raw_c0); |
| return check(c0); |
| }); |
| |
| if (next_cursor_pos == buffer_end_) { |
| buffer_cursor_ = buffer_end_; |
| if (!ReadBlockChecked()) { |
| buffer_cursor_++; |
| return kEndOfInput; |
| } |
| } else { |
| buffer_cursor_ = next_cursor_pos + 1; |
| return static_cast<uc32>(*next_cursor_pos); |
| } |
| } |
| } |
| |
| // Go back one by one character in the input stream. |
| // This undoes the most recent Advance(). |
| inline void Back() final { |
| // The common case - if the previous character is within |
| // buffer_start_ .. buffer_end_ will be handles locally. |
| // Otherwise, a new block is requested. |
| if (V8_LIKELY(buffer_cursor_ > buffer_start_)) { |
| buffer_cursor_--; |
| } else { |
| ReadBlockAt(pos() - 1); |
| } |
| } |
| |
| // Go back one by two characters in the input stream. (This is the same as |
| // calling Back() twice. But Back() may - in some instances - do substantial |
| // work. Back2() guarantees this work will be done only once.) |
| inline void Back2() { |
| if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) { |
| buffer_cursor_ -= 2; |
| } else { |
| ReadBlockAt(pos() - 2); |
| } |
| } |
| |
| inline size_t pos() const final { |
| return buffer_pos_ + (buffer_cursor_ - buffer_start_); |
| } |
| |
| inline void Seek(size_t pos) final { |
| if (V8_LIKELY(pos >= buffer_pos_ && |
| pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) { |
| buffer_cursor_ = buffer_start_ + (pos - buffer_pos_); |
| } else { |
| ReadBlockAt(pos); |
| } |
| } |
| |
| // Returns true if the stream could access the V8 heap after construction. |
| virtual bool can_access_heap() = 0; |
| |
| protected: |
| CharacterStream(const uint16_t* buffer_start, const uint16_t* buffer_cursor, |
| const uint16_t* buffer_end, size_t buffer_pos) |
| : buffer_start_(buffer_start), |
| buffer_cursor_(buffer_cursor), |
| buffer_end_(buffer_end), |
| buffer_pos_(buffer_pos) {} |
| CharacterStream() : CharacterStream(nullptr, nullptr, nullptr, 0) {} |
| |
| bool ReadBlockChecked() { |
| size_t position = pos(); |
| USE(position); |
| bool success = ReadBlock(); |
| |
| // Post-conditions: 1, We should always be at the right position. |
| // 2, Cursor should be inside the buffer. |
| // 3, We should have more characters available iff success. |
| DCHECK_EQ(pos(), position); |
| DCHECK_LE(buffer_cursor_, buffer_end_); |
| DCHECK_LE(buffer_start_, buffer_cursor_); |
| DCHECK_EQ(success, buffer_cursor_ < buffer_end_); |
| return success; |
| } |
| |
| void ReadBlockAt(size_t new_pos) { |
| // The callers of this method (Back/Back2/Seek) should handle the easy |
| // case (seeking within the current buffer), and we should only get here |
| // if we actually require new data. |
| // (This is really an efficiency check, not a correctness invariant.) |
| DCHECK(new_pos < buffer_pos_ || |
| new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_)); |
| |
| // Change pos() to point to new_pos. |
| buffer_pos_ = new_pos; |
| buffer_cursor_ = buffer_start_; |
| DCHECK_EQ(pos(), new_pos); |
| ReadBlockChecked(); |
| } |
| |
| // Read more data, and update buffer_*_ to point to it. |
| // Returns true if more data was available. |
| // |
| // ReadBlock() may modify any of the buffer_*_ members, but must sure that |
| // the result of pos() remains unaffected. |
| // |
| // Examples: |
| // - a stream could either fill a separate buffer. Then buffer_start_ and |
| // buffer_cursor_ would point to the beginning of the buffer, and |
| // buffer_pos would be the old pos(). |
| // - a stream with existing buffer chunks would set buffer_start_ and |
| // buffer_end_ to cover the full chunk, and then buffer_cursor_ would |
| // point into the middle of the buffer, while buffer_pos_ would describe |
| // the start of the buffer. |
| virtual bool ReadBlock() = 0; |
| |
| const Char* buffer_start_; |
| const Char* buffer_cursor_; |
| const Char* buffer_end_; |
| size_t buffer_pos_; |
| }; |
| |
| } // namespace internal |
| } // namespace v8 |
| |
| #endif // V8_PARSING_SCANNER_CHARACTER_STREAMS_H_ |