blob: e6bb3d12e136882042523a6fac28d1cf63ae0c8b [file] [log] [blame]
// Copyright 2011 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
#define V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
#include <algorithm>
#include "include/v8.h" // for v8::ScriptCompiler
#include "src/globals.h"
namespace v8 {
namespace internal {
template <typename T>
class Handle;
template <typename Char>
class CharacterStream;
class RuntimeCallStats;
class String;
class V8_EXPORT_PRIVATE ScannerStream {
public:
static const uc32 kEndOfInput = -1;
static ScannerStream* For(Isolate* isolate, Handle<String> data);
static ScannerStream* For(Isolate* isolate, Handle<String> data,
int start_pos, int end_pos);
static ScannerStream* For(ScriptCompiler::ExternalSourceStream* source_stream,
ScriptCompiler::StreamedSource::Encoding encoding,
RuntimeCallStats* stats);
// For testing:
static std::unique_ptr<CharacterStream<uint16_t>> ForTesting(
const char* data);
static std::unique_ptr<CharacterStream<uint16_t>> ForTesting(const char* data,
size_t length);
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
virtual uc32 Advance() = 0;
virtual void Seek(size_t pos) = 0;
virtual size_t pos() const = 0;
virtual void Back() = 0;
virtual ~ScannerStream() {}
};
template <typename Char>
class CharacterStream : public ScannerStream {
public:
// Returns and advances past the next UTF-16 code unit in the input
// stream. If there are no more code units it returns kEndOfInput.
inline uc32 Advance() final {
if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
return static_cast<uc32>(*(buffer_cursor_++));
} else if (ReadBlockChecked()) {
return static_cast<uc32>(*(buffer_cursor_++));
} else {
// Note: currently the following increment is necessary to avoid a
// parser problem! The scanner treats the final kEndOfInput as
// a code unit with a position, and does math relative to that
// position.
buffer_cursor_++;
return kEndOfInput;
}
}
// Returns and advances past the next UTF-16 code unit in the input stream
// that meets the checks requirement. If there are no more code units it
// returns kEndOfInput.
template <typename FunctionType>
V8_INLINE uc32 AdvanceUntil(FunctionType check) {
while (true) {
auto next_cursor_pos =
std::find_if(buffer_cursor_, buffer_end_, [&check](Char raw_c0) {
uc32 c0 = static_cast<uc32>(raw_c0);
return check(c0);
});
if (next_cursor_pos == buffer_end_) {
buffer_cursor_ = buffer_end_;
if (!ReadBlockChecked()) {
buffer_cursor_++;
return kEndOfInput;
}
} else {
buffer_cursor_ = next_cursor_pos + 1;
return static_cast<uc32>(*next_cursor_pos);
}
}
}
// Go back one by one character in the input stream.
// This undoes the most recent Advance().
inline void Back() final {
// The common case - if the previous character is within
// buffer_start_ .. buffer_end_ will be handles locally.
// Otherwise, a new block is requested.
if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
buffer_cursor_--;
} else {
ReadBlockAt(pos() - 1);
}
}
// Go back one by two characters in the input stream. (This is the same as
// calling Back() twice. But Back() may - in some instances - do substantial
// work. Back2() guarantees this work will be done only once.)
inline void Back2() {
if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {
buffer_cursor_ -= 2;
} else {
ReadBlockAt(pos() - 2);
}
}
inline size_t pos() const final {
return buffer_pos_ + (buffer_cursor_ - buffer_start_);
}
inline void Seek(size_t pos) final {
if (V8_LIKELY(pos >= buffer_pos_ &&
pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
} else {
ReadBlockAt(pos);
}
}
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
protected:
CharacterStream(const uint16_t* buffer_start, const uint16_t* buffer_cursor,
const uint16_t* buffer_end, size_t buffer_pos)
: buffer_start_(buffer_start),
buffer_cursor_(buffer_cursor),
buffer_end_(buffer_end),
buffer_pos_(buffer_pos) {}
CharacterStream() : CharacterStream(nullptr, nullptr, nullptr, 0) {}
bool ReadBlockChecked() {
size_t position = pos();
USE(position);
bool success = ReadBlock();
// Post-conditions: 1, We should always be at the right position.
// 2, Cursor should be inside the buffer.
// 3, We should have more characters available iff success.
DCHECK_EQ(pos(), position);
DCHECK_LE(buffer_cursor_, buffer_end_);
DCHECK_LE(buffer_start_, buffer_cursor_);
DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
return success;
}
void ReadBlockAt(size_t new_pos) {
// The callers of this method (Back/Back2/Seek) should handle the easy
// case (seeking within the current buffer), and we should only get here
// if we actually require new data.
// (This is really an efficiency check, not a correctness invariant.)
DCHECK(new_pos < buffer_pos_ ||
new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
// Change pos() to point to new_pos.
buffer_pos_ = new_pos;
buffer_cursor_ = buffer_start_;
DCHECK_EQ(pos(), new_pos);
ReadBlockChecked();
}
// Read more data, and update buffer_*_ to point to it.
// Returns true if more data was available.
//
// ReadBlock() may modify any of the buffer_*_ members, but must sure that
// the result of pos() remains unaffected.
//
// Examples:
// - a stream could either fill a separate buffer. Then buffer_start_ and
// buffer_cursor_ would point to the beginning of the buffer, and
// buffer_pos would be the old pos().
// - a stream with existing buffer chunks would set buffer_start_ and
// buffer_end_ to cover the full chunk, and then buffer_cursor_ would
// point into the middle of the buffer, while buffer_pos_ would describe
// the start of the buffer.
virtual bool ReadBlock() = 0;
const Char* buffer_start_;
const Char* buffer_cursor_;
const Char* buffer_end_;
size_t buffer_pos_;
};
} // namespace internal
} // namespace v8
#endif // V8_PARSING_SCANNER_CHARACTER_STREAMS_H_