blob: 1a93d3e996bfa55f6eb64498a7c06ac3807c33ee [file] [log] [blame] [edit]
/*
* Copyright 2023 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <optional>
#include <ostream>
#include <string_view>
#include <variant>
#include "support/name.h"
#include "support/result.h"
#ifndef parser_lexer_h
#define parser_lexer_h
namespace wasm::WATParser {
struct TextPos {
size_t line;
size_t col;
bool operator==(const TextPos& other) const;
bool operator!=(const TextPos& other) const { return !(*this == other); }
friend std::ostream& operator<<(std::ostream& os, const TextPos& pos);
};
// ======
// Tokens
// ======
struct LParenTok {
bool operator==(const LParenTok&) const { return true; }
friend std::ostream& operator<<(std::ostream&, const LParenTok&);
};
struct RParenTok {
bool operator==(const RParenTok&) const { return true; }
friend std::ostream& operator<<(std::ostream&, const RParenTok&);
};
struct IdTok {
// Whether this ID has `$"..."` format
bool isStr;
// If the ID is a string ID and contains escapes, this is its contents.
std::optional<std::string> str;
bool operator==(const IdTok&) const { return true; }
friend std::ostream& operator<<(std::ostream&, const IdTok&);
};
enum Sign { NoSign, Pos, Neg };
struct IntTok {
uint64_t n;
Sign sign;
bool operator==(const IntTok&) const;
friend std::ostream& operator<<(std::ostream&, const IntTok&);
};
struct FloatTok {
// The payload if we lexed a nan with payload. We cannot store the payload
// directly in `d` because we do not know at this point whether we are parsing
// an f32 or f64 and therefore we do not know what the allowable payloads are.
// No payload with NaN means to use the default payload for the expected float
// width.
std::optional<uint64_t> nanPayload;
double d;
bool operator==(const FloatTok&) const;
friend std::ostream& operator<<(std::ostream&, const FloatTok&);
};
struct StringTok {
// If the string contains escapes, this is its contents.
std::optional<std::string> str;
bool operator==(const StringTok& other) const { return str == other.str; }
friend std::ostream& operator<<(std::ostream&, const StringTok&);
};
struct KeywordTok {
bool operator==(const KeywordTok&) const { return true; }
friend std::ostream& operator<<(std::ostream&, const KeywordTok&);
};
struct Token {
using Data = std::variant<LParenTok,
RParenTok,
IdTok,
IntTok,
FloatTok,
StringTok,
KeywordTok>;
std::string_view span;
Data data;
// ====================
// Token classification
// ====================
bool isLParen() const { return std::get_if<LParenTok>(&data); }
bool isRParen() const { return std::get_if<RParenTok>(&data); }
std::optional<std::string_view> getKeyword() const {
if (std::get_if<KeywordTok>(&data)) {
return span;
}
return {};
}
template<typename T> std::optional<T> getU() const;
template<typename T> std::optional<T> getS() const;
template<typename T> std::optional<T> getI() const;
std::optional<double> getF64() const;
std::optional<float> getF32() const;
std::optional<std::string_view> getString() const;
std::optional<std::string_view> getID() const;
bool operator==(const Token&) const;
friend std::ostream& operator<<(std::ostream& os, const Token&);
};
// ===========
// Annotations
// ===========
struct Annotation {
Name kind;
std::string_view contents;
};
extern Name srcAnnotationKind;
// =====
// Lexer
// =====
struct Lexer {
private:
std::string_view buffer;
size_t index = 0;
std::optional<Token> curr;
std::vector<Annotation> annotations;
public:
Lexer(std::string_view buffer) : buffer(buffer) { setIndex(0); }
size_t getIndex() const { return index; }
void setIndex(size_t i) {
index = i;
advance();
}
bool takeLParen() {
if (!curr || !curr->isLParen()) {
return false;
}
advance();
return true;
}
bool peekLParen() { return Lexer(*this).takeLParen(); }
bool takeRParen() {
if (!curr || !curr->isRParen()) {
return false;
}
advance();
return true;
}
bool peekRParen() { return Lexer(*this).takeRParen(); }
bool takeUntilParen() {
while (true) {
if (!curr) {
return false;
}
if (curr->isLParen() || curr->isRParen()) {
return true;
}
advance();
}
}
std::optional<Name> takeID() {
if (curr) {
if (auto id = curr->getID()) {
advance();
// See comment on takeName.
return Name(std::string(*id));
}
}
return {};
}
std::optional<std::string_view> takeKeyword() {
if (curr) {
if (auto keyword = curr->getKeyword()) {
advance();
return *keyword;
}
}
return {};
}
std::optional<std::string_view> peekKeyword() {
return Lexer(*this).takeKeyword();
}
bool takeKeyword(std::string_view expected) {
if (curr) {
if (auto keyword = curr->getKeyword()) {
if (*keyword == expected) {
advance();
return true;
}
}
}
return false;
}
std::optional<uint64_t> takeOffset() {
using namespace std::string_view_literals;
if (curr) {
if (auto keyword = curr->getKeyword()) {
if (keyword->substr(0, 7) != "offset="sv) {
return {};
}
Lexer subLexer(keyword->substr(7));
if (subLexer.empty()) {
return {};
}
if (auto o = subLexer.curr->getU<uint64_t>()) {
subLexer.advance();
if (subLexer.empty()) {
advance();
return o;
}
}
}
}
return std::nullopt;
}
std::optional<uint32_t> takeAlign() {
using namespace std::string_view_literals;
if (curr) {
if (auto keyword = curr->getKeyword()) {
if (keyword->substr(0, 6) != "align="sv) {
return {};
}
Lexer subLexer(keyword->substr(6));
if (subLexer.empty()) {
return {};
}
if (auto a = subLexer.curr->getU<uint32_t>()) {
subLexer.advance();
if (subLexer.empty()) {
advance();
return a;
}
}
}
}
return {};
}
template<typename T> std::optional<T> takeU() {
if (curr) {
if (auto n = curr->getU<T>()) {
advance();
return n;
}
}
return std::nullopt;
}
template<typename T> std::optional<T> takeI() {
if (curr) {
if (auto n = curr->getI<T>()) {
advance();
return n;
}
}
return std::nullopt;
}
std::optional<uint64_t> takeU64() { return takeU<uint64_t>(); }
std::optional<uint64_t> takeI64() { return takeI<uint64_t>(); }
std::optional<uint32_t> takeU32() { return takeU<uint32_t>(); }
std::optional<uint32_t> takeI32() { return takeI<uint32_t>(); }
std::optional<uint16_t> takeI16() { return takeI<uint16_t>(); }
std::optional<uint8_t> takeU8() { return takeU<uint8_t>(); }
std::optional<uint8_t> takeI8() { return takeI<uint8_t>(); }
std::optional<double> takeF64() {
if (curr) {
if (auto d = curr->getF64()) {
advance();
return d;
}
}
return std::nullopt;
}
std::optional<float> takeF32() {
if (curr) {
if (auto f = curr->getF32()) {
advance();
return f;
}
}
return std::nullopt;
}
std::optional<std::string> takeString() {
if (curr) {
if (auto s = curr->getString()) {
std::string ret(*s);
advance();
return ret;
}
}
return {};
}
std::optional<Name> takeName() {
// TODO: Move this to lexer and validate UTF.
if (auto str = takeString()) {
// Copy to a std::string to make sure we have a null terminator, otherwise
// the `Name` constructor won't work correctly.
// TODO: Update `Name` to use string_view instead of char* and/or to take
// rvalue strings to avoid this extra copy.
return Name(std::string(*str));
}
return {};
}
bool takeSExprStart(std::string_view expected) {
auto original = *this;
if (takeLParen() && takeKeyword(expected)) {
return true;
}
*this = original;
return false;
}
bool peekSExprStart(std::string_view expected) {
auto original = *this;
if (!takeLParen()) {
return false;
}
bool ret = takeKeyword(expected);
*this = original;
return ret;
}
std::string_view next() const { return buffer.substr(index); }
void advance() {
annotations.clear();
skipSpace();
lexToken();
}
bool empty() const { return !curr; }
TextPos position(const char* c) const;
TextPos position(size_t i) const { return position(buffer.data() + i); }
TextPos position(std::string_view span) const {
return position(span.data());
}
TextPos position() const { return position(getPos()); }
size_t getPos() const {
if (curr) {
return getIndex() - curr->span.size();
}
return getIndex();
}
[[nodiscard]] Err err(size_t pos, std::string reason) {
std::stringstream msg;
msg << position(pos) << ": error: " << reason;
return Err{msg.str()};
}
[[nodiscard]] Err err(std::string reason) { return err(getPos(), reason); }
const std::vector<Annotation> getAnnotations() { return annotations; }
std::vector<Annotation> takeAnnotations() { return std::move(annotations); }
void setAnnotations(std::vector<Annotation>&& annotations) {
this->annotations = std::move(annotations);
}
private:
void skipSpace();
void lexToken();
};
} // namespace wasm::WATParser
#endif // parser_lexer_h