| /* |
| * Copyright 2023 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <cstddef> |
| #include <cstdint> |
| #include <cstring> |
| #include <iterator> |
| #include <optional> |
| #include <ostream> |
| #include <string_view> |
| #include <variant> |
| |
| #include "support/name.h" |
| #include "support/result.h" |
| |
| #ifndef parser_lexer_h |
| #define parser_lexer_h |
| |
| namespace wasm::WATParser { |
| |
| struct TextPos { |
| size_t line; |
| size_t col; |
| |
| bool operator==(const TextPos& other) const; |
| bool operator!=(const TextPos& other) const { return !(*this == other); } |
| |
| friend std::ostream& operator<<(std::ostream& os, const TextPos& pos); |
| }; |
| |
| // ====== |
| // Tokens |
| // ====== |
| |
| struct LParenTok { |
| bool operator==(const LParenTok&) const { return true; } |
| friend std::ostream& operator<<(std::ostream&, const LParenTok&); |
| }; |
| |
| struct RParenTok { |
| bool operator==(const RParenTok&) const { return true; } |
| friend std::ostream& operator<<(std::ostream&, const RParenTok&); |
| }; |
| |
| struct IdTok { |
| // Whether this ID has `$"..."` format |
| bool isStr; |
| |
| // If the ID is a string ID and contains escapes, this is its contents. |
| std::optional<std::string> str; |
| |
| bool operator==(const IdTok&) const { return true; } |
| friend std::ostream& operator<<(std::ostream&, const IdTok&); |
| }; |
| |
| enum Sign { NoSign, Pos, Neg }; |
| |
| struct IntTok { |
| uint64_t n; |
| Sign sign; |
| |
| bool operator==(const IntTok&) const; |
| friend std::ostream& operator<<(std::ostream&, const IntTok&); |
| }; |
| |
| struct FloatTok { |
| // The payload if we lexed a nan with payload. We cannot store the payload |
| // directly in `d` because we do not know at this point whether we are parsing |
| // an f32 or f64 and therefore we do not know what the allowable payloads are. |
| // No payload with NaN means to use the default payload for the expected float |
| // width. |
| std::optional<uint64_t> nanPayload; |
| double d; |
| |
| bool operator==(const FloatTok&) const; |
| friend std::ostream& operator<<(std::ostream&, const FloatTok&); |
| }; |
| |
| struct StringTok { |
| // If the string contains escapes, this is its contents. |
| std::optional<std::string> str; |
| |
| bool operator==(const StringTok& other) const { return str == other.str; } |
| friend std::ostream& operator<<(std::ostream&, const StringTok&); |
| }; |
| |
| struct KeywordTok { |
| bool operator==(const KeywordTok&) const { return true; } |
| friend std::ostream& operator<<(std::ostream&, const KeywordTok&); |
| }; |
| |
| struct Token { |
| using Data = std::variant<LParenTok, |
| RParenTok, |
| IdTok, |
| IntTok, |
| FloatTok, |
| StringTok, |
| KeywordTok>; |
| std::string_view span; |
| Data data; |
| |
| // ==================== |
| // Token classification |
| // ==================== |
| |
| bool isLParen() const { return std::get_if<LParenTok>(&data); } |
| |
| bool isRParen() const { return std::get_if<RParenTok>(&data); } |
| |
| std::optional<std::string_view> getKeyword() const { |
| if (std::get_if<KeywordTok>(&data)) { |
| return span; |
| } |
| return {}; |
| } |
| |
| template<typename T> std::optional<T> getU() const; |
| template<typename T> std::optional<T> getS() const; |
| template<typename T> std::optional<T> getI() const; |
| std::optional<double> getF64() const; |
| std::optional<float> getF32() const; |
| std::optional<std::string_view> getString() const; |
| std::optional<std::string_view> getID() const; |
| |
| bool operator==(const Token&) const; |
| friend std::ostream& operator<<(std::ostream& os, const Token&); |
| }; |
| |
| // =========== |
| // Annotations |
| // =========== |
| |
| struct Annotation { |
| Name kind; |
| std::string_view contents; |
| }; |
| |
| extern Name srcAnnotationKind; |
| |
| // ===== |
| // Lexer |
| // ===== |
| |
| struct Lexer { |
| private: |
| std::string_view buffer; |
| size_t index = 0; |
| std::optional<Token> curr; |
| std::vector<Annotation> annotations; |
| |
| public: |
| Lexer(std::string_view buffer) : buffer(buffer) { setIndex(0); } |
| |
| size_t getIndex() const { return index; } |
| |
| void setIndex(size_t i) { |
| index = i; |
| advance(); |
| } |
| |
| bool takeLParen() { |
| if (!curr || !curr->isLParen()) { |
| return false; |
| } |
| advance(); |
| return true; |
| } |
| |
| bool peekLParen() { return Lexer(*this).takeLParen(); } |
| |
| bool takeRParen() { |
| if (!curr || !curr->isRParen()) { |
| return false; |
| } |
| advance(); |
| return true; |
| } |
| |
| bool peekRParen() { return Lexer(*this).takeRParen(); } |
| |
| bool takeUntilParen() { |
| while (true) { |
| if (!curr) { |
| return false; |
| } |
| if (curr->isLParen() || curr->isRParen()) { |
| return true; |
| } |
| advance(); |
| } |
| } |
| |
| std::optional<Name> takeID() { |
| if (curr) { |
| if (auto id = curr->getID()) { |
| advance(); |
| // See comment on takeName. |
| return Name(std::string(*id)); |
| } |
| } |
| return {}; |
| } |
| |
| std::optional<std::string_view> takeKeyword() { |
| if (curr) { |
| if (auto keyword = curr->getKeyword()) { |
| advance(); |
| return *keyword; |
| } |
| } |
| return {}; |
| } |
| |
| std::optional<std::string_view> peekKeyword() { |
| return Lexer(*this).takeKeyword(); |
| } |
| |
| bool takeKeyword(std::string_view expected) { |
| if (curr) { |
| if (auto keyword = curr->getKeyword()) { |
| if (*keyword == expected) { |
| advance(); |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| std::optional<uint64_t> takeOffset() { |
| using namespace std::string_view_literals; |
| if (curr) { |
| if (auto keyword = curr->getKeyword()) { |
| if (keyword->substr(0, 7) != "offset="sv) { |
| return {}; |
| } |
| Lexer subLexer(keyword->substr(7)); |
| if (subLexer.empty()) { |
| return {}; |
| } |
| if (auto o = subLexer.curr->getU<uint64_t>()) { |
| subLexer.advance(); |
| if (subLexer.empty()) { |
| advance(); |
| return o; |
| } |
| } |
| } |
| } |
| return std::nullopt; |
| } |
| |
| std::optional<uint32_t> takeAlign() { |
| using namespace std::string_view_literals; |
| if (curr) { |
| if (auto keyword = curr->getKeyword()) { |
| if (keyword->substr(0, 6) != "align="sv) { |
| return {}; |
| } |
| Lexer subLexer(keyword->substr(6)); |
| if (subLexer.empty()) { |
| return {}; |
| } |
| if (auto a = subLexer.curr->getU<uint32_t>()) { |
| subLexer.advance(); |
| if (subLexer.empty()) { |
| advance(); |
| return a; |
| } |
| } |
| } |
| } |
| return {}; |
| } |
| |
| template<typename T> std::optional<T> takeU() { |
| if (curr) { |
| if (auto n = curr->getU<T>()) { |
| advance(); |
| return n; |
| } |
| } |
| return std::nullopt; |
| } |
| |
| template<typename T> std::optional<T> takeI() { |
| if (curr) { |
| if (auto n = curr->getI<T>()) { |
| advance(); |
| return n; |
| } |
| } |
| return std::nullopt; |
| } |
| |
| std::optional<uint64_t> takeU64() { return takeU<uint64_t>(); } |
| |
| std::optional<uint64_t> takeI64() { return takeI<uint64_t>(); } |
| |
| std::optional<uint32_t> takeU32() { return takeU<uint32_t>(); } |
| |
| std::optional<uint32_t> takeI32() { return takeI<uint32_t>(); } |
| |
| std::optional<uint16_t> takeI16() { return takeI<uint16_t>(); } |
| |
| std::optional<uint8_t> takeU8() { return takeU<uint8_t>(); } |
| |
| std::optional<uint8_t> takeI8() { return takeI<uint8_t>(); } |
| |
| std::optional<double> takeF64() { |
| if (curr) { |
| if (auto d = curr->getF64()) { |
| advance(); |
| return d; |
| } |
| } |
| return std::nullopt; |
| } |
| |
| std::optional<float> takeF32() { |
| if (curr) { |
| if (auto f = curr->getF32()) { |
| advance(); |
| return f; |
| } |
| } |
| return std::nullopt; |
| } |
| |
| std::optional<std::string> takeString() { |
| if (curr) { |
| if (auto s = curr->getString()) { |
| std::string ret(*s); |
| advance(); |
| return ret; |
| } |
| } |
| return {}; |
| } |
| |
| std::optional<Name> takeName() { |
| // TODO: Move this to lexer and validate UTF. |
| if (auto str = takeString()) { |
| // Copy to a std::string to make sure we have a null terminator, otherwise |
| // the `Name` constructor won't work correctly. |
| // TODO: Update `Name` to use string_view instead of char* and/or to take |
| // rvalue strings to avoid this extra copy. |
| return Name(std::string(*str)); |
| } |
| return {}; |
| } |
| |
| bool takeSExprStart(std::string_view expected) { |
| auto original = *this; |
| if (takeLParen() && takeKeyword(expected)) { |
| return true; |
| } |
| *this = original; |
| return false; |
| } |
| |
| bool peekSExprStart(std::string_view expected) { |
| auto original = *this; |
| if (!takeLParen()) { |
| return false; |
| } |
| bool ret = takeKeyword(expected); |
| *this = original; |
| return ret; |
| } |
| |
| std::string_view next() const { return buffer.substr(index); } |
| |
| void advance() { |
| annotations.clear(); |
| skipSpace(); |
| lexToken(); |
| } |
| |
| bool empty() const { return !curr; } |
| |
| TextPos position(const char* c) const; |
| TextPos position(size_t i) const { return position(buffer.data() + i); } |
| TextPos position(std::string_view span) const { |
| return position(span.data()); |
| } |
| TextPos position() const { return position(getPos()); } |
| |
| size_t getPos() const { |
| if (curr) { |
| return getIndex() - curr->span.size(); |
| } |
| return getIndex(); |
| } |
| |
| [[nodiscard]] Err err(size_t pos, std::string reason) { |
| std::stringstream msg; |
| msg << position(pos) << ": error: " << reason; |
| return Err{msg.str()}; |
| } |
| |
| [[nodiscard]] Err err(std::string reason) { return err(getPos(), reason); } |
| |
| const std::vector<Annotation> getAnnotations() { return annotations; } |
| std::vector<Annotation> takeAnnotations() { return std::move(annotations); } |
| |
| void setAnnotations(std::vector<Annotation>&& annotations) { |
| this->annotations = std::move(annotations); |
| } |
| |
| private: |
| void skipSpace(); |
| void lexToken(); |
| }; |
| |
| } // namespace wasm::WATParser |
| |
| #endif // parser_lexer_h |