| /* |
| * Copyright (C) 2022 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
| * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
| * THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "Lexer.h" |
| |
| #include "ConstantValue.h" |
| #include "Token.h" |
| #include <charconv> |
| #include <wtf/FastFloat.h> |
| #include <wtf/SortedArrayMap.h> |
| #include <wtf/dtoa.h> |
| #include <wtf/text/StringHash.h> |
| #include <wtf/text/StringToIntegerConversion.h> |
| #include <wtf/unicode/CharacterNames.h> |
| |
| namespace WGSL { |
| |
| static unsigned isIdentifierStart(char16_t character, std::span<const char16_t> code) |
| { |
| if (character == '_') |
| return 1; |
| |
| unsigned length = 1; |
| if (code.size() > 1 && u_charType(character) == U_SURROGATE) |
| ++length; |
| if (u_stringHasBinaryProperty(code.data(), length, UCHAR_XID_START)) |
| return length; |
| return 0; |
| } |
| |
| static unsigned isIdentifierContinue(char16_t character, std::span<const char16_t> code) |
| { |
| if (auto length = isIdentifierStart(character, code)) |
| return length; |
| |
| unsigned length = 1; |
| if (code.size() > 1 && u_charType(character) == U_SURROGATE) |
| ++length; |
| if (u_stringHasBinaryProperty(code.data(), length, UCHAR_XID_CONTINUE)) |
| return length; |
| return 0; |
| } |
| |
| static unsigned isIdentifierStart(Latin1Character character, std::span<const Latin1Character>) |
| { |
| return isASCIIAlpha(character) || character == '_'; |
| } |
| |
| static unsigned isIdentifierContinue(Latin1Character character, std::span<const Latin1Character>) |
| { |
| return isASCIIAlphanumeric(character) || character == '_'; |
| } |
| |
| template<typename CharacterType> |
| Token Lexer<CharacterType>::makeToken(TokenType type) |
| { |
| return { type, m_tokenStartingPosition, currentTokenLength() }; |
| } |
| |
| template<typename CharacterType> |
| Token Lexer<CharacterType>::makeFloatToken(TokenType type, double floatValue) |
| { |
| return { type, m_tokenStartingPosition, currentTokenLength(), floatValue }; |
| } |
| |
| template<typename CharacterType> |
| Token Lexer<CharacterType>::makeIntegerToken(TokenType type, int64_t integerValue) |
| { |
| return { type, m_tokenStartingPosition, currentTokenLength(), integerValue }; |
| } |
| |
| template<typename CharacterType> |
| Token Lexer<CharacterType>::makeIdentifierToken(String&& identifier) |
| { |
| return { WGSL::TokenType::Identifier, m_tokenStartingPosition, currentTokenLength(), WTF::move(identifier) }; |
| } |
| |
| template<typename T> |
| Vector<Token> Lexer<T>::lex() |
| { |
| Vector<Token> tokens; |
| |
| while (true) { |
| auto token = nextToken(); |
| tokens.append(token); |
| switch (token.type) { |
| case TokenType::GtGtEq: |
| tokens.append(makeToken(TokenType::Placeholder)); |
| [[fallthrough]]; |
| case TokenType::GtGt: |
| case TokenType::GtEq: |
| case TokenType::MinusMinus: |
| tokens.append(makeToken(TokenType::Placeholder)); |
| break; |
| default: |
| break; |
| } |
| |
| if (token.type == TokenType::EndOfFile || token.type == TokenType::Invalid) |
| break; |
| } |
| |
| return tokens; |
| } |
| |
| template <typename T> |
| Token Lexer<T>::nextToken() |
| { |
| if (!skipWhitespaceAndComments()) |
| return makeToken(TokenType::Invalid); |
| |
| m_tokenStartingPosition = m_currentPosition; |
| |
| if (isAtEndOfFile()) |
| return makeToken(TokenType::EndOfFile); |
| |
| switch (m_current) { |
| case '!': |
| shift(); |
| if (m_current == '=') { |
| shift(); |
| return makeToken(TokenType::BangEq); |
| } |
| return makeToken(TokenType::Bang); |
| case '%': |
| shift(); |
| switch (m_current) { |
| case '=': |
| shift(); |
| return makeToken(TokenType::ModuloEq); |
| default: |
| return makeToken(TokenType::Modulo); |
| } |
| case '&': |
| shift(); |
| switch (m_current) { |
| case '&': |
| shift(); |
| return makeToken(TokenType::AndAnd); |
| case '=': |
| shift(); |
| return makeToken(TokenType::AndEq); |
| default: |
| return makeToken(TokenType::And); |
| } |
| case '(': |
| shift(); |
| return makeToken(TokenType::ParenLeft); |
| case ')': |
| shift(); |
| return makeToken(TokenType::ParenRight); |
| case '{': |
| shift(); |
| return makeToken(TokenType::BraceLeft); |
| case '}': |
| shift(); |
| return makeToken(TokenType::BraceRight); |
| case '[': |
| shift(); |
| return makeToken(TokenType::BracketLeft); |
| case ']': |
| shift(); |
| return makeToken(TokenType::BracketRight); |
| case ':': |
| shift(); |
| return makeToken(TokenType::Colon); |
| case ',': |
| shift(); |
| return makeToken(TokenType::Comma); |
| case ';': |
| shift(); |
| return makeToken(TokenType::Semicolon); |
| case '=': |
| shift(); |
| if (m_current == '=') { |
| shift(); |
| return makeToken(TokenType::EqEq); |
| } |
| return makeToken(TokenType::Equal); |
| case '>': |
| shift(); |
| switch (m_current) { |
| case '=': |
| shift(); |
| return makeToken(TokenType::GtEq); |
| case '>': |
| shift(); |
| switch (m_current) { |
| case '=': |
| shift(); |
| return makeToken(TokenType::GtGtEq); |
| default: |
| return makeToken(TokenType::GtGt); |
| } |
| default: |
| return makeToken(TokenType::Gt); |
| } |
| case '<': |
| shift(); |
| switch (m_current) { |
| case '=': |
| shift(); |
| return makeToken(TokenType::LtEq); |
| case '<': |
| shift(); |
| switch (m_current) { |
| case '=': |
| shift(); |
| return makeToken(TokenType::LtLtEq); |
| default: |
| return makeToken(TokenType::LtLt); |
| } |
| default: |
| return makeToken(TokenType::Lt); |
| } |
| case '@': |
| shift(); |
| return makeToken(TokenType::Attribute); |
| case '*': |
| shift(); |
| switch (m_current) { |
| case '=': |
| shift(); |
| return makeToken(TokenType::StarEq); |
| default: |
| return makeToken(TokenType::Star); |
| } |
| case '/': |
| shift(); |
| switch (m_current) { |
| case '=': |
| shift(); |
| return makeToken(TokenType::SlashEq); |
| default: |
| return makeToken(TokenType::Slash); |
| } |
| case '-': |
| shift(); |
| switch (m_current) { |
| case '>': |
| shift(); |
| return makeToken(TokenType::Arrow); |
| case '-': |
| shift(); |
| return makeToken(TokenType::MinusMinus); |
| case '=': |
| shift(); |
| return makeToken(TokenType::MinusEq); |
| default: |
| return makeToken(TokenType::Minus); |
| } |
| case '+': |
| shift(); |
| switch (m_current) { |
| case '+': |
| shift(); |
| return makeToken(TokenType::PlusPlus); |
| case '=': |
| shift(); |
| return makeToken(TokenType::PlusEq); |
| default: |
| return makeToken(TokenType::Plus); |
| } |
| case '^': |
| shift(); |
| switch (m_current) { |
| case '=': |
| shift(); |
| return makeToken(TokenType::XorEq); |
| default: |
| return makeToken(TokenType::Xor); |
| } |
| case '|': |
| shift(); |
| switch (m_current) { |
| case '|': |
| shift(); |
| return makeToken(TokenType::OrOr); |
| case '=': |
| shift(); |
| return makeToken(TokenType::OrEq); |
| default: |
| return makeToken(TokenType::Or); |
| } |
| case '~': |
| shift(); |
| return makeToken(TokenType::Tilde); |
| default: |
| if (isASCIIDigit(m_current) || m_current == '.') |
| return lexNumber(); |
| if (auto consumed = isIdentifierStart(m_current, m_code.span())) { |
| unsigned length = consumed; |
| auto startOfToken = m_code.span(); |
| shift(consumed); |
| while (!isAtEndOfFile()) { |
| auto consumed = isIdentifierContinue(m_current, m_code.span()); |
| if (!consumed) |
| break; |
| length += consumed; |
| shift(consumed); |
| } |
| |
| String view(StringImpl::createWithoutCopying(startOfToken.subspan(0, currentTokenLength()))); |
| |
| static constexpr auto keywordMappings = std::to_array<std::pair<ComparableASCIILiteral, TokenType>>({ |
| { "_"_s, TokenType::Underbar }, |
| |
| #define MAPPING_ENTRY(lexeme, name)\ |
| { #lexeme##_s, TokenType::Keyword##name }, |
| FOREACH_KEYWORD(MAPPING_ENTRY) |
| #undef MAPPING_ENTRY |
| |
| }); |
| static constexpr SortedArrayMap keywords { keywordMappings }; |
| |
| // https://www.w3.org/TR/WGSL/#reserved-words |
| static constexpr auto reservedWords = std::to_array<ComparableASCIILiteral>({ |
| "NULL"_s, |
| "Self"_s, |
| "abstract"_s, |
| "active"_s, |
| "alignas"_s, |
| "alignof"_s, |
| "as"_s, |
| "asm"_s, |
| "asm_fragment"_s, |
| "async"_s, |
| "attribute"_s, |
| "auto"_s, |
| "await"_s, |
| "become"_s, |
| "binding_array"_s, |
| "cast"_s, |
| "catch"_s, |
| "class"_s, |
| "co_await"_s, |
| "co_return"_s, |
| "co_yield"_s, |
| "coherent"_s, |
| "column_major"_s, |
| "common"_s, |
| "compile"_s, |
| "compile_fragment"_s, |
| "concept"_s, |
| "const_cast"_s, |
| "consteval"_s, |
| "constexpr"_s, |
| "constinit"_s, |
| "crate"_s, |
| "debugger"_s, |
| "decltype"_s, |
| "delete"_s, |
| "demote"_s, |
| "demote_to_helper"_s, |
| "do"_s, |
| "dynamic_cast"_s, |
| "enum"_s, |
| "explicit"_s, |
| "export"_s, |
| "extends"_s, |
| "extern"_s, |
| "external"_s, |
| "fallthrough"_s, |
| "filter"_s, |
| "final"_s, |
| "finally"_s, |
| "friend"_s, |
| "from"_s, |
| "fxgroup"_s, |
| "get"_s, |
| "goto"_s, |
| "groupshared"_s, |
| "highp"_s, |
| "impl"_s, |
| "implements"_s, |
| "import"_s, |
| "inline"_s, |
| "instanceof"_s, |
| "interface"_s, |
| "layout"_s, |
| "lowp"_s, |
| "macro"_s, |
| "macro_rules"_s, |
| "match"_s, |
| "mediump"_s, |
| "meta"_s, |
| "mod"_s, |
| "module"_s, |
| "move"_s, |
| "mut"_s, |
| "mutable"_s, |
| "namespace"_s, |
| "new"_s, |
| "nil"_s, |
| "noexcept"_s, |
| "noinline"_s, |
| "nointerpolation"_s, |
| "noperspective"_s, |
| "null"_s, |
| "nullptr"_s, |
| "of"_s, |
| "operator"_s, |
| "package"_s, |
| "packoffset"_s, |
| "partition"_s, |
| "pass"_s, |
| "patch"_s, |
| "pixelfragment"_s, |
| "precise"_s, |
| "precision"_s, |
| "premerge"_s, |
| "priv"_s, |
| "protected"_s, |
| "pub"_s, |
| "public"_s, |
| "readonly"_s, |
| "ref"_s, |
| "regardless"_s, |
| "register"_s, |
| "reinterpret_cast"_s, |
| "require"_s, |
| "resource"_s, |
| "restrict"_s, |
| "self"_s, |
| "set"_s, |
| "shared"_s, |
| "sizeof"_s, |
| "smooth"_s, |
| "snorm"_s, |
| "static"_s, |
| "static_assert"_s, |
| "static_cast"_s, |
| "std"_s, |
| "subroutine"_s, |
| "super"_s, |
| "target"_s, |
| "template"_s, |
| "this"_s, |
| "thread_local"_s, |
| "throw"_s, |
| "trait"_s, |
| "try"_s, |
| "type"_s, |
| "typedef"_s, |
| "typeid"_s, |
| "typename"_s, |
| "typeof"_s, |
| "union"_s, |
| "unless"_s, |
| "unorm"_s, |
| "unsafe"_s, |
| "unsized"_s, |
| "use"_s, |
| "using"_s, |
| "varying"_s, |
| "virtual"_s, |
| "volatile"_s, |
| "wgsl"_s, |
| "where"_s, |
| "with"_s, |
| "writeonly"_s, |
| "yield"_s, |
| }); |
| static constexpr SortedArraySet reservedWordSet { reservedWords }; |
| |
| auto tokenType = keywords.get(view); |
| if (tokenType != TokenType::Invalid) |
| return makeToken(tokenType); |
| |
| if (reservedWordSet.contains(view)) [[unlikely]] |
| return makeToken(TokenType::ReservedWord); |
| |
| |
| if (length >= 2 && startOfToken[0] == '_' && startOfToken[1] == '_') [[unlikely]] |
| return makeToken(TokenType::Invalid); |
| |
| |
| return makeIdentifierToken(WTF::move(view)); |
| } |
| break; |
| } |
| return makeToken(TokenType::Invalid); |
| } |
| |
| template <typename T> |
| T Lexer<T>::shift(unsigned i) |
| { |
| ASSERT(i <= m_code.lengthRemaining()); |
| |
| T last = m_current; |
| // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence. |
| m_current = 0; |
| m_code.advanceBy(i); |
| m_currentPosition.offset += i; |
| m_currentPosition.lineOffset += i; |
| if (m_code.hasCharactersRemaining()) [[likely]] |
| m_current = m_code[0]; |
| return last; |
| } |
| |
| template <typename T> |
| T Lexer<T>::peek(unsigned i) |
| { |
| if (i >= m_code.lengthRemaining()) [[unlikely]] |
| return 0; |
| return m_code[i]; |
| } |
| |
| template <typename T> |
| void Lexer<T>::newLine() |
| { |
| m_currentPosition.line += 1; |
| m_currentPosition.lineOffset = 0; |
| } |
| |
| template <typename T> |
| bool Lexer<T>::skipBlockComments() |
| { |
| ASSERT(peek(0) == '/' && peek(1) == '*'); |
| shift(2); |
| |
| T ch = 0; |
| unsigned depth = 1u; |
| |
| while (!isAtEndOfFile() && (ch = shift())) { |
| if (ch == '/' && peek() == '*') { |
| shift(); |
| depth += 1; |
| } else if (ch == '*' && peek() == '/') { |
| shift(); |
| depth -= 1; |
| if (!depth) { |
| // This block comment is closed, so for a construction like "/* */ */" |
| // there will be a successfully parsed block comment "/* */" |
| // and " */" will be processed separately. |
| return true; |
| } |
| } else if (ch == '\n') |
| newLine(); |
| } |
| |
| return false; |
| } |
| |
| template <typename T> |
| void Lexer<T>::skipLineComment() |
| { |
| ASSERT(peek(0) == '/' && peek(1) == '/'); |
| // Note that in the case of \r\n this makes the comment end on the \r. It is |
| // fine, as the \n after that is simple whitespace. |
| while (!isAtEndOfFile() && peek() != '\n') |
| shift(); |
| } |
| |
| template <typename T> |
| bool Lexer<T>::skipWhitespaceAndComments() |
| { |
| while (!isAtEndOfFile()) { |
| if (isUnicodeCompatibleASCIIWhitespace(m_current)) { |
| if (shift() == '\n') |
| newLine(); |
| } else if (peek(0) == '/') { |
| if (peek(1) == '/') |
| skipLineComment(); |
| else if (peek(1) == '*') { |
| if (!skipBlockComments()) |
| return false; |
| } else |
| break; |
| } else |
| break; |
| } |
| return true; |
| } |
| |
| template <typename T> |
| bool Lexer<T>::isAtEndOfFile() const |
| { |
| if (m_code.atEnd()) { |
| ASSERT(!m_current); |
| return true; |
| } |
| return false; |
| } |
| |
| template <typename T> |
| Token Lexer<T>::lexNumber() |
| { |
| /* Grammar: |
| decimal_int_literal: |
| | /0[iu]?/ |
| | /[1-9][0-9]*[iu]?/ |
| |
| hex_int_literal : |
| | /0[xX][0-9a-fA-F]+[iu]?/ |
| |
| decimal_float_literal: |
| | /0[fh]/` |
| | /[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[fh]?/ |
| | /[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?[fh]?/ |
| | /[0-9]+[eE][+-]?[0-9]+[fh]?/ |
| | /[1-9][0-9]*[fh]/ |
| |
| hex_float_literal: |
| | /0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+([pP][+-]?[0-9]+[fh]?)?/ |
| | /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*([pP][+-]?[0-9]+[fh]?)?/ |
| | /0[xX][0-9a-fA-F]+[pP][+-]?[0-9]+[fh]?/ |
| */ |
| |
| /* State machine: |
| Start -> InitZero (0) |
| -> Decimal (1-9) |
| -> FloatFractNoIntegral(.) |
| |
| InitZero -> End (i, u, f, h, ∅) |
| -> Hex (x, X) |
| -> Float (0-9) |
| -> FloatFract(.) |
| -> FloatExponent(e, E) |
| |
| Decimal -> End (i, u, f, h, ∅) |
| -> Decimal (0-9) |
| -> FloatFract(.) |
| -> FloatExponent(e, E) |
| |
| Float -> Float (0-9) |
| -> FloatFract(.) |
| -> FloatExponent(e, E) |
| |
| FloatFractNoIntegral -> FloatFract (0-9) |
| -> End(∅) |
| |
| FloatFract -> FloatFract (0-9) |
| -> FloatExponent(e, E) |
| -> End(f, h, ∅) |
| |
| FloatExponent -> FloatExponentPostSign(+, -) |
| -> FloatExponentNonEmpty(0-9) |
| |
| FloatExponentPostSign -> FloatExponentNonEmpty(0-9) |
| |
| FloatExponentNonEmpty -> FloatExponentNonEmpty(0-9) |
| -> End(f, h, ∅) |
| |
| Hex -> HexNonEmpty(0-9, a-f, A-F) |
| -> HexFloatFractNoIntegral(.) |
| |
| HexNonEmpty -> HexNonEmpty(0-9, a-f, A-F) |
| -> End(i, u, ∅) |
| -> HexFloatFract(.) |
| -> HexFloatExponentRequireSuffix(p, P) |
| |
| HexFloatFractNoIntegral -> HexFloatFract(0-9, a-f, A-F) |
| |
| HexFloatFract -> HexFloatFract(0-9, a-f, A-F) |
| -> HexFloatExponent(p, P) |
| -> End(∅) |
| |
| HexFloatExponent -> HexFloatExponentNonEmpty(0-9) |
| -> HexFloatExponentPostSign(+, -) |
| |
| HexFloatExponentPostSign -> HexFloatExponentNonEmpty(0-9) |
| |
| HexFloatExponentNonEmpty -> HexFloatExponentNonEmpty(0-9) |
| -> End(f, h, ∅) |
| |
| HexFloatExponentRequireSuffix -> HexFloatExponentRequireSuffixNonEmpty(0-9) |
| -> HexFloatExponentRequireSuffixPostSign(+, -) |
| |
| HexFloatExponentRequireSuffixPostSign -> HexFloatExponentRequireSuffixNonEmpty(0-9) |
| |
| HexFloatExponentRequireSuffixNonEmpty -> HexFloatExponentRequireSuffixNonEmpty(0-9) |
| -> End(f, h) |
| */ |
| |
| enum State : uint8_t { |
| Start, |
| |
| InitZero, |
| Decimal, |
| |
| Float, |
| FloatFractNoIntegral, |
| FloatFract, |
| FloatExponent, |
| FloatExponentPostSign, |
| FloatExponentNonEmpty, |
| |
| Hex, |
| HexNonEmpty, |
| HexFloatFractNoIntegral, |
| HexFloatFract, |
| HexFloatExponent, |
| HexFloatExponentPostSign, |
| HexFloatExponentNonEmpty, |
| HexFloatExponentRequireSuffix, |
| HexFloatExponentRequireSuffixPostSign, |
| HexFloatExponentRequireSuffixNonEmpty, |
| |
| End, |
| EndNoShift, |
| }; |
| |
| auto state = Start; |
| char suffix = '\0'; |
| char exponentSign = '\0'; |
| bool isHex = false; |
| auto start = m_code.span(); |
| auto integral = m_code.span(); |
| const T* fract = nullptr; |
| const T* exponent = nullptr; |
| |
| while (true) { |
| switch (state) { |
| case Start: |
| switch (m_current) { |
| case '0': |
| state = InitZero; |
| break; |
| case '.': |
| state = FloatFractNoIntegral; |
| break; |
| default: |
| ASSERT(isASCIIDigit(m_current)); |
| state = Decimal; |
| break; |
| } |
| break; |
| |
| case InitZero: |
| switch (m_current) { |
| case 'i': |
| case 'u': |
| case 'f': |
| case 'h': |
| state = End; |
| suffix = m_current; |
| break; |
| |
| case 'x': |
| case 'X': |
| state = Hex; |
| break; |
| |
| case '.': |
| state = FloatFract; |
| break; |
| |
| case 'e': |
| case 'E': |
| state = FloatExponent; |
| break; |
| |
| default: |
| if (isASCIIDigit(m_current)) |
| state = Float; |
| else |
| state = EndNoShift; |
| } |
| break; |
| |
| case Decimal: |
| switch (m_current) { |
| case 'i': |
| case 'u': |
| case 'f': |
| case 'h': |
| state = End; |
| suffix = m_current; |
| break; |
| |
| case '.': |
| state = FloatFract; |
| break; |
| |
| case 'e': |
| case 'E': |
| state = FloatExponent; |
| break; |
| |
| default: |
| if (!isASCIIDigit(m_current)) |
| state = EndNoShift; |
| } |
| break; |
| |
| case Float: |
| switch (m_current) { |
| case '.': |
| state = FloatFract; |
| break; |
| |
| case 'e': |
| case 'E': |
| state = FloatExponent; |
| break; |
| |
| default: |
| if (!isASCIIDigit(m_current)) |
| return makeToken(TokenType::Invalid); |
| } |
| break; |
| case FloatFractNoIntegral: |
| fract = m_code.position(); |
| if (!isASCIIDigit(m_current)) |
| return makeToken(TokenType::Period); |
| state = FloatFract; |
| break; |
| case FloatFract: |
| if (!fract) |
| fract = m_code.position(); |
| switch (m_current) { |
| case 'f': |
| case 'h': |
| state = End; |
| suffix = m_current; |
| break; |
| |
| case 'e': |
| case 'E': |
| state = FloatExponent; |
| break; |
| |
| default: |
| if (!isASCIIDigit(m_current)) |
| state = EndNoShift; |
| } |
| break; |
| case FloatExponent: |
| exponent = m_code.position(); |
| switch (m_current) { |
| case '+': |
| case '-': |
| exponentSign = m_current; |
| state = FloatExponentPostSign; |
| break; |
| default: |
| if (!isASCIIDigit(m_current)) |
| return makeToken(TokenType::Invalid); |
| state = FloatExponentNonEmpty; |
| } |
| break; |
| case FloatExponentPostSign: |
| if (exponentSign == '+') |
| exponent = m_code.position(); |
| if (!isASCIIDigit(m_current)) |
| return makeToken(TokenType::Invalid); |
| state = FloatExponentNonEmpty; |
| break; |
| case FloatExponentNonEmpty: |
| switch (m_current) { |
| case 'f': |
| case 'h': |
| state = End; |
| suffix = m_current; |
| break; |
| default: |
| if (!isASCIIDigit(m_current)) |
| state = EndNoShift; |
| } |
| break; |
| case Hex: |
| isHex = true; |
| integral = m_code.span(); |
| if (m_current == '.') |
| state = HexFloatFractNoIntegral; |
| else if (isASCIIHexDigit(m_current)) |
| state = HexNonEmpty; |
| else |
| return makeToken(TokenType::Invalid); |
| break; |
| case HexNonEmpty: |
| switch (m_current) { |
| case 'i': |
| case 'u': |
| state = End; |
| suffix = m_current; |
| break; |
| |
| case 'p': |
| case 'P': |
| state = HexFloatExponentRequireSuffix; |
| break; |
| |
| case '.': |
| state = HexFloatFract; |
| break; |
| |
| default: |
| if (!isASCIIHexDigit(m_current)) |
| state = EndNoShift; |
| } |
| break; |
| case HexFloatFractNoIntegral: |
| fract = m_code.position(); |
| if (!isASCIIHexDigit(m_current)) |
| return makeToken(TokenType::Invalid); |
| state = HexFloatFract; |
| break; |
| case HexFloatFract: |
| if (!fract) |
| fract = m_code.position(); |
| if (isASCIIHexDigit(m_current)) |
| break; |
| if (m_current == 'p' || m_current == 'P') |
| state = HexFloatExponent; |
| else |
| state = EndNoShift; |
| break; |
| case HexFloatExponent: |
| exponent = m_code.position(); |
| if (isASCIIDigit(m_current)) |
| state = HexFloatExponentNonEmpty; |
| else if (m_current == '+' || m_current == '-') { |
| exponentSign = m_current; |
| state = HexFloatExponentPostSign; |
| } else |
| return makeToken(TokenType::Invalid); |
| break; |
| case HexFloatExponentPostSign: |
| if (exponentSign == '+') |
| exponent = m_code.position(); |
| if (isASCIIDigit(m_current)) { |
| state = HexFloatExponentNonEmpty; |
| break; |
| } |
| return makeToken(TokenType::Invalid); |
| case HexFloatExponentNonEmpty: |
| if (isASCIIDigit(m_current)) |
| state = HexFloatExponentNonEmpty; |
| else if (m_current == 'f' || m_current == 'h') { |
| state = End; |
| suffix = m_current; |
| } else |
| state = EndNoShift; |
| break; |
| case HexFloatExponentRequireSuffix: |
| exponent = m_code.position(); |
| if (isASCIIDigit(m_current)) |
| state = HexFloatExponentRequireSuffixNonEmpty; |
| else if (m_current == '+' || m_current == '-') { |
| exponentSign = m_current; |
| state = HexFloatExponentRequireSuffixPostSign; |
| } else |
| return makeToken(TokenType::Invalid); |
| break; |
| case HexFloatExponentRequireSuffixPostSign: |
| if (exponentSign == '+') |
| exponent = m_code.position(); |
| if (isASCIIDigit(m_current)) { |
| state = HexFloatExponentRequireSuffixNonEmpty; |
| break; |
| } |
| return makeToken(TokenType::Invalid); |
| case HexFloatExponentRequireSuffixNonEmpty: |
| if (isASCIIDigit(m_current)) |
| state = HexFloatExponentNonEmpty; |
| else if (m_current == 'f' || m_current == 'h') { |
| state = End; |
| suffix = m_current; |
| } else |
| return makeToken(TokenType::Invalid); |
| break; |
| case End: |
| case EndNoShift: |
| RELEASE_ASSERT_NOT_REACHED(); |
| } |
| |
| if (state == EndNoShift) |
| break; |
| shift(); |
| if (state == End) |
| break; |
| } |
| |
| const auto& convert = [&](auto value) -> Token { |
| switch (suffix) { |
| case 'i': { |
| if constexpr (std::is_integral_v<decltype(value)>) { |
| if (auto result = convertInteger<int>(value)) |
| return makeIntegerToken(TokenType::IntegerLiteralSigned, *result); |
| } |
| break; |
| } |
| case 'u': { |
| if constexpr (std::is_integral_v<decltype(value)>) { |
| if (auto result = convertInteger<unsigned>(value)) |
| return makeIntegerToken(TokenType::IntegerLiteralUnsigned, *result); |
| } |
| break; |
| } |
| case 'f': { |
| if (auto result = convertFloat<float>(value)) |
| return makeFloatToken(TokenType::FloatLiteral, *result); |
| break; |
| } |
| case 'h': |
| if (auto result = convertFloat<half>(value)) |
| return makeFloatToken(TokenType::HalfLiteral, *result); |
| break; |
| default: |
| if constexpr (std::is_floating_point_v<decltype(value)>) { |
| if (auto result = convertFloat<double>(value)) |
| return makeFloatToken(TokenType::AbstractFloatLiteral, *result); |
| } else { |
| if (auto result = convertInteger<int64_t>(value)) |
| return makeIntegerToken(TokenType::IntegerLiteral, *result); |
| } |
| } |
| return makeToken(TokenType::Invalid); |
| }; |
| |
| if (!fract && !exponent) { |
| auto base = isHex ? 16 : 10; |
| auto result = WTF::parseInteger<int64_t>(integral, base, WTF::TrailingJunkPolicy::Allow); |
| if (!result) |
| return makeToken(TokenType::Invalid); |
| return convert(result.value()); |
| } |
| |
| if (!isHex) { |
| size_t parsedLength; |
| double result = WTF::parseDouble(integral, parsedLength); |
| return convert(result); |
| } |
| |
| auto length = static_cast<size_t>(m_code.position() - start.data()); |
| if (suffix) |
| length--; |
| Vector<char, 256> buffer(length + 1); |
| for (unsigned i = 0; i < length; ++i) |
| buffer[i] = start[i]; |
| buffer[length] = '\0'; |
| size_t parsedLength; |
| auto maybeResult = stringToDouble(buffer.span(), parsedLength); |
| if (!maybeResult || parsedLength != length) |
| return makeToken(TokenType::Invalid); |
| return convert(*maybeResult); |
| } |
| |
| template class Lexer<Latin1Character>; |
| template class Lexer<char16_t>; |
| |
| } |