Source/WebGPU/WGSL/Lexer.cpp - external/github.com/WebKit/webkit - Git at Google

 /*
  * Copyright (C) 2022 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "config.h"
 #include "Lexer.h"

 #include "ConstantValue.h"
 #include <charconv>
 #include <wtf/SortedArrayMap.h>
 #include <wtf/dtoa.h>
 #include <wtf/text/StringHash.h>
 #include <wtf/unicode/CharacterNames.h>

 namespace WGSL {

 static unsigned isIdentifierStart(UChar character, std::span<const UChar> code)
 {
     if (character == '_')
         return 1;

     unsigned length = 1;
     if (code.size() > 1 && u_charType(character) == U_SURROGATE)
         ++length;
     if (u_stringHasBinaryProperty(code.data(), length, UCHAR_XID_START))
         return length;
     return 0;
 }

 static unsigned isIdentifierContinue(UChar character, std::span<const UChar> code)
 {
     if (auto length = isIdentifierStart(character, code))
         return length;

     unsigned length = 1;
     if (code.size() > 1 && u_charType(character) == U_SURROGATE)
         ++length;
     if (u_stringHasBinaryProperty(code.data(), length, UCHAR_XID_CONTINUE))
         return length;
     return 0;
 }

 static unsigned isIdentifierStart(LChar character, std::span<const LChar>)
 {
     return isASCIIAlpha(character) || character == '_';
 }

 static unsigned isIdentifierContinue(LChar character, std::span<const LChar>)
 {
     return isASCIIAlphanumeric(character) || character == '_';
 }

 template <typename T>
 Vector<Token> Lexer<T>::lex()
 {
     Vector<Token> tokens;

     while (true) {
         auto token = nextToken();
         tokens.append(token);
         switch (token.type) {
         case TokenType::GtGtEq:
             tokens.append(makeToken(TokenType::Placeholder));
             FALLTHROUGH;
         case TokenType::GtGt:
         case TokenType::GtEq:
             tokens.append(makeToken(TokenType::Placeholder));
             break;
         default:
             break;
         }

         if (token.type == TokenType::EndOfFile || token.type == TokenType::Invalid)
             break;
     }

     return tokens;
 }

 template <typename T>
 Token Lexer<T>::nextToken()
 {
     if (!skipWhitespaceAndComments())
         return makeToken(TokenType::Invalid);

     m_tokenStartingPosition = m_currentPosition;

     if (isAtEndOfFile())
         return makeToken(TokenType::EndOfFile);

     switch (m_current) {
     case '!':
         shift();
         if (m_current == '=') {
             shift();
             return makeToken(TokenType::BangEq);
         }
         return makeToken(TokenType::Bang);
     case '%':
         shift();
         switch (m_current) {
         case '=':
             shift();
             return makeToken(TokenType::ModuloEq);
         default:
             return makeToken(TokenType::Modulo);
         }
     case '&':
         shift();
         switch (m_current) {
         case '&':
             shift();
             return makeToken(TokenType::AndAnd);
         case '=':
             shift();
             return makeToken(TokenType::AndEq);
         default:
             return makeToken(TokenType::And);
         }
     case '(':
         shift();
         return makeToken(TokenType::ParenLeft);
     case ')':
         shift();
         return makeToken(TokenType::ParenRight);
     case '{':
         shift();
         return makeToken(TokenType::BraceLeft);
     case '}':
         shift();
         return makeToken(TokenType::BraceRight);
     case '[':
         shift();
         return makeToken(TokenType::BracketLeft);
     case ']':
         shift();
         return makeToken(TokenType::BracketRight);
     case ':':
         shift();
         return makeToken(TokenType::Colon);
     case ',':
         shift();
         return makeToken(TokenType::Comma);
     case ';':
         shift();
         return makeToken(TokenType::Semicolon);
     case '=':
         shift();
         if (m_current == '=') {
             shift();
             return makeToken(TokenType::EqEq);
         }
         return makeToken(TokenType::Equal);
     case '>':
         shift();
         switch (m_current) {
         case '=':
             shift();
             return makeToken(TokenType::GtEq);
         case '>':
             shift();
             switch (m_current) {
             case '=':
                 shift();
                 return makeToken(TokenType::GtGtEq);
             default:
                 return makeToken(TokenType::GtGt);
             }
         default:
             return makeToken(TokenType::Gt);
         }
     case '<':
         shift();
         switch (m_current) {
         case '=':
             shift();
             return makeToken(TokenType::LtEq);
         case '<':
             shift();
             switch (m_current) {
             case '=':
                 shift();
                 return makeToken(TokenType::LtLtEq);
             default:
                 return makeToken(TokenType::LtLt);
             }
         default:
             return makeToken(TokenType::Lt);
         }
     case '@':
         shift();
         return makeToken(TokenType::Attribute);
     case '*':
         shift();
         switch (m_current) {
         case '=':
             shift();
             return makeToken(TokenType::StarEq);
         default:
             // FIXME: Report unbalanced block comments, such as "this is an unbalanced comment. */"
             return makeToken(TokenType::Star);
         }
     case '/':
         shift();
         switch (m_current) {
         case '=':
             shift();
             return makeToken(TokenType::SlashEq);
         default:
             return makeToken(TokenType::Slash);
         }
     case '-':
         shift();
         switch (m_current) {
         case '>':
             shift();
             return makeToken(TokenType::Arrow);
         case '-':
             shift();
             return makeToken(TokenType::MinusMinus);
         case '=':
             shift();
             return makeToken(TokenType::MinusEq);
         default:
             return makeToken(TokenType::Minus);
         }
     case '+':
         shift();
         switch (m_current) {
         case '+':
             shift();
             return makeToken(TokenType::PlusPlus);
         case '=':
             shift();
             return makeToken(TokenType::PlusEq);
         default:
             return makeToken(TokenType::Plus);
         }
     case '^':
         shift();
         switch (m_current) {
         case '=':
             shift();
             return makeToken(TokenType::XorEq);
         default:
             return makeToken(TokenType::Xor);
         }
     case '|':
         shift();
         switch (m_current) {
         case '|':
             shift();
             return makeToken(TokenType::OrOr);
         case '=':
             shift();
             return makeToken(TokenType::OrEq);
         default:
             return makeToken(TokenType::Or);
         }
     case '~':
         shift();
         return makeToken(TokenType::Tilde);
     default:
         if (isASCIIDigit(m_current) || m_current == '.')
             return lexNumber();
         if (auto consumed = isIdentifierStart(m_current, m_code.span())) {
             unsigned length = consumed;
             auto* startOfToken = m_code.position();
             shift(consumed);
             while (!isAtEndOfFile()) {
                 auto consumed = isIdentifierContinue(m_current, m_code.span());
                 if (!consumed)
                     break;
                 length += consumed;
                 shift(consumed);
             }

             // FIXME: a trie would be more efficient here, look at JavaScriptCore/KeywordLookupGenerator.py for an example of code autogeneration that produces such a trie.
             String view(StringImpl::createWithoutCopying({ startOfToken, currentTokenLength() }));

             static constexpr std::pair<ComparableASCIILiteral, TokenType> keywordMappings[] {
                 { "_", TokenType::Underbar },

 #define MAPPING_ENTRY(lexeme, name)\
                 { #lexeme, TokenType::Keyword##name },
 FOREACH_KEYWORD(MAPPING_ENTRY)
 #undef MAPPING_ENTRY

             };
             static constexpr SortedArrayMap keywords { keywordMappings };

             // https://www.w3.org/TR/WGSL/#reserved-words
             static constexpr ComparableASCIILiteral reservedWords[] {
                 "NULL",
                 "Self",
                 "abstract",
                 "active",
                 "alignas",
                 "alignof",
                 "as",
                 "asm",
                 "asm_fragment",
                 "async",
                 "attribute",
                 "auto",
                 "await",
                 "become",
                 "binding_array",
                 "cast",
                 "catch",
                 "class",
                 "co_await",
                 "co_return",
                 "co_yield",
                 "coherent",
                 "column_major",
                 "common",
                 "compile",
                 "compile_fragment",
                 "concept",
                 "const_cast",
                 "consteval",
                 "constexpr",
                 "constinit",
                 "crate",
                 "debugger",
                 "decltype",
                 "delete",
                 "demote",
                 "demote_to_helper",
                 "do",
                 "dynamic_cast",
                 "enum",
                 "explicit",
                 "export",
                 "extends",
                 "extern",
                 "external",
                 "fallthrough",
                 "filter",
                 "final",
                 "finally",
                 "friend",
                 "from",
                 "fxgroup",
                 "get",
                 "goto",
                 "groupshared",
                 "highp",
                 "impl",
                 "implements",
                 "import",
                 "inline",
                 "instanceof",
                 "interface",
                 "layout",
                 "lowp",
                 "macro",
                 "macro_rules",
                 "match",
                 "mediump",
                 "meta",
                 "mod",
                 "module",
                 "move",
                 "mut",
                 "mutable",
                 "namespace",
                 "new",
                 "nil",
                 "noexcept",
                 "noinline",
                 "nointerpolation",
                 "noperspective",
                 "null",
                 "nullptr",
                 "of",
                 "operator",
                 "package",
                 "packoffset",
                 "partition",
                 "pass",
                 "patch",
                 "pixelfragment",
                 "precise",
                 "precision",
                 "premerge",
                 "priv",
                 "protected",
                 "pub",
                 "public",
                 "readonly",
                 "ref",
                 "regardless",
                 "register",
                 "reinterpret_cast",
                 "require",
                 "resource",
                 "restrict",
                 "self",
                 "set",
                 "shared",
                 "sizeof",
                 "smooth",
                 "snorm",
                 "static",
                 "static_assert",
                 "static_cast",
                 "std",
                 "subroutine",
                 "super",
                 "target",
                 "template",
                 "this",
                 "thread_local",
                 "throw",
                 "trait",
                 "try",
                 "type",
                 "typedef",
                 "typeid",
                 "typename",
                 "typeof",
                 "union",
                 "unless",
                 "unorm",
                 "unsafe",
                 "unsized",
                 "use",
                 "using",
                 "varying",
                 "virtual",
                 "volatile",
                 "wgsl",
                 "where",
                 "with",
                 "writeonly",
                 "yield",
             };
             static constexpr SortedArraySet reservedWordSet { reservedWords };

             auto tokenType = keywords.get(view);
             if (tokenType != TokenType::Invalid)
                 return makeToken(tokenType);

             if (UNLIKELY(reservedWordSet.contains(view)))
                 return makeToken(TokenType::ReservedWord);


             if (UNLIKELY(length >= 2 && *startOfToken == '_' && *(startOfToken + 1) == '_'))
                 return makeToken(TokenType::Invalid);


             return makeIdentifierToken(WTFMove(view));
         }
         break;
     }
     return makeToken(TokenType::Invalid);
 }

 template <typename T>
 T Lexer<T>::shift(unsigned i)
 {
     ASSERT(i <= m_code.lengthRemaining());

     T last = m_current;
     // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
     m_current = 0;
     m_code.advanceBy(i);
     m_currentPosition.offset += i;
     m_currentPosition.lineOffset += i;
     if (LIKELY(m_code.hasCharactersRemaining()))
         m_current = m_code[0];
     return last;
 }

 template <typename T>
 T Lexer<T>::peek(unsigned i)
 {
     if (UNLIKELY(i >= m_code.lengthRemaining()))
         return 0;
     return m_code[i];
 }

 template <typename T>
 void Lexer<T>::newLine()
 {
     m_currentPosition.line += 1;
     m_currentPosition.lineOffset = 0;
 }

 template <typename T>
 bool Lexer<T>::skipBlockComments()
 {
     ASSERT(peek(0) == '/' && peek(1) == '*');
     shift(2);

     T ch = 0;
     unsigned depth = 1u;

     while (!isAtEndOfFile() && (ch = shift())) {
         if (ch == '/' && peek() == '*') {
             shift();
             depth += 1;
         } else if (ch == '*' && peek() == '/') {
             shift();
             depth -= 1;
             if (!depth) {
                 // This block comment is closed, so for a construction like "/* */ */"
                 // there will be a successfully parsed block comment "/* */"
                 // and " */" will be processed separately.
                 return true;
             }
         } else if (ch == '\n')
             newLine();
     }

     // FIXME: Report unbalanced block comments, such as "/* this is an unbalanced comment."
     return false;
 }

 template <typename T>
 void Lexer<T>::skipLineComment()
 {
     ASSERT(peek(0) == '/' && peek(1) == '/');
     // Note that in the case of \r\n this makes the comment end on the \r. It is
     // fine, as the \n after that is simple whitespace.
     while (!isAtEndOfFile() && peek() != '\n')
         shift();
 }

 template <typename T>
 bool Lexer<T>::skipWhitespaceAndComments()
 {
     while (!isAtEndOfFile()) {
         if (isUnicodeCompatibleASCIIWhitespace(m_current)) {
             if (shift() == '\n')
                 newLine();
         } else if (peek(0) == '/') {
             if (peek(1) == '/')
                 skipLineComment();
             else if (peek(1) == '*') {
                 if (!skipBlockComments())
                     return false;
             } else
                 break;
         } else
             break;
     }
     return true;
 }

 template <typename T>
 bool Lexer<T>::isAtEndOfFile() const
 {
     if (m_code.atEnd()) {
         ASSERT(!m_current);
         return true;
     }
     return false;
 }

 template <typename T>
 Token Lexer<T>::lexNumber()
 {
     /* Grammar:
     decimal_int_literal:
     | /0[iu]?/
     | /[1-9][0-9]*[iu]?/

     hex_int_literal :
     | /0[xX][0-9a-fA-F]+[iu]?/

     decimal_float_literal:
     | /0[fh]/`
     | /[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[fh]?/
     | /[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?[fh]?/
     | /[0-9]+[eE][+-]?[0-9]+[fh]?/
     | /[1-9][0-9]*[fh]/

     hex_float_literal:
     | /0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+([pP][+-]?[0-9]+[fh]?)?/
     | /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*([pP][+-]?[0-9]+[fh]?)?/
     | /0[xX][0-9a-fA-F]+[pP][+-]?[0-9]+[fh]?/
     */

     /* State machine:
     Start -> InitZero (0)
           -> Decimal (1-9)
           -> FloatFractNoIntegral(.)

     InitZero -> End (i, u, f, h, ∅)
              -> Hex (x, X)
              -> Float (0-9)
              -> FloatFract(.)
              -> FloatExponent(e, E)

     Decimal -> End (i, u, f, h, ∅)
             -> Decimal (0-9)
             -> FloatFract(.)
             -> FloatExponent(e, E)

     Float -> Float (0-9)
           -> FloatFract(.)
           -> FloatExponent(e, E)

     FloatFractNoIntegral -> FloatFract (0-9)
                          -> End(∅)

     FloatFract -> FloatFract (0-9)
                -> FloatExponent(e, E)
                -> End(f, h, ∅)

     FloatExponent -> FloatExponentPostSign(+, -)
                   -> FloatExponentNonEmpty(0-9)

     FloatExponentPostSign -> FloatExponentNonEmpty(0-9)

     FloatExponentNonEmpty -> FloatExponentNonEmpty(0-9)
                           -> End(f, h, ∅)

     Hex -> HexNonEmpty(0-9, a-f, A-F)
         -> HexFloatFractNoIntegral(.)

     HexNonEmpty -> HexNonEmpty(0-9, a-f, A-F)
                 -> End(i, u, ∅)
                 -> HexFloatFract(.)
                 -> HexFloatExponentRequireSuffix(p, P)

     HexFloatFractNoIntegral -> HexFloatFract(0-9, a-f, A-F)

     HexFloatFract -> HexFloatFract(0-9, a-f, A-F)
                   -> HexFloatExponent(p, P)
                   -> End(∅)

     HexFloatExponent -> HexFloatExponentNonEmpty(0-9)
                      -> HexFloatExponentPostSign(+, -)

     HexFloatExponentPostSign -> HexFloatExponentNonEmpty(0-9)

     HexFloatExponentNonEmpty -> HexFloatExponentNonEmpty(0-9)
                              -> End(f, h, ∅)

     HexFloatExponentRequireSuffix -> HexFloatExponentRequireSuffixNonEmpty(0-9)
                                   -> HexFloatExponentRequireSuffixPostSign(+, -)

     HexFloatExponentRequireSuffixPostSign -> HexFloatExponentRequireSuffixNonEmpty(0-9)

     HexFloatExponentRequireSuffixNonEmpty -> HexFloatExponentRequireSuffixNonEmpty(0-9)
                                           -> End(f, h)
     */

     enum State : uint8_t {
         Start,

         InitZero,
         Decimal,

         Float,
         FloatFractNoIntegral,
         FloatFract,
         FloatExponent,
         FloatExponentPostSign,
         FloatExponentNonEmpty,

         Hex,
         HexNonEmpty,
         HexFloatFractNoIntegral,
         HexFloatFract,
         HexFloatExponent,
         HexFloatExponentPostSign,
         HexFloatExponentNonEmpty,
         HexFloatExponentRequireSuffix,
         HexFloatExponentRequireSuffixPostSign,
         HexFloatExponentRequireSuffixNonEmpty,

         End,
         EndNoShift,
     };

     auto state = Start;
     char suffix = '\0';
     char exponentSign = '\0';
     bool isHex = false;
     auto* integral = m_code.position();
     const T* fract = nullptr;
     const T* exponent = nullptr;

     while (true) {
         switch (state) {
         case Start:
             switch (m_current) {
             case '0':
                 state = InitZero;
                 break;
             case '.':
                 state = FloatFractNoIntegral;
                 break;
             default:
                 ASSERT(isASCIIDigit(m_current));
                 state = Decimal;
                 break;
             }
             break;

         case InitZero:
             switch (m_current) {
             case 'i':
             case 'u':
             case 'f':
             case 'h':
                 state = End;
                 suffix = m_current;
                 break;

             case 'x':
             case 'X':
                 state = Hex;
                 break;

             case '.':
                 state = FloatFract;
                 break;

             case 'e':
             case 'E':
                 state = FloatExponent;
                 break;

             default:
                 if (isASCIIDigit(m_current))
                     state = Float;
                 else
                     state = EndNoShift;
             }
             break;

         case Decimal:
             switch (m_current) {
             case 'i':
             case 'u':
             case 'f':
             case 'h':
                 state = End;
                 suffix = m_current;
                 break;

             case '.':
                 state = FloatFract;
                 break;

             case 'e':
             case 'E':
                 state = FloatExponent;
                 break;

             default:
                 if (!isASCIIDigit(m_current))
                     state = EndNoShift;
             }
             break;

         case Float:
             switch (m_current) {
             case '.':
                 state = FloatFract;
                 break;

             case 'e':
             case 'E':
                 state = FloatExponent;
                 break;

             default:
                 if (!isASCIIDigit(m_current))
                     return makeToken(TokenType::Invalid);
             }
             break;
         case FloatFractNoIntegral:
             fract = m_code.position();
             if (!isASCIIDigit(m_current))
                 return makeToken(TokenType::Period);
             state = FloatFract;
             break;
         case FloatFract:
             if (!fract)
                 fract = m_code.position();
             switch (m_current) {
             case 'f':
             case 'h':
                 state = End;
                 suffix = m_current;
                 break;

             case 'e':
             case 'E':
                 state = FloatExponent;
                 break;

             default:
                 if (!isASCIIDigit(m_current))
                     state = EndNoShift;
             }
             break;
         case FloatExponent:
             exponent = m_code.position();
             switch (m_current) {
             case '+':
             case '-':
                 exponentSign = m_current;
                 state = FloatExponentPostSign;
                 break;
             default:
                 if (!isASCIIDigit(m_current))
                     return makeToken(TokenType::Invalid);
                 state = FloatExponentNonEmpty;
             }
             break;
         case FloatExponentPostSign:
             if (exponentSign == '+')
                 exponent = m_code.position();
             if (!isASCIIDigit(m_current))
                 return makeToken(TokenType::Invalid);
             state = FloatExponentNonEmpty;
             break;
         case FloatExponentNonEmpty:
             switch (m_current) {
             case 'f':
             case 'h':
                 state = End;
                 suffix = m_current;
                 break;
             default:
                 if (!isASCIIDigit(m_current))
                     state = EndNoShift;
             }
             break;
         case Hex:
             isHex = true;
             integral = m_code.position();
             if (m_current == '.')
                 state = HexFloatFractNoIntegral;
             else if (isASCIIHexDigit(m_current))
                 state = HexNonEmpty;
             else
                 return makeToken(TokenType::Invalid);
             break;
         case HexNonEmpty:
             switch (m_current) {
             case 'i':
             case 'u':
                 state = End;
                 suffix = m_current;
                 break;

             case 'p':
             case 'P':
                 state = HexFloatExponentRequireSuffix;
                 break;

             case '.':
                 state = HexFloatFract;
                 break;

             default:
                 if (!isASCIIHexDigit(m_current))
                     state = EndNoShift;
             }
             break;
         case HexFloatFractNoIntegral:
             fract = m_code.position();
             if (!isASCIIHexDigit(m_current))
                 return makeToken(TokenType::Invalid);
             state = HexFloatFract;
             break;
         case HexFloatFract:
             if (!fract)
                 fract = m_code.position();
             if (isASCIIHexDigit(m_current))
                 break;
             if (m_current == 'p' || m_current == 'P')
                 state = HexFloatExponent;
             else
                 state = EndNoShift;
             break;
         case HexFloatExponent:
             exponent = m_code.position();
             if (isASCIIDigit(m_current))
                 state = HexFloatExponentNonEmpty;
             else if (m_current == '+' || m_current == '-') {
                 exponentSign = m_current;
                 state = HexFloatExponentPostSign;
             } else
                 return makeToken(TokenType::Invalid);
             break;
         case HexFloatExponentPostSign:
             if (exponentSign == '+')
                 exponent = m_code.position();
             if (isASCIIDigit(m_current)) {
                 state = HexFloatExponentNonEmpty;
                 break;
             }
             return makeToken(TokenType::Invalid);
         case HexFloatExponentNonEmpty:
             if (isASCIIDigit(m_current))
                 state = HexFloatExponentNonEmpty;
             else if (m_current == 'f' || m_current == 'h') {
                 state = End;
                 suffix = m_current;
             } else
                 state = EndNoShift;
             break;
         case HexFloatExponentRequireSuffix:
             exponent = m_code.position();
             if (isASCIIDigit(m_current))
                 state = HexFloatExponentRequireSuffixNonEmpty;
             else if (m_current == '+' || m_current == '-') {
                 exponentSign = m_current;
                 state = HexFloatExponentRequireSuffixPostSign;
             } else
                 return makeToken(TokenType::Invalid);
             break;
         case HexFloatExponentRequireSuffixPostSign:
             if (exponentSign == '+')
                 exponent = m_code.position();
             if (isASCIIDigit(m_current)) {
                 state = HexFloatExponentRequireSuffixNonEmpty;
                 break;
             }
             return makeToken(TokenType::Invalid);
         case HexFloatExponentRequireSuffixNonEmpty:
             if (isASCIIDigit(m_current))
                 state = HexFloatExponentNonEmpty;
             else if (m_current == 'f' || m_current == 'h') {
                 state = End;
                 suffix = m_current;
             } else
                 return makeToken(TokenType::Invalid);
             break;
         case End:
         case EndNoShift:
             RELEASE_ASSERT_NOT_REACHED();
         }

         if (state == EndNoShift)
             break;
         shift();
         if (state == End)
             break;
     }

     const auto& convert = [&](auto value) -> Token {
         switch (suffix) {
         case 'i': {
             if constexpr (std::is_integral_v<decltype(value)>) {
                 if (auto result = convertInteger<int>(value))
                     return makeIntegerToken(TokenType::IntegerLiteralSigned, *result);
             }
             break;
         }
         case 'u': {
             if constexpr (std::is_integral_v<decltype(value)>) {
                 if (auto result = convertInteger<unsigned>(value))
                     return makeIntegerToken(TokenType::IntegerLiteralUnsigned, *result);
             }
             break;
         }
         case 'f': {
             if (auto result = convertFloat<float>(value))
                 return makeFloatToken(TokenType::FloatLiteral, *result);
             break;
         }
         case 'h':
             if (auto result = convertFloat<half>(value))
                 return makeFloatToken(TokenType::HalfLiteral, *result);
             break;
         default:
             if constexpr (std::is_floating_point_v<decltype(value)>) {
                 if (auto result = convertFloat<double>(value))
                     return makeFloatToken(TokenType::AbstractFloatLiteral, *result);
             } else {
                 if (auto result = convertInteger<int64_t>(value))
                     return makeIntegerToken(TokenType::IntegerLiteral, *result);
             }
         }
         return makeToken(TokenType::Invalid);
     };

     auto* end = m_code.position() - (suffix ? 1 : 0);
     if (!fract && !exponent) {
         auto length = static_cast<size_t>(end - integral);
         if (length > 19)
             return makeToken(TokenType::Invalid);

         char ascii[20];
         const char* asciiStart;
         const char* asciiEnd;
         if constexpr (sizeof(T) == 1) {
             asciiStart = bitwise_cast<const char*>(integral);
             asciiEnd = bitwise_cast<const char*>(end);
         } else {
             for (unsigned i = 0; i < length; ++i) {
                 auto digit = integral[i];
                 RELEASE_ASSERT(isASCIIHexDigit(digit));
                 ascii[i] = digit;
             }
             ascii[length] = '\0';
             asciiStart = ascii;
             asciiEnd = ascii + length;
         }

         int64_t result;
         auto base = isHex ? 16 : 10;
         auto remaining = std::from_chars(asciiStart, asciiEnd, result, base);
         RELEASE_ASSERT(remaining.ptr == asciiEnd);
         if (remaining.ec == std::errc::result_out_of_range)
             return makeToken(TokenType::Invalid);
         return convert(result);
     }

     if (!isHex) {
         size_t parsedLength;
         double result = parseDouble(std::span { integral, m_code.position() }, parsedLength);
         ASSERT(integral + parsedLength == end);
         return convert(result);
     }

     char* parseEnd;
     double result = std::strtod(bitwise_cast<const char*>(integral) - 2, &parseEnd);
     ASSERT(parseEnd == bitwise_cast<const char*>(end));
     return convert(result);
 }

 template class Lexer<LChar>;
 template class Lexer<UChar>;

 }
	/*
	* Copyright (C) 2022 Apple Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
	* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	* THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "config.h"
	#include "Lexer.h"

	#include "ConstantValue.h"
	#include <charconv>
	#include <wtf/SortedArrayMap.h>
	#include <wtf/dtoa.h>
	#include <wtf/text/StringHash.h>
	#include <wtf/unicode/CharacterNames.h>

	namespace WGSL {

	static unsigned isIdentifierStart(UChar character, std::span<const UChar> code)
	{
	if (character == '_')
	return 1;

	unsigned length = 1;
	if (code.size() > 1 && u_charType(character) == U_SURROGATE)
	++length;
	if (u_stringHasBinaryProperty(code.data(), length, UCHAR_XID_START))
	return length;
	return 0;
	}

	static unsigned isIdentifierContinue(UChar character, std::span<const UChar> code)
	{
	if (auto length = isIdentifierStart(character, code))
	return length;

	unsigned length = 1;
	if (code.size() > 1 && u_charType(character) == U_SURROGATE)
	++length;
	if (u_stringHasBinaryProperty(code.data(), length, UCHAR_XID_CONTINUE))
	return length;
	return 0;
	}

	static unsigned isIdentifierStart(LChar character, std::span<const LChar>)
	{
	return isASCIIAlpha(character) \|\| character == '_';
	}

	static unsigned isIdentifierContinue(LChar character, std::span<const LChar>)
	{
	return isASCIIAlphanumeric(character) \|\| character == '_';
	}

	template <typename T>
	Vector<Token> Lexer<T>::lex()
	{
	Vector<Token> tokens;

	while (true) {
	auto token = nextToken();
	tokens.append(token);
	switch (token.type) {
	case TokenType::GtGtEq:
	tokens.append(makeToken(TokenType::Placeholder));
	FALLTHROUGH;
	case TokenType::GtGt:
	case TokenType::GtEq:
	tokens.append(makeToken(TokenType::Placeholder));
	break;
	default:
	break;
	}

	if (token.type == TokenType::EndOfFile \|\| token.type == TokenType::Invalid)
	break;
	}

	return tokens;
	}

	template <typename T>
	Token Lexer<T>::nextToken()
	{
	if (!skipWhitespaceAndComments())
	return makeToken(TokenType::Invalid);

	m_tokenStartingPosition = m_currentPosition;

	if (isAtEndOfFile())
	return makeToken(TokenType::EndOfFile);

	switch (m_current) {
	case '!':
	shift();
	if (m_current == '=') {
	shift();
	return makeToken(TokenType::BangEq);
	}
	return makeToken(TokenType::Bang);
	case '%':
	shift();
	switch (m_current) {
	case '=':
	shift();
	return makeToken(TokenType::ModuloEq);
	default:
	return makeToken(TokenType::Modulo);
	}
	case '&':
	shift();
	switch (m_current) {
	case '&':
	shift();
	return makeToken(TokenType::AndAnd);
	case '=':
	shift();
	return makeToken(TokenType::AndEq);
	default:
	return makeToken(TokenType::And);
	}
	case '(':
	shift();
	return makeToken(TokenType::ParenLeft);
	case ')':
	shift();
	return makeToken(TokenType::ParenRight);
	case '{':
	shift();
	return makeToken(TokenType::BraceLeft);
	case '}':
	shift();
	return makeToken(TokenType::BraceRight);
	case '[':
	shift();
	return makeToken(TokenType::BracketLeft);
	case ']':
	shift();
	return makeToken(TokenType::BracketRight);
	case ':':
	shift();
	return makeToken(TokenType::Colon);
	case ',':
	shift();
	return makeToken(TokenType::Comma);
	case ';':
	shift();
	return makeToken(TokenType::Semicolon);
	case '=':
	shift();
	if (m_current == '=') {
	shift();
	return makeToken(TokenType::EqEq);
	}
	return makeToken(TokenType::Equal);
	case '>':
	shift();
	switch (m_current) {
	case '=':
	shift();
	return makeToken(TokenType::GtEq);
	case '>':
	shift();
	switch (m_current) {
	case '=':
	shift();
	return makeToken(TokenType::GtGtEq);
	default:
	return makeToken(TokenType::GtGt);
	}
	default:
	return makeToken(TokenType::Gt);
	}
	case '<':
	shift();
	switch (m_current) {
	case '=':
	shift();
	return makeToken(TokenType::LtEq);
	case '<':
	shift();
	switch (m_current) {
	case '=':
	shift();
	return makeToken(TokenType::LtLtEq);
	default:
	return makeToken(TokenType::LtLt);
	}
	default:
	return makeToken(TokenType::Lt);
	}
	case '@':
	shift();
	return makeToken(TokenType::Attribute);
	case '*':
	shift();
	switch (m_current) {
	case '=':
	shift();
	return makeToken(TokenType::StarEq);
	default:
	// FIXME: Report unbalanced block comments, such as "this is an unbalanced comment. */"
	return makeToken(TokenType::Star);
	}
	case '/':
	shift();
	switch (m_current) {
	case '=':
	shift();
	return makeToken(TokenType::SlashEq);
	default:
	return makeToken(TokenType::Slash);
	}
	case '-':
	shift();
	switch (m_current) {
	case '>':
	shift();
	return makeToken(TokenType::Arrow);
	case '-':
	shift();
	return makeToken(TokenType::MinusMinus);
	case '=':
	shift();
	return makeToken(TokenType::MinusEq);
	default:
	return makeToken(TokenType::Minus);
	}
	case '+':
	shift();
	switch (m_current) {
	case '+':
	shift();
	return makeToken(TokenType::PlusPlus);
	case '=':
	shift();
	return makeToken(TokenType::PlusEq);
	default:
	return makeToken(TokenType::Plus);
	}
	case '^':
	shift();
	switch (m_current) {
	case '=':
	shift();
	return makeToken(TokenType::XorEq);
	default:
	return makeToken(TokenType::Xor);
	}
	case '\|':
	shift();
	switch (m_current) {
	case '\|':
	shift();
	return makeToken(TokenType::OrOr);
	case '=':
	shift();
	return makeToken(TokenType::OrEq);
	default:
	return makeToken(TokenType::Or);
	}
	case '~':
	shift();
	return makeToken(TokenType::Tilde);
	default:
	if (isASCIIDigit(m_current) \|\| m_current == '.')
	return lexNumber();
	if (auto consumed = isIdentifierStart(m_current, m_code.span())) {
	unsigned length = consumed;
	auto* startOfToken = m_code.position();
	shift(consumed);
	while (!isAtEndOfFile()) {
	auto consumed = isIdentifierContinue(m_current, m_code.span());
	if (!consumed)
	break;
	length += consumed;
	shift(consumed);
	}

	// FIXME: a trie would be more efficient here, look at JavaScriptCore/KeywordLookupGenerator.py for an example of code autogeneration that produces such a trie.
	String view(StringImpl::createWithoutCopying({ startOfToken, currentTokenLength() }));

	static constexpr std::pair<ComparableASCIILiteral, TokenType> keywordMappings[] {
	{ "_", TokenType::Underbar },

	#define MAPPING_ENTRY(lexeme, name)\
	{ #lexeme, TokenType::Keyword##name },
	FOREACH_KEYWORD(MAPPING_ENTRY)
	#undef MAPPING_ENTRY

	};
	static constexpr SortedArrayMap keywords { keywordMappings };

	// https://www.w3.org/TR/WGSL/#reserved-words
	static constexpr ComparableASCIILiteral reservedWords[] {
	"NULL",
	"Self",
	"abstract",
	"active",
	"alignas",
	"alignof",
	"as",
	"asm",
	"asm_fragment",
	"async",
	"attribute",
	"auto",
	"await",
	"become",
	"binding_array",
	"cast",
	"catch",
	"class",
	"co_await",
	"co_return",
	"co_yield",
	"coherent",
	"column_major",
	"common",
	"compile",
	"compile_fragment",
	"concept",
	"const_cast",
	"consteval",
	"constexpr",
	"constinit",
	"crate",
	"debugger",
	"decltype",
	"delete",
	"demote",
	"demote_to_helper",
	"do",
	"dynamic_cast",
	"enum",
	"explicit",
	"export",
	"extends",
	"extern",
	"external",
	"fallthrough",
	"filter",
	"final",
	"finally",
	"friend",
	"from",
	"fxgroup",
	"get",
	"goto",
	"groupshared",
	"highp",
	"impl",
	"implements",
	"import",
	"inline",
	"instanceof",
	"interface",
	"layout",
	"lowp",
	"macro",
	"macro_rules",
	"match",
	"mediump",
	"meta",
	"mod",
	"module",
	"move",
	"mut",
	"mutable",
	"namespace",
	"new",
	"nil",
	"noexcept",
	"noinline",
	"nointerpolation",
	"noperspective",
	"null",
	"nullptr",
	"of",
	"operator",
	"package",
	"packoffset",
	"partition",
	"pass",
	"patch",
	"pixelfragment",
	"precise",
	"precision",
	"premerge",
	"priv",
	"protected",
	"pub",
	"public",
	"readonly",
	"ref",
	"regardless",
	"register",
	"reinterpret_cast",
	"require",
	"resource",
	"restrict",
	"self",
	"set",
	"shared",
	"sizeof",
	"smooth",
	"snorm",
	"static",
	"static_assert",
	"static_cast",
	"std",
	"subroutine",
	"super",
	"target",
	"template",
	"this",
	"thread_local",
	"throw",
	"trait",
	"try",
	"type",
	"typedef",
	"typeid",
	"typename",
	"typeof",
	"union",
	"unless",
	"unorm",
	"unsafe",
	"unsized",
	"use",
	"using",
	"varying",
	"virtual",
	"volatile",
	"wgsl",
	"where",
	"with",
	"writeonly",
	"yield",
	};
	static constexpr SortedArraySet reservedWordSet { reservedWords };

	auto tokenType = keywords.get(view);
	if (tokenType != TokenType::Invalid)
	return makeToken(tokenType);

	if (UNLIKELY(reservedWordSet.contains(view)))
	return makeToken(TokenType::ReservedWord);


	if (UNLIKELY(length >= 2 && startOfToken == '_' && (startOfToken + 1) == '_'))
	return makeToken(TokenType::Invalid);


	return makeIdentifierToken(WTFMove(view));
	}
	break;
	}
	return makeToken(TokenType::Invalid);
	}

	template <typename T>
	T Lexer<T>::shift(unsigned i)
	{
	ASSERT(i <= m_code.lengthRemaining());

	T last = m_current;
	// At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
	m_current = 0;
	m_code.advanceBy(i);
	m_currentPosition.offset += i;
	m_currentPosition.lineOffset += i;
	if (LIKELY(m_code.hasCharactersRemaining()))
	m_current = m_code[0];
	return last;
	}

	template <typename T>
	T Lexer<T>::peek(unsigned i)
	{
	if (UNLIKELY(i >= m_code.lengthRemaining()))
	return 0;
	return m_code[i];
	}

	template <typename T>
	void Lexer<T>::newLine()
	{
	m_currentPosition.line += 1;
	m_currentPosition.lineOffset = 0;
	}

	template <typename T>
	bool Lexer<T>::skipBlockComments()
	{
	ASSERT(peek(0) == '/' && peek(1) == '*');
	shift(2);

	T ch = 0;
	unsigned depth = 1u;

	while (!isAtEndOfFile() && (ch = shift())) {
	if (ch == '/' && peek() == '*') {
	shift();
	depth += 1;
	} else if (ch == '*' && peek() == '/') {
	shift();
	depth -= 1;
	if (!depth) {
	// This block comment is closed, so for a construction like "/* / /"
	// there will be a successfully parsed block comment "/* */"
	// and " */" will be processed separately.
	return true;
	}
	} else if (ch == '\n')
	newLine();
	}

	// FIXME: Report unbalanced block comments, such as "/* this is an unbalanced comment."
	return false;
	}

	template <typename T>
	void Lexer<T>::skipLineComment()
	{
	ASSERT(peek(0) == '/' && peek(1) == '/');
	// Note that in the case of \r\n this makes the comment end on the \r. It is
	// fine, as the \n after that is simple whitespace.
	while (!isAtEndOfFile() && peek() != '\n')
	shift();
	}

	template <typename T>
	bool Lexer<T>::skipWhitespaceAndComments()
	{
	while (!isAtEndOfFile()) {
	if (isUnicodeCompatibleASCIIWhitespace(m_current)) {
	if (shift() == '\n')
	newLine();
	} else if (peek(0) == '/') {
	if (peek(1) == '/')
	skipLineComment();
	else if (peek(1) == '*') {
	if (!skipBlockComments())
	return false;
	} else
	break;
	} else
	break;
	}
	return true;
	}

	template <typename T>
	bool Lexer<T>::isAtEndOfFile() const
	{
	if (m_code.atEnd()) {
	ASSERT(!m_current);
	return true;
	}
	return false;
	}

	template <typename T>
	Token Lexer<T>::lexNumber()
	{
	/* Grammar:
	decimal_int_literal:
	\| /0[iu]?/
	\| /[1-9][0-9]*[iu]?/

	hex_int_literal :
	\| /0[xX][0-9a-fA-F]+[iu]?/

	decimal_float_literal:
	\| /0[fh]/`
	\| /[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[fh]?/
	\| /[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?[fh]?/
	\| /[0-9]+[eE][+-]?[0-9]+[fh]?/
	\| /[1-9][0-9]*[fh]/

	hex_float_literal:
	\| /0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+([pP][+-]?[0-9]+[fh]?)?/
	\| /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*([pP][+-]?[0-9]+[fh]?)?/
	\| /0[xX][0-9a-fA-F]+[pP][+-]?[0-9]+[fh]?/
	*/

	/* State machine:
	Start -> InitZero (0)
	-> Decimal (1-9)
	-> FloatFractNoIntegral(.)

	InitZero -> End (i, u, f, h, ∅)
	-> Hex (x, X)
	-> Float (0-9)
	-> FloatFract(.)
	-> FloatExponent(e, E)

	Decimal -> End (i, u, f, h, ∅)
	-> Decimal (0-9)
	-> FloatFract(.)
	-> FloatExponent(e, E)

	Float -> Float (0-9)
	-> FloatFract(.)
	-> FloatExponent(e, E)

	FloatFractNoIntegral -> FloatFract (0-9)
	-> End(∅)

	FloatFract -> FloatFract (0-9)
	-> FloatExponent(e, E)
	-> End(f, h, ∅)

	FloatExponent -> FloatExponentPostSign(+, -)
	-> FloatExponentNonEmpty(0-9)

	FloatExponentPostSign -> FloatExponentNonEmpty(0-9)

	FloatExponentNonEmpty -> FloatExponentNonEmpty(0-9)
	-> End(f, h, ∅)

	Hex -> HexNonEmpty(0-9, a-f, A-F)
	-> HexFloatFractNoIntegral(.)

	HexNonEmpty -> HexNonEmpty(0-9, a-f, A-F)
	-> End(i, u, ∅)
	-> HexFloatFract(.)
	-> HexFloatExponentRequireSuffix(p, P)

	HexFloatFractNoIntegral -> HexFloatFract(0-9, a-f, A-F)

	HexFloatFract -> HexFloatFract(0-9, a-f, A-F)
	-> HexFloatExponent(p, P)
	-> End(∅)

	HexFloatExponent -> HexFloatExponentNonEmpty(0-9)
	-> HexFloatExponentPostSign(+, -)

	HexFloatExponentPostSign -> HexFloatExponentNonEmpty(0-9)

	HexFloatExponentNonEmpty -> HexFloatExponentNonEmpty(0-9)
	-> End(f, h, ∅)

	HexFloatExponentRequireSuffix -> HexFloatExponentRequireSuffixNonEmpty(0-9)
	-> HexFloatExponentRequireSuffixPostSign(+, -)

	HexFloatExponentRequireSuffixPostSign -> HexFloatExponentRequireSuffixNonEmpty(0-9)

	HexFloatExponentRequireSuffixNonEmpty -> HexFloatExponentRequireSuffixNonEmpty(0-9)
	-> End(f, h)
	*/

	enum State : uint8_t {
	Start,

	InitZero,
	Decimal,

	Float,
	FloatFractNoIntegral,
	FloatFract,
	FloatExponent,
	FloatExponentPostSign,
	FloatExponentNonEmpty,

	Hex,
	HexNonEmpty,
	HexFloatFractNoIntegral,
	HexFloatFract,
	HexFloatExponent,
	HexFloatExponentPostSign,
	HexFloatExponentNonEmpty,
	HexFloatExponentRequireSuffix,
	HexFloatExponentRequireSuffixPostSign,
	HexFloatExponentRequireSuffixNonEmpty,

	End,
	EndNoShift,
	};

	auto state = Start;
	char suffix = '\0';
	char exponentSign = '\0';
	bool isHex = false;
	auto* integral = m_code.position();
	const T* fract = nullptr;
	const T* exponent = nullptr;

	while (true) {
	switch (state) {
	case Start:
	switch (m_current) {
	case '0':
	state = InitZero;
	break;
	case '.':
	state = FloatFractNoIntegral;
	break;
	default:
	ASSERT(isASCIIDigit(m_current));
	state = Decimal;
	break;
	}
	break;

	case InitZero:
	switch (m_current) {
	case 'i':
	case 'u':
	case 'f':
	case 'h':
	state = End;
	suffix = m_current;
	break;

	case 'x':
	case 'X':
	state = Hex;
	break;

	case '.':
	state = FloatFract;
	break;

	case 'e':
	case 'E':
	state = FloatExponent;
	break;

	default:
	if (isASCIIDigit(m_current))
	state = Float;
	else
	state = EndNoShift;
	}
	break;

	case Decimal:
	switch (m_current) {
	case 'i':
	case 'u':
	case 'f':
	case 'h':
	state = End;
	suffix = m_current;
	break;

	case '.':
	state = FloatFract;
	break;

	case 'e':
	case 'E':
	state = FloatExponent;
	break;

	default:
	if (!isASCIIDigit(m_current))
	state = EndNoShift;
	}
	break;

	case Float:
	switch (m_current) {
	case '.':
	state = FloatFract;
	break;

	case 'e':
	case 'E':
	state = FloatExponent;
	break;

	default:
	if (!isASCIIDigit(m_current))
	return makeToken(TokenType::Invalid);
	}
	break;
	case FloatFractNoIntegral:
	fract = m_code.position();
	if (!isASCIIDigit(m_current))
	return makeToken(TokenType::Period);
	state = FloatFract;
	break;
	case FloatFract:
	if (!fract)
	fract = m_code.position();
	switch (m_current) {
	case 'f':
	case 'h':
	state = End;
	suffix = m_current;
	break;

	case 'e':
	case 'E':
	state = FloatExponent;
	break;

	default:
	if (!isASCIIDigit(m_current))
	state = EndNoShift;
	}
	break;
	case FloatExponent:
	exponent = m_code.position();
	switch (m_current) {
	case '+':
	case '-':
	exponentSign = m_current;
	state = FloatExponentPostSign;
	break;
	default:
	if (!isASCIIDigit(m_current))
	return makeToken(TokenType::Invalid);
	state = FloatExponentNonEmpty;
	}
	break;
	case FloatExponentPostSign:
	if (exponentSign == '+')
	exponent = m_code.position();
	if (!isASCIIDigit(m_current))
	return makeToken(TokenType::Invalid);
	state = FloatExponentNonEmpty;
	break;
	case FloatExponentNonEmpty:
	switch (m_current) {
	case 'f':
	case 'h':
	state = End;
	suffix = m_current;
	break;
	default:
	if (!isASCIIDigit(m_current))
	state = EndNoShift;
	}
	break;
	case Hex:
	isHex = true;
	integral = m_code.position();
	if (m_current == '.')
	state = HexFloatFractNoIntegral;
	else if (isASCIIHexDigit(m_current))
	state = HexNonEmpty;
	else
	return makeToken(TokenType::Invalid);
	break;
	case HexNonEmpty:
	switch (m_current) {
	case 'i':
	case 'u':
	state = End;
	suffix = m_current;
	break;

	case 'p':
	case 'P':
	state = HexFloatExponentRequireSuffix;
	break;

	case '.':
	state = HexFloatFract;
	break;

	default:
	if (!isASCIIHexDigit(m_current))
	state = EndNoShift;
	}
	break;
	case HexFloatFractNoIntegral:
	fract = m_code.position();
	if (!isASCIIHexDigit(m_current))
	return makeToken(TokenType::Invalid);
	state = HexFloatFract;
	break;
	case HexFloatFract:
	if (!fract)
	fract = m_code.position();
	if (isASCIIHexDigit(m_current))
	break;
	if (m_current == 'p' \|\| m_current == 'P')
	state = HexFloatExponent;
	else
	state = EndNoShift;
	break;
	case HexFloatExponent:
	exponent = m_code.position();
	if (isASCIIDigit(m_current))
	state = HexFloatExponentNonEmpty;
	else if (m_current == '+' \|\| m_current == '-') {
	exponentSign = m_current;
	state = HexFloatExponentPostSign;
	} else
	return makeToken(TokenType::Invalid);
	break;
	case HexFloatExponentPostSign:
	if (exponentSign == '+')
	exponent = m_code.position();
	if (isASCIIDigit(m_current)) {
	state = HexFloatExponentNonEmpty;
	break;
	}
	return makeToken(TokenType::Invalid);
	case HexFloatExponentNonEmpty:
	if (isASCIIDigit(m_current))
	state = HexFloatExponentNonEmpty;
	else if (m_current == 'f' \|\| m_current == 'h') {
	state = End;
	suffix = m_current;
	} else
	state = EndNoShift;
	break;
	case HexFloatExponentRequireSuffix:
	exponent = m_code.position();
	if (isASCIIDigit(m_current))
	state = HexFloatExponentRequireSuffixNonEmpty;
	else if (m_current == '+' \|\| m_current == '-') {
	exponentSign = m_current;
	state = HexFloatExponentRequireSuffixPostSign;
	} else
	return makeToken(TokenType::Invalid);
	break;
	case HexFloatExponentRequireSuffixPostSign:
	if (exponentSign == '+')
	exponent = m_code.position();
	if (isASCIIDigit(m_current)) {
	state = HexFloatExponentRequireSuffixNonEmpty;
	break;
	}
	return makeToken(TokenType::Invalid);
	case HexFloatExponentRequireSuffixNonEmpty:
	if (isASCIIDigit(m_current))
	state = HexFloatExponentNonEmpty;
	else if (m_current == 'f' \|\| m_current == 'h') {
	state = End;
	suffix = m_current;
	} else
	return makeToken(TokenType::Invalid);
	break;
	case End:
	case EndNoShift:
	RELEASE_ASSERT_NOT_REACHED();
	}

	if (state == EndNoShift)
	break;
	shift();
	if (state == End)
	break;
	}

	const auto& convert = [&](auto value) -> Token {
	switch (suffix) {
	case 'i': {
	if constexpr (std::is_integral_v<decltype(value)>) {
	if (auto result = convertInteger<int>(value))
	return makeIntegerToken(TokenType::IntegerLiteralSigned, *result);
	}
	break;
	}
	case 'u': {
	if constexpr (std::is_integral_v<decltype(value)>) {
	if (auto result = convertInteger<unsigned>(value))
	return makeIntegerToken(TokenType::IntegerLiteralUnsigned, *result);
	}
	break;
	}
	case 'f': {
	if (auto result = convertFloat<float>(value))
	return makeFloatToken(TokenType::FloatLiteral, *result);
	break;
	}
	case 'h':
	if (auto result = convertFloat<half>(value))
	return makeFloatToken(TokenType::HalfLiteral, *result);
	break;
	default:
	if constexpr (std::is_floating_point_v<decltype(value)>) {
	if (auto result = convertFloat<double>(value))
	return makeFloatToken(TokenType::AbstractFloatLiteral, *result);
	} else {
	if (auto result = convertInteger<int64_t>(value))
	return makeIntegerToken(TokenType::IntegerLiteral, *result);
	}
	}
	return makeToken(TokenType::Invalid);
	};

	auto* end = m_code.position() - (suffix ? 1 : 0);
	if (!fract && !exponent) {
	auto length = static_cast<size_t>(end - integral);
	if (length > 19)
	return makeToken(TokenType::Invalid);

	char ascii[20];
	const char* asciiStart;
	const char* asciiEnd;
	if constexpr (sizeof(T) == 1) {
	asciiStart = bitwise_cast<const char*>(integral);
	asciiEnd = bitwise_cast<const char*>(end);
	} else {
	for (unsigned i = 0; i < length; ++i) {
	auto digit = integral[i];
	RELEASE_ASSERT(isASCIIHexDigit(digit));
	ascii[i] = digit;
	}
	ascii[length] = '\0';
	asciiStart = ascii;
	asciiEnd = ascii + length;
	}

	int64_t result;
	auto base = isHex ? 16 : 10;
	auto remaining = std::from_chars(asciiStart, asciiEnd, result, base);
	RELEASE_ASSERT(remaining.ptr == asciiEnd);
	if (remaining.ec == std::errc::result_out_of_range)
	return makeToken(TokenType::Invalid);
	return convert(result);
	}

	if (!isHex) {
	size_t parsedLength;
	double result = parseDouble(std::span { integral, m_code.position() }, parsedLength);
	ASSERT(integral + parsedLength == end);
	return convert(result);
	}

	char* parseEnd;
	double result = std::strtod(bitwise_cast<const char*>(integral) - 2, &parseEnd);
	ASSERT(parseEnd == bitwise_cast<const char*>(end));
	return convert(result);
	}

	template class Lexer<LChar>;
	template class Lexer<UChar>;

	}