src/parser/lexer.cpp - external/github.com/WebAssembly/binaryen - Git at Google

 /*
  * Copyright 2023 WebAssembly Community Group participants
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <cassert>
 #include <cctype>
 #include <cmath>
 #include <iostream>
 #include <optional>
 #include <sstream>
 #include <variant>

 #include "lexer.h"
 #include "support/bits.h"
 #include "support/string.h"

 using namespace std::string_view_literals;

 namespace wasm::WATParser {

 Name srcAnnotationKind("src");

 namespace {

 // ================
 // Lexical Analysis
 // ================

 // The result of lexing a token fragment.
 struct LexResult {
   std::string_view span;
 };

 // Lexing context that accumulates lexed input to produce a token fragment.
 struct LexCtx {
 private:
   // The input we are lexing.
   std::string_view input;

   // How much of the input we have already lexed.
   size_t lexedSize = 0;

 public:
   explicit LexCtx(std::string_view in) : input(in) {}

   // Return the fragment that has been lexed so far.
   std::optional<LexResult> lexed() const {
     if (lexedSize > 0) {
       return {LexResult{input.substr(0, lexedSize)}};
     }
     return {};
   }

   // The next input that has not already been lexed.
   std::string_view next() const { return input.substr(lexedSize); }

   // Get the next character without consuming it.
   uint8_t peek() const { return next()[0]; }

   // The size of the unlexed input.
   size_t size() const { return input.size() - lexedSize; }

   // Whether there is no more input.
   bool empty() const { return size() == 0; }

   // Tokens must be separated by spaces or parentheses.
   bool canFinish() const;

   // Whether the unlexed input starts with prefix `sv`.
   size_t startsWith(std::string_view sv) const {
     return next().substr(0, sv.size()) == sv;
   }

   // Consume the next `n` characters.
   void take(size_t n) { lexedSize += n; }

   // Consume an additional lexed fragment.
   void take(const LexResult& res) { lexedSize += res.span.size(); }

   // Consume the prefix and return true if possible.
   bool takePrefix(std::string_view sv) {
     if (startsWith(sv)) {
       take(sv.size());
       return true;
     }
     return false;
   }

   // Consume the rest of the input.
   void takeAll() { lexedSize = input.size(); }
 };

 enum OverflowBehavior { DisallowOverflow, IgnoreOverflow };

 std::optional<int> getDigit(char c) {
   if ('0' <= c && c <= '9') {
     return c - '0';
   }
   return {};
 }

 std::optional<int> getHexDigit(char c) {
   if ('0' <= c && c <= '9') {
     return c - '0';
   }
   if ('A' <= c && c <= 'F') {
     return 10 + c - 'A';
   }
   if ('a' <= c && c <= 'f') {
     return 10 + c - 'a';
   }
   return {};
 }

 enum Sign { NoSign, Pos, Neg };

 // The result of lexing an integer token fragment.
 struct LexIntResult : LexResult {
   uint64_t n;
   Sign sign;

   template<typename T> bool isUnsigned() {
     static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>);
     return sign == NoSign && n <= std::numeric_limits<T>::max();
   }

   template<typename T> bool isSigned() {
     static_assert(std::is_integral_v<T> && std::is_signed_v<T>);
     if (sign == Neg) {
       return uint64_t(std::numeric_limits<T>::min()) <= n || n == 0;
     }
     return n <= uint64_t(std::numeric_limits<T>::max());
   }
 };

 // Lexing context that accumulates lexed input to produce an integer token
 // fragment.
 struct LexIntCtx : LexCtx {
   using LexCtx::take;

 private:
   uint64_t n = 0;
   Sign sign = NoSign;
   bool overflow = false;

 public:
   explicit LexIntCtx(std::string_view in) : LexCtx(in) {}

   // Lex only the underlying span, ignoring the overflow and value.
   std::optional<LexIntResult> lexedRaw() {
     if (auto basic = LexCtx::lexed()) {
       return LexIntResult{*basic, 0, NoSign};
     }
     return {};
   }

   std::optional<LexIntResult> lexed() {
     if (overflow) {
       return {};
     }
     if (auto basic = LexCtx::lexed()) {
       return LexIntResult{*basic, sign == Neg ? -n : n, sign};
     }
     return {};
   }

   void takeSign() {
     if (takePrefix("+"sv)) {
       sign = Pos;
     } else if (takePrefix("-"sv)) {
       sign = Neg;
     } else {
       sign = NoSign;
     }
   }

   bool takeDigit() {
     if (!empty()) {
       if (auto d = getDigit(peek())) {
         take(1);
         uint64_t newN = n * 10 + *d;
         if (newN < n) {
           overflow = true;
         }
         n = newN;
         return true;
       }
     }
     return false;
   }

   bool takeHexdigit() {
     if (!empty()) {
       if (auto h = getHexDigit(peek())) {
         take(1);
         uint64_t newN = n * 16 + *h;
         if (newN < n) {
           overflow = true;
         }
         n = newN;
         return true;
       }
     }
     return false;
   }

   void take(const LexIntResult& res) {
     LexCtx::take(res);
     n = res.n;
   }
 };

 struct LexFloatResult : LexResult {
   // The payload if we lexed a nan with payload. We cannot store the payload
   // directly in `d` because we do not know at this point whether we are parsing
   // an f32 or f64 and therefore we do not know what the allowable payloads are.
   // No payload with NaN means to use the default payload for the expected float
   // width.
   std::optional<uint64_t> nanPayload;
   double d;
 };

 struct LexFloatCtx : LexCtx {
   std::optional<uint64_t> nanPayload;

   LexFloatCtx(std::string_view in) : LexCtx(in) {}

   std::optional<LexFloatResult> lexed() {
     const double posNan = std::copysign(NAN, 1.0);
     const double negNan = std::copysign(NAN, -1.0);
     assert(!std::signbit(posNan) && "expected positive NaN to be positive");
     assert(std::signbit(negNan) && "expected negative NaN to be negative");
     auto basic = LexCtx::lexed();
     if (!basic) {
       return {};
     }
     // strtod does not return NaNs with the expected signs on all platforms.
     // TODO: use starts_with once we have C++20.
     if (basic->span.substr(0, 3) == "nan"sv ||
         basic->span.substr(0, 4) == "+nan"sv) {
       return LexFloatResult{*basic, nanPayload, posNan};
     }
     if (basic->span.substr(0, 4) == "-nan"sv) {
       return LexFloatResult{*basic, nanPayload, negNan};
     }
     // Do not try to implement fully general and precise float parsing
     // ourselves. Instead, call out to std::strtod to do our parsing. This means
     // we need to strip any underscores since `std::strtod` does not understand
     // them.
     std::stringstream ss;
     for (const char *curr = basic->span.data(),
                     *end = curr + basic->span.size();
          curr != end;
          ++curr) {
       if (*curr != '_') {
         ss << *curr;
       }
     }
     std::string str = ss.str();
     char* last;
     double d = std::strtod(str.data(), &last);
     assert(last == str.data() + str.size() && "could not parse float");
     return LexFloatResult{*basic, {}, d};
   }
 };

 struct LexStrResult : LexResult {
   // Allocate a string only if there are escape sequences, otherwise just use
   // the original string_view.
   std::optional<std::string> str;

   std::string_view getStr() {
     if (str) {
       return *str;
     }
     return span;
   }
 };

 struct LexStrCtx : LexCtx {
 private:
   // Used to build a string with resolved escape sequences. Only used when the
   // parsed string contains escape sequences, otherwise we can just use the
   // parsed string directly.
   std::optional<std::stringstream> escapeBuilder;

 public:
   LexStrCtx(std::string_view in) : LexCtx(in) {}

   std::optional<LexStrResult> lexed() {
     if (auto basic = LexCtx::lexed()) {
       if (escapeBuilder) {
         return LexStrResult{*basic, {escapeBuilder->str()}};
       } else {
         return LexStrResult{*basic, {}};
       }
     }
     return {};
   }

   void takeChar() {
     if (escapeBuilder) {
       *escapeBuilder << peek();
     }
     LexCtx::take(1);
   }

   void ensureBuildingEscaped() {
     if (escapeBuilder) {
       return;
     }
     // Drop the opening '"'.
     escapeBuilder = std::stringstream{};
     *escapeBuilder << LexCtx::lexed()->span.substr(1);
   }

   void appendEscaped(char c) { *escapeBuilder << c; }

   bool appendUnicode(uint64_t u) {
     if ((0xd800 <= u && u < 0xe000) || 0x110000 <= u) {
       return false;
     }
     String::writeWTF8CodePoint(*escapeBuilder, u);
     return true;
   }
 };

 struct LexIdResult : LexResult {
   bool isStr = false;
   std::optional<std::string> str;
 };

 struct LexIdCtx : LexCtx {
   bool isStr = false;
   std::optional<std::string> str;

   LexIdCtx(std::string_view in) : LexCtx(in) {}

   std::optional<LexIdResult> lexed() {
     if (auto basic = LexCtx::lexed()) {
       return LexIdResult{*basic, isStr, str};
     }
     return {};
   }
 };

 struct LexAnnotationResult : LexResult {
   Annotation annotation;
 };

 struct LexAnnotationCtx : LexCtx {
   std::string_view kind;
   size_t kindSize = 0;
   std::string_view contents;
   size_t contentsSize = 0;

   explicit LexAnnotationCtx(std::string_view in) : LexCtx(in) {}

   void startKind() { kind = next(); }

   void takeKind(size_t size) {
     kindSize += size;
     take(size);
   }

   void setKind(std::string_view kind) {
     this->kind = kind;
     kindSize = kind.size();
   }

   void startContents() { contents = next(); }

   void takeContents(size_t size) {
     contentsSize += size;
     take(size);
   }

   std::optional<LexAnnotationResult> lexed() {
     if (auto basic = LexCtx::lexed()) {
       return LexAnnotationResult{
         *basic,
         {Name(kind.substr(0, kindSize)), contents.substr(0, contentsSize)}};
     }
     return std::nullopt;
   }
 };

 std::optional<LexResult> idchar(std::string_view);
 std::optional<LexResult> space(std::string_view);
 std::optional<LexResult> keyword(std::string_view);
 std::optional<LexIntResult> integer(std::string_view);
 std::optional<LexFloatResult> float_(std::string_view);
 std::optional<LexStrResult> str(std::string_view);
 std::optional<LexIdResult> ident(std::string_view);

 // annotation ::= ';;@' [^\n]* | '(@'idchar+ annotelem* ')'
 // annotelem  ::= keyword | reserved | uN | sN | fN | string | id
 //              | '(' annotelem* ')' | '(@'idchar+ annotelem* ')'
 std::optional<LexAnnotationResult> annotation(std::string_view in) {
   LexAnnotationCtx ctx(in);
   if (ctx.takePrefix(";;@"sv)) {
     ctx.setKind(srcAnnotationKind.str);
     ctx.startContents();
     if (auto size = ctx.next().find('\n'); size != ""sv.npos) {
       ctx.takeContents(size);
     } else {
       ctx.takeContents(ctx.next().size());
     }
   } else if (ctx.takePrefix("(@"sv)) {
     ctx.startKind();
     bool hasIdchar = false;
     while (auto lexed = idchar(ctx.next())) {
       ctx.takeKind(1);
       hasIdchar = true;
     }
     if (!hasIdchar) {
       return std::nullopt;
     }
     ctx.startContents();
     size_t depth = 1;
     while (true) {
       if (ctx.empty()) {
         return std::nullopt;
       }
       if (auto lexed = space(ctx.next())) {
         ctx.takeContents(lexed->span.size());
         continue;
       }
       if (auto lexed = keyword(ctx.next())) {
         ctx.takeContents(lexed->span.size());
         continue;
       }
       if (auto lexed = integer(ctx.next())) {
         ctx.takeContents(lexed->span.size());
         continue;
       }
       if (auto lexed = float_(ctx.next())) {
         ctx.takeContents(lexed->span.size());
         continue;
       }
       if (auto lexed = str(ctx.next())) {
         ctx.takeContents(lexed->span.size());
         continue;
       }
       if (auto lexed = ident(ctx.next())) {
         ctx.takeContents(lexed->span.size());
         continue;
       }
       if (ctx.startsWith("(@"sv)) {
         ctx.takeContents(2);
         bool hasIdchar = false;
         while (auto lexed = idchar(ctx.next())) {
           ctx.takeContents(1);
           hasIdchar = true;
         }
         if (!hasIdchar) {
           return std::nullopt;
         }
         ++depth;
         continue;
       }
       if (ctx.startsWith("("sv)) {
         ctx.takeContents(1);
         ++depth;
         continue;
       }
       if (ctx.startsWith(")"sv)) {
         --depth;
         if (depth == 0) {
           ctx.take(1);
           break;
         }
         ctx.takeContents(1);
         continue;
       }
       // Unrecognized token.
       return std::nullopt;
     }
   }
   return ctx.lexed();
 }

 // comment      ::= linecomment | blockcomment
 // linecomment  ::= ';;' linechar* ('\n' | eof)
 // linechar     ::= c:char                      (if c != '\n')
 // blockcomment ::= '(;' blockchar* ';)'
 // blockchar    ::= c:char                      (if c != ';' and c != '(')
 //                | ';'                         (if the next char is not ')')
 //                | '('                         (if the next char is not ';')
 //                | blockcomment
 std::optional<LexResult> comment(std::string_view in) {
   LexCtx ctx(in);
   if (ctx.size() < 2) {
     return {};
   }

   // Line comment
   if (!ctx.startsWith(";;@"sv) && ctx.takePrefix(";;"sv)) {
     if (auto size = ctx.next().find('\n'); size != ""sv.npos) {
       ctx.take(size);
     } else {
       ctx.takeAll();
     }
     return ctx.lexed();
   }

   // Block comment (possibly nested!)
   if (ctx.takePrefix("(;"sv)) {
     size_t depth = 1;
     while (depth > 0 && ctx.size() >= 2) {
       if (ctx.takePrefix("(;"sv)) {
         ++depth;
       } else if (ctx.takePrefix(";)"sv)) {
         --depth;
       } else {
         ctx.take(1);
       }
     }
     if (depth > 0) {
       // TODO: Add error production for non-terminated block comment.
       return {};
     }
     return ctx.lexed();
   }

   return {};
 }

 std::optional<LexResult> spacechar(std::string_view in) {
   LexCtx ctx(in);
   ctx.takePrefix(" "sv) || ctx.takePrefix("\n"sv) || ctx.takePrefix("\r"sv) ||
     ctx.takePrefix("\t"sv);
   return ctx.lexed();
 }

 // space  ::= (' ' | format | comment)*
 // format ::= '\t' | '\n' | '\r'
 std::optional<LexResult> space(std::string_view in) {
   LexCtx ctx(in);
   while (ctx.size()) {
     if (auto lexed = spacechar(ctx.next())) {
       ctx.take(*lexed);
     } else if (auto lexed = comment(ctx.next())) {
       ctx.take(*lexed);
     } else {
       break;
     }
   }
   return ctx.lexed();
 }

 bool LexCtx::canFinish() const {
   // Logically we want to check for eof, parens, and space. But we don't
   // actually want to parse more than a couple characters of space, so check for
   // individual space chars or comment starts instead.
   return empty() || startsWith("("sv) || startsWith(")"sv) ||
          spacechar(next()) || startsWith(";;"sv);
 }

 // num   ::= d:digit => d
 //         |  n:num '_'? d:digit => 10*n + d
 // digit ::= '0' => 0 | ... | '9' => 9
 std::optional<LexIntResult> num(std::string_view in,
                                 OverflowBehavior overflow = DisallowOverflow) {
   LexIntCtx ctx(in);
   if (ctx.empty()) {
     return {};
   }
   if (!ctx.takeDigit()) {
     return {};
   }
   while (true) {
     bool under = ctx.takePrefix("_"sv);
     if (!ctx.takeDigit()) {
       if (!under) {
         return overflow == DisallowOverflow ? ctx.lexed() : ctx.lexedRaw();
       }
       // TODO: Add error production for trailing underscore.
       return {};
     }
   }
 }

 // hexnum   ::= h:hexdigit => h
 //            | n:hexnum '_'? h:hexdigit => 16*n + h
 // hexdigit ::= d:digit => d
 //            | 'A' => 10 | ... | 'F' => 15
 //            | 'a' => 10 | ... | 'f' => 15
 std::optional<LexIntResult>
 hexnum(std::string_view in, OverflowBehavior overflow = DisallowOverflow) {
   LexIntCtx ctx(in);
   if (!ctx.takeHexdigit()) {
     return {};
   }
   while (true) {
     bool under = ctx.takePrefix("_"sv);
     if (!ctx.takeHexdigit()) {
       if (!under) {
         return overflow == DisallowOverflow ? ctx.lexed() : ctx.lexedRaw();
       }
       // TODO: Add error production for trailing underscore.
       return {};
     }
   }
 }

 // uN ::= n:num         => n (if n < 2^N)
 //      | '0x' n:hexnum => n (if n < 2^N)
 // sN ::= s:sign n:num         => [s]n (if -2^(N-1) <= [s]n < 2^(N-1))
 //      | s:sign '0x' n:hexnum => [s]n (if -2^(N-1) <= [s]n < 2^(N-1))
 // sign ::= {} => + | '+' => + | '-' => -
 //
 // Note: Defer bounds and sign checking until we know what kind of integer we
 // expect.
 std::optional<LexIntResult> integer(std::string_view in) {
   LexIntCtx ctx(in);
   ctx.takeSign();
   if (ctx.takePrefix("0x"sv)) {
     if (auto lexed = hexnum(ctx.next())) {
       ctx.take(*lexed);
       if (ctx.canFinish()) {
         return ctx.lexed();
       }
     }
     // TODO: Add error production for unrecognized hexnum.
     return {};
   }
   if (auto lexed = num(ctx.next())) {
     ctx.take(*lexed);
     if (ctx.canFinish()) {
       return ctx.lexed();
     }
   }
   return {};
 }

 // float   ::= p:num '.'?                              => p
 //           | p:num '.' q:frac                        => p + q
 //           | p:num '.'? ('E'|'e') s:sign e:num       => p * 10^([s]e)
 //           | p:num '.' q:frac ('E'|'e') s:sign e:num => (p + q) * 10^([s]e)
 // frac    ::= d:digit                                 => d/10
 //           | d:digit '_'? p:frac                     => (d + p/10) / 10
 std::optional<LexResult> decfloat(std::string_view in) {
   LexCtx ctx(in);
   if (auto lexed = num(ctx.next(), IgnoreOverflow)) {
     ctx.take(*lexed);
   } else {
     return {};
   }
   // Optional '.' followed by optional frac
   if (ctx.takePrefix("."sv)) {
     if (auto lexed = num(ctx.next(), IgnoreOverflow)) {
       ctx.take(*lexed);
     }
   }
   if (ctx.takePrefix("E"sv) || ctx.takePrefix("e"sv)) {
     // Optional sign
     ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv);
     if (auto lexed = num(ctx.next(), IgnoreOverflow)) {
       ctx.take(*lexed);
     } else {
       // TODO: Add error production for missing exponent.
       return {};
     }
   }
   return ctx.lexed();
 }

 // hexfloat ::= '0x' p:hexnum '.'?                        => p
 //            | '0x' p:hexnum '.' q:hexfrac               => p + q
 //            | '0x' p:hexnum '.'? ('P'|'p') s:sign e:num => p * 2^([s]e)
 //            | '0x' p:hexnum '.' q:hexfrac ('P'|'p') s:sign e:num
 //                   => (p + q) * 2^([s]e)
 // hexfrac ::= h:hexdigit                              => h/16
 //           | h:hexdigit '_'? p:hexfrac               => (h + p/16) / 16
 std::optional<LexResult> hexfloat(std::string_view in) {
   LexCtx ctx(in);
   if (!ctx.takePrefix("0x"sv)) {
     return {};
   }
   if (auto lexed = hexnum(ctx.next(), IgnoreOverflow)) {
     ctx.take(*lexed);
   } else {
     return {};
   }
   // Optional '.' followed by optional hexfrac
   if (ctx.takePrefix("."sv)) {
     if (auto lexed = hexnum(ctx.next(), IgnoreOverflow)) {
       ctx.take(*lexed);
     }
   }
   if (ctx.takePrefix("P"sv) || ctx.takePrefix("p"sv)) {
     // Optional sign
     ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv);
     if (auto lexed = num(ctx.next(), IgnoreOverflow)) {
       ctx.take(*lexed);
     } else {
       // TODO: Add error production for missing exponent.
       return {};
     }
   }
   return ctx.lexed();
 }

 // fN    ::= s:sign z:fNmag => [s]z
 // fNmag ::= z:float        => float_N(z) (if float_N(z) != +/-infinity)
 //         | z:hexfloat     => float_N(z) (if float_N(z) != +/-infinity)
 //         | 'inf'          => infinity
 //         | 'nan'          => nan(2^(signif(N)-1))
 //         | 'nan:0x' n:hexnum => nan(n) (if 1 <= n < 2^signif(N))
 std::optional<LexFloatResult> float_(std::string_view in) {
   LexFloatCtx ctx(in);
   // Optional sign
   ctx.takePrefix("+"sv) || ctx.takePrefix("-"sv);
   if (auto lexed = hexfloat(ctx.next())) {
     ctx.take(*lexed);
   } else if (auto lexed = decfloat(ctx.next())) {
     ctx.take(*lexed);
   } else if (ctx.takePrefix("inf"sv)) {
     // nop
   } else if (ctx.takePrefix("nan"sv)) {
     if (ctx.takePrefix(":0x"sv)) {
       if (auto lexed = hexnum(ctx.next())) {
         ctx.take(*lexed);
         ctx.nanPayload = lexed->n;
       } else {
         // TODO: Add error production for malformed NaN payload.
         return {};
       }
     } else {
       // No explicit payload necessary; we will inject the default payload
       // later.
     }
   } else {
     return {};
   }
   if (ctx.canFinish()) {
     return ctx.lexed();
   }
   return {};
 }

 // idchar ::= '0' | ... | '9'
 //          | 'A' | ... | 'Z'
 //          | 'a' | ... | 'z'
 //          | '!' | '#' | '$' | '%' | '&' | ''' | '*' | '+'
 //          | '-' | '.' | '/' | ':' | '<' | '=' | '>' | '?'
 //          | '@' | '\' | '^' | '_' | '`' | '|' | '~'
 std::optional<LexResult> idchar(std::string_view in) {
   LexCtx ctx(in);
   if (ctx.empty()) {
     return {};
   }
   uint8_t c = ctx.peek();
   // All the allowed characters lie in the range '!' to '~', and within that
   // range the vast majority of characters are allowed, so it is significantly
   // faster to check for the disallowed characters instead.
   if (c < '!' || c > '~') {
     return ctx.lexed();
   }
   switch (c) {
     case '"':
     case '(':
     case ')':
     case ',':
     case ';':
     case '[':
     case ']':
     case '{':
     case '}':
       return ctx.lexed();
   }
   ctx.take(1);
   return ctx.lexed();
 }

 // string     ::= '"' (b*:stringelem)* '"'  => concat((b*)*)
 //                    (if |concat((b*)*)| < 2^32)
 // stringelem ::= c:stringchar              => utf8(c)
 //              | '\' n:hexdigit m:hexdigit => 16*n + m
 // stringchar ::= c:char                    => c
 //                    (if c >= U+20 && c != U+7f && c != '"' && c != '\')
 //              | '\t' => \t | '\n' => \n | '\r' => \r
 //              | '\\' => \ | '\"' => " | '\'' => '
 //              | '\u{' n:hexnum '}'        => U+(n)
 //                    (if n < 0xD800 and 0xE000 <= n <= 0x110000)
 std::optional<LexStrResult> str(std::string_view in) {
   LexStrCtx ctx(in);
   if (!ctx.takePrefix("\""sv)) {
     return {};
   }
   while (!ctx.takePrefix("\""sv)) {
     if (ctx.empty()) {
       // TODO: Add error production for unterminated string.
       return {};
     }
     if (ctx.startsWith("\\"sv)) {
       // Escape sequences
       ctx.ensureBuildingEscaped();
       ctx.take(1);
       if (ctx.takePrefix("t"sv)) {
         ctx.appendEscaped('\t');
       } else if (ctx.takePrefix("n"sv)) {
         ctx.appendEscaped('\n');
       } else if (ctx.takePrefix("r"sv)) {
         ctx.appendEscaped('\r');
       } else if (ctx.takePrefix("\\"sv)) {
         ctx.appendEscaped('\\');
       } else if (ctx.takePrefix("\""sv)) {
         ctx.appendEscaped('"');
       } else if (ctx.takePrefix("'"sv)) {
         ctx.appendEscaped('\'');
       } else if (ctx.takePrefix("u{"sv)) {
         auto lexed = hexnum(ctx.next());
         if (!lexed) {
           // TODO: Add error production for malformed unicode escapes.
           return {};
         }
         ctx.take(*lexed);
         if (!ctx.takePrefix("}"sv)) {
           // TODO: Add error production for malformed unicode escapes.
           return {};
         }
         if (!ctx.appendUnicode(lexed->n)) {
           // TODO: Add error production for invalid unicode values.
           return {};
         }
       } else {
         LexIntCtx ictx(ctx.next());
         if (!ictx.takeHexdigit() || !ictx.takeHexdigit()) {
           // TODO: Add error production for unrecognized escape sequence.
           return {};
         }
         auto lexed = *ictx.lexed();
         ctx.take(lexed);
         ctx.appendEscaped(char(lexed.n));
       }
     } else {
       // Normal characters
       if (uint8_t c = ctx.peek(); c >= 0x20 && c != 0x7F) {
         ctx.takeChar();
       } else {
         // TODO: Add error production for unescaped control characters.
         return {};
       }
     }
   }
   return ctx.lexed();
 }

 // id ::= '$' idchar+ | '$' str
 std::optional<LexIdResult> ident(std::string_view in) {
   LexIdCtx ctx(in);
   if (!ctx.takePrefix("$"sv)) {
     return {};
   }
   if (auto s = str(ctx.next())) {
     if (!String::isUTF8(s->getStr())) {
       return {};
     }
     ctx.isStr = true;
     ctx.str = s->str;
     ctx.take(*s);
   } else if (auto lexed = idchar(ctx.next())) {
     ctx.take(*lexed);
     while (auto lexed = idchar(ctx.next())) {
       ctx.take(*lexed);
     }
   } else {
     return {};
   }
   if (ctx.canFinish()) {
     return ctx.lexed();
   }
   return {};
 }

 // keyword ::= ( 'a' | ... | 'z' ) idchar* (if literal terminal in grammar)
 // reserved ::= idchar+
 //
 // The "keyword" token we lex here covers both keywords as well as any reserved
 // tokens that match the keyword format. This saves us from having to enumerate
 // all the valid keywords here. These invalid keywords will still produce
 // errors, just at a higher level of the parser.
 std::optional<LexResult> keyword(std::string_view in) {
   LexCtx ctx(in);
   if (ctx.empty()) {
     return {};
   }
   uint8_t start = ctx.peek();
   if ('a' <= start && start <= 'z') {
     ctx.take(1);
   } else {
     return {};
   }
   while (auto lexed = idchar(ctx.next())) {
     ctx.take(*lexed);
   }
   return ctx.lexed();
 }

 } // anonymous namespace

 void Lexer::skipSpace() {
   while (true) {
     if (auto ctx = annotation(next())) {
       pos += ctx->span.size();
       annotations.push_back(ctx->annotation);
       continue;
     }
     if (auto ctx = space(next())) {
       pos += ctx->span.size();
       continue;
     }
     break;
   }
 }

 bool Lexer::takeLParen() {
   if (LexCtx(next()).startsWith("("sv)) {
     ++pos;
     advance();
     return true;
   }
   return false;
 }

 bool Lexer::takeRParen() {
   if (LexCtx(next()).startsWith(")"sv)) {
     ++pos;
     advance();
     return true;
   }
   return false;
 }

 std::optional<std::string> Lexer::takeString() {
   if (auto result = str(next())) {
     pos += result->span.size();
     advance();
     if (result->str) {
       return result->str;
     }
     // Remove quotes.
     return std::string(result->span.substr(1, result->span.size() - 2));
   }
   return std::nullopt;
 }

 std::optional<Name> Lexer::takeID() {
   if (auto result = ident(next())) {
     pos += result->span.size();
     advance();
     if (result->str) {
       return Name(*result->str);
     }
     if (result->isStr) {
       // Remove '$' and quotes.
       return Name(result->span.substr(2, result->span.size() - 3));
     }
     // Remove '$'.
     return Name(result->span.substr(1));
   }
   return std::nullopt;
 }

 std::optional<std::string_view> Lexer::takeKeyword() {
   if (auto result = keyword(next())) {
     pos += result->span.size();
     advance();
     return result->span;
   }
   return std::nullopt;
 }

 bool Lexer::takeKeyword(std::string_view expected) {
   if (auto result = keyword(next()); result && result->span == expected) {
     pos += expected.size();
     advance();
     return true;
   }
   return false;
 }

 std::optional<uint64_t> Lexer::takeOffset() {
   if (auto result = keyword(next())) {
     if (result->span.substr(0, 7) != "offset="sv) {
       return std::nullopt;
     }
     Lexer subLexer(result->span.substr(7));
     if (auto o = subLexer.takeU64()) {
       pos += result->span.size();
       advance();
       return o;
     }
   }
   return std::nullopt;
 }

 std::optional<uint32_t> Lexer::takeAlign() {
   if (auto result = keyword(next())) {
     if (result->span.substr(0, 6) != "align="sv) {
       return std::nullopt;
     }
     Lexer subLexer(result->span.substr(6));
     if (auto o = subLexer.takeU32()) {
       if (Bits::popCount(*o) != 1) {
         return std::nullopt;
       }
       pos += result->span.size();
       advance();
       return o;
     }
   }
   return std::nullopt;
 }

 template<typename T> std::optional<T> Lexer::takeU() {
   static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>);
   if (auto result = integer(next()); result && result->isUnsigned<T>()) {
     pos += result->span.size();
     advance();
     return T(result->n);
   }
   // TODO: Add error production for unsigned overflow.
   return std::nullopt;
 }

 template<typename T> std::optional<T> Lexer::takeS() {
   static_assert(std::is_integral_v<T> && std::is_signed_v<T>);
   if (auto result = integer(next()); result && result->isSigned<T>()) {
     pos += result->span.size();
     advance();
     return T(result->n);
   }
   return std::nullopt;
 }

 template<typename T> std::optional<T> Lexer::takeI() {
   static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>);
   if (auto result = integer(next())) {
     if (result->isUnsigned<T>() || result->isSigned<std::make_signed_t<T>>()) {
       pos += result->span.size();
       advance();
       return T(result->n);
     }
   }
   return std::nullopt;
 }

 template std::optional<uint64_t> Lexer::takeU<uint64_t>();
 template std::optional<int64_t> Lexer::takeS<int64_t>();
 template std::optional<uint64_t> Lexer::takeI<uint64_t>();
 template std::optional<uint32_t> Lexer::takeU<uint32_t>();
 template std::optional<int32_t> Lexer::takeS<int32_t>();
 template std::optional<uint32_t> Lexer::takeI<uint32_t>();
 template std::optional<uint16_t> Lexer::takeU<uint16_t>();
 template std::optional<int16_t> Lexer::takeS<int16_t>();
 template std::optional<uint16_t> Lexer::takeI<uint16_t>();
 template std::optional<uint8_t> Lexer::takeU<uint8_t>();
 template std::optional<int8_t> Lexer::takeS<int8_t>();
 template std::optional<uint8_t> Lexer::takeI<uint8_t>();

 std::optional<double> Lexer::takeF64() {
   constexpr int signif = 52;
   constexpr uint64_t payloadMask = (1ull << signif) - 1;
   constexpr uint64_t nanDefault = 1ull << (signif - 1);
   if (auto result = float_(next())) {
     double d = result->d;
     if (std::isnan(d)) {
       // Inject payload.
       uint64_t payload = result->nanPayload ? *result->nanPayload : nanDefault;
       if (payload == 0 || payload > payloadMask) {
         // TODO: Add error production for out-of-bounds payload.
         return std::nullopt;
       }
       uint64_t bits;
       static_assert(sizeof(bits) == sizeof(d));
       memcpy(&bits, &d, sizeof(bits));
       bits = (bits & ~payloadMask) | payload;
       memcpy(&d, &bits, sizeof(bits));
     }
     pos += result->span.size();
     advance();
     return d;
   }
   if (auto result = integer(next())) {
     pos += result->span.size();
     advance();
     if (result->sign == Neg) {
       if (result->n == 0) {
         return -0.0;
       }
       return double(int64_t(result->n));
     }
     return double(result->n);
   }
   return std::nullopt;
 }

 std::optional<float> Lexer::takeF32() {
   constexpr int signif = 23;
   constexpr uint32_t payloadMask = (1u << signif) - 1;
   constexpr uint64_t nanDefault = 1ull << (signif - 1);
   if (auto result = float_(next())) {
     float f = result->d;
     if (std::isnan(f)) {
       // Validate and inject payload.
       uint64_t payload = result->nanPayload ? *result->nanPayload : nanDefault;
       if (payload == 0 || payload > payloadMask) {
         // TODO: Add error production for out-of-bounds payload.
         return std::nullopt;
       }
       uint32_t bits;
       static_assert(sizeof(bits) == sizeof(f));
       memcpy(&bits, &f, sizeof(bits));
       bits = (bits & ~payloadMask) | payload;
       memcpy(&f, &bits, sizeof(bits));
     }
     pos += result->span.size();
     advance();
     return f;
   }
   if (auto result = integer(next())) {
     pos += result->span.size();
     advance();
     if (result->sign == Neg) {
       if (result->n == 0) {
         return -0.0f;
       }
       return float(int64_t(result->n));
     }
     return float(result->n);
   }
   return std::nullopt;
 }

 TextPos Lexer::position(const char* c) const {
   assert(size_t(c - buffer.data()) <= buffer.size());
   TextPos pos{1, 0};
   for (const char* p = buffer.data(); p != c; ++p) {
     if (*p == '\n') {
       pos.line++;
       pos.col = 0;
     } else {
       pos.col++;
     }
   }
   return pos;
 }

 bool TextPos::operator==(const TextPos& other) const {
   return line == other.line && col == other.col;
 }

 std::ostream& operator<<(std::ostream& os, const TextPos& pos) {
   return os << pos.line << ":" << pos.col;
 }

 } // namespace wasm::WATParser