Source/WebCore/xml/XPathParser.cpp - external/github.com/WebKit/webkit - Git at Google

 /*
  * Copyright 2005 Maksim Orlovich <maksim@kde.org>
  * Copyright (C) 2006, 2013 Apple Inc. All rights reserved.
  * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "config.h"
 #include "XPathParser.h"

 #include "XPathEvaluator.h"
 #include "XPathNSResolver.h"
 #include "XPathPath.h"
 #include "XPathStep.h"
 #include <wtf/NeverDestroyed.h>
 #include <wtf/RobinHoodHashMap.h>
 #include <wtf/StdLibExtras.h>
 #include <wtf/text/StringHash.h>

 extern int xpathyyparse(WebCore::XPath::Parser&);

 #include "XPathGrammar.h"

 namespace WebCore {
 namespace XPath {

 struct Parser::Token {
     int type;
     String string;
     Step::Axis axis;
     NumericOp::Opcode numericOpcode;
     EqTestOp::Opcode equalityTestOpcode;

     Token(int type) : type(type) { }
     Token(int type, const String& string) : type(type), string(string) { }
     Token(int type, Step::Axis axis) : type(type), axis(axis) { }
     Token(int type, NumericOp::Opcode opcode) : type(type), numericOpcode(opcode) { }
     Token(int type, EqTestOp::Opcode opcode) : type(type), equalityTestOpcode(opcode) { }
 };

 enum XMLCat { NameStart, NameCont, NotPartOfName };

 static XMLCat charCat(UChar character)
 {
     if (character == '_')
         return NameStart;

     if (character == '.' || character == '-')
         return NameCont;
     unsigned characterTypeMask = U_GET_GC_MASK(character);
     if (characterTypeMask & (U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK))
         return NameStart;
     if (characterTypeMask & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_ND_MASK))
         return NameCont;
     return NotPartOfName;
 }

 static MemoryCompactLookupOnlyRobinHoodHashMap<String, Step::Axis> createAxisNamesMap()
 {
     struct AxisName {
         ASCIILiteral name;
         Step::Axis axis;
     };
     const AxisName axisNameList[] = {
         { "ancestor"_s, Step::AncestorAxis },
         { "ancestor-or-self"_s, Step::AncestorOrSelfAxis },
         { "attribute"_s, Step::AttributeAxis },
         { "child"_s, Step::ChildAxis },
         { "descendant"_s, Step::DescendantAxis },
         { "descendant-or-self"_s, Step::DescendantOrSelfAxis },
         { "following"_s, Step::FollowingAxis },
         { "following-sibling"_s, Step::FollowingSiblingAxis },
         { "namespace"_s, Step::NamespaceAxis },
         { "parent"_s, Step::ParentAxis },
         { "preceding"_s, Step::PrecedingAxis },
         { "preceding-sibling"_s, Step::PrecedingSiblingAxis },
         { "self"_s, Step::SelfAxis }
     };
     MemoryCompactLookupOnlyRobinHoodHashMap<String, Step::Axis> map;
     for (auto& axisName : axisNameList)
         map.add(axisName.name, axisName.axis);
     return map;
 }

 static bool parseAxisName(const String& name, Step::Axis& type)
 {
     static NeverDestroyed axisNames = createAxisNamesMap();
     auto it = axisNames.get().find(name);
     if (it == axisNames.get().end())
         return false;
     type = it->value;
     return true;
 }

 // Returns whether the current token can possibly be a binary operator, given
 // the previous token. Necessary to disambiguate some of the operators
 // (* (multiply), div, and, or, mod) in the [32] Operator rule
 // (check http://www.w3.org/TR/xpath#exprlex).
 bool Parser::isBinaryOperatorContext() const
 {
     switch (m_lastTokenType) {
     case 0:
     case '@': case AXISNAME: case '(': case '[': case ',':
     case AND: case OR: case MULOP:
     case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
     case EQOP: case RELOP:
         return false;
     default:
         return true;
     }
 }

 void Parser::skipWS()
 {
     while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
         ++m_nextPos;
 }

 Parser::Token Parser::makeTokenAndAdvance(int code, int advance)
 {
     m_nextPos += advance;
     return Token(code);
 }

 Parser::Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
 {
     m_nextPos += advance;
     return Token(code, val);
 }

 Parser::Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
 {
     m_nextPos += advance;
     return Token(code, val);
 }

 // Returns next char if it's there and interesting, 0 otherwise.
 char Parser::peekAheadHelper()
 {
     if (m_nextPos + 1 >= m_data.length())
         return 0;
     UChar next = m_data[m_nextPos + 1];
     if (next >= 0xff)
         return 0;
     return next;
 }

 char Parser::peekCurHelper()
 {
     if (m_nextPos >= m_data.length())
         return 0;
     UChar next = m_data[m_nextPos];
     if (next >= 0xff)
         return 0;
     return next;
 }

 Parser::Token Parser::lexString()
 {
     UChar delimiter = m_data[m_nextPos];
     int startPos = m_nextPos + 1;

     for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
         if (m_data[m_nextPos] == delimiter) {
             String value = m_data.substring(startPos, m_nextPos - startPos);
             if (value.isNull())
                 value = emptyString();
             ++m_nextPos; // Consume the char.
             return Token(LITERAL, value);
         }
     }

     // Ouch, went off the end -- report error.
     return Token(XPATH_ERROR);
 }

 Parser::Token Parser::lexNumber()
 {
     int startPos = m_nextPos;
     bool seenDot = false;

     // Go until end or a non-digits character.
     for (; m_nextPos < m_data.length(); ++m_nextPos) {
         UChar aChar = m_data[m_nextPos];
         if (aChar >= 0xff) break;

         if (!isASCIIDigit(aChar)) {
             if (aChar == '.' && !seenDot)
                 seenDot = true;
             else
                 break;
         }
     }

     return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
 }

 bool Parser::lexNCName(String& name)
 {
     int startPos = m_nextPos;
     if (m_nextPos >= m_data.length())
         return false;

     if (charCat(m_data[m_nextPos]) != NameStart)
         return false;

     // Keep going until we get a character that's not good for names.
     for (; m_nextPos < m_data.length(); ++m_nextPos)
         if (charCat(m_data[m_nextPos]) == NotPartOfName)
             break;

     name = m_data.substring(startPos, m_nextPos - startPos);
     return true;
 }

 bool Parser::lexQName(String& name)
 {
     String n1;
     if (!lexNCName(n1))
         return false;

     skipWS();

     // If the next character is :, what we just got it the prefix, if not,
     // it's the whole thing.
     if (peekAheadHelper() != ':') {
         name = n1;
         return true;
     }

     String n2;
     if (!lexNCName(n2))
         return false;

     name = n1 + ":" + n2;
     return true;
 }

 inline Parser::Token Parser::nextTokenInternal()
 {
     skipWS();

     if (m_nextPos >= m_data.length())
         return Token(0);

     char code = peekCurHelper();
     switch (code) {
     case '(': case ')': case '[': case ']':
     case '@': case ',': case '|':
         return makeTokenAndAdvance(code);
     case '\'':
     case '\"':
         return lexString();
     case '0': case '1': case '2': case '3': case '4':
     case '5': case '6': case '7': case '8': case '9':
         return lexNumber();
     case '.': {
         char next = peekAheadHelper();
         if (next == '.')
             return makeTokenAndAdvance(DOTDOT, 2);
         if (isASCIIDigit(next))
             return lexNumber();
         return makeTokenAndAdvance('.');
     }
     case '/':
         if (peekAheadHelper() == '/')
             return makeTokenAndAdvance(SLASHSLASH, 2);
         return makeTokenAndAdvance('/');
     case '+':
         return makeTokenAndAdvance(PLUS);
     case '-':
         return makeTokenAndAdvance(MINUS);
     case '=':
         return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
     case '!':
         if (peekAheadHelper() == '=')
             return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
         return Token(XPATH_ERROR);
     case '<':
         if (peekAheadHelper() == '=')
             return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
         return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
     case '>':
         if (peekAheadHelper() == '=')
             return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
         return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
     case '*':
         if (isBinaryOperatorContext())
             return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
         ++m_nextPos;
         return Token(NAMETEST, "*"_s);
     case '$': { // $ QName
         m_nextPos++;
         String name;
         if (!lexQName(name))
             return Token(XPATH_ERROR);
         return Token(VARIABLEREFERENCE, name);
     }
     }

     String name;
     if (!lexNCName(name))
         return Token(XPATH_ERROR);

     skipWS();
     // If we're in an operator context, check for any operator names
     if (isBinaryOperatorContext()) {
         if (name == "and") //### hash?
             return Token(AND);
         if (name == "or")
             return Token(OR);
         if (name == "mod")
             return Token(MULOP, NumericOp::OP_Mod);
         if (name == "div")
             return Token(MULOP, NumericOp::OP_Div);
     }

     // See whether we are at a :
     if (peekCurHelper() == ':') {
         m_nextPos++;
         // Any chance it's an axis name?
         if (peekCurHelper() == ':') {
             m_nextPos++;

             //It might be an axis name.
             Step::Axis axis;
             if (parseAxisName(name, axis))
                 return Token(AXISNAME, axis);
             // Ugh, :: is only valid in axis names -> error
             return Token(XPATH_ERROR);
         }

         // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
         skipWS();
         if (peekCurHelper() == '*') {
             m_nextPos++;
             return Token(NAMETEST, name + ":*");
         }

         // Make a full qname.
         String n2;
         if (!lexNCName(n2))
             return Token(XPATH_ERROR);

         name = name + ":" + n2;
     }

     skipWS();

     if (peekCurHelper() == '(') {
         // note: we don't swallow the '(' here!

         // Either node type oor function name.

         if (name == "processing-instruction")
             return Token(PI);
         if (name == "node")
             return Token(NODE);
         if (name == "text")
             return Token(TEXT_);
         if (name == "comment")
             return Token(COMMENT);

         return Token(FUNCTIONNAME, name);
     }

     // At this point, it must be NAMETEST.
     return Token(NAMETEST, name);
 }

 inline Parser::Token Parser::nextToken()
 {
     Token token = nextTokenInternal();
     m_lastTokenType = token.type;
     return token;
 }

 Parser::Parser(const String& statement, RefPtr<XPathNSResolver>&& resolver)
     : m_data(statement)
     , m_resolver(WTFMove(resolver))
 {
 }

 int Parser::lex(YYSTYPE& yylval)
 {
     Token token = nextToken();

     switch (token.type) {
     case AXISNAME:
         yylval.axis = token.axis;
         break;
     case MULOP:
         yylval.numericOpcode = token.numericOpcode;
         break;
     case RELOP:
     case EQOP:
         yylval.equalityTestOpcode = token.equalityTestOpcode;
         break;
     case NODETYPE:
     case FUNCTIONNAME:
     case LITERAL:
     case VARIABLEREFERENCE:
     case NUMBER:
     case NAMETEST:
         yylval.string = token.string.releaseImpl().leakRef();
         break;
     }

     return token.type;
 }

 bool Parser::expandQualifiedName(const String& qualifiedName, AtomString& localName, AtomString& namespaceURI)
 {
     size_t colon = qualifiedName.find(':');
     if (colon != notFound) {
         if (!m_resolver) {
             m_sawNamespaceError = true;
             return false;
         }
         namespaceURI = m_resolver->lookupNamespaceURI(StringView(qualifiedName).left(colon).toAtomString());
         if (namespaceURI.isNull()) {
             m_sawNamespaceError = true;
             return false;
         }
         localName = StringView(qualifiedName).substring(colon + 1).toAtomString();
     } else
         localName = AtomString { qualifiedName };
     return true;
 }

 ExceptionOr<std::unique_ptr<Expression>> Parser::parseStatement(const String& statement, RefPtr<XPathNSResolver>&& resolver)
 {
     Parser parser { statement, WTFMove(resolver) };

     int parseError = xpathyyparse(parser);

     if (parser.m_sawNamespaceError)
         return Exception { NamespaceError };

     if (parseError)
         return Exception { SyntaxError };

     return WTFMove(parser.m_result);
 }

 } }
	/*
	* Copyright 2005 Maksim Orlovich <maksim@kde.org>
	* Copyright (C) 2006, 2013 Apple Inc. All rights reserved.
	* Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "config.h"
	#include "XPathParser.h"

	#include "XPathEvaluator.h"
	#include "XPathNSResolver.h"
	#include "XPathPath.h"
	#include "XPathStep.h"
	#include <wtf/NeverDestroyed.h>
	#include <wtf/RobinHoodHashMap.h>
	#include <wtf/StdLibExtras.h>
	#include <wtf/text/StringHash.h>

	extern int xpathyyparse(WebCore::XPath::Parser&);

	#include "XPathGrammar.h"

	namespace WebCore {
	namespace XPath {

	struct Parser::Token {
	int type;
	String string;
	Step::Axis axis;
	NumericOp::Opcode numericOpcode;
	EqTestOp::Opcode equalityTestOpcode;

	Token(int type) : type(type) { }
	Token(int type, const String& string) : type(type), string(string) { }
	Token(int type, Step::Axis axis) : type(type), axis(axis) { }
	Token(int type, NumericOp::Opcode opcode) : type(type), numericOpcode(opcode) { }
	Token(int type, EqTestOp::Opcode opcode) : type(type), equalityTestOpcode(opcode) { }
	};

	enum XMLCat { NameStart, NameCont, NotPartOfName };

	static XMLCat charCat(UChar character)
	{
	if (character == '_')
	return NameStart;

	if (character == '.' \|\| character == '-')
	return NameCont;
	unsigned characterTypeMask = U_GET_GC_MASK(character);
	if (characterTypeMask & (U_GC_LU_MASK \| U_GC_LL_MASK \| U_GC_LO_MASK \| U_GC_LT_MASK \| U_GC_NL_MASK))
	return NameStart;
	if (characterTypeMask & (U_GC_M_MASK \| U_GC_LM_MASK \| U_GC_ND_MASK))
	return NameCont;
	return NotPartOfName;
	}

	static MemoryCompactLookupOnlyRobinHoodHashMap<String, Step::Axis> createAxisNamesMap()
	{
	struct AxisName {
	ASCIILiteral name;
	Step::Axis axis;
	};
	const AxisName axisNameList[] = {
	{ "ancestor"_s, Step::AncestorAxis },
	{ "ancestor-or-self"_s, Step::AncestorOrSelfAxis },
	{ "attribute"_s, Step::AttributeAxis },
	{ "child"_s, Step::ChildAxis },
	{ "descendant"_s, Step::DescendantAxis },
	{ "descendant-or-self"_s, Step::DescendantOrSelfAxis },
	{ "following"_s, Step::FollowingAxis },
	{ "following-sibling"_s, Step::FollowingSiblingAxis },
	{ "namespace"_s, Step::NamespaceAxis },
	{ "parent"_s, Step::ParentAxis },
	{ "preceding"_s, Step::PrecedingAxis },
	{ "preceding-sibling"_s, Step::PrecedingSiblingAxis },
	{ "self"_s, Step::SelfAxis }
	};
	MemoryCompactLookupOnlyRobinHoodHashMap<String, Step::Axis> map;
	for (auto& axisName : axisNameList)
	map.add(axisName.name, axisName.axis);
	return map;
	}

	static bool parseAxisName(const String& name, Step::Axis& type)
	{
	static NeverDestroyed axisNames = createAxisNamesMap();
	auto it = axisNames.get().find(name);
	if (it == axisNames.get().end())
	return false;
	type = it->value;
	return true;
	}

	// Returns whether the current token can possibly be a binary operator, given
	// the previous token. Necessary to disambiguate some of the operators
	// (* (multiply), div, and, or, mod) in the [32] Operator rule
	// (check http://www.w3.org/TR/xpath#exprlex).
	bool Parser::isBinaryOperatorContext() const
	{
	switch (m_lastTokenType) {
	case 0:
	case '@': case AXISNAME: case '(': case '[': case ',':
	case AND: case OR: case MULOP:
	case '/': case SLASHSLASH: case '\|': case PLUS: case MINUS:
	case EQOP: case RELOP:
	return false;
	default:
	return true;
	}
	}

	void Parser::skipWS()
	{
	while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
	++m_nextPos;
	}

	Parser::Token Parser::makeTokenAndAdvance(int code, int advance)
	{
	m_nextPos += advance;
	return Token(code);
	}

	Parser::Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
	{
	m_nextPos += advance;
	return Token(code, val);
	}

	Parser::Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
	{
	m_nextPos += advance;
	return Token(code, val);
	}

	// Returns next char if it's there and interesting, 0 otherwise.
	char Parser::peekAheadHelper()
	{
	if (m_nextPos + 1 >= m_data.length())
	return 0;
	UChar next = m_data[m_nextPos + 1];
	if (next >= 0xff)
	return 0;
	return next;
	}

	char Parser::peekCurHelper()
	{
	if (m_nextPos >= m_data.length())
	return 0;
	UChar next = m_data[m_nextPos];
	if (next >= 0xff)
	return 0;
	return next;
	}

	Parser::Token Parser::lexString()
	{
	UChar delimiter = m_data[m_nextPos];
	int startPos = m_nextPos + 1;

	for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
	if (m_data[m_nextPos] == delimiter) {
	String value = m_data.substring(startPos, m_nextPos - startPos);
	if (value.isNull())
	value = emptyString();
	++m_nextPos; // Consume the char.
	return Token(LITERAL, value);
	}
	}

	// Ouch, went off the end -- report error.
	return Token(XPATH_ERROR);
	}

	Parser::Token Parser::lexNumber()
	{
	int startPos = m_nextPos;
	bool seenDot = false;

	// Go until end or a non-digits character.
	for (; m_nextPos < m_data.length(); ++m_nextPos) {
	UChar aChar = m_data[m_nextPos];
	if (aChar >= 0xff) break;

	if (!isASCIIDigit(aChar)) {
	if (aChar == '.' && !seenDot)
	seenDot = true;
	else
	break;
	}
	}

	return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
	}

	bool Parser::lexNCName(String& name)
	{
	int startPos = m_nextPos;
	if (m_nextPos >= m_data.length())
	return false;

	if (charCat(m_data[m_nextPos]) != NameStart)
	return false;

	// Keep going until we get a character that's not good for names.
	for (; m_nextPos < m_data.length(); ++m_nextPos)
	if (charCat(m_data[m_nextPos]) == NotPartOfName)
	break;

	name = m_data.substring(startPos, m_nextPos - startPos);
	return true;
	}

	bool Parser::lexQName(String& name)
	{
	String n1;
	if (!lexNCName(n1))
	return false;

	skipWS();

	// If the next character is :, what we just got it the prefix, if not,
	// it's the whole thing.
	if (peekAheadHelper() != ':') {
	name = n1;
	return true;
	}

	String n2;
	if (!lexNCName(n2))
	return false;

	name = n1 + ":" + n2;
	return true;
	}

	inline Parser::Token Parser::nextTokenInternal()
	{
	skipWS();

	if (m_nextPos >= m_data.length())
	return Token(0);

	char code = peekCurHelper();
	switch (code) {
	case '(': case ')': case '[': case ']':
	case '@': case ',': case '\|':
	return makeTokenAndAdvance(code);
	case '\'':
	case '\"':
	return lexString();
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	return lexNumber();
	case '.': {
	char next = peekAheadHelper();
	if (next == '.')
	return makeTokenAndAdvance(DOTDOT, 2);
	if (isASCIIDigit(next))
	return lexNumber();
	return makeTokenAndAdvance('.');
	}
	case '/':
	if (peekAheadHelper() == '/')
	return makeTokenAndAdvance(SLASHSLASH, 2);
	return makeTokenAndAdvance('/');
	case '+':
	return makeTokenAndAdvance(PLUS);
	case '-':
	return makeTokenAndAdvance(MINUS);
	case '=':
	return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
	case '!':
	if (peekAheadHelper() == '=')
	return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
	return Token(XPATH_ERROR);
	case '<':
	if (peekAheadHelper() == '=')
	return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
	return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
	case '>':
	if (peekAheadHelper() == '=')
	return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
	return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
	case '*':
	if (isBinaryOperatorContext())
	return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
	++m_nextPos;
	return Token(NAMETEST, "*"_s);
	case '$': { // $ QName
	m_nextPos++;
	String name;
	if (!lexQName(name))
	return Token(XPATH_ERROR);
	return Token(VARIABLEREFERENCE, name);
	}
	}

	String name;
	if (!lexNCName(name))
	return Token(XPATH_ERROR);

	skipWS();
	// If we're in an operator context, check for any operator names
	if (isBinaryOperatorContext()) {
	if (name == "and") //### hash?
	return Token(AND);
	if (name == "or")
	return Token(OR);
	if (name == "mod")
	return Token(MULOP, NumericOp::OP_Mod);
	if (name == "div")
	return Token(MULOP, NumericOp::OP_Div);
	}

	// See whether we are at a :
	if (peekCurHelper() == ':') {
	m_nextPos++;
	// Any chance it's an axis name?
	if (peekCurHelper() == ':') {
	m_nextPos++;

	//It might be an axis name.
	Step::Axis axis;
	if (parseAxisName(name, axis))
	return Token(AXISNAME, axis);
	// Ugh, :: is only valid in axis names -> error
	return Token(XPATH_ERROR);
	}

	// Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
	skipWS();
	if (peekCurHelper() == '*') {
	m_nextPos++;
	return Token(NAMETEST, name + ":*");
	}

	// Make a full qname.
	String n2;
	if (!lexNCName(n2))
	return Token(XPATH_ERROR);

	name = name + ":" + n2;
	}

	skipWS();

	if (peekCurHelper() == '(') {
	// note: we don't swallow the '(' here!

	// Either node type oor function name.

	if (name == "processing-instruction")
	return Token(PI);
	if (name == "node")
	return Token(NODE);
	if (name == "text")
	return Token(TEXT_);
	if (name == "comment")
	return Token(COMMENT);

	return Token(FUNCTIONNAME, name);
	}

	// At this point, it must be NAMETEST.
	return Token(NAMETEST, name);
	}

	inline Parser::Token Parser::nextToken()
	{
	Token token = nextTokenInternal();
	m_lastTokenType = token.type;
	return token;
	}

	Parser::Parser(const String& statement, RefPtr<XPathNSResolver>&& resolver)
	: m_data(statement)
	, m_resolver(WTFMove(resolver))
	{
	}

	int Parser::lex(YYSTYPE& yylval)
	{
	Token token = nextToken();

	switch (token.type) {
	case AXISNAME:
	yylval.axis = token.axis;
	break;
	case MULOP:
	yylval.numericOpcode = token.numericOpcode;
	break;
	case RELOP:
	case EQOP:
	yylval.equalityTestOpcode = token.equalityTestOpcode;
	break;
	case NODETYPE:
	case FUNCTIONNAME:
	case LITERAL:
	case VARIABLEREFERENCE:
	case NUMBER:
	case NAMETEST:
	yylval.string = token.string.releaseImpl().leakRef();
	break;
	}

	return token.type;
	}

	bool Parser::expandQualifiedName(const String& qualifiedName, AtomString& localName, AtomString& namespaceURI)
	{
	size_t colon = qualifiedName.find(':');
	if (colon != notFound) {
	if (!m_resolver) {
	m_sawNamespaceError = true;
	return false;
	}
	namespaceURI = m_resolver->lookupNamespaceURI(StringView(qualifiedName).left(colon).toAtomString());
	if (namespaceURI.isNull()) {
	m_sawNamespaceError = true;
	return false;
	}
	localName = StringView(qualifiedName).substring(colon + 1).toAtomString();
	} else
	localName = AtomString { qualifiedName };
	return true;
	}

	ExceptionOr<std::unique_ptr<Expression>> Parser::parseStatement(const String& statement, RefPtr<XPathNSResolver>&& resolver)
	{
	Parser parser { statement, WTFMove(resolver) };

	int parseError = xpathyyparse(parser);

	if (parser.m_sawNamespaceError)
	return Exception { NamespaceError };

	if (parseError)
	return Exception { SyntaxError };

	return WTFMove(parser.m_result);
	}

	} }