|  | /* | 
|  | * Copyright 2005 Maksim Orlovich <maksim@kde.org> | 
|  | * Copyright (C) 2006, 2013 Apple Inc. All rights reserved. | 
|  | * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org> | 
|  | * | 
|  | * Redistribution and use in source and binary forms, with or without | 
|  | * modification, are permitted provided that the following conditions | 
|  | * are met: | 
|  | * | 
|  | * 1. Redistributions of source code must retain the above copyright | 
|  | *    notice, this list of conditions and the following disclaimer. | 
|  | * 2. Redistributions in binary form must reproduce the above copyright | 
|  | *    notice, this list of conditions and the following disclaimer in the | 
|  | *    documentation and/or other materials provided with the distribution. | 
|  | * | 
|  | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | 
|  | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | 
|  | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | 
|  | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | 
|  | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | 
|  | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
|  | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
|  | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|  | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 
|  | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|  | */ | 
|  |  | 
|  | #include "config.h" | 
|  | #include "XPathParser.h" | 
|  |  | 
|  | #include "XPathEvaluator.h" | 
|  | #include "XPathNSResolver.h" | 
|  | #include "XPathPath.h" | 
|  | #include "XPathStep.h" | 
|  | #include <wtf/NeverDestroyed.h> | 
|  | #include <wtf/RobinHoodHashMap.h> | 
|  | #include <wtf/StdLibExtras.h> | 
|  | #include <wtf/text/StringHash.h> | 
|  |  | 
|  | extern int xpathyyparse(WebCore::XPath::Parser&); | 
|  |  | 
|  | #include "XPathGrammar.h" | 
|  |  | 
|  | namespace WebCore { | 
|  | namespace XPath { | 
|  |  | 
|  | struct Parser::Token { | 
|  | int type; | 
|  | String string; | 
|  | Step::Axis axis; | 
|  | NumericOp::Opcode numericOpcode; | 
|  | EqTestOp::Opcode equalityTestOpcode; | 
|  |  | 
|  | Token(int type) : type(type) { } | 
|  | Token(int type, const String& string) : type(type), string(string) { } | 
|  | Token(int type, Step::Axis axis) : type(type), axis(axis) { } | 
|  | Token(int type, NumericOp::Opcode opcode) : type(type), numericOpcode(opcode) { } | 
|  | Token(int type, EqTestOp::Opcode opcode) : type(type), equalityTestOpcode(opcode) { } | 
|  | }; | 
|  |  | 
|  | enum XMLCat { NameStart, NameCont, NotPartOfName }; | 
|  |  | 
|  | static XMLCat charCat(UChar character) | 
|  | { | 
|  | if (character == '_') | 
|  | return NameStart; | 
|  |  | 
|  | if (character == '.' || character == '-') | 
|  | return NameCont; | 
|  | unsigned characterTypeMask = U_GET_GC_MASK(character); | 
|  | if (characterTypeMask & (U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK)) | 
|  | return NameStart; | 
|  | if (characterTypeMask & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_ND_MASK)) | 
|  | return NameCont; | 
|  | return NotPartOfName; | 
|  | } | 
|  |  | 
|  | static MemoryCompactLookupOnlyRobinHoodHashMap<String, Step::Axis> createAxisNamesMap() | 
|  | { | 
|  | struct AxisName { | 
|  | ASCIILiteral name; | 
|  | Step::Axis axis; | 
|  | }; | 
|  | const AxisName axisNameList[] = { | 
|  | { "ancestor"_s, Step::AncestorAxis }, | 
|  | { "ancestor-or-self"_s, Step::AncestorOrSelfAxis }, | 
|  | { "attribute"_s, Step::AttributeAxis }, | 
|  | { "child"_s, Step::ChildAxis }, | 
|  | { "descendant"_s, Step::DescendantAxis }, | 
|  | { "descendant-or-self"_s, Step::DescendantOrSelfAxis }, | 
|  | { "following"_s, Step::FollowingAxis }, | 
|  | { "following-sibling"_s, Step::FollowingSiblingAxis }, | 
|  | { "namespace"_s, Step::NamespaceAxis }, | 
|  | { "parent"_s, Step::ParentAxis }, | 
|  | { "preceding"_s, Step::PrecedingAxis }, | 
|  | { "preceding-sibling"_s, Step::PrecedingSiblingAxis }, | 
|  | { "self"_s, Step::SelfAxis } | 
|  | }; | 
|  | MemoryCompactLookupOnlyRobinHoodHashMap<String, Step::Axis> map; | 
|  | for (auto& axisName : axisNameList) | 
|  | map.add(axisName.name, axisName.axis); | 
|  | return map; | 
|  | } | 
|  |  | 
|  | static bool parseAxisName(const String& name, Step::Axis& type) | 
|  | { | 
|  | static NeverDestroyed axisNames = createAxisNamesMap(); | 
|  | auto it = axisNames.get().find(name); | 
|  | if (it == axisNames.get().end()) | 
|  | return false; | 
|  | type = it->value; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // Returns whether the current token can possibly be a binary operator, given | 
|  | // the previous token. Necessary to disambiguate some of the operators | 
|  | // (* (multiply), div, and, or, mod) in the [32] Operator rule | 
|  | // (check http://www.w3.org/TR/xpath#exprlex). | 
|  | bool Parser::isBinaryOperatorContext() const | 
|  | { | 
|  | switch (m_lastTokenType) { | 
|  | case 0: | 
|  | case '@': case AXISNAME: case '(': case '[': case ',': | 
|  | case AND: case OR: case MULOP: | 
|  | case '/': case SLASHSLASH: case '|': case PLUS: case MINUS: | 
|  | case EQOP: case RELOP: | 
|  | return false; | 
|  | default: | 
|  | return true; | 
|  | } | 
|  | } | 
|  |  | 
|  | void Parser::skipWS() | 
|  | { | 
|  | while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos])) | 
|  | ++m_nextPos; | 
|  | } | 
|  |  | 
|  | Parser::Token Parser::makeTokenAndAdvance(int code, int advance) | 
|  | { | 
|  | m_nextPos += advance; | 
|  | return Token(code); | 
|  | } | 
|  |  | 
|  | Parser::Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance) | 
|  | { | 
|  | m_nextPos += advance; | 
|  | return Token(code, val); | 
|  | } | 
|  |  | 
|  | Parser::Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance) | 
|  | { | 
|  | m_nextPos += advance; | 
|  | return Token(code, val); | 
|  | } | 
|  |  | 
|  | // Returns next char if it's there and interesting, 0 otherwise. | 
|  | char Parser::peekAheadHelper() | 
|  | { | 
|  | if (m_nextPos + 1 >= m_data.length()) | 
|  | return 0; | 
|  | UChar next = m_data[m_nextPos + 1]; | 
|  | if (next >= 0xff) | 
|  | return 0; | 
|  | return next; | 
|  | } | 
|  |  | 
|  | char Parser::peekCurHelper() | 
|  | { | 
|  | if (m_nextPos >= m_data.length()) | 
|  | return 0; | 
|  | UChar next = m_data[m_nextPos]; | 
|  | if (next >= 0xff) | 
|  | return 0; | 
|  | return next; | 
|  | } | 
|  |  | 
|  | Parser::Token Parser::lexString() | 
|  | { | 
|  | UChar delimiter = m_data[m_nextPos]; | 
|  | int startPos = m_nextPos + 1; | 
|  |  | 
|  | for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) { | 
|  | if (m_data[m_nextPos] == delimiter) { | 
|  | String value = m_data.substring(startPos, m_nextPos - startPos); | 
|  | if (value.isNull()) | 
|  | value = emptyString(); | 
|  | ++m_nextPos; // Consume the char. | 
|  | return Token(LITERAL, value); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Ouch, went off the end -- report error. | 
|  | return Token(XPATH_ERROR); | 
|  | } | 
|  |  | 
|  | Parser::Token Parser::lexNumber() | 
|  | { | 
|  | int startPos = m_nextPos; | 
|  | bool seenDot = false; | 
|  |  | 
|  | // Go until end or a non-digits character. | 
|  | for (; m_nextPos < m_data.length(); ++m_nextPos) { | 
|  | UChar aChar = m_data[m_nextPos]; | 
|  | if (aChar >= 0xff) break; | 
|  |  | 
|  | if (!isASCIIDigit(aChar)) { | 
|  | if (aChar == '.' && !seenDot) | 
|  | seenDot = true; | 
|  | else | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos)); | 
|  | } | 
|  |  | 
|  | bool Parser::lexNCName(String& name) | 
|  | { | 
|  | int startPos = m_nextPos; | 
|  | if (m_nextPos >= m_data.length()) | 
|  | return false; | 
|  |  | 
|  | if (charCat(m_data[m_nextPos]) != NameStart) | 
|  | return false; | 
|  |  | 
|  | // Keep going until we get a character that's not good for names. | 
|  | for (; m_nextPos < m_data.length(); ++m_nextPos) | 
|  | if (charCat(m_data[m_nextPos]) == NotPartOfName) | 
|  | break; | 
|  |  | 
|  | name = m_data.substring(startPos, m_nextPos - startPos); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | bool Parser::lexQName(String& name) | 
|  | { | 
|  | String n1; | 
|  | if (!lexNCName(n1)) | 
|  | return false; | 
|  |  | 
|  | skipWS(); | 
|  |  | 
|  | // If the next character is :, what we just got it the prefix, if not, | 
|  | // it's the whole thing. | 
|  | if (peekAheadHelper() != ':') { | 
|  | name = n1; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | String n2; | 
|  | if (!lexNCName(n2)) | 
|  | return false; | 
|  |  | 
|  | name = n1 + ":" + n2; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | inline Parser::Token Parser::nextTokenInternal() | 
|  | { | 
|  | skipWS(); | 
|  |  | 
|  | if (m_nextPos >= m_data.length()) | 
|  | return Token(0); | 
|  |  | 
|  | char code = peekCurHelper(); | 
|  | switch (code) { | 
|  | case '(': case ')': case '[': case ']': | 
|  | case '@': case ',': case '|': | 
|  | return makeTokenAndAdvance(code); | 
|  | case '\'': | 
|  | case '\"': | 
|  | return lexString(); | 
|  | case '0': case '1': case '2': case '3': case '4': | 
|  | case '5': case '6': case '7': case '8': case '9': | 
|  | return lexNumber(); | 
|  | case '.': { | 
|  | char next = peekAheadHelper(); | 
|  | if (next == '.') | 
|  | return makeTokenAndAdvance(DOTDOT, 2); | 
|  | if (isASCIIDigit(next)) | 
|  | return lexNumber(); | 
|  | return makeTokenAndAdvance('.'); | 
|  | } | 
|  | case '/': | 
|  | if (peekAheadHelper() == '/') | 
|  | return makeTokenAndAdvance(SLASHSLASH, 2); | 
|  | return makeTokenAndAdvance('/'); | 
|  | case '+': | 
|  | return makeTokenAndAdvance(PLUS); | 
|  | case '-': | 
|  | return makeTokenAndAdvance(MINUS); | 
|  | case '=': | 
|  | return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); | 
|  | case '!': | 
|  | if (peekAheadHelper() == '=') | 
|  | return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); | 
|  | return Token(XPATH_ERROR); | 
|  | case '<': | 
|  | if (peekAheadHelper() == '=') | 
|  | return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); | 
|  | return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); | 
|  | case '>': | 
|  | if (peekAheadHelper() == '=') | 
|  | return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); | 
|  | return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); | 
|  | case '*': | 
|  | if (isBinaryOperatorContext()) | 
|  | return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); | 
|  | ++m_nextPos; | 
|  | return Token(NAMETEST, "*"_s); | 
|  | case '$': { // $ QName | 
|  | m_nextPos++; | 
|  | String name; | 
|  | if (!lexQName(name)) | 
|  | return Token(XPATH_ERROR); | 
|  | return Token(VARIABLEREFERENCE, name); | 
|  | } | 
|  | } | 
|  |  | 
|  | String name; | 
|  | if (!lexNCName(name)) | 
|  | return Token(XPATH_ERROR); | 
|  |  | 
|  | skipWS(); | 
|  | // If we're in an operator context, check for any operator names | 
|  | if (isBinaryOperatorContext()) { | 
|  | if (name == "and") //### hash? | 
|  | return Token(AND); | 
|  | if (name == "or") | 
|  | return Token(OR); | 
|  | if (name == "mod") | 
|  | return Token(MULOP, NumericOp::OP_Mod); | 
|  | if (name == "div") | 
|  | return Token(MULOP, NumericOp::OP_Div); | 
|  | } | 
|  |  | 
|  | // See whether we are at a : | 
|  | if (peekCurHelper() == ':') { | 
|  | m_nextPos++; | 
|  | // Any chance it's an axis name? | 
|  | if (peekCurHelper() == ':') { | 
|  | m_nextPos++; | 
|  |  | 
|  | //It might be an axis name. | 
|  | Step::Axis axis; | 
|  | if (parseAxisName(name, axis)) | 
|  | return Token(AXISNAME, axis); | 
|  | // Ugh, :: is only valid in axis names -> error | 
|  | return Token(XPATH_ERROR); | 
|  | } | 
|  |  | 
|  | // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest | 
|  | skipWS(); | 
|  | if (peekCurHelper() == '*') { | 
|  | m_nextPos++; | 
|  | return Token(NAMETEST, name + ":*"); | 
|  | } | 
|  |  | 
|  | // Make a full qname. | 
|  | String n2; | 
|  | if (!lexNCName(n2)) | 
|  | return Token(XPATH_ERROR); | 
|  |  | 
|  | name = name + ":" + n2; | 
|  | } | 
|  |  | 
|  | skipWS(); | 
|  |  | 
|  | if (peekCurHelper() == '(') { | 
|  | // note: we don't swallow the '(' here! | 
|  |  | 
|  | // Either node type oor function name. | 
|  |  | 
|  | if (name == "processing-instruction") | 
|  | return Token(PI); | 
|  | if (name == "node") | 
|  | return Token(NODE); | 
|  | if (name == "text") | 
|  | return Token(TEXT_); | 
|  | if (name == "comment") | 
|  | return Token(COMMENT); | 
|  |  | 
|  | return Token(FUNCTIONNAME, name); | 
|  | } | 
|  |  | 
|  | // At this point, it must be NAMETEST. | 
|  | return Token(NAMETEST, name); | 
|  | } | 
|  |  | 
|  | inline Parser::Token Parser::nextToken() | 
|  | { | 
|  | Token token = nextTokenInternal(); | 
|  | m_lastTokenType = token.type; | 
|  | return token; | 
|  | } | 
|  |  | 
|  | Parser::Parser(const String& statement, RefPtr<XPathNSResolver>&& resolver) | 
|  | : m_data(statement) | 
|  | , m_resolver(WTFMove(resolver)) | 
|  | { | 
|  | } | 
|  |  | 
|  | int Parser::lex(YYSTYPE& yylval) | 
|  | { | 
|  | Token token = nextToken(); | 
|  |  | 
|  | switch (token.type) { | 
|  | case AXISNAME: | 
|  | yylval.axis = token.axis; | 
|  | break; | 
|  | case MULOP: | 
|  | yylval.numericOpcode = token.numericOpcode; | 
|  | break; | 
|  | case RELOP: | 
|  | case EQOP: | 
|  | yylval.equalityTestOpcode = token.equalityTestOpcode; | 
|  | break; | 
|  | case NODETYPE: | 
|  | case FUNCTIONNAME: | 
|  | case LITERAL: | 
|  | case VARIABLEREFERENCE: | 
|  | case NUMBER: | 
|  | case NAMETEST: | 
|  | yylval.string = token.string.releaseImpl().leakRef(); | 
|  | break; | 
|  | } | 
|  |  | 
|  | return token.type; | 
|  | } | 
|  |  | 
|  | bool Parser::expandQualifiedName(const String& qualifiedName, AtomString& localName, AtomString& namespaceURI) | 
|  | { | 
|  | size_t colon = qualifiedName.find(':'); | 
|  | if (colon != notFound) { | 
|  | if (!m_resolver) { | 
|  | m_sawNamespaceError = true; | 
|  | return false; | 
|  | } | 
|  | namespaceURI = m_resolver->lookupNamespaceURI(StringView(qualifiedName).left(colon).toAtomString()); | 
|  | if (namespaceURI.isNull()) { | 
|  | m_sawNamespaceError = true; | 
|  | return false; | 
|  | } | 
|  | localName = StringView(qualifiedName).substring(colon + 1).toAtomString(); | 
|  | } else | 
|  | localName = AtomString { qualifiedName }; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | ExceptionOr<std::unique_ptr<Expression>> Parser::parseStatement(const String& statement, RefPtr<XPathNSResolver>&& resolver) | 
|  | { | 
|  | Parser parser { statement, WTFMove(resolver) }; | 
|  |  | 
|  | int parseError = xpathyyparse(parser); | 
|  |  | 
|  | if (parser.m_sawNamespaceError) | 
|  | return Exception { NamespaceError }; | 
|  |  | 
|  | if (parseError) | 
|  | return Exception { SyntaxError }; | 
|  |  | 
|  | return WTFMove(parser.m_result); | 
|  | } | 
|  |  | 
|  | } } |