blob: 283a8718c5a6f2bff84230c9508551b1c101d6e2 [file] [log] [blame]
/*
* Copyright (C) 2019 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "UserAgentStringParser.h"
#include "RFC7230.h"
#include "UserAgentStringData.h"
#include <optional>
#include <wtf/ASCIICType.h>
#include <wtf/StdLibExtras.h>
#include <wtf/WeakPtr.h>
#include <wtf/text/MakeString.h>
#include <wtf/text/StringCommon.h>
#include <wtf/text/StringImpl.h>
#include <wtf/text/WTFString.h>
/*
* GRAMMAR:
* https://www.rfc-editor.org/rfc/rfc9110#name-user-agent
* User-Agent = product *( RWS ( product / comment ) )
* product = token ["/" product-version]
* product-version = token
* token = 1*tchar
* tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA ; any VCHAR, except delimiters
* RWS = 1*( SP / HTAB ); required whitespace
* comment = "(" *( ctext / quoted-pair / comment ) ")"
* ctext = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E / obs-text
* quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
* obs-text = %x80-FF
* HTAB = <ASCII horizontal tab %x09, aka '\t'>
* SP = <ASCII space, i.e. " ">
* VCHAR = <any visible US-ASCII character>
*
* REFERENCE:
* https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/User-Agent#syntax
*
* NOTE:
* User agent strings come in many different forms, but most browsers conform to a common pattern.
* This class is attempting to determine attributes about the user agent by expecting common forms
* of user agent strings. There is a link below that contains a list of of these strings grouped
* by platform, browser, layout engine, etc. I tried to pick the most frequent ones to parse out
* viable information.
*
* https://explore.whatismybrowser.com/useragents/explore/
*
* Some user agent strings, while valid grammatically, list their info in odd locations.
* This parser will not be able to pick out the correct information from those.
*/
namespace WebCore {
UserAgentStringParser::UserAgentStringParser(const String& userAgentString)
: m_userAgentString(userAgentString)
, data(UserAgentStringData::create()) { };
Ref<UserAgentStringParser> UserAgentStringParser::create(const String& userAgentString)
{
return adoptRef(*new UserAgentStringParser(userAgentString));
}
std::optional<Ref<UserAgentStringData>> UserAgentStringParser::parse()
{
data = UserAgentStringData::create();
if (atEnd())
return { };
consumeProduct();
while (!atEnd()) {
if (!isTabOrSpace(peek()))
return { };
consumeRWS();
start = pos;
if (peek() == '(')
consumeComment();
else
consumeProduct();
if (malformed)
return { };
}
populateUserAgentData();
return data;
}
void UserAgentStringParser::consumeProduct()
{
consumeToken();
if (malformed)
return;
auto product = Product { .name = getSubstring(), .version = { } };
if (!atEnd() && peek() == '/') {
increment();
start = pos;
consumeToken();
if (malformed)
return;
product.version = getSubstring();
}
start = pos;
segments.append(product);
}
void UserAgentStringParser::consumeRWS()
{
while (!atEnd() && isTabOrSpace(peek()))
increment();
}
void UserAgentStringParser::consumeComment()
{
ASSERT(peek() == '(');
increment(); // pass first '('
start = pos;
if (atEnd()) {
malformed = true;
return;
}
auto c = peek();
while (!atEnd() && c != ')') {
if (c == '(')
consumeComment();
else if (c == '\\')
consumeQuotedPair();
else if (RFC7230::isCommentText(c))
increment();
if (malformed)
return;
c = peek();
}
if (atEnd()) {
malformed = true;
return;
}
auto s = getSubstring();
if (!s.isEmpty()) {
auto comment = Comment { .parts = s.split(';') };
segments.append(comment);
}
increment();
start = pos;
// malformed user agent string
}
void UserAgentStringParser::consumeToken()
{
if (!RFC7230::isTokenCharacter(peek())) {
malformed = true;
return;
}
do {
increment();
} while (!atEnd() && RFC7230::isTokenCharacter(peek()));
}
void UserAgentStringParser::consumeQuotedPair()
{
ASSERT(peek() == '\\');
increment(); // pass '\'
if (RFC7230::isQuotedPairSecondOctet(peek())) {
increment();
return;
}
malformed = true;
}
inline char16_t UserAgentStringParser::peek()
{
return m_userAgentString[this->pos];
}
inline void UserAgentStringParser::increment()
{
this->pos++;
}
inline bool UserAgentStringParser::atEnd()
{
return this->pos >= this->m_userAgentString.length();
}
inline String UserAgentStringParser::getSubstring()
{
return m_userAgentString.substring(start, pos - start);
}
struct BrowsersSeen {
bool brave : 1 { false };
bool firefox : 1 { false };
bool chrome : 1 { false };
bool safari : 1 { false };
bool opera : 1 { false };
bool edge : 1 { false };
String braveVersion;
String firefoxVersion;
String chromeVersion;
String safariVersion;
String operaVersion;
String edgeVersion;
};
void UserAgentStringParser::populateUserAgentData()
{
BrowsersSeen browsersSeen;
auto weakThis = WeakPtr { *this };
bool linuxSeen { false };
for (const auto& segment : segments) {
WTF::switchOn(segment, [&browsersSeen, weakThis](const Product& p) {
if (p.name == "Mobile") {
weakThis->data->mobile = true;
return;
}
if (p.name == "Brave") {
browsersSeen.braveVersion = p.version;
browsersSeen.brave = true;
return;
}
if (p.name == "Firefox" || p.name == "fxiOS") {
browsersSeen.firefoxVersion = p.version;
browsersSeen.firefox = true;
return;
}
if (p.name == "Chrome") {
browsersSeen.chromeVersion = p.version;
browsersSeen.chrome = true;
return;
}
if (p.name == "Safari") {
browsersSeen.firefoxVersion = p.version;
browsersSeen.safari = true;
return;
}
if (p.name == "OPR") {
browsersSeen.operaVersion = p.version;
browsersSeen.opera = true;
return;
}
if (p.name.contains("Edg")) {
browsersSeen.edgeVersion = p.version;
browsersSeen.edge = true;
return;
} }, [weakThis, &linuxSeen](const Comment& c) {
for (const auto& part : c.parts) {
if (part.contains("Windows")) {
weakThis->data->platform = "Windows"_s;
return;
}
if (part == "Macintosh") {
weakThis->data->platform = "macOS"_s;
return;
}
if (part == "iPhone") {
weakThis->data->platform = "iOS"_s;
return;
}
if (part == "iPad") {
weakThis->data->platform = "iOS"_s;
return;
}
if (part.contains("Android")) {
weakThis->data->platform = "Android"_s;
return;
}
if (part.contains("Linux")) {
linuxSeen = true;
return;
}
if (part.contains("CrOS")) {
weakThis->data->platform = "ChromeOS"_s;
return;
}
} });
}
// android user agents sometimes list linux and android, but linux user agents don't list androids
if (linuxSeen && data->platform.isEmpty())
data->platform = "Linux"_s;
// both chrome and firefox sometimes list safari in their user agent strings
if (browsersSeen.safari && !browsersSeen.chrome && !browsersSeen.firefox) {
data->browserName = "Safari"_s;
data->browserVersion = browsersSeen.safariVersion;
return;
}
// no other browser typically list firefox
if (browsersSeen.firefox) {
data->browserName = "Firefox"_s;
data->browserVersion = browsersSeen.firefoxVersion;
}
// chrome based browsers typically list chrome
if (browsersSeen.chrome) {
if (browsersSeen.edge) {
data->browserName = "Microsoft Edge"_s;
data->browserVersion = browsersSeen.edgeVersion;
} else if (browsersSeen.brave) {
data->browserName = "Brave"_s;
data->browserVersion = browsersSeen.braveVersion;
} else if (browsersSeen.opera) {
data->browserName = "Opera"_s;
data->browserVersion = browsersSeen.operaVersion;
} else {
data->browserName = "Google Chrome"_s;
data->browserVersion = browsersSeen.chromeVersion;
}
}
}
};