|  | // Copyright 2016 The Chromium Authors | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "components/link_header_util/link_header_util.h" | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <string> | 
|  | #include <string_view> | 
|  | #include <unordered_map> | 
|  |  | 
|  | #include "base/strings/string_util.h" | 
|  | #include "net/http/http_util.h" | 
|  |  | 
|  | namespace link_header_util { | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | // A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator. | 
|  | // Takes the parsing of StringTokenizer and adds support for quoted strings that | 
|  | // are quoted by matching <> (and does not support escaping in those strings). | 
|  | // Also has the behavior of ValuesIterator where it strips whitespace from all | 
|  | // values and only outputs non-empty values. | 
|  | // Only supports ',' as separator and supports "" and <> as quote chars. | 
|  | class ValueTokenizer { | 
|  | public: | 
|  | ValueTokenizer(std::string::const_iterator begin, | 
|  | std::string::const_iterator end) | 
|  | : token_begin_(begin), token_end_(begin), end_(end) {} | 
|  |  | 
|  | std::string::const_iterator token_begin() const { return token_begin_; } | 
|  | std::string::const_iterator token_end() const { return token_end_; } | 
|  |  | 
|  | bool GetNext() { | 
|  | while (GetNextInternal()) { | 
|  | net::HttpUtil::TrimLWS(&token_begin_, &token_end_); | 
|  |  | 
|  | // Only return non-empty values. | 
|  | if (token_begin_ != token_end_) | 
|  | return true; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | private: | 
|  | // Updates token_begin_ and token_end_ to point to the (possibly empty) next | 
|  | // token. Returns false if end-of-string was reached first. | 
|  | bool GetNextInternal() { | 
|  | // First time this is called token_end_ points to the first character in the | 
|  | // input. Every other time token_end_ points to the delimiter at the end of | 
|  | // the last returned token (which could be the end of the string). | 
|  |  | 
|  | // End of string, return false. | 
|  | if (token_end_ == end_) | 
|  | return false; | 
|  |  | 
|  | // Skip past the delimiter. | 
|  | if (*token_end_ == ',') | 
|  | ++token_end_; | 
|  |  | 
|  | // Make token_begin_ point to the beginning of the next token, and search | 
|  | // for the end of the token in token_end_. | 
|  | token_begin_ = token_end_; | 
|  |  | 
|  | // Set to true if we're currently inside a quoted string. | 
|  | bool in_quote = false; | 
|  | // Set to true if we're currently inside a quoted string, and have just | 
|  | // encountered an escape character. In this case a closing quote will be | 
|  | // ignored. | 
|  | bool in_escape = false; | 
|  | // If currently in a quoted string, this is the character that (when not | 
|  | // escaped) indicates the end of the string. | 
|  | char quote_close_char = '\0'; | 
|  | // If currently in a quoted string, this is set to true if it is possible to | 
|  | // escape the closing quote using '\'. | 
|  | bool quote_allows_escape = false; | 
|  |  | 
|  | while (token_end_ != end_) { | 
|  | char c = *token_end_; | 
|  | if (in_quote) { | 
|  | if (in_escape) { | 
|  | in_escape = false; | 
|  | } else if (quote_allows_escape && c == '\\') { | 
|  | in_escape = true; | 
|  | } else if (c == quote_close_char) { | 
|  | in_quote = false; | 
|  | } | 
|  | } else { | 
|  | if (c == ',') | 
|  | break; | 
|  | if (c == '"' || c == '<') { | 
|  | in_quote = true; | 
|  | quote_close_char = (c == '<' ? '>' : c); | 
|  | quote_allows_escape = (c != '<'); | 
|  | } | 
|  | } | 
|  | ++token_end_; | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | std::string::const_iterator token_begin_; | 
|  | std::string::const_iterator token_end_; | 
|  | std::string::const_iterator end_; | 
|  | }; | 
|  |  | 
|  | // Parses the URL part of a Link header. When successful, returns the URL and | 
|  | // sets `params_string` to include the portion of the header after the | 
|  | // '>' character at the end of the URL. | 
|  | std::optional<std::string> ExtractURL(std::string_view header, | 
|  | std::string_view& params_string) { | 
|  | // Extract the URL part (everything between '<' and first '>' character). | 
|  | // ParseLinkHeaderValue() ensures `header` is non-empty, so no need to check | 
|  | // for that. | 
|  | if (header.front() != '<') { | 
|  | return std::nullopt; | 
|  | } | 
|  |  | 
|  | size_t url_begin = 1; | 
|  | size_t url_end = header.find('>'); | 
|  |  | 
|  | // Fail if we did not find a '>'. | 
|  | if (url_end == std::string_view::npos) { | 
|  | return std::nullopt; | 
|  | } | 
|  |  | 
|  | // Skip the '>' at the end of the URL. | 
|  | params_string = header.substr(url_end + 1); | 
|  |  | 
|  | // Trim whitespace around the URL, and copy to a string. | 
|  | return std::string( | 
|  | net::HttpUtil::TrimLWS(header.substr(url_begin, url_end - url_begin))); | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) { | 
|  | std::vector<StringIteratorPair> values; | 
|  | ValueTokenizer tokenizer(header.begin(), header.end()); | 
|  | while (tokenizer.GetNext()) { | 
|  | values.push_back( | 
|  | StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end())); | 
|  | } | 
|  | return values; | 
|  | } | 
|  |  | 
|  | // Parses one link in a link header into its url and parameters. | 
|  | // A link is of the form "<some-url>; param1=value1; param2=value2". | 
|  | // Returns nullopt if parsing the link failed, returns the URL as a string on | 
|  | // success. This method is more lenient than the RFC. It doesn't fail on things | 
|  | // like invalid characters in the URL, and also doesn't verify that certain | 
|  | // parameters should or shouldn't be quoted strings. | 
|  | // | 
|  | // If a parameter occurs more than once in the link, only the first value is | 
|  | // returned in params as this is the required behavior for all attributes chrome | 
|  | // currently cares about in link headers. | 
|  | std::optional<std::string> ParseLinkHeaderValue( | 
|  | std::string_view header, | 
|  | std::unordered_map<std::string, std::optional<std::string>>& params) { | 
|  | // Can't parse an empty string. | 
|  | if (header.empty()) { | 
|  | return std::nullopt; | 
|  | } | 
|  |  | 
|  | // Extract the URL part (everything between '<' and first '>' character). | 
|  | std::string_view params_string; | 
|  | auto url = ExtractURL(header, params_string); | 
|  | if (!url) { | 
|  | return std::nullopt; | 
|  | } | 
|  |  | 
|  | // Trim any remaining whitespace, and make sure there is a ';' separating | 
|  | // parameters from the URL. | 
|  | params_string = net::HttpUtil::TrimLWS(params_string); | 
|  | if (!params_string.empty() && params_string.front() != ';') { | 
|  | return std::nullopt; | 
|  | } | 
|  |  | 
|  | // Parse all the parameters. | 
|  | net::HttpUtil::NameValuePairsIterator params_iterator( | 
|  | params_string, /*delimiter=*/';', | 
|  | net::HttpUtil::NameValuePairsIterator::Values::NOT_REQUIRED, | 
|  | net::HttpUtil::NameValuePairsIterator::Quotes::STRICT_QUOTES); | 
|  | while (params_iterator.GetNext()) { | 
|  | if (!net::HttpUtil::IsParmName(params_iterator.name())) { | 
|  | return std::nullopt; | 
|  | } | 
|  | std::string name = base::ToLowerASCII(params_iterator.name()); | 
|  | if (!params_iterator.value_is_quoted() && params_iterator.value().empty()) { | 
|  | params.emplace(std::move(name), std::nullopt); | 
|  | } else { | 
|  | params.emplace(std::move(name), params_iterator.value()); | 
|  | } | 
|  | } | 
|  | if (!params_iterator.valid()) { | 
|  | return std::nullopt; | 
|  | } | 
|  | return url; | 
|  | } | 
|  |  | 
|  | std::optional<std::string> ParseLinkHeaderValue( | 
|  | const StringIteratorPair& string_iterator_pair, | 
|  | std::unordered_map<std::string, std::optional<std::string>>& params) { | 
|  | return ParseLinkHeaderValue( | 
|  | std::string_view(string_iterator_pair.first, string_iterator_pair.second), | 
|  | params); | 
|  | } | 
|  |  | 
|  | }  // namespace link_header_util |