blob: 4200c56f7d17e72086b8d6b68dfb30034c200231 [file] [log] [blame]
/*
* Copyright (C) 2010 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "third_party/blink/renderer/core/html/parser/html_parser_idioms.h"
#include "third_party/blink/renderer/core/html_names.h"
#include "third_party/blink/renderer/platform/wtf/math_extras.h"
#include "third_party/blink/renderer/platform/wtf/text/atomic_string.h"
#include "third_party/blink/renderer/platform/wtf/text/parsing_utilities.h"
#include "third_party/blink/renderer/platform/wtf/text/string_hash.h"
#include "third_party/blink/renderer/platform/wtf/text/string_to_number.h"
#include "third_party/blink/renderer/platform/wtf/text/text_encoding.h"
#include <limits>
namespace blink {
using namespace html_names;
template <typename CharType>
static String StripLeadingAndTrailingHTMLSpaces(String string,
const CharType* characters,
unsigned length) {
unsigned num_leading_spaces = 0;
unsigned num_trailing_spaces = 0;
for (; num_leading_spaces < length; ++num_leading_spaces) {
if (IsNotHTMLSpace<CharType>(characters[num_leading_spaces]))
break;
}
if (num_leading_spaces == length)
return string.IsNull() ? string : g_empty_atom.GetString();
for (; num_trailing_spaces < length; ++num_trailing_spaces) {
if (IsNotHTMLSpace<CharType>(characters[length - num_trailing_spaces - 1]))
break;
}
DCHECK_LT(num_leading_spaces + num_trailing_spaces, length);
if (!(num_leading_spaces | num_trailing_spaces))
return string;
return string.Substring(num_leading_spaces,
length - (num_leading_spaces + num_trailing_spaces));
}
String StripLeadingAndTrailingHTMLSpaces(const String& string) {
unsigned length = string.length();
if (!length)
return string.IsNull() ? string : g_empty_atom.GetString();
if (string.Is8Bit())
return StripLeadingAndTrailingHTMLSpaces<LChar>(
string, string.Characters8(), length);
return StripLeadingAndTrailingHTMLSpaces<UChar>(string, string.Characters16(),
length);
}
String SerializeForNumberType(const Decimal& number) {
if (number.IsZero()) {
// Decimal::toString appends exponent, e.g. "0e-18"
return number.IsNegative() ? "-0" : "0";
}
return number.ToString();
}
String SerializeForNumberType(double number) {
// According to HTML5, "the best representation of the number n as a floating
// point number" is a string produced by applying ToString() to n.
return String::NumberToStringECMAScript(number);
}
Decimal ParseToDecimalForNumberType(const String& string,
const Decimal& fallback_value) {
// http://www.whatwg.org/specs/web-apps/current-work/#floating-point-numbers
// and parseToDoubleForNumberType String::toDouble() accepts leading + and
// whitespace characters, which are not valid here.
const UChar first_character = string[0];
if (first_character != '-' && first_character != '.' &&
!IsASCIIDigit(first_character))
return fallback_value;
const Decimal value = Decimal::FromString(string);
if (!value.IsFinite())
return fallback_value;
// Numbers are considered finite IEEE 754 Double-precision floating point
// values.
const Decimal double_max =
Decimal::FromDouble(std::numeric_limits<double>::max());
if (value < -double_max || value > double_max)
return fallback_value;
// We return +0 for -0 case.
return value.IsZero() ? Decimal(0) : value;
}
static double CheckDoubleValue(double value,
bool valid,
double fallback_value) {
if (!valid)
return fallback_value;
// NaN and infinity are considered valid by String::toDouble, but not valid
// here.
if (!std::isfinite(value))
return fallback_value;
// Numbers are considered finite IEEE 754 Double-precision floating point
// values.
if (-std::numeric_limits<double>::max() > value ||
value > std::numeric_limits<double>::max())
return fallback_value;
// The following expression converts -0 to +0.
return value ? value : 0;
}
double ParseToDoubleForNumberType(const String& string, double fallback_value) {
// http://www.whatwg.org/specs/web-apps/current-work/#floating-point-numbers
// String::toDouble() accepts leading + and whitespace characters, which are
// not valid here.
UChar first_character = string[0];
if (first_character != '-' && first_character != '.' &&
!IsASCIIDigit(first_character))
return fallback_value;
if (string.EndsWith('.'))
return fallback_value;
bool valid = false;
double value = string.ToDouble(&valid);
return CheckDoubleValue(value, valid, fallback_value);
}
template <typename CharacterType>
static bool ParseHTMLIntegerInternal(const CharacterType* position,
const CharacterType* end,
int& value) {
// Step 4
SkipWhile<CharacterType, IsHTMLSpace<CharacterType>>(position, end);
// Step 5
if (position == end)
return false;
DCHECK_LT(position, end);
bool ok;
WTF::NumberParsingOptions options(
WTF::NumberParsingOptions::kAcceptTrailingGarbage |
WTF::NumberParsingOptions::kAcceptLeadingPlus);
int wtf_value = CharactersToInt(position, end - position, options, &ok);
if (ok)
value = wtf_value;
return ok;
}
// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
bool ParseHTMLInteger(const String& input, int& value) {
// Step 1
// Step 2
unsigned length = input.length();
if (!length || input.Is8Bit()) {
const LChar* start = input.Characters8();
return ParseHTMLIntegerInternal(start, start + length, value);
}
const UChar* start = input.Characters16();
return ParseHTMLIntegerInternal(start, start + length, value);
}
template <typename CharacterType>
static WTF::NumberParsingResult ParseHTMLNonNegativeIntegerInternal(
const CharacterType* position,
const CharacterType* end,
unsigned& value) {
// This function is an implementation of the following algorithm:
// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-non-negative-integers
// However, in order to support integers >= 2^31, we fold [1] into this.
// 'Step N' in the following comments refers to [1].
//
// [1]
// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-integers
// Step 4: Skip whitespace.
SkipWhile<CharacterType, IsHTMLSpace<CharacterType>>(position, end);
// Step 5: If position is past the end of input, return an error.
if (position == end)
return WTF::NumberParsingResult::kError;
DCHECK_LT(position, end);
WTF::NumberParsingResult result;
WTF::NumberParsingOptions options(
WTF::NumberParsingOptions::kAcceptTrailingGarbage |
WTF::NumberParsingOptions::kAcceptLeadingPlus |
WTF::NumberParsingOptions::kAcceptMinusZeroForUnsigned);
unsigned wtf_value =
CharactersToUInt(position, end - position, options, &result);
if (result == WTF::NumberParsingResult::kSuccess)
value = wtf_value;
return result;
}
static WTF::NumberParsingResult ParseHTMLNonNegativeIntegerInternal(
const String& input,
unsigned& value) {
unsigned length = input.length();
if (length == 0)
return WTF::NumberParsingResult::kError;
if (input.Is8Bit()) {
const LChar* start = input.Characters8();
return ParseHTMLNonNegativeIntegerInternal(start, start + length, value);
}
const UChar* start = input.Characters16();
return ParseHTMLNonNegativeIntegerInternal(start, start + length, value);
}
// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-non-negative-integers
bool ParseHTMLNonNegativeInteger(const String& input, unsigned& value) {
return ParseHTMLNonNegativeIntegerInternal(input, value) ==
WTF::NumberParsingResult::kSuccess;
}
bool ParseHTMLClampedNonNegativeInteger(const String& input,
unsigned min,
unsigned max,
unsigned& value) {
unsigned parsed_value;
switch (ParseHTMLNonNegativeIntegerInternal(input, parsed_value)) {
case WTF::NumberParsingResult::kError:
return false;
case WTF::NumberParsingResult::kOverflowMin:
NOTREACHED() << input;
return false;
case WTF::NumberParsingResult::kOverflowMax:
value = max;
return true;
case WTF::NumberParsingResult::kSuccess:
value = std::max(min, std::min(parsed_value, max));
return true;
}
return false;
}
template <typename CharacterType>
static bool IsSpaceOrDelimiter(CharacterType c) {
return IsHTMLSpace(c) || c == ',' || c == ';';
}
template <typename CharacterType>
static bool IsNotSpaceDelimiterOrNumberStart(CharacterType c) {
return !(IsSpaceOrDelimiter(c) || IsASCIIDigit(c) || c == '.' || c == '-');
}
template <typename CharacterType>
static Vector<double> ParseHTMLListOfFloatingPointNumbersInternal(
const CharacterType* position,
const CharacterType* end) {
Vector<double> numbers;
SkipWhile<CharacterType, IsSpaceOrDelimiter>(position, end);
while (position < end) {
SkipWhile<CharacterType, IsNotSpaceDelimiterOrNumberStart>(position, end);
const CharacterType* unparsed_number_start = position;
SkipUntil<CharacterType, IsSpaceOrDelimiter>(position, end);
size_t parsed_length = 0;
double number = CharactersToDouble(
unparsed_number_start, position - unparsed_number_start, parsed_length);
numbers.push_back(CheckDoubleValue(number, parsed_length != 0, 0));
SkipWhile<CharacterType, IsSpaceOrDelimiter>(position, end);
}
return numbers;
}
// https://html.spec.whatwg.org/multipage/infrastructure.html#rules-for-parsing-a-list-of-floating-point-numbers
Vector<double> ParseHTMLListOfFloatingPointNumbers(const String& input) {
unsigned length = input.length();
if (!length || input.Is8Bit())
return ParseHTMLListOfFloatingPointNumbersInternal(
input.Characters8(), input.Characters8() + length);
return ParseHTMLListOfFloatingPointNumbersInternal(
input.Characters16(), input.Characters16() + length);
}
static const char kCharsetString[] = "charset";
static const size_t kCharsetLength = sizeof("charset") - 1;
// https://html.spec.whatwg.org/multipage/infrastructure.html#extracting-character-encodings-from-meta-elements
String ExtractCharset(const String& value) {
wtf_size_t pos = 0;
unsigned length = value.length();
while (pos < length) {
pos = value.FindIgnoringASCIICase(kCharsetString, pos);
if (pos == kNotFound)
break;
pos += kCharsetLength;
// Skip whitespace.
while (pos < length && value[pos] <= ' ')
++pos;
if (value[pos] != '=')
continue;
++pos;
while (pos < length && value[pos] <= ' ')
++pos;
char quote_mark = 0;
if (pos < length && (value[pos] == '"' || value[pos] == '\'')) {
quote_mark = static_cast<char>(value[pos++]);
DCHECK(!(quote_mark & 0x80));
}
if (pos == length)
break;
unsigned end = pos;
while (end < length &&
((quote_mark && value[end] != quote_mark) ||
(!quote_mark && value[end] > ' ' && value[end] != '"' &&
value[end] != '\'' && value[end] != ';')))
++end;
if (quote_mark && (end == length))
break; // Close quote not found.
return value.Substring(pos, end - pos);
}
return "";
}
enum class MetaAttribute {
kNone,
kCharset,
kPragma,
};
WTF::TextEncoding EncodingFromMetaAttributes(
const HTMLAttributeList& attributes) {
bool got_pragma = false;
bool has_charset = false;
MetaAttribute mode = MetaAttribute::kNone;
String charset;
for (const auto& html_attribute : attributes) {
const String& attribute_name = html_attribute.first;
const AtomicString& attribute_value = AtomicString(html_attribute.second);
if (ThreadSafeMatch(attribute_name, kHttpEquivAttr)) {
if (DeprecatedEqualIgnoringCase(attribute_value, "content-type"))
got_pragma = true;
} else if (ThreadSafeMatch(attribute_name, kCharsetAttr)) {
has_charset = true;
charset = attribute_value;
mode = MetaAttribute::kCharset;
} else if (!has_charset && ThreadSafeMatch(attribute_name, kContentAttr)) {
charset = ExtractCharset(attribute_value);
if (charset.length())
mode = MetaAttribute::kPragma;
}
}
if (mode == MetaAttribute::kCharset ||
(mode == MetaAttribute::kPragma && got_pragma))
return WTF::TextEncoding(StripLeadingAndTrailingHTMLSpaces(charset));
return WTF::TextEncoding();
}
static bool ThreadSafeEqual(const StringImpl* a, const StringImpl* b) {
if (a == b)
return true;
if (a->GetHash() != b->GetHash())
return false;
return EqualNonNull(a, b);
}
bool ThreadSafeMatch(const QualifiedName& a, const QualifiedName& b) {
return ThreadSafeEqual(a.LocalName().Impl(), b.LocalName().Impl());
}
bool ThreadSafeMatch(const String& local_name, const QualifiedName& q_name) {
return ThreadSafeEqual(local_name.Impl(), q_name.LocalName().Impl());
}
template <typename CharType>
inline StringImpl* FindStringIfStatic(const CharType* characters,
unsigned length) {
// We don't need to try hashing if we know the string is too long.
if (length > StringImpl::HighestStaticStringLength())
return nullptr;
// computeHashAndMaskTop8Bits is the function StringImpl::hash() uses.
unsigned hash = StringHasher::ComputeHashAndMaskTop8Bits(characters, length);
const WTF::StaticStringsTable& table = StringImpl::AllStaticStrings();
DCHECK(!table.IsEmpty());
WTF::StaticStringsTable::const_iterator it = table.find(hash);
if (it == table.end())
return nullptr;
// It's possible to have hash collisions between arbitrary strings and known
// identifiers (e.g. "bvvfg" collides with "script"). However ASSERTs in
// StringImpl::createStatic guard against there ever being collisions between
// static strings.
if (!Equal(it->value, characters, length))
return nullptr;
return it->value;
}
String AttemptStaticStringCreation(const LChar* characters, wtf_size_t size) {
String string(FindStringIfStatic(characters, size));
if (string.Impl())
return string;
return String(characters, size);
}
String AttemptStaticStringCreation(const UChar* characters,
wtf_size_t size,
CharacterWidth width) {
String string(FindStringIfStatic(characters, size));
if (string.Impl())
return string;
if (width == kLikely8Bit)
string = StringImpl::Create8BitIfPossible(characters, size);
else if (width == kForce8Bit)
string = String::Make8BitFrom16BitSource(characters, size);
else
string = String(characters, size);
return string;
}
} // namespace blink