| /* |
| * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
| * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "third_party/blink/renderer/core/html/parser/html_tokenizer.h" |
| |
| #include "third_party/blink/renderer/core/html/parser/html_entity_parser.h" |
| #include "third_party/blink/renderer/core/html/parser/html_parser_idioms.h" |
| #include "third_party/blink/renderer/core/html/parser/html_tree_builder.h" |
| #include "third_party/blink/renderer/core/html/parser/markup_tokenizer_inlines.h" |
| #include "third_party/blink/renderer/core/html_names.h" |
| #include "third_party/blink/renderer/core/html_tokenizer_names.h" |
| #include "third_party/blink/renderer/platform/wtf/text/ascii_ctype.h" |
| #include "third_party/blink/renderer/platform/wtf/text/unicode.h" |
| |
| namespace blink { |
| |
| // clang-format off |
| #define INT_0_TO_127_LIST(V) \ |
| V(0), V(1), V(2), V(3), V(4), V(5), V(6), V(7), V(8), V(9), \ |
| V(10), V(11), V(12), V(13), V(14), V(15), V(16), V(17), V(18), V(19), \ |
| V(20), V(21), V(22), V(23), V(24), V(25), V(26), V(27), V(28), V(29), \ |
| V(30), V(31), V(32), V(33), V(34), V(35), V(36), V(37), V(38), V(39), \ |
| V(40), V(41), V(42), V(43), V(44), V(45), V(46), V(47), V(48), V(49), \ |
| V(50), V(51), V(52), V(53), V(54), V(55), V(56), V(57), V(58), V(59), \ |
| V(60), V(61), V(62), V(63), V(64), V(65), V(66), V(67), V(68), V(69), \ |
| V(70), V(71), V(72), V(73), V(74), V(75), V(76), V(77), V(78), V(79), \ |
| V(80), V(81), V(82), V(83), V(84), V(85), V(86), V(87), V(88), V(89), \ |
| V(90), V(91), V(92), V(93), V(94), V(95), V(96), V(97), V(98), V(99), \ |
| V(100), V(101), V(102), V(103), V(104), V(105), V(106), V(107), V(108), V(109), \ |
| V(110), V(111), V(112), V(113), V(114), V(115), V(116), V(117), V(118), V(119), \ |
| V(120), V(121), V(122), V(123), V(124), V(125), V(126), V(127), |
| // clang-format on |
| |
| // Character flags for fast paths. |
| enum class ScanFlags : uint8_t { |
| // Base flags |
| kNullCharacter = 1 << 0, |
| kNewlineOrCarriageReturn = 1 << 1, |
| kWhitespaceNotNewline = 1 << 2, |
| kAmpersand = 1 << 3, |
| kOpenTag = 1 << 4, |
| kSlashAndCloseTag = 1 << 5, |
| kEqual = 1 << 6, |
| kQuotes = 1 << 7, |
| // Compound flags |
| kWhitespace = kWhitespaceNotNewline | kNewlineOrCarriageReturn, |
| kCharacterTokenSpecial = |
| kNullCharacter | kNewlineOrCarriageReturn | kAmpersand | kOpenTag, |
| kNullOrNewline = kNullCharacter | kNewlineOrCarriageReturn, |
| kRCDATASpecial = kNullCharacter | kAmpersand | kOpenTag, |
| kTagNameSpecial = kWhitespace | kSlashAndCloseTag | kNullCharacter, |
| kAttributeNameSpecial = kWhitespace | kSlashAndCloseTag | kNullCharacter | |
| kEqual | kOpenTag | kQuotes, |
| }; |
| |
| static constexpr uint8_t CreateScanFlags(UChar cc) { |
| #define SCAN_FLAG(flag) static_cast<uint8_t>(ScanFlags::flag) |
| DCHECK(!(cc & ~0x7F)); // IsASCII |
| uint8_t scan_flag = 0; |
| if (cc == '\0') |
| scan_flag = SCAN_FLAG(kNullCharacter); |
| else if (cc == '\n' || cc == '\r') |
| scan_flag = SCAN_FLAG(kNewlineOrCarriageReturn); |
| else if (cc == ' ' || cc == '\x09' || cc == '\x0C') |
| scan_flag = SCAN_FLAG(kWhitespaceNotNewline); |
| else if (cc == '&') |
| scan_flag = SCAN_FLAG(kAmpersand); |
| else if (cc == '<') |
| scan_flag = SCAN_FLAG(kOpenTag); |
| else if (cc == '/' || cc == '>') |
| scan_flag = SCAN_FLAG(kSlashAndCloseTag); |
| else if (cc == '=') |
| scan_flag = SCAN_FLAG(kEqual); |
| else if (cc == '"' || cc == '\'') |
| scan_flag = SCAN_FLAG(kQuotes); |
| return scan_flag; |
| #undef SCAN_FLAG |
| } |
| |
| // Table of precomputed scan flags for the first 128 ASCII characters. |
| static constexpr const uint8_t character_scan_flags_[128] = { |
| INT_0_TO_127_LIST(CreateScanFlags)}; |
| |
| static inline UChar ToLowerCase(UChar cc) { |
| DCHECK(IsASCIIAlpha(cc)); |
| return cc | 0x20; |
| } |
| |
| static inline bool CheckScanFlag(UChar cc, ScanFlags flag) { |
| return IsASCII(cc) && |
| (character_scan_flags_[cc] & static_cast<uint8_t>(flag)); |
| } |
| |
| static inline UChar ToLowerCaseIfAlpha(UChar cc) { |
| return cc | (IsASCIIUpper(cc) ? 0x20 : 0); |
| } |
| |
| static inline bool VectorEqualsString(const LiteralBuffer<LChar, 32>& vector, |
| const String& string) { |
| if (vector.size() != string.length()) |
| return false; |
| |
| if (!string.length()) |
| return true; |
| |
| return Equal(string.Impl(), vector.data(), vector.size()); |
| } |
| |
| #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName) |
| #define HTML_BEGIN_STATE_NOLABEL(stateName) \ |
| BEGIN_STATE_NOLABEL(HTMLTokenizer, stateName) |
| #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName) |
| #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName) |
| #define HTML_ADVANCE_PAST_NON_NEWLINE_TO(stateName) \ |
| ADVANCE_PAST_NON_NEWLINE_TO(HTMLTokenizer, stateName) |
| #define HTML_CONSUME(stateName) CONSUME(HTMLTokenizer, stateName) |
| #define HTML_CONSUME_NON_NEWLINE(stateName) \ |
| CONSUME_NON_NEWLINE(HTMLTokenizer, stateName) |
| #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName) |
| |
| HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options) |
| : input_stream_preprocessor_(this), options_(options) { |
| Reset(); |
| } |
| |
| HTMLTokenizer::~HTMLTokenizer() = default; |
| |
| void HTMLTokenizer::Reset() { |
| state_ = HTMLTokenizer::kDataState; |
| token_ = nullptr; |
| force_null_character_replacement_ = false; |
| should_allow_cdata_ = false; |
| additional_allowed_character_ = '\0'; |
| } |
| |
| inline bool HTMLTokenizer::ProcessEntity(SegmentedString& source) { |
| bool not_enough_characters = false; |
| DecodedHTMLEntity decoded_entity; |
| bool success = |
| ConsumeHTMLEntity(source, decoded_entity, not_enough_characters); |
| if (not_enough_characters) |
| return false; |
| if (!success) { |
| DCHECK(decoded_entity.IsEmpty()); |
| BufferCharacter('&'); |
| } else { |
| for (unsigned i = 0; i < decoded_entity.length; ++i) |
| BufferCharacter(decoded_entity.data[i]); |
| } |
| return true; |
| } |
| |
| bool HTMLTokenizer::FlushBufferedEndTag(SegmentedString& source) { |
| DCHECK(token_->GetType() == HTMLToken::kCharacter || |
| token_->GetType() == HTMLToken::kUninitialized); |
| source.AdvanceAndUpdateLineNumber(); |
| if (token_->GetType() == HTMLToken::kCharacter) |
| return true; |
| token_->BeginEndTag(buffered_end_tag_name_); |
| buffered_end_tag_name_.clear(); |
| appropriate_end_tag_name_.clear(); |
| temporary_buffer_.clear(); |
| return false; |
| } |
| |
| #define FLUSH_AND_ADVANCE_TO(stateName) \ |
| do { \ |
| state_ = HTMLTokenizer::stateName; \ |
| if (FlushBufferedEndTag(source)) \ |
| return true; \ |
| if (source.IsEmpty() || !input_stream_preprocessor_.Peek(source, cc)) \ |
| return HaveBufferedCharacterToken(); \ |
| goto stateName; \ |
| } while (false) |
| |
| bool HTMLTokenizer::FlushEmitAndResumeIn(SegmentedString& source, |
| HTMLTokenizer::State state) { |
| state_ = state; |
| FlushBufferedEndTag(source); |
| return true; |
| } |
| |
| bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| // If we have a token in progress, then we're supposed to be called back |
| // with the same token so we can finish it. |
| DCHECK(!token_ || token_ == &token || |
| token.GetType() == HTMLToken::kUninitialized); |
| token_ = &token; |
| |
| if (!buffered_end_tag_name_.IsEmpty() && !IsEndTagBufferingState(state_)) { |
| // FIXME: This should call flushBufferedEndTag(). |
| // We started an end tag during our last iteration. |
| token_->BeginEndTag(buffered_end_tag_name_); |
| buffered_end_tag_name_.clear(); |
| appropriate_end_tag_name_.clear(); |
| temporary_buffer_.clear(); |
| if (state_ == HTMLTokenizer::kDataState) { |
| // We're back in the data state, so we must be done with the tag. |
| return true; |
| } |
| } |
| |
| UChar cc; |
| if (source.IsEmpty() || !input_stream_preprocessor_.Peek(source, cc)) |
| return HaveBufferedCharacterToken(); |
| |
| // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 |
| switch (state_) { |
| HTML_BEGIN_STATE(kDataState) { |
| if (cc == '&') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kCharacterReferenceInDataState); |
| else if (cc == '<') { |
| if (token_->GetType() == HTMLToken::kCharacter) { |
| // We have a bunch of character tokens queued up that we |
| // are emitting lazily here. |
| return true; |
| } |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kTagOpenState); |
| } else if (cc == kEndOfFileMarker) |
| return EmitEndOfFile(source); |
| else { |
| return EmitData(source, cc); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kCharacterReferenceInDataState) { |
| if (!ProcessEntity(source)) |
| return HaveBufferedCharacterToken(); |
| HTML_SWITCH_TO(kDataState); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kRCDATAState) { |
| while (!CheckScanFlag(cc, ScanFlags::kRCDATASpecial)) { |
| BufferCharacter(cc); |
| if (!input_stream_preprocessor_.Advance(source, cc)) |
| return HaveBufferedCharacterToken(); |
| } |
| if (cc == '&') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kCharacterReferenceInRCDATAState); |
| else if (cc == '<') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kRCDATALessThanSignState); |
| else if (cc == kEndOfFileMarker) |
| return EmitEndOfFile(source); |
| else |
| NOTREACHED(); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kCharacterReferenceInRCDATAState) { |
| if (!ProcessEntity(source)) |
| return HaveBufferedCharacterToken(); |
| HTML_SWITCH_TO(kRCDATAState); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kRAWTEXTState) { |
| if (cc == '<') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kRAWTEXTLessThanSignState); |
| else if (cc == kEndOfFileMarker) |
| return EmitEndOfFile(source); |
| else { |
| BufferCharacter(cc); |
| HTML_CONSUME(kRAWTEXTState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataState) { |
| if (cc == '<') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataLessThanSignState); |
| else if (cc == kEndOfFileMarker) |
| return EmitEndOfFile(source); |
| else { |
| BufferCharacter(cc); |
| HTML_CONSUME(kScriptDataState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE_NOLABEL(kPLAINTEXTState) { |
| if (cc == kEndOfFileMarker) |
| return EmitEndOfFile(source); |
| return EmitPLAINTEXT(source, cc); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kTagOpenState) { |
| if (cc == '!') { |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kMarkupDeclarationOpenState); |
| } else if (cc == '/') { |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kEndTagOpenState); |
| } else if (IsASCIIAlpha(cc)) { |
| token_->BeginStartTag(ToLowerCase(cc)); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kTagNameState); |
| } else if (cc == '?') { |
| ParseError(); |
| // The spec consumes the current character before switching |
| // to the bogus comment state, but it's easier to implement |
| // if we reconsume the current character. |
| HTML_RECONSUME_IN(kBogusCommentState); |
| } else { |
| ParseError(); |
| BufferCharacter('<'); |
| HTML_RECONSUME_IN(kDataState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kEndTagOpenState) { |
| if (IsASCIIAlpha(cc)) { |
| token_->BeginEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| appropriate_end_tag_name_.clear(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kTagNameState); |
| } else if (cc == '>') { |
| ParseError(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| ParseError(); |
| HTML_RECONSUME_IN(kBogusCommentState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kTagNameState) { |
| while (!CheckScanFlag(cc, ScanFlags::kTagNameSpecial)) { |
| token_->AppendToName(ToLowerCaseIfAlpha(cc)); |
| if (!input_stream_preprocessor_.AdvancePastNonNewline(source, cc)) |
| return HaveBufferedCharacterToken(); |
| } |
| if (cc == '/') { |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kSelfClosingStartTagState); |
| } else if (cc == '>') { |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| DCHECK(IsTokenizerWhitespace(cc)); |
| HTML_ADVANCE_TO(kBeforeAttributeNameState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kRCDATALessThanSignState) { |
| if (cc == '/') { |
| temporary_buffer_.clear(); |
| DCHECK(buffered_end_tag_name_.IsEmpty()); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kRCDATAEndTagOpenState); |
| } else { |
| BufferCharacter('<'); |
| HTML_RECONSUME_IN(kRCDATAState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kRCDATAEndTagOpenState) { |
| if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kRCDATAEndTagNameState); |
| } else { |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| HTML_RECONSUME_IN(kRCDATAState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kRCDATAEndTagNameState) { |
| if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_CONSUME_NON_NEWLINE(kRCDATAEndTagNameState); |
| } else { |
| if (IsTokenizerWhitespace(cc)) { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState); |
| } |
| } else if (cc == '/') { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState); |
| } |
| } else if (cc == '>') { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| return FlushEmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } |
| } |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| token_->AppendToCharacter(temporary_buffer_); |
| buffered_end_tag_name_.clear(); |
| temporary_buffer_.clear(); |
| HTML_RECONSUME_IN(kRCDATAState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kRAWTEXTLessThanSignState) { |
| if (cc == '/') { |
| temporary_buffer_.clear(); |
| DCHECK(buffered_end_tag_name_.IsEmpty()); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kRAWTEXTEndTagOpenState); |
| } else { |
| BufferCharacter('<'); |
| HTML_RECONSUME_IN(kRAWTEXTState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kRAWTEXTEndTagOpenState) { |
| if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kRAWTEXTEndTagNameState); |
| } else { |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| HTML_RECONSUME_IN(kRAWTEXTState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kRAWTEXTEndTagNameState) { |
| if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_CONSUME_NON_NEWLINE(kRAWTEXTEndTagNameState); |
| } else { |
| if (IsTokenizerWhitespace(cc)) { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState); |
| } |
| } else if (cc == '/') { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState); |
| } |
| } else if (cc == '>') { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| return FlushEmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } |
| } |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| token_->AppendToCharacter(temporary_buffer_); |
| buffered_end_tag_name_.clear(); |
| temporary_buffer_.clear(); |
| HTML_RECONSUME_IN(kRAWTEXTState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataLessThanSignState) { |
| if (cc == '/') { |
| temporary_buffer_.clear(); |
| DCHECK(buffered_end_tag_name_.IsEmpty()); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEndTagOpenState); |
| } else if (cc == '!') { |
| BufferCharacter('<'); |
| BufferCharacter('!'); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEscapeStartState); |
| } else { |
| BufferCharacter('<'); |
| HTML_RECONSUME_IN(kScriptDataState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataEndTagOpenState) { |
| if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEndTagNameState); |
| } else { |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| HTML_RECONSUME_IN(kScriptDataState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataEndTagNameState) { |
| if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_CONSUME_NON_NEWLINE(kScriptDataEndTagNameState); |
| } else { |
| if (IsTokenizerWhitespace(cc)) { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState); |
| } |
| } else if (cc == '/') { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState); |
| } |
| } else if (cc == '>') { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| return FlushEmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } |
| } |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| token_->AppendToCharacter(temporary_buffer_); |
| buffered_end_tag_name_.clear(); |
| temporary_buffer_.clear(); |
| HTML_RECONSUME_IN(kScriptDataState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataEscapeStartState) { |
| if (cc == '-') { |
| BufferCharacter(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEscapeStartDashState); |
| } else |
| HTML_RECONSUME_IN(kScriptDataState); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataEscapeStartDashState) { |
| if (cc == '-') { |
| BufferCharacter(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEscapedDashDashState); |
| } else |
| HTML_RECONSUME_IN(kScriptDataState); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataEscapedState) { |
| if (cc == '-') { |
| BufferCharacter(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEscapedDashState); |
| } else if (cc == '<') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEscapedLessThanSignState); |
| else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| BufferCharacter(cc); |
| HTML_CONSUME(kScriptDataEscapedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataEscapedDashState) { |
| if (cc == '-') { |
| BufferCharacter(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEscapedDashDashState); |
| } else if (cc == '<') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEscapedLessThanSignState); |
| else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| BufferCharacter(cc); |
| HTML_ADVANCE_TO(kScriptDataEscapedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataEscapedDashDashState) { |
| if (cc == '-') { |
| BufferCharacter(cc); |
| HTML_CONSUME_NON_NEWLINE(kScriptDataEscapedDashDashState); |
| } else if (cc == '<') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEscapedLessThanSignState); |
| else if (cc == '>') { |
| BufferCharacter(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| BufferCharacter(cc); |
| HTML_ADVANCE_TO(kScriptDataEscapedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataEscapedLessThanSignState) { |
| if (cc == '/') { |
| temporary_buffer_.clear(); |
| DCHECK(buffered_end_tag_name_.IsEmpty()); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEscapedEndTagOpenState); |
| } else if (IsASCIIAlpha(cc)) { |
| BufferCharacter('<'); |
| BufferCharacter(cc); |
| temporary_buffer_.clear(); |
| temporary_buffer_.AddChar(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataDoubleEscapeStartState); |
| } else { |
| BufferCharacter('<'); |
| HTML_RECONSUME_IN(kScriptDataEscapedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataEscapedEndTagOpenState) { |
| if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataEscapedEndTagNameState); |
| } else { |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| HTML_RECONSUME_IN(kScriptDataEscapedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataEscapedEndTagNameState) { |
| if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_CONSUME_NON_NEWLINE(kScriptDataEscapedEndTagNameState); |
| } else { |
| if (IsTokenizerWhitespace(cc)) { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState); |
| } |
| } else if (cc == '/') { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState); |
| } |
| } else if (cc == '>') { |
| if (IsAppropriateEndTag()) { |
| temporary_buffer_.AddChar(static_cast<LChar>(cc)); |
| return FlushEmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } |
| } |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| token_->AppendToCharacter(temporary_buffer_); |
| buffered_end_tag_name_.clear(); |
| temporary_buffer_.clear(); |
| HTML_RECONSUME_IN(kScriptDataEscapedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataDoubleEscapeStartState) { |
| if (IsTokenizerWhitespace(cc) || cc == '/' || cc == '>') { |
| BufferCharacter(cc); |
| if (TemporaryBufferIs(html_names::kScriptTag.LocalName())) |
| HTML_ADVANCE_TO(kScriptDataDoubleEscapedState); |
| else |
| HTML_ADVANCE_TO(kScriptDataEscapedState); |
| } else if (IsASCIIAlpha(cc)) { |
| BufferCharacter(cc); |
| temporary_buffer_.AddChar(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_CONSUME_NON_NEWLINE(kScriptDataDoubleEscapeStartState); |
| } else |
| HTML_RECONSUME_IN(kScriptDataEscapedState); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataDoubleEscapedState) { |
| if (cc == '-') { |
| BufferCharacter(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataDoubleEscapedDashState); |
| } else if (cc == '<') { |
| BufferCharacter(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kScriptDataDoubleEscapedLessThanSignState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| BufferCharacter(cc); |
| HTML_CONSUME(kScriptDataDoubleEscapedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataDoubleEscapedDashState) { |
| if (cc == '-') { |
| BufferCharacter(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataDoubleEscapedDashDashState); |
| } else if (cc == '<') { |
| BufferCharacter(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kScriptDataDoubleEscapedLessThanSignState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| BufferCharacter(cc); |
| HTML_ADVANCE_TO(kScriptDataDoubleEscapedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataDoubleEscapedDashDashState) { |
| if (cc == '-') { |
| BufferCharacter(cc); |
| HTML_CONSUME_NON_NEWLINE(kScriptDataDoubleEscapedDashDashState); |
| } else if (cc == '<') { |
| BufferCharacter(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kScriptDataDoubleEscapedLessThanSignState); |
| } else if (cc == '>') { |
| BufferCharacter(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| BufferCharacter(cc); |
| HTML_ADVANCE_TO(kScriptDataDoubleEscapedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataDoubleEscapedLessThanSignState) { |
| if (cc == '/') { |
| BufferCharacter(cc); |
| temporary_buffer_.clear(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kScriptDataDoubleEscapeEndState); |
| } else |
| HTML_RECONSUME_IN(kScriptDataDoubleEscapedState); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kScriptDataDoubleEscapeEndState) { |
| if (IsTokenizerWhitespace(cc) || cc == '/' || cc == '>') { |
| BufferCharacter(cc); |
| if (TemporaryBufferIs(html_names::kScriptTag.LocalName())) |
| HTML_ADVANCE_TO(kScriptDataEscapedState); |
| else |
| HTML_ADVANCE_TO(kScriptDataDoubleEscapedState); |
| } else if (IsASCIIAlpha(cc)) { |
| BufferCharacter(cc); |
| temporary_buffer_.AddChar(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_CONSUME_NON_NEWLINE(kScriptDataDoubleEscapeEndState); |
| } else |
| HTML_RECONSUME_IN(kScriptDataDoubleEscapedState); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kBeforeAttributeNameState) { |
| if (!SkipWhitespaces(source, cc)) |
| return HaveBufferedCharacterToken(); |
| if (cc == '/') { |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kSelfClosingStartTagState); |
| } else if (cc == '>') { |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') |
| ParseError(); |
| token_->AddNewAttribute(); |
| token_->BeginAttributeName(source.NumberOfCharactersConsumed()); |
| token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc)); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kAttributeNameState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kAttributeNameState) { |
| while (!CheckScanFlag(cc, ScanFlags::kAttributeNameSpecial)) { |
| token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc)); |
| if (!input_stream_preprocessor_.AdvancePastNonNewline(source, cc)) |
| return HaveBufferedCharacterToken(); |
| } |
| if (IsTokenizerWhitespace(cc)) { |
| token_->EndAttributeName(source.NumberOfCharactersConsumed()); |
| HTML_ADVANCE_TO(kAfterAttributeNameState); |
| } else if (cc == '/') { |
| token_->EndAttributeName(source.NumberOfCharactersConsumed()); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kSelfClosingStartTagState); |
| } else if (cc == '=') { |
| token_->EndAttributeName(source.NumberOfCharactersConsumed()); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kBeforeAttributeValueState); |
| } else if (cc == '>') { |
| token_->EndAttributeName(source.NumberOfCharactersConsumed()); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->EndAttributeName(source.NumberOfCharactersConsumed()); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| DCHECK(cc == '"' || cc == '\'' || cc == '<' || cc == '='); |
| ParseError(); |
| token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc)); |
| HTML_CONSUME_NON_NEWLINE(kAttributeNameState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kAfterAttributeNameState) { |
| if (!SkipWhitespaces(source, cc)) |
| return HaveBufferedCharacterToken(); |
| if (cc == '/') { |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kSelfClosingStartTagState); |
| } else if (cc == '=') { |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kBeforeAttributeValueState); |
| } else if (cc == '>') { |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| if (cc == '"' || cc == '\'' || cc == '<') |
| ParseError(); |
| token_->AddNewAttribute(); |
| token_->BeginAttributeName(source.NumberOfCharactersConsumed()); |
| token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc)); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kAttributeNameState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kBeforeAttributeValueState) { |
| if (IsTokenizerWhitespace(cc)) |
| HTML_CONSUME(kBeforeAttributeValueState); |
| else if (cc == '"') { |
| token_->BeginAttributeValue(source.NumberOfCharactersConsumed() + 1); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kAttributeValueDoubleQuotedState); |
| } else if (cc == '&') { |
| token_->BeginAttributeValue(source.NumberOfCharactersConsumed()); |
| HTML_RECONSUME_IN(kAttributeValueUnquotedState); |
| } else if (cc == '\'') { |
| token_->BeginAttributeValue(source.NumberOfCharactersConsumed() + 1); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kAttributeValueSingleQuotedState); |
| } else if (cc == '>') { |
| ParseError(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| if (cc == '<' || cc == '=' || cc == '`') |
| ParseError(); |
| token_->BeginAttributeValue(source.NumberOfCharactersConsumed()); |
| token_->AppendToAttributeValue(cc); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kAttributeValueUnquotedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kAttributeValueDoubleQuotedState) { |
| if (cc == '"') { |
| token_->EndAttributeValue(source.NumberOfCharactersConsumed()); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kAfterAttributeValueQuotedState); |
| } else if (cc == '&') { |
| additional_allowed_character_ = '"'; |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kCharacterReferenceInAttributeValueState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->EndAttributeValue(source.NumberOfCharactersConsumed()); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| token_->AppendToAttributeValue(cc); |
| HTML_CONSUME(kAttributeValueDoubleQuotedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kAttributeValueSingleQuotedState) { |
| if (cc == '\'') { |
| token_->EndAttributeValue(source.NumberOfCharactersConsumed()); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kAfterAttributeValueQuotedState); |
| } else if (cc == '&') { |
| additional_allowed_character_ = '\''; |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kCharacterReferenceInAttributeValueState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->EndAttributeValue(source.NumberOfCharactersConsumed()); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| token_->AppendToAttributeValue(cc); |
| HTML_CONSUME(kAttributeValueSingleQuotedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kAttributeValueUnquotedState) { |
| if (IsTokenizerWhitespace(cc)) { |
| token_->EndAttributeValue(source.NumberOfCharactersConsumed()); |
| HTML_ADVANCE_TO(kBeforeAttributeNameState); |
| } else if (cc == '&') { |
| additional_allowed_character_ = '>'; |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kCharacterReferenceInAttributeValueState); |
| } else if (cc == '>') { |
| token_->EndAttributeValue(source.NumberOfCharactersConsumed()); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->EndAttributeValue(source.NumberOfCharactersConsumed()); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') |
| ParseError(); |
| token_->AppendToAttributeValue(cc); |
| HTML_CONSUME_NON_NEWLINE(kAttributeValueUnquotedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kCharacterReferenceInAttributeValueState) { |
| bool not_enough_characters = false; |
| DecodedHTMLEntity decoded_entity; |
| bool success = |
| ConsumeHTMLEntity(source, decoded_entity, not_enough_characters, |
| additional_allowed_character_); |
| if (not_enough_characters) |
| return HaveBufferedCharacterToken(); |
| if (!success) { |
| DCHECK(decoded_entity.IsEmpty()); |
| token_->AppendToAttributeValue('&'); |
| } else { |
| for (unsigned i = 0; i < decoded_entity.length; ++i) |
| token_->AppendToAttributeValue(decoded_entity.data[i]); |
| } |
| // We're supposed to switch back to the attribute value state that |
| // we were in when we were switched into this state. Rather than |
| // keeping track of this explictly, we observe that the previous |
| // state can be determined by additional_allowed_character_. |
| if (additional_allowed_character_ == '"') |
| HTML_SWITCH_TO(kAttributeValueDoubleQuotedState); |
| else if (additional_allowed_character_ == '\'') |
| HTML_SWITCH_TO(kAttributeValueSingleQuotedState); |
| else if (additional_allowed_character_ == '>') |
| HTML_SWITCH_TO(kAttributeValueUnquotedState); |
| else |
| NOTREACHED(); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kAfterAttributeValueQuotedState) { |
| if (IsTokenizerWhitespace(cc)) |
| HTML_ADVANCE_TO(kBeforeAttributeNameState); |
| else if (cc == '/') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kSelfClosingStartTagState); |
| else if (cc == '>') |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| ParseError(); |
| HTML_RECONSUME_IN(kBeforeAttributeNameState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kSelfClosingStartTagState) { |
| if (cc == '>') { |
| token_->SetSelfClosing(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| ParseError(); |
| HTML_RECONSUME_IN(kBeforeAttributeNameState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kBogusCommentState) { |
| token_->BeginComment(); |
| HTML_RECONSUME_IN(kContinueBogusCommentState); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kContinueBogusCommentState) { |
| if (cc == '>') |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| else if (cc == kEndOfFileMarker) |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| else { |
| token_->AppendToComment(cc); |
| HTML_CONSUME(kContinueBogusCommentState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kMarkupDeclarationOpenState) { |
| if (cc == '-') { |
| SegmentedString::LookAheadResult result = |
| source.LookAhead(html_tokenizer_names::kDashDash); |
| if (result == SegmentedString::kDidMatch) { |
| source.AdvanceAndASSERT('-'); |
| source.AdvanceAndASSERT('-'); |
| token_->BeginComment(); |
| HTML_SWITCH_TO(kCommentStartState); |
| } else if (result == SegmentedString::kNotEnoughCharacters) |
| return HaveBufferedCharacterToken(); |
| } else if (cc == 'D' || cc == 'd') { |
| SegmentedString::LookAheadResult result = |
| source.LookAheadIgnoringCase(html_tokenizer_names::kDoctype); |
| if (result == SegmentedString::kDidMatch) { |
| AdvanceStringAndASSERTIgnoringCase(source, "doctype"); |
| HTML_SWITCH_TO(kDOCTYPEState); |
| } else if (result == SegmentedString::kNotEnoughCharacters) |
| return HaveBufferedCharacterToken(); |
| } else if (cc == '[' && ShouldAllowCDATA()) { |
| SegmentedString::LookAheadResult result = |
| source.LookAhead(html_tokenizer_names::kCdata); |
| if (result == SegmentedString::kDidMatch) { |
| AdvanceStringAndASSERT(source, "[CDATA["); |
| HTML_SWITCH_TO(kCDATASectionState); |
| } else if (result == SegmentedString::kNotEnoughCharacters) |
| return HaveBufferedCharacterToken(); |
| } |
| ParseError(); |
| HTML_RECONSUME_IN(kBogusCommentState); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kCommentStartState) { |
| if (cc == '-') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kCommentStartDashState); |
| else if (cc == '>') { |
| ParseError(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| token_->AppendToComment(cc); |
| HTML_ADVANCE_TO(kCommentState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kCommentStartDashState) { |
| if (cc == '-') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kCommentEndState); |
| else if (cc == '>') { |
| ParseError(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| token_->AppendToComment('-'); |
| token_->AppendToComment(cc); |
| HTML_ADVANCE_TO(kCommentState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kCommentState) { |
| if (cc == '-') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kCommentEndDashState); |
| else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| token_->AppendToComment(cc); |
| HTML_CONSUME(kCommentState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kCommentEndDashState) { |
| if (cc == '-') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kCommentEndState); |
| else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| token_->AppendToComment('-'); |
| token_->AppendToComment(cc); |
| HTML_ADVANCE_TO(kCommentState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kCommentEndState) { |
| if (cc == '>') |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| else if (cc == '!') { |
| ParseError(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kCommentEndBangState); |
| } else if (cc == '-') { |
| ParseError(); |
| token_->AppendToComment('-'); |
| HTML_CONSUME_NON_NEWLINE(kCommentEndState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| ParseError(); |
| token_->AppendToComment('-'); |
| token_->AppendToComment('-'); |
| token_->AppendToComment(cc); |
| HTML_ADVANCE_TO(kCommentState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kCommentEndBangState) { |
| if (cc == '-') { |
| token_->AppendToComment('-'); |
| token_->AppendToComment('-'); |
| token_->AppendToComment('!'); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kCommentEndDashState); |
| } else if (cc == '>') |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| token_->AppendToComment('-'); |
| token_->AppendToComment('-'); |
| token_->AppendToComment('!'); |
| token_->AppendToComment(cc); |
| HTML_ADVANCE_TO(kCommentState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kDOCTYPEState) { |
| if (IsTokenizerWhitespace(cc)) |
| HTML_ADVANCE_TO(kBeforeDOCTYPENameState); |
| else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->BeginDOCTYPE(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| ParseError(); |
| HTML_RECONSUME_IN(kBeforeDOCTYPENameState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kBeforeDOCTYPENameState) { |
| if (!SkipWhitespaces(source, cc)) |
| return HaveBufferedCharacterToken(); |
| if (cc == '>') { |
| ParseError(); |
| token_->BeginDOCTYPE(); |
| token_->SetForceQuirks(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->BeginDOCTYPE(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| token_->BeginDOCTYPE(ToLowerCaseIfAlpha(cc)); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kDOCTYPENameState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kDOCTYPENameState) { |
| if (IsTokenizerWhitespace(cc)) { |
| HTML_ADVANCE_TO(kAfterDOCTYPENameState); |
| } else if (cc == '>') { |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| token_->AppendToName(ToLowerCaseIfAlpha(cc)); |
| HTML_CONSUME_NON_NEWLINE(kDOCTYPENameState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kAfterDOCTYPENameState) { |
| if (!SkipWhitespaces(source, cc)) |
| return HaveBufferedCharacterToken(); |
| if (cc == '>') |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| if (cc == 'P' || cc == 'p') { |
| SegmentedString::LookAheadResult result = |
| source.LookAheadIgnoringCase(html_tokenizer_names::kPublic); |
| if (result == SegmentedString::kDidMatch) { |
| AdvanceStringAndASSERTIgnoringCase(source, "public"); |
| HTML_SWITCH_TO(kAfterDOCTYPEPublicKeywordState); |
| } else if (result == SegmentedString::kNotEnoughCharacters) |
| return HaveBufferedCharacterToken(); |
| } else if (cc == 'S' || cc == 's') { |
| SegmentedString::LookAheadResult result = |
| source.LookAheadIgnoringCase(html_tokenizer_names::kSystem); |
| if (result == SegmentedString::kDidMatch) { |
| AdvanceStringAndASSERTIgnoringCase(source, "system"); |
| HTML_SWITCH_TO(kAfterDOCTYPESystemKeywordState); |
| } else if (result == SegmentedString::kNotEnoughCharacters) |
| return HaveBufferedCharacterToken(); |
| } |
| ParseError(); |
| token_->SetForceQuirks(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kBogusDOCTYPEState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kAfterDOCTYPEPublicKeywordState) { |
| if (IsTokenizerWhitespace(cc)) |
| HTML_ADVANCE_TO(kBeforeDOCTYPEPublicIdentifierState); |
| else if (cc == '"') { |
| ParseError(); |
| token_->SetPublicIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPEPublicIdentifierDoubleQuotedState); |
| } else if (cc == '\'') { |
| ParseError(); |
| token_->SetPublicIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPEPublicIdentifierSingleQuotedState); |
| } else if (cc == '>') { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| ParseError(); |
| token_->SetForceQuirks(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kBogusDOCTYPEState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kBeforeDOCTYPEPublicIdentifierState) { |
| if (!SkipWhitespaces(source, cc)) |
| return HaveBufferedCharacterToken(); |
| if (cc == '"') { |
| token_->SetPublicIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPEPublicIdentifierDoubleQuotedState); |
| } else if (cc == '\'') { |
| token_->SetPublicIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPEPublicIdentifierSingleQuotedState); |
| } else if (cc == '>') { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| ParseError(); |
| token_->SetForceQuirks(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kBogusDOCTYPEState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kDOCTYPEPublicIdentifierDoubleQuotedState) { |
| if (cc == '"') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kAfterDOCTYPEPublicIdentifierState); |
| else if (cc == '>') { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| token_->AppendToPublicIdentifier(cc); |
| HTML_CONSUME(kDOCTYPEPublicIdentifierDoubleQuotedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kDOCTYPEPublicIdentifierSingleQuotedState) { |
| if (cc == '\'') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kAfterDOCTYPEPublicIdentifierState); |
| else if (cc == '>') { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| token_->AppendToPublicIdentifier(cc); |
| HTML_CONSUME(kDOCTYPEPublicIdentifierSingleQuotedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kAfterDOCTYPEPublicIdentifierState) { |
| if (IsTokenizerWhitespace(cc)) |
| HTML_ADVANCE_TO(kBetweenDOCTYPEPublicAndSystemIdentifiersState); |
| else if (cc == '>') |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| else if (cc == '"') { |
| ParseError(); |
| token_->SetSystemIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPESystemIdentifierDoubleQuotedState); |
| } else if (cc == '\'') { |
| ParseError(); |
| token_->SetSystemIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPESystemIdentifierSingleQuotedState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| ParseError(); |
| token_->SetForceQuirks(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kBogusDOCTYPEState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kBetweenDOCTYPEPublicAndSystemIdentifiersState) { |
| if (!SkipWhitespaces(source, cc)) |
| return HaveBufferedCharacterToken(); |
| if (cc == '>') |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| else if (cc == '"') { |
| token_->SetSystemIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPESystemIdentifierDoubleQuotedState); |
| } else if (cc == '\'') { |
| token_->SetSystemIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPESystemIdentifierSingleQuotedState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| ParseError(); |
| token_->SetForceQuirks(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kBogusDOCTYPEState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kAfterDOCTYPESystemKeywordState) { |
| if (IsTokenizerWhitespace(cc)) |
| HTML_ADVANCE_TO(kBeforeDOCTYPESystemIdentifierState); |
| else if (cc == '"') { |
| ParseError(); |
| token_->SetSystemIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPESystemIdentifierDoubleQuotedState); |
| } else if (cc == '\'') { |
| ParseError(); |
| token_->SetSystemIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPESystemIdentifierSingleQuotedState); |
| } else if (cc == '>') { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| ParseError(); |
| token_->SetForceQuirks(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kBogusDOCTYPEState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kBeforeDOCTYPESystemIdentifierState) { |
| if (!SkipWhitespaces(source, cc)) |
| return HaveBufferedCharacterToken(); |
| if (cc == '"') { |
| token_->SetSystemIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPESystemIdentifierDoubleQuotedState); |
| } else if (cc == '\'') { |
| token_->SetSystemIdentifierToEmptyString(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO( |
| kDOCTYPESystemIdentifierSingleQuotedState); |
| } else if (cc == '>') { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| ParseError(); |
| token_->SetForceQuirks(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kBogusDOCTYPEState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kDOCTYPESystemIdentifierDoubleQuotedState) { |
| if (cc == '"') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kAfterDOCTYPESystemIdentifierState); |
| else if (cc == '>') { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| token_->AppendToSystemIdentifier(cc); |
| HTML_CONSUME(kDOCTYPESystemIdentifierDoubleQuotedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kDOCTYPESystemIdentifierSingleQuotedState) { |
| if (cc == '\'') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kAfterDOCTYPESystemIdentifierState); |
| else if (cc == '>') { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| token_->AppendToSystemIdentifier(cc); |
| HTML_CONSUME(kDOCTYPESystemIdentifierSingleQuotedState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kAfterDOCTYPESystemIdentifierState) { |
| if (!SkipWhitespaces(source, cc)) |
| return HaveBufferedCharacterToken(); |
| if (cc == '>') |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| ParseError(); |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kBogusDOCTYPEState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kBogusDOCTYPEState) { |
| if (cc == '>') |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| else if (cc == kEndOfFileMarker) |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| HTML_CONSUME(kBogusDOCTYPEState); |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kCDATASectionState) { |
| if (cc == ']') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kCDATASectionBracketState); |
| else if (cc == kEndOfFileMarker) |
| HTML_RECONSUME_IN(kDataState); |
| else { |
| BufferCharacter(cc); |
| HTML_CONSUME(kCDATASectionState); |
| } |
| } |
| END_STATE() |
| |
| HTML_BEGIN_STATE(kCDATASectionBracketState) { |
| if (cc == ']') |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kCDATASectionEndState); |
| else { |
| BufferCharacter(']'); |
| HTML_RECONSUME_IN(kCDATASectionState); |
| } |
| } |
| |
| HTML_BEGIN_STATE(kCDATASectionEndState) { |
| if (cc == ']') { |
| BufferCharacter(']'); |
| HTML_CONSUME_NON_NEWLINE(kCDATASectionEndState); |
| } else if (cc == '>') { |
| HTML_ADVANCE_PAST_NON_NEWLINE_TO(kDataState); |
| } else { |
| BufferCharacter(']'); |
| BufferCharacter(']'); |
| HTML_RECONSUME_IN(kCDATASectionState); |
| } |
| } |
| END_STATE() |
| } |
| |
| NOTREACHED(); |
| return false; |
| } |
| |
| bool HTMLTokenizer::SkipWhitespaces(SegmentedString& source, UChar& cc) { |
| // The character `cc` is usually not a whitespace, so we check it here |
| // first, before calling the helper. |
| if (!CheckScanFlag(cc, ScanFlags::kWhitespace)) |
| return true; |
| return SkipWhitespacesHelper(source, cc); |
| } |
| |
| bool HTMLTokenizer::SkipWhitespacesHelper(SegmentedString& source, UChar& cc) { |
| DCHECK(!source.IsEmpty()); |
| DCHECK(IsTokenizerWhitespace(cc)); |
| cc = source.CurrentChar(); |
| while (true) { |
| while (CheckScanFlag(cc, ScanFlags::kWhitespaceNotNewline)) { |
| cc = source.AdvancePastNonNewline(); |
| } |
| switch (cc) { |
| case '\n': |
| cc = source.AdvancePastNewlineAndUpdateLineNumber(); |
| break; |
| case '\r': |
| if (!input_stream_preprocessor_.AdvancePastCarriageReturn(source, cc)) |
| return false; |
| break; |
| case '\0': |
| if (!input_stream_preprocessor_.ProcessNullCharacter(source, cc)) |
| return false; |
| if (cc == kEndOfFileMarker) |
| return true; |
| break; |
| default: |
| return true; |
| } |
| } |
| } |
| |
| bool HTMLTokenizer::EmitData(SegmentedString& source, UChar cc) { |
| token_->EnsureIsCharacterToken(); |
| if (cc == '\n') // We could be pointing to '\r'. |
| cc = source.CurrentChar(); |
| while (true) { |
| while (!CheckScanFlag(cc, ScanFlags::kCharacterTokenSpecial)) { |
| token_->AppendToCharacter(cc); |
| cc = source.AdvancePastNonNewline(); |
| } |
| switch (cc) { |
| case '&': |
| state_ = kCharacterReferenceInDataState; |
| source.AdvanceAndASSERT('&'); |
| if (!ProcessEntity(source)) |
| return true; |
| state_ = kDataState; |
| if (source.IsEmpty()) |
| return true; |
| cc = source.CurrentChar(); |
| break; |
| case '\n': |
| token_->AppendToCharacter(cc); |
| cc = source.AdvancePastNewlineAndUpdateLineNumber(); |
| break; |
| case '\r': |
| token_->AppendToCharacter('\n'); // Canonize newline. |
| if (!input_stream_preprocessor_.AdvancePastCarriageReturn(source, cc)) |
| return true; |
| break; |
| case '<': |
| return true; |
| case '\0': |
| if (!input_stream_preprocessor_.ProcessNullCharacter(source, cc)) |
| return true; |
| if (cc == kEndOfFileMarker) |
| return EmitEndOfFile(source); |
| break; |
| default: |
| NOTREACHED(); |
| break; |
| } |
| } |
| } |
| |
| bool HTMLTokenizer::EmitPLAINTEXT(SegmentedString& source, UChar cc) { |
| token_->EnsureIsCharacterToken(); |
| if (cc == '\n') // We could be pointing to '\r'. |
| cc = source.CurrentChar(); |
| while (true) { |
| while (!CheckScanFlag(cc, ScanFlags::kNullOrNewline)) { |
| token_->AppendToCharacter(cc); |
| cc = source.AdvancePastNonNewline(); |
| } |
| switch (cc) { |
| case '\n': |
| token_->AppendToCharacter(cc); |
| cc = source.AdvancePastNewlineAndUpdateLineNumber(); |
| break; |
| case '\r': |
| token_->AppendToCharacter('\n'); // Canonize newline. |
| if (!input_stream_preprocessor_.AdvancePastCarriageReturn(source, cc)) |
| return true; |
| break; |
| case '\0': |
| if (!input_stream_preprocessor_.ProcessNullCharacter(source, cc)) |
| return true; |
| if (cc == kEndOfFileMarker) |
| return EmitEndOfFile(source); |
| break; |
| default: |
| NOTREACHED(); |
| break; |
| } |
| } |
| } |
| |
| String HTMLTokenizer::BufferedCharacters() const { |
| // FIXME: Add a DCHECK about state_. |
| StringBuilder characters; |
| characters.ReserveCapacity(NumberOfBufferedCharacters()); |
| characters.Append('<'); |
| characters.Append('/'); |
| characters.Append(temporary_buffer_.data(), temporary_buffer_.size()); |
| return characters.ToString(); |
| } |
| |
| void HTMLTokenizer::UpdateStateFor(const String& tag_name) { |
| if (ThreadSafeMatch(tag_name, html_names::kTextareaTag) || |
| ThreadSafeMatch(tag_name, html_names::kTitleTag)) |
| SetState(HTMLTokenizer::kRCDATAState); |
| else if (ThreadSafeMatch(tag_name, html_names::kPlaintextTag)) |
| SetState(HTMLTokenizer::kPLAINTEXTState); |
| else if (ThreadSafeMatch(tag_name, html_names::kScriptTag)) |
| SetState(HTMLTokenizer::kScriptDataState); |
| else if (ThreadSafeMatch(tag_name, html_names::kStyleTag) || |
| ThreadSafeMatch(tag_name, html_names::kIFrameTag) || |
| ThreadSafeMatch(tag_name, html_names::kXmpTag) || |
| ThreadSafeMatch(tag_name, html_names::kNoembedTag) || |
| ThreadSafeMatch(tag_name, html_names::kNoframesTag) || |
| (ThreadSafeMatch(tag_name, html_names::kNoscriptTag) && |
| options_.scripting_flag)) |
| SetState(HTMLTokenizer::kRAWTEXTState); |
| } |
| |
| inline bool HTMLTokenizer::TemporaryBufferIs(const String& expected_string) { |
| return VectorEqualsString(temporary_buffer_, expected_string); |
| } |
| |
| inline void HTMLTokenizer::AddToPossibleEndTag(LChar cc) { |
| DCHECK(IsEndTagBufferingState(state_)); |
| buffered_end_tag_name_.AddChar(cc); |
| } |
| |
| inline bool HTMLTokenizer::IsAppropriateEndTag() { |
| if (buffered_end_tag_name_.size() != appropriate_end_tag_name_.size()) |
| return false; |
| |
| wtf_size_t num_characters = buffered_end_tag_name_.size(); |
| |
| for (wtf_size_t i = 0; i < num_characters; i++) { |
| if (buffered_end_tag_name_[i] != appropriate_end_tag_name_[i]) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| inline void HTMLTokenizer::ParseError() { |
| #if DCHECK_IS_ON() |
| DVLOG(1) << "Not implemented."; |
| #endif |
| } |
| |
| } // namespace blink |