blob: 7b9d2573cd6881d5ab61e4dd5a7f7953efabd07c [file] [log] [blame]
/*
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "third_party/blink/renderer/core/html/parser/html_document_parser.h"
#include <memory>
#include <utility>
#include "base/feature_list.h"
#include "base/metrics/histogram_functions.h"
#include "base/numerics/safe_conversions.h"
#include "base/stl_util.h"
#include "base/strings/strcat.h"
#include "third_party/blink/public/common/features.h"
#include "third_party/blink/public/common/loader/loading_behavior_flag.h"
#include "third_party/blink/public/platform/platform.h"
#include "third_party/blink/public/platform/task_type.h"
#include "third_party/blink/renderer/core/css/media_values_cached.h"
#include "third_party/blink/renderer/core/css/style_engine.h"
#include "third_party/blink/renderer/core/dom/document_fragment.h"
#include "third_party/blink/renderer/core/dom/element.h"
#include "third_party/blink/renderer/core/frame/local_frame.h"
#include "third_party/blink/renderer/core/frame/settings.h"
#include "third_party/blink/renderer/core/html/html_document.h"
#include "third_party/blink/renderer/core/html/html_meta_element.h"
#include "third_party/blink/renderer/core/html/nesting_level_incrementer.h"
#include "third_party/blink/renderer/core/html/parser/atomic_html_token.h"
#include "third_party/blink/renderer/core/html/parser/background_html_scanner.h"
#include "third_party/blink/renderer/core/html/parser/html_parser_metrics.h"
#include "third_party/blink/renderer/core/html/parser/html_preload_scanner.h"
#include "third_party/blink/renderer/core/html/parser/html_resource_preloader.h"
#include "third_party/blink/renderer/core/html/parser/html_tree_builder.h"
#include "third_party/blink/renderer/core/html_names.h"
#include "third_party/blink/renderer/core/inspector/inspector_trace_events.h"
#include "third_party/blink/renderer/core/loader/document_loader.h"
#include "third_party/blink/renderer/core/loader/prefetched_signed_exchange_manager.h"
#include "third_party/blink/renderer/core/loader/preload_helper.h"
#include "third_party/blink/renderer/core/probe/core_probes.h"
#include "third_party/blink/renderer/core/script/html_parser_script_runner.h"
#include "third_party/blink/renderer/platform/bindings/runtime_call_stats.h"
#include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
#include "third_party/blink/renderer/platform/heap/garbage_collected.h"
#include "third_party/blink/renderer/platform/heap/persistent.h"
#include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h"
#include "third_party/blink/renderer/platform/loader/fetch/resource_fetcher.h"
#include "third_party/blink/renderer/platform/runtime_enabled_features.h"
#include "third_party/blink/renderer/platform/scheduler/public/cooperative_scheduling_manager.h"
#include "third_party/blink/renderer/platform/scheduler/public/post_cross_thread_task.h"
#include "third_party/blink/renderer/platform/scheduler/public/thread.h"
#include "third_party/blink/renderer/platform/scheduler/public/thread_scheduler.h"
#include "third_party/blink/renderer/platform/wtf/cross_thread_copier_base.h"
#include "third_party/blink/renderer/platform/wtf/cross_thread_functional.h"
#include "third_party/blink/renderer/platform/wtf/shared_buffer.h"
namespace blink {
// This sets the (default) maximum number of tokens which the foreground HTML
// parser should try to process in one go. Lower values generally mean faster
// first paints, larger values delay first paint, but make sure it's closer to
// the final page. This is the default value to use, if no Finch-provided
// value exists.
constexpr int kDefaultMaxTokenizationBudget = 250;
constexpr int kNumYieldsWithDefaultBudget = 2;
class EndIfDelayedForbiddenScope;
class ShouldCompleteScope;
class AttemptToEndForbiddenScope;
bool ThreadedPreloadScannerEnabled() {
// Cache the feature value since checking for each parser regresses some micro
// benchmarks.
static const bool kEnabled =
base::FeatureList::IsEnabled(features::kThreadedPreloadScanner);
return kEnabled;
}
bool TimedParserBudgetEnabled() {
// Cache the feature value since checking for each parser regresses some micro
// benchmarks.
static const bool kEnabled =
base::FeatureList::IsEnabled(features::kTimedHTMLParserBudget);
return kEnabled;
}
bool PrecompileInlineScriptsEnabled() {
// Cache the feature value since checking for each parser regresses some micro
// benchmarks.
static const bool kEnabled =
base::FeatureList::IsEnabled(features::kPrecompileInlineScripts);
return kEnabled;
}
Thread* GetPreloadScannerThread() {
DCHECK(ThreadedPreloadScannerEnabled());
// The preload scanner relies on parsing CSS, which requires creating garbage
// collected objects. This means the thread the scanning runs on must be GC
// enabled.
DEFINE_STATIC_LOCAL(
std::unique_ptr<Thread>, preload_scanner_thread,
(Thread::CreateThread(
ThreadCreationParams(ThreadType::kPreloadScannerThread)
.SetSupportsGC(true))));
return preload_scanner_thread.get();
}
// Determines how preloads will be processed when available in the background.
// It is important to process preloads quickly so the request can be started as
// soon as possible. An experiment will be run to pick the best option which
// will then be hard coded.
enum class PreloadProcessingMode {
// Preloads will be processed once the posted task is run.
kNone,
// Preloads will be checked each iteration of the parser and dispatched
// immediately.
kImmediate,
// The parser will yield if there are pending preloads so the task can be run.
kYield,
};
PreloadProcessingMode GetPreloadProcessingMode() {
if (!ThreadedPreloadScannerEnabled())
return PreloadProcessingMode::kNone;
static const base::FeatureParam<PreloadProcessingMode>::Option
kPreloadProcessingModeOptions[] = {
{PreloadProcessingMode::kNone, "none"},
{PreloadProcessingMode::kImmediate, "immediate"},
{PreloadProcessingMode::kYield, "yield"},
};
static const base::FeatureParam<PreloadProcessingMode>
kPreloadProcessingModeParam{
&features::kThreadedPreloadScanner, "preload-processing-mode",
PreloadProcessingMode::kImmediate, &kPreloadProcessingModeOptions};
return kPreloadProcessingModeParam.Get();
}
bool IsPreloadScanningEnabled(Document* document) {
return document->GetSettings() &&
document->GetSettings()->GetDoHtmlPreloadScanning();
}
base::TimeDelta GetDefaultTimedBudget() {
static const base::FeatureParam<base::TimeDelta> kDefaultParserBudgetParam{
&features::kTimedHTMLParserBudget, "default-parser-budget",
base::Milliseconds(10)};
// Cache the value to avoid parsing the param string more than once.
static const base::TimeDelta kDefaultParserBudgetValue =
kDefaultParserBudgetParam.Get();
return kDefaultParserBudgetValue;
}
base::TimeDelta GetTimedBudget(int times_yielded) {
static const base::FeatureParam<int> kNumYieldsWithDefaultBudgetParam{
&features::kTimedHTMLParserBudget, "num-yields-with-default-budget",
kNumYieldsWithDefaultBudget};
// Cache the value to avoid parsing the param string more than once.
static const int kNumYieldsWithDefaultBudgetValue =
kNumYieldsWithDefaultBudgetParam.Get();
static const base::FeatureParam<base::TimeDelta> kLongParserBudgetParam{
&features::kTimedHTMLParserBudget, "long-parser-budget",
base::Milliseconds(500)};
// Cache the value to avoid parsing the param string more than once.
static const base::TimeDelta kLongParserBudgetValue =
kLongParserBudgetParam.Get();
if (times_yielded <= kNumYieldsWithDefaultBudgetValue)
return GetDefaultTimedBudget();
return kLongParserBudgetValue;
}
// This class encapsulates the internal state needed for synchronous foreground
// HTML parsing (e.g. if HTMLDocumentParser::PumpTokenizer yields, this class
// tracks what should be done after the pump completes.)
class HTMLDocumentParserState
: public GarbageCollected<HTMLDocumentParserState> {
friend EndIfDelayedForbiddenScope;
friend ShouldCompleteScope;
friend AttemptToEndForbiddenScope;
public:
// Keeps track of whether the parser needs to complete tokenization work,
// optionally followed by EndIfDelayed.
enum class DeferredParserState {
// Indicates that a tokenizer pump has either completed or hasn't been
// scheduled.
kNotScheduled = 0, // Enforce ordering in this enum.
// Indicates that a tokenizer pump is scheduled and hasn't completed yet.
kScheduled = 1,
// Indicates that a tokenizer pump, followed by EndIfDelayed, is scheduled.
kScheduledWithEndIfDelayed = 2
};
enum class MetaCSPTokenState {
// If we've seen a meta CSP token in an upcoming HTML chunk, then we need to
// defer any preloads until we've added the CSP token to the document and
// applied the Content Security Policy.
kSeen = 0,
// Indicates that there is no meta CSP token in the upcoming chunk.
kNotSeen = 1,
// Indicates that we've added the CSP token to the document and we can now
// fetch preloads.
kProcessed = 2,
// Indicates that it's too late to apply a Content-Security policy (because
// we've exited the header section.)
kUnenforceable = 3,
};
explicit HTMLDocumentParserState(ParserSynchronizationPolicy mode)
: state_(DeferredParserState::kNotScheduled),
meta_csp_state_(MetaCSPTokenState::kNotSeen),
mode_(mode),
preload_processing_mode_(GetPreloadProcessingMode()) {}
void Trace(Visitor* v) const {}
void SetState(DeferredParserState state) {
DCHECK(!(state == DeferredParserState::kScheduled && ShouldComplete()));
state_ = state;
}
DeferredParserState GetState() const { return state_; }
bool IsScheduled() const { return state_ >= DeferredParserState::kScheduled; }
const char* GetStateAsString() const {
switch (state_) {
case DeferredParserState::kNotScheduled:
return "not_scheduled";
case DeferredParserState::kScheduled:
return "scheduled";
case DeferredParserState::kScheduledWithEndIfDelayed:
return "scheduled_with_end_if_delayed";
}
}
bool NeedsLinkHeaderPreloadsDispatch() const {
return needs_link_header_dispatch_;
}
void DispatchedLinkHeaderPreloads() { needs_link_header_dispatch_ = false; }
bool SeenFirstByte() const { return have_seen_first_byte_; }
void MarkSeenFirstByte() { have_seen_first_byte_ = true; }
bool EndWasDelayed() const { return end_was_delayed_; }
void SetEndWasDelayed(bool new_value) { end_was_delayed_ = new_value; }
bool AddedPendingParserBlockingStylesheet() const {
return added_pending_parser_blocking_stylesheet_;
}
void SetAddedPendingParserBlockingStylesheet(bool new_value) {
added_pending_parser_blocking_stylesheet_ = new_value;
}
bool WaitingForStylesheets() const { return is_waiting_for_stylesheets_; }
void SetWaitingForStylesheets(bool new_value) {
is_waiting_for_stylesheets_ = new_value;
}
// Keeps track of whether Document::Finish has been called whilst parsing.
// ShouldAttemptToEndOnEOF() means that the parser should close when there's
// no more input.
bool ShouldAttemptToEndOnEOF() const { return should_attempt_to_end_on_eof_; }
void SetAttemptToEndOnEOF() {
// Should only ever call ::Finish once.
DCHECK(!should_attempt_to_end_on_eof_);
// This method should only be called from ::Finish.
should_attempt_to_end_on_eof_ = true;
}
bool ShouldEndIfDelayed() const { return end_if_delayed_forbidden_ == 0; }
bool ShouldComplete() const {
return should_complete_ || GetMode() != kAllowDeferredParsing;
}
bool IsSynchronous() const {
return mode_ == ParserSynchronizationPolicy::kForceSynchronousParsing;
}
ParserSynchronizationPolicy GetMode() const { return mode_; }
void MarkYield() { times_yielded_++; }
int TimesYielded() const { return times_yielded_; }
NestingLevelIncrementer ScopedPumpSession() {
return NestingLevelIncrementer(pump_session_nesting_level_);
}
bool InPumpSession() const { return pump_session_nesting_level_; }
bool InNestedPumpSession() const { return pump_session_nesting_level_ > 1; }
void SetSeenCSPMetaTag(const bool seen) {
if (meta_csp_state_ == MetaCSPTokenState::kUnenforceable)
return;
if (seen)
meta_csp_state_ = MetaCSPTokenState::kSeen;
else
meta_csp_state_ = MetaCSPTokenState::kNotSeen;
}
void SetExitedHeader() {
meta_csp_state_ = MetaCSPTokenState::kUnenforceable;
}
bool HaveExitedHeader() const {
return meta_csp_state_ == MetaCSPTokenState::kUnenforceable;
}
bool ShouldYieldForPreloads() const {
return preload_processing_mode_ == PreloadProcessingMode::kYield;
}
bool ShouldProcessPreloads() const {
return preload_processing_mode_ == PreloadProcessingMode::kImmediate;
}
private:
void EnterEndIfDelayedForbidden() { end_if_delayed_forbidden_++; }
void ExitEndIfDelayedForbidden() {
DCHECK(end_if_delayed_forbidden_);
end_if_delayed_forbidden_--;
}
void EnterAttemptToEndForbidden() {
DCHECK(should_attempt_to_end_on_eof_);
should_attempt_to_end_on_eof_ = false;
}
void EnterShouldComplete() { should_complete_++; }
void ExitShouldComplete() {
DCHECK(should_complete_);
should_complete_--;
}
DeferredParserState state_;
MetaCSPTokenState meta_csp_state_;
ParserSynchronizationPolicy mode_;
PreloadProcessingMode preload_processing_mode_;
unsigned end_if_delayed_forbidden_ = 0;
unsigned should_complete_ = 0;
unsigned times_yielded_ = 0;
unsigned pump_session_nesting_level_ = 0;
// Set to non-zero if Document::Finish has been called and we're operating
// asynchronously.
bool should_attempt_to_end_on_eof_ = false;
bool needs_link_header_dispatch_ = true;
bool have_seen_first_byte_ = false;
bool end_was_delayed_ = false;
bool added_pending_parser_blocking_stylesheet_ = false;
bool is_waiting_for_stylesheets_ = false;
};
class EndIfDelayedForbiddenScope {
STACK_ALLOCATED();
public:
explicit EndIfDelayedForbiddenScope(HTMLDocumentParserState* state)
: state_(state) {
state_->EnterEndIfDelayedForbidden();
}
~EndIfDelayedForbiddenScope() { state_->ExitEndIfDelayedForbidden(); }
private:
HTMLDocumentParserState* state_;
};
class AttemptToEndForbiddenScope {
STACK_ALLOCATED();
public:
explicit AttemptToEndForbiddenScope(HTMLDocumentParserState* state)
: state_(state) {
state_->EnterAttemptToEndForbidden();
}
private:
HTMLDocumentParserState* state_;
};
class ShouldCompleteScope {
STACK_ALLOCATED();
public:
explicit ShouldCompleteScope(HTMLDocumentParserState* state) : state_(state) {
state_->EnterShouldComplete();
}
~ShouldCompleteScope() { state_->ExitShouldComplete(); }
private:
HTMLDocumentParserState* state_;
};
class FetchBatchScope {
STACK_ALLOCATED();
public:
explicit FetchBatchScope(HTMLDocumentParser* parser) : parser_(parser) {
parser_->StartFetchBatch();
}
~FetchBatchScope() { parser_->EndFetchBatch(); }
private:
HTMLDocumentParser* const parser_;
};
// This is a direct transcription of step 4 from:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
static HTMLTokenizer::State TokenizerStateForContextElement(
Element* context_element,
bool report_errors,
const HTMLParserOptions& options) {
if (!context_element)
return HTMLTokenizer::kDataState;
const QualifiedName& context_tag = context_element->TagQName();
if (context_tag.Matches(html_names::kTitleTag) ||
context_tag.Matches(html_names::kTextareaTag))
return HTMLTokenizer::kRCDATAState;
if (context_tag.Matches(html_names::kStyleTag) ||
context_tag.Matches(html_names::kXmpTag) ||
context_tag.Matches(html_names::kIFrameTag) ||
context_tag.Matches(html_names::kNoembedTag) ||
(context_tag.Matches(html_names::kNoscriptTag) &&
options.scripting_flag) ||
context_tag.Matches(html_names::kNoframesTag))
return report_errors ? HTMLTokenizer::kRAWTEXTState
: HTMLTokenizer::kPLAINTEXTState;
if (context_tag.Matches(html_names::kScriptTag))
return report_errors ? HTMLTokenizer::kScriptDataState
: HTMLTokenizer::kPLAINTEXTState;
if (context_tag.Matches(html_names::kPlaintextTag))
return HTMLTokenizer::kPLAINTEXTState;
return HTMLTokenizer::kDataState;
}
class ScopedYieldTimer {
public:
// This object is created at the start of a block of parsing, and will
// report the time since the last block yielded if known.
ScopedYieldTimer(std::unique_ptr<base::ElapsedTimer>* timer,
HTMLParserMetrics* metrics_reporter)
: timer_(timer), reporting_metrics_(metrics_reporter) {
if (!reporting_metrics_ || !(*timer_))
return;
metrics_reporter->AddYieldInterval((*timer_)->Elapsed());
timer_->reset();
}
// The destructor creates a new timer, which will keep track of time until
// the next block starts.
~ScopedYieldTimer() {
if (reporting_metrics_)
*timer_ = std::make_unique<base::ElapsedTimer>();
}
private:
std::unique_ptr<base::ElapsedTimer>* timer_;
bool reporting_metrics_;
};
HTMLDocumentParser::HTMLDocumentParser(HTMLDocument& document,
ParserSynchronizationPolicy sync_policy,
ParserPrefetchPolicy prefetch_policy)
: HTMLDocumentParser(document,
kAllowScriptingContent,
sync_policy,
prefetch_policy) {
script_runner_ =
HTMLParserScriptRunner::Create(ReentryPermit(), &document, this);
// Allow declarative shadow DOM for the document parser, if not explicitly
// disabled.
bool include_shadow_roots = document.GetDeclarativeShadowRootAllowState() !=
Document::DeclarativeShadowRootAllowState::kDeny;
tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
this, document, kAllowScriptingContent, options_, include_shadow_roots);
}
HTMLDocumentParser::HTMLDocumentParser(
DocumentFragment* fragment,
Element* context_element,
ParserContentPolicy parser_content_policy,
ParserPrefetchPolicy parser_prefetch_policy)
: HTMLDocumentParser(fragment->GetDocument(),
parser_content_policy,
kForceSynchronousParsing,
parser_prefetch_policy) {
// Allow declarative shadow DOM for the fragment parser only if explicitly
// enabled.
bool include_shadow_roots =
fragment->GetDocument().GetDeclarativeShadowRootAllowState() ==
Document::DeclarativeShadowRootAllowState::kAllow;
// No script_runner_ in fragment parser.
tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
this, fragment, context_element, parser_content_policy, options_,
include_shadow_roots);
// For now document fragment parsing never reports errors.
bool report_errors = false;
tokenizer_->SetState(TokenizerStateForContextElement(
context_element, report_errors, options_));
}
HTMLDocumentParser::HTMLDocumentParser(Document& document,
ParserContentPolicy content_policy,
ParserSynchronizationPolicy sync_policy,
ParserPrefetchPolicy prefetch_policy)
: ScriptableDocumentParser(document, content_policy),
options_(&document),
token_(std::make_unique<HTMLToken>()),
tokenizer_(std::make_unique<HTMLTokenizer>(options_)),
loading_task_runner_(sync_policy == kForceSynchronousParsing
? nullptr
: document.GetTaskRunner(TaskType::kNetworking)),
task_runner_state_(
MakeGarbageCollected<HTMLDocumentParserState>(sync_policy)),
scheduler_(sync_policy == kAllowDeferredParsing
? Thread::Current()->Scheduler()
: nullptr) {
// Make sure the preload scanner thread will be ready when needed.
if (ThreadedPreloadScannerEnabled() && !task_runner_state_->IsSynchronous())
GetPreloadScannerThread();
// Report metrics for async document parsing or forced synchronous parsing.
// The document must be outermost main frame to meet UKM requirements, and
// must have a high resolution clock for high quality data.
if (sync_policy == kAllowDeferredParsing &&
document.IsInOutermostMainFrame() &&
base::TimeTicks::IsHighResolution()) {
metrics_reporter_ = std::make_unique<HTMLParserMetrics>(
document.UkmSourceID(), document.UkmRecorder());
}
if (GetDocument()->IsInOutermostMainFrame() &&
!task_runner_state_->IsSynchronous()) {
tokenizer_metrics_reporter_ =
std::make_unique<HTMLTokenizerMetricsReporter>(tokenizer_.get());
}
// Don't create preloader for parsing clipboard content.
if (content_policy == kDisallowScriptingAndPluginContent)
return;
// Create preloader only when the document is:
// - attached to a frame (likely the prefetched resources will be loaded
// soon),
// - is for no-state prefetch (made specifically for running preloader).
if (!document.GetFrame() && !document.IsPrefetchOnly())
return;
if (prefetch_policy == kAllowPrefetching)
preloader_ = MakeGarbageCollected<HTMLResourcePreloader>(document);
}
HTMLDocumentParser::~HTMLDocumentParser() = default;
void HTMLDocumentParser::Trace(Visitor* visitor) const {
visitor->Trace(reentry_permit_);
visitor->Trace(tree_builder_);
visitor->Trace(script_runner_);
visitor->Trace(preloader_);
visitor->Trace(task_runner_state_);
ScriptableDocumentParser::Trace(visitor);
HTMLParserScriptRunnerHost::Trace(visitor);
}
bool HTMLDocumentParser::HasPendingWorkScheduledForTesting() const {
return task_runner_state_->IsScheduled();
}
void HTMLDocumentParser::Detach() {
// Unwind any nested batch operations before being detached
FlushFetchBatch();
// Deschedule any pending tokenizer pumps.
task_runner_state_->SetState(
HTMLDocumentParserState::DeferredParserState::kNotScheduled);
DocumentParser::Detach();
if (script_runner_)
script_runner_->Detach();
if (tree_builder_)
tree_builder_->Detach();
// FIXME: It seems wrong that we would have a preload scanner here. Yet during
// fast/dom/HTMLScriptElement/script-load-events.html we do.
preload_scanner_.reset();
insertion_preload_scanner_.reset();
background_script_scanner_.Reset();
background_scanner_.Reset();
// `tokenizer_metrics_reporter_` has a reference to `tokenizer_`.
tokenizer_metrics_reporter_.reset();
// Oilpan: It is important to clear token_ to deallocate backing memory of
// HTMLToken::data_ and let the allocator reuse the memory for
// HTMLToken::data_ of a next HTMLDocumentParser. We need to clear
// tokenizer_ first because tokenizer_ has a raw pointer to token_.
// TODO(masonf): We can probably move tokenizer_ and token_ into the
// HTMLDocumentParser itself, instead of having them as Members.
tokenizer_.reset();
token_.reset();
}
void HTMLDocumentParser::StopParsing() {
DocumentParser::StopParsing();
task_runner_state_->SetState(
HTMLDocumentParserState::DeferredParserState::kNotScheduled);
}
// This kicks off "Once the user agent stops parsing" as described by:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end
void HTMLDocumentParser::PrepareToStopParsing() {
TRACE_EVENT1("blink", "HTMLDocumentParser::PrepareToStopParsing", "parser",
(void*)this);
DCHECK(!HasInsertionPoint());
// If we've already been detached, e.g. in
// WebFrameTest.SwapMainFrameWhileLoading, bail out.
if (IsDetached())
return;
DCHECK(tokenizer_);
// NOTE: This pump should only ever emit buffered character tokens.
if (!GetDocument()->IsPrefetchOnly()) {
ShouldCompleteScope should_complete(task_runner_state_);
EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
PumpTokenizerIfPossible();
}
if (IsStopped())
return;
DocumentParser::PrepareToStopParsing();
// We will not have a scriptRunner when parsing a DocumentFragment.
if (script_runner_)
GetDocument()->SetReadyState(Document::kInteractive);
// Setting the ready state above can fire mutation event and detach us from
// underneath. In that case, just bail out.
if (IsDetached())
return;
if (script_runner_)
script_runner_->RecordMetricsAtParseEnd();
AttemptToRunDeferredScriptsAndEnd();
}
bool HTMLDocumentParser::IsPaused() const {
return IsWaitingForScripts() || task_runner_state_->WaitingForStylesheets();
}
bool HTMLDocumentParser::IsParsingFragment() const {
return tree_builder_->IsParsingFragment();
}
void HTMLDocumentParser::DeferredPumpTokenizerIfPossible() {
// This method is called asynchronously, continues building the HTML document.
// If we're scheduled for a tokenizer pump, then document should be attached
// and the parser should not be stopped, but sometimes a script completes
// loading (so we schedule a pump) but the Document is stopped in the meantime
// (e.g. fast/parser/iframe-onload-document-close-with-external-script.html).
DCHECK(task_runner_state_->GetState() ==
HTMLDocumentParserState::DeferredParserState::kNotScheduled ||
!IsDetached());
TRACE_EVENT2("blink", "HTMLDocumentParser::DeferredPumpTokenizerIfPossible",
"parser", (void*)this, "state",
task_runner_state_->GetStateAsString());
// This method is called when the post task is executed, marking the end of
// a yield. Report the yielded time.
DCHECK(yield_timer_);
if (metrics_reporter_) {
metrics_reporter_->AddYieldInterval(yield_timer_->Elapsed());
}
yield_timer_.reset();
bool should_call_delay_end =
task_runner_state_->GetState() ==
HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed;
if (task_runner_state_->IsScheduled()) {
task_runner_state_->SetState(
HTMLDocumentParserState::DeferredParserState::kNotScheduled);
if (should_call_delay_end) {
EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
PumpTokenizerIfPossible();
EndIfDelayed();
} else {
PumpTokenizerIfPossible();
}
}
}
void HTMLDocumentParser::PumpTokenizerIfPossible() {
// This method is called synchronously, builds the HTML document up to
// the current budget, and optionally completes.
TRACE_EVENT1("blink", "HTMLDocumentParser::PumpTokenizerIfPossible", "parser",
(void*)this);
bool yielded = false;
CheckIfBlockingStylesheetAdded();
if (!IsStopped() &&
(!IsPaused() || task_runner_state_->ShouldEndIfDelayed())) {
yielded = PumpTokenizer();
}
if (yielded) {
DCHECK(!task_runner_state_->ShouldComplete());
SchedulePumpTokenizer();
} else if (task_runner_state_->ShouldAttemptToEndOnEOF()) {
// Fall into this branch if ::Finish has been previously called and we've
// just finished asynchronously parsing everything.
if (metrics_reporter_)
metrics_reporter_->ReportMetricsAtParseEnd();
AttemptToEnd();
} else if (task_runner_state_->ShouldEndIfDelayed()) {
// If we did not exceed the budget or parsed everything there was to
// parse, check if we should complete the document.
if (task_runner_state_->ShouldComplete() || IsStopped() || IsStopping()) {
if (metrics_reporter_)
metrics_reporter_->ReportMetricsAtParseEnd();
EndIfDelayed();
} else {
ScheduleEndIfDelayed();
}
}
}
void HTMLDocumentParser::RunScriptsForPausedTreeBuilder() {
TRACE_EVENT1("blink", "HTMLDocumentParser::RunScriptsForPausedTreeBuilder",
"parser", (void*)this);
DCHECK(ScriptingContentIsAllowed(GetParserContentPolicy()));
TextPosition script_start_position = TextPosition::BelowRangePosition();
Element* script_element =
tree_builder_->TakeScriptToProcess(script_start_position);
// We will not have a scriptRunner when parsing a DocumentFragment.
if (script_runner_)
script_runner_->ProcessScriptElement(script_element, script_start_position);
CheckIfBlockingStylesheetAdded();
}
HTMLDocumentParser::NextTokenStatus HTMLDocumentParser::CanTakeNextToken(
base::TimeDelta& time_executing_script) {
if (IsStopped())
return kNoTokens;
// If we're paused waiting for a script, we try to execute scripts before
// continuing.
auto ret = kHaveTokens;
if (tree_builder_->HasParserBlockingScript()) {
base::ElapsedTimer timer;
RunScriptsForPausedTreeBuilder();
ret = kHaveTokensAfterScript;
time_executing_script += timer.Elapsed();
}
if (IsStopped() || IsPaused())
return kNoTokens;
return ret;
}
void HTMLDocumentParser::ForcePlaintextForTextDocument() {
tokenizer_->SetState(HTMLTokenizer::kPLAINTEXTState);
}
bool HTMLDocumentParser::PumpTokenizer() {
DCHECK(!GetDocument()->IsPrefetchOnly());
DCHECK(!IsStopped());
DCHECK(tokenizer_);
DCHECK(token_);
NestingLevelIncrementer session = task_runner_state_->ScopedPumpSession();
// If we're in kForceSynchronousParsing, always run until all available input
// is consumed.
bool should_run_until_completion = task_runner_state_->ShouldComplete() ||
task_runner_state_->IsSynchronous() ||
task_runner_state_->InNestedPumpSession();
bool is_tracing;
TRACE_EVENT_CATEGORY_GROUP_ENABLED("blink", &is_tracing);
unsigned starting_bytes;
if (is_tracing) {
starting_bytes = input_.length();
TRACE_EVENT_BEGIN2("blink", "HTMLDocumentParser::PumpTokenizer",
"should_complete", should_run_until_completion,
"bytes_queued", starting_bytes);
}
// We tell the InspectorInstrumentation about every pump, even if we end up
// pumping nothing. It can filter out empty pumps itself.
// FIXME: input_.Current().length() is only accurate if we end up parsing the
// whole buffer in this pump. We should pass how much we parsed as part of
// DidWriteHTML instead of WillWriteHTML.
probe::ParseHTML probe(GetDocument(), this);
FetchBatchScope fetch_batch(this);
bool should_yield = false;
// If we've yielded more than 2 times, then set the budget to a very large
// number, to attempt to consume all available tokens in one go. This
// heuristic is intended to allow a quick first contentful paint, followed by
// a larger rendering lifecycle that processes the remainder of the page.
int budget =
(task_runner_state_->TimesYielded() <= kNumYieldsWithDefaultBudget)
? kDefaultMaxTokenizationBudget
: 1e7;
base::TimeDelta timed_budget;
if (TimedParserBudgetEnabled())
timed_budget = GetTimedBudget(task_runner_state_->TimesYielded());
base::ElapsedTimer chunk_parsing_timer;
unsigned tokens_parsed = 0;
base::TimeDelta time_executing_script;
while (!should_yield) {
if (task_runner_state_->ShouldProcessPreloads())
FlushPendingPreloads();
const auto next_token_status = CanTakeNextToken(time_executing_script);
if (next_token_status == kNoTokens) {
// No tokens left to process in this pump, so break
break;
} else if (next_token_status == kHaveTokensAfterScript &&
task_runner_state_->HaveExitedHeader()) {
// Just executed a parser-blocking script in the body. We'd probably like
// to yield at some point soon, especially if we're in "extended budget"
// mode. So reduce the budget back to at most the default.
budget = std::min(budget, kDefaultMaxTokenizationBudget);
if (TimedParserBudgetEnabled()) {
timed_budget = std::min(timed_budget, chunk_parsing_timer.Elapsed() +
GetDefaultTimedBudget());
}
}
{
RUNTIME_CALL_TIMER_SCOPE(
V8PerIsolateData::MainThreadIsolate(),
RuntimeCallStats::CounterId::kHTMLTokenizerNextToken);
if (tokenizer_metrics_reporter_)
tokenizer_metrics_reporter_->WillProcessNextToken(input_.Current());
if (!tokenizer_->NextToken(input_.Current(), Token()))
break;
budget--;
tokens_parsed++;
}
ConstructTreeFromHTMLToken();
if (!should_run_until_completion && !IsPaused()) {
DCHECK_EQ(task_runner_state_->GetMode(), kAllowDeferredParsing);
if (TimedParserBudgetEnabled())
should_yield = chunk_parsing_timer.Elapsed() >= timed_budget;
else
should_yield = budget <= 0;
should_yield |= scheduler_->ShouldYieldForHighPriorityWork();
should_yield &= task_runner_state_->HaveExitedHeader();
// Yield for preloads even if we haven't exited the header, since they
// should be dispatched as soon as possible.
if (task_runner_state_->ShouldYieldForPreloads())
should_yield |= HasPendingPreloads();
} else {
should_yield = false;
}
DCHECK(IsStopped() || Token().IsUninitialized());
}
if (is_tracing) {
TRACE_EVENT_END2("blink", "HTMLDocumentParser::PumpTokenizer",
"parsed_tokens", tokens_parsed, "parsed_bytes",
starting_bytes - input_.length());
}
const bool is_stopped_or_parsing_fragment =
IsStopped() || IsParsingFragment();
if (!is_stopped_or_parsing_fragment) {
// There should only be PendingText left since the tree-builder always
// flushes the task queue before returning. In case that ever changes,
// crash.
tree_builder_->Flush(kFlushAlways);
CHECK(!IsStopped());
}
if (tokens_parsed && metrics_reporter_) {
metrics_reporter_->AddChunk(
chunk_parsing_timer.Elapsed() - time_executing_script, tokens_parsed);
}
if (is_stopped_or_parsing_fragment)
return false;
if (IsPaused()) {
DCHECK_EQ(tokenizer_->GetState(), HTMLTokenizer::kDataState);
if (preloader_ && !background_scanner_) {
if (!preload_scanner_) {
preload_scanner_ = CreatePreloadScanner(
TokenPreloadScanner::ScannerType::kMainDocument);
preload_scanner_->AppendToEnd(input_.Current());
}
ScanAndPreload(preload_scanner_.get());
}
}
// should_run_until_completion implies that we should not yield
CHECK(!should_run_until_completion || !should_yield);
if (should_yield)
task_runner_state_->MarkYield();
return should_yield;
}
void HTMLDocumentParser::SchedulePumpTokenizer() {
TRACE_EVENT0("blink", "HTMLDocumentParser::SchedulePumpTokenizer");
DCHECK(!IsStopped());
DCHECK(!task_runner_state_->InPumpSession());
DCHECK(!task_runner_state_->ShouldComplete());
if (task_runner_state_->IsScheduled()) {
// If the parser is already scheduled, there's no need to do anything.
return;
}
loading_task_runner_->PostTask(
FROM_HERE, WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
WrapPersistent(this)));
task_runner_state_->SetState(
HTMLDocumentParserState::DeferredParserState::kScheduled);
yield_timer_ = std::make_unique<base::ElapsedTimer>();
}
void HTMLDocumentParser::ScheduleEndIfDelayed() {
TRACE_EVENT0("blink", "HTMLDocumentParser::ScheduleEndIfDelayed");
DCHECK(!IsStopped());
DCHECK(!task_runner_state_->InPumpSession());
DCHECK(!task_runner_state_->ShouldComplete());
// Schedule a pump callback if needed.
if (!task_runner_state_->IsScheduled()) {
loading_task_runner_->PostTask(
FROM_HERE,
WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
WrapPersistent(this)));
yield_timer_ = std::make_unique<base::ElapsedTimer>();
}
// If a pump is already scheduled, it's OK to just upgrade it to one
// which calls EndIfDelayed afterwards.
task_runner_state_->SetState(
HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed);
}
void HTMLDocumentParser::ConstructTreeFromHTMLToken() {
DCHECK(!GetDocument()->IsPrefetchOnly());
AtomicHTMLToken atomic_token(Token());
// Check whether we've exited the header.
if (!task_runner_state_->HaveExitedHeader()) {
if (GetDocument()->body()) {
task_runner_state_->SetExitedHeader();
}
}
// We clear the token_ in case ConstructTree() synchronously re-enters the
// parser.
Token().Clear();
if (tokenizer_metrics_reporter_) {
tokenizer_metrics_reporter_->WillConstructTreeFromToken(atomic_token,
input_.Current());
}
tree_builder_->ConstructTree(&atomic_token);
CheckIfBlockingStylesheetAdded();
}
bool HTMLDocumentParser::HasInsertionPoint() {
// FIXME: The wasCreatedByScript() branch here might not be fully correct. Our
// model of the EOF character differs slightly from the one in the spec
// because our treatment is uniform between network-sourced and script-sourced
// input streams whereas the spec treats them differently.
return input_.HasInsertionPoint() ||
(WasCreatedByScript() && !input_.HaveSeenEndOfFile());
}
void HTMLDocumentParser::insert(const String& source) {
// No need to do any processing if the supplied text is empty.
if (IsStopped() || source.IsEmpty())
return;
TRACE_EVENT2("blink", "HTMLDocumentParser::insert", "source_length",
source.length(), "parser", (void*)this);
if (tokenizer_metrics_reporter_ && !source.IsEmpty())
tokenizer_metrics_reporter_->OnDocumentWrite(input_.Current());
SegmentedString excluded_line_number_source(source);
excluded_line_number_source.SetExcludeLineNumbers();
input_.InsertAtCurrentInsertionPoint(excluded_line_number_source);
// Pump the the tokenizer to build the document from the given insert point.
// Should process everything available and not defer anything.
ShouldCompleteScope should_complete(task_runner_state_);
EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
// Call EndIfDelayed manually at the end to maintain preload behaviour.
PumpTokenizerIfPossible();
if (IsPaused()) {
// Check the document.write() output with a separate preload scanner as
// the main scanner can't deal with insertions.
if (!insertion_preload_scanner_) {
insertion_preload_scanner_ =
CreatePreloadScanner(TokenPreloadScanner::ScannerType::kInsertion);
}
insertion_preload_scanner_->AppendToEnd(source);
if (preloader_) {
ScanAndPreload(insertion_preload_scanner_.get());
}
}
EndIfDelayed();
}
void HTMLDocumentParser::Append(const String& input_source) {
TRACE_EVENT2("blink", "HTMLDocumentParser::append", "size",
input_source.length(), "parser", (void*)this);
if (IsStopped())
return;
const SegmentedString source(input_source);
ScanInBackground(input_source);
if (!background_scanner_ && !preload_scanner_ && preloader_ &&
GetDocument()->Url().IsValid() &&
(!task_runner_state_->IsSynchronous() ||
GetDocument()->IsPrefetchOnly() || IsPaused())) {
// If we're operating with a budget, we need to create a preload scanner to
// make sure that parser-blocking Javascript requests are dispatched in
// plenty of time, which prevents unnecessary delays.
// When parsing without a budget (e.g. for HTML fragment parsing), it's
// additional overhead to scan the string unless the parser's already
// paused whilst executing a script.
preload_scanner_ =
CreatePreloadScanner(TokenPreloadScanner::ScannerType::kMainDocument);
}
if (GetDocument()->IsPrefetchOnly()) {
if (preload_scanner_) {
preload_scanner_->AppendToEnd(source);
// TODO(Richard.Townsend@arm.com): add test coverage of this branch.
// The crash in crbug.com/1166786 indicates that text documents are being
// speculatively prefetched.
ScanAndPreload(preload_scanner_.get());
}
// Return after the preload scanner, do not actually parse the document.
return;
}
if (preload_scanner_) {
preload_scanner_->AppendToEnd(source);
if (task_runner_state_->GetMode() == kAllowDeferredParsing &&
(IsPaused() || !task_runner_state_->SeenFirstByte())) {
// Should scan and preload if the parser's paused waiting for a resource,
// or if we're starting a document for the first time (we want to at least
// prefetch anything that's in the <head> section).
ScanAndPreload(preload_scanner_.get());
}
}
if (tokenizer_metrics_reporter_)
tokenizer_metrics_reporter_->WillAppend(input_source);
input_.AppendToEnd(source);
task_runner_state_->MarkSeenFirstByte();
// Add input_source.length() to "file size" metric.
if (metrics_reporter_)
metrics_reporter_->AddInput(input_source.length());
if (task_runner_state_->InPumpSession()) {
// We've gotten data off the network in a nested write. We don't want to
// consume any more of the input stream now. Do not worry. We'll consume
// this data in a less-nested write().
return;
}
// If we are preloading, FinishAppend() will be called later in
// CommitPreloadedData().
if (IsPreloading())
return;
FinishAppend();
}
void HTMLDocumentParser::FinishAppend() {
// Schedule a tokenizer pump to process this new data. We schedule to give
// paint a chance to happen, and because devtools somehow depends on it
// for js loads.
if (task_runner_state_->GetMode() ==
ParserSynchronizationPolicy::kAllowDeferredParsing &&
!task_runner_state_->ShouldComplete()) {
SchedulePumpTokenizer();
} else {
PumpTokenizerIfPossible();
}
}
void HTMLDocumentParser::CommitPreloadedData() {
if (!IsPreloading())
return;
SetIsPreloading(false);
if (task_runner_state_->SeenFirstByte() && !IsStopped())
FinishAppend();
}
void HTMLDocumentParser::end() {
DCHECK(!IsDetached());
// Informs the the rest of WebCore that parsing is really finished (and
// deletes this).
tree_builder_->Finished();
// All preloads should be done.
preloader_ = nullptr;
DocumentParser::StopParsing();
}
void HTMLDocumentParser::AttemptToRunDeferredScriptsAndEnd() {
DCHECK(IsStopping());
DCHECK(!HasInsertionPoint());
if (script_runner_ && !script_runner_->ExecuteScriptsWaitingForParsing())
return;
end();
}
bool HTMLDocumentParser::ShouldDelayEnd() const {
return task_runner_state_->InPumpSession() || IsPaused() ||
IsExecutingScript() || task_runner_state_->IsScheduled();
}
void HTMLDocumentParser::AttemptToEnd() {
// finish() indicates we will not receive any more data. If we are waiting on
// an external script to load, we can't finish parsing quite yet.
TRACE_EVENT1("blink", "HTMLDocumentParser::AttemptToEnd", "parser",
(void*)this);
DCHECK(task_runner_state_->ShouldAttemptToEndOnEOF());
AttemptToEndForbiddenScope should_not_attempt_to_end(task_runner_state_);
// We should only be in this state once after calling Finish.
// If there are pending scripts, future control flow should pass to
// EndIfDelayed.
if (ShouldDelayEnd()) {
task_runner_state_->SetEndWasDelayed(true);
return;
}
PrepareToStopParsing();
}
void HTMLDocumentParser::EndIfDelayed() {
TRACE_EVENT1("blink", "HTMLDocumentParser::EndIfDelayed", "parser",
(void*)this);
ShouldCompleteScope should_complete(task_runner_state_);
EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
// If we've already been detached, don't bother ending.
if (IsDetached())
return;
if (!task_runner_state_->EndWasDelayed() || ShouldDelayEnd())
return;
task_runner_state_->SetEndWasDelayed(false);
PrepareToStopParsing();
}
void HTMLDocumentParser::Finish() {
ShouldCompleteScope should_complete(task_runner_state_);
EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
Flush();
if (IsDetached())
return;
// We're not going to get any more data off the network, so we tell the input
// stream we've reached the end of file. finish() can be called more than
// once, if the first time does not call end().
if (!input_.HaveSeenEndOfFile())
input_.MarkEndOfFile();
// If there's any deferred work remaining, signal that we
// want to end the document once all work's complete.
task_runner_state_->SetAttemptToEndOnEOF();
if (task_runner_state_->IsScheduled() && !GetDocument()->IsPrefetchOnly()) {
return;
}
AttemptToEnd();
}
bool HTMLDocumentParser::IsExecutingScript() const {
if (!script_runner_)
return false;
return script_runner_->IsExecutingScript();
}
OrdinalNumber HTMLDocumentParser::LineNumber() const {
return input_.Current().CurrentLine();
}
TextPosition HTMLDocumentParser::GetTextPosition() const {
const SegmentedString& current_string = input_.Current();
OrdinalNumber line = current_string.CurrentLine();
OrdinalNumber column = current_string.CurrentColumn();
return TextPosition(line, column);
}
bool HTMLDocumentParser::IsWaitingForScripts() const {
if (IsParsingFragment()) {
// HTMLTreeBuilder may have a parser blocking script element, but we
// ignore it during fragment parsing.
DCHECK(!(tree_builder_->HasParserBlockingScript() || (script_runner_ &&
script_runner_->HasParserBlockingScript()) || reentry_permit_->ParserPauseFlag()));
return false;
}
// When the TreeBuilder encounters a </script> tag, it returns to the
// HTMLDocumentParser where the script is transfered from the treebuilder to
// the script runner. The script runner will hold the script until its loaded
// and run. During any of this time, we want to count ourselves as "waiting
// for a script" and thus run the preload scanner, as well as delay completion
// of parsing.
bool tree_builder_has_blocking_script =
tree_builder_->HasParserBlockingScript();
bool script_runner_has_blocking_script =
script_runner_ && script_runner_->HasParserBlockingScript();
// Since the parser is paused while a script runner has a blocking script, it
// should never be possible to end up with both objects holding a blocking
// script.
DCHECK(
!(tree_builder_has_blocking_script && script_runner_has_blocking_script));
// If either object has a blocking script, the parser should be paused.
return tree_builder_has_blocking_script ||
script_runner_has_blocking_script ||
reentry_permit_->ParserPauseFlag();
}
void HTMLDocumentParser::ResumeParsingAfterPause() {
// This function runs after a parser-blocking script has completed.
TRACE_EVENT1("blink", "HTMLDocumentParser::ResumeParsingAfterPause", "parser",
(void*)this);
DCHECK(!IsExecutingScript());
DCHECK(!IsPaused());
CheckIfBlockingStylesheetAdded();
if (IsStopped() || IsPaused() || IsDetached())
return;
DCHECK(tokenizer_);
insertion_preload_scanner_.reset();
if (task_runner_state_->GetMode() == kAllowDeferredParsing &&
!task_runner_state_->ShouldComplete() &&
!task_runner_state_->InPumpSession()) {
SchedulePumpTokenizer();
} else {
ShouldCompleteScope should_complete(task_runner_state_);
PumpTokenizerIfPossible();
}
}
void HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan() {
TRACE_EVENT1(
"blink",
"HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan",
"parser", (void*)this);
if (preload_scanner_) {
DCHECK(preloader_);
preload_scanner_->AppendToEnd(input_.Current());
ScanAndPreload(preload_scanner_.get());
}
}
void HTMLDocumentParser::NotifyScriptLoaded() {
TRACE_EVENT1("blink", "HTMLDocumentParser::NotifyScriptLoaded", "parser",
(void*)this);
DCHECK(script_runner_);
DCHECK(!IsExecutingScript());
scheduler::CooperativeSchedulingManager::AllowedStackScope
allowed_stack_scope(scheduler::CooperativeSchedulingManager::Instance());
if (IsStopped()) {
return;
}
if (IsStopping()) {
AttemptToRunDeferredScriptsAndEnd();
return;
}
script_runner_->ExecuteScriptsWaitingForLoad();
if (!IsPaused())
ResumeParsingAfterPause();
}
void HTMLDocumentParser::ExecuteScriptsWaitingForResources() {
TRACE_EVENT0("blink",
"HTMLDocumentParser::ExecuteScriptsWaitingForResources");
if (IsStopped())
return;
DCHECK(GetDocument()->IsScriptExecutionReady());
if (task_runner_state_->WaitingForStylesheets())
task_runner_state_->SetWaitingForStylesheets(false);
if (IsStopping()) {
AttemptToRunDeferredScriptsAndEnd();
return;
}
// Document only calls this when the Document owns the DocumentParser so this
// will not be called in the DocumentFragment case.
DCHECK(script_runner_);
script_runner_->ExecuteScriptsWaitingForResources();
if (!IsPaused())
ResumeParsingAfterPause();
}
void HTMLDocumentParser::DidAddPendingParserBlockingStylesheet() {
// In-body CSS doesn't block painting. The parser needs to pause so that
// the DOM doesn't include any elements that may depend on the CSS for style.
// The stylesheet can be added and removed during the parsing of a single
// token so don't actually set the bit to block parsing here, just track
// the state of the added sheet in case it does persist beyond a single
// token.
task_runner_state_->SetAddedPendingParserBlockingStylesheet(true);
}
void HTMLDocumentParser::DidLoadAllPendingParserBlockingStylesheets() {
// Just toggle the stylesheet flag here (mostly for synchronous sheets).
// The document will also call into executeScriptsWaitingForResources
// which is when the parser will re-start, otherwise it will attempt to
// resume twice which could cause state machine issues.
task_runner_state_->SetAddedPendingParserBlockingStylesheet(false);
}
void HTMLDocumentParser::CheckIfBlockingStylesheetAdded() {
if (task_runner_state_->AddedPendingParserBlockingStylesheet()) {
task_runner_state_->SetAddedPendingParserBlockingStylesheet(false);
task_runner_state_->SetWaitingForStylesheets(true);
}
}
void HTMLDocumentParser::ParseDocumentFragment(
const String& source,
DocumentFragment* fragment,
Element* context_element,
ParserContentPolicy parser_content_policy) {
auto* parser = MakeGarbageCollected<HTMLDocumentParser>(
fragment, context_element, parser_content_policy);
parser->Append(source);
parser->Finish();
// Allows ~DocumentParser to assert it was detached before destruction.
parser->Detach();
}
void HTMLDocumentParser::AppendBytes(const char* data, size_t length) {
TRACE_EVENT2("blink", "HTMLDocumentParser::appendBytes", "size",
(unsigned)length, "parser", (void*)this);
DCHECK(Thread::MainThread()->IsCurrentThread());
if (!length || IsStopped())
return;
DecodedDataDocumentParser::AppendBytes(data, length);
}
void HTMLDocumentParser::Flush() {
TRACE_EVENT1("blink", "HTMLDocumentParser::Flush", "parser", (void*)this);
// If we've got no decoder, we never received any data.
if (IsDetached() || NeedsDecoder())
return;
DecodedDataDocumentParser::Flush();
}
void HTMLDocumentParser::SetDecoder(
std::unique_ptr<TextResourceDecoder> decoder) {
DCHECK(decoder);
DecodedDataDocumentParser::SetDecoder(std::move(decoder));
}
void HTMLDocumentParser::DocumentElementAvailable() {
TRACE_EVENT0("blink,loading", "HTMLDocumentParser::DocumentElementAvailable");
Document* document = GetDocument();
DCHECK(document);
DCHECK(document->documentElement());
Element* documentElement = GetDocument()->documentElement();
if (documentElement->hasAttribute(u"\u26A1") ||
documentElement->hasAttribute("amp") ||
documentElement->hasAttribute("i-amphtml-layout")) {
// The DocumentLoader fetches a main resource and handles the result.
// But it may not be available if JavaScript appends HTML to the page later
// in the page's lifetime. This can happen both from in-page JavaScript and
// from extensions. See example callstacks linked from crbug.com/931330.
if (document->Loader()) {
document->Loader()->DidObserveLoadingBehavior(
kLoadingBehaviorAmpDocumentLoaded);
}
}
if (preloader_)
FetchQueuedPreloads();
}
std::unique_ptr<HTMLPreloadScanner> HTMLDocumentParser::CreatePreloadScanner(
TokenPreloadScanner::ScannerType scanner_type) {
#if DCHECK_IS_ON()
if (scanner_type == TokenPreloadScanner::ScannerType::kMainDocument) {
// A main document scanner should never be created if scanning is already
// happening in the background.
DCHECK(!background_scanner_);
// If background scanning is enabled, the main document scanner is used when
// the parser is paused, for prefetch documents, or if preload scanning is
// disabled in tests (HTMLPreloadScanner internally handles this setting).
DCHECK(!ThreadedPreloadScannerEnabled() || IsPaused() ||
GetDocument()->IsPrefetchOnly() ||
!IsPreloadScanningEnabled(GetDocument()));
}
#endif
return HTMLPreloadScanner::Create(*GetDocument(), options_, scanner_type);
}
void HTMLDocumentParser::ScanAndPreload(HTMLPreloadScanner* scanner) {
TRACE_EVENT0("blink", "HTMLDocumentParser::ScanAndPreload");
DCHECK(preloader_);
base::ElapsedTimer timer;
ProcessPreloadData(scanner->Scan(GetDocument()->ValidBaseElementURL()));
base::UmaHistogramTimes(
base::StrCat({"Blink.ScanAndPreloadTime", GetPreloadHistogramSuffix()}),
timer.Elapsed());
}
void HTMLDocumentParser::ProcessPreloadData(
std::unique_ptr<PendingPreloadData> preload_data) {
for (const auto& value : preload_data->meta_ch_values) {
HTMLMetaElement::ProcessMetaCH(*GetDocument(), value.value, value.type,
value.is_doc_preloader);
}
FetchBatchScope fetch_batch(this);
// Make sure that the viewport is up-to-date, so that the correct viewport
// dimensions will be fed to the preload scanner.
if (GetDocument()->Loader() &&
task_runner_state_->GetMode() == kAllowDeferredParsing) {
if (preload_data->viewport.has_value()) {
GetDocument()->GetStyleEngine().UpdateViewport();
}
if (task_runner_state_->NeedsLinkHeaderPreloadsDispatch()) {
{
TRACE_EVENT0("blink", "HTMLDocumentParser::DispatchLinkHeaderPreloads");
GetDocument()->Loader()->DispatchLinkHeaderPreloads(
base::OptionalOrNullptr(preload_data->viewport),
PreloadHelper::kOnlyLoadMedia);
}
if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) {
TRACE_EVENT0("blink",
"HTMLDocumentParser::DispatchSignedExchangeManager");
// Link header preloads for prefetched signed exchanges won't be started
// until StartPrefetchedLinkHeaderPreloads() is called. See the header
// comment of PrefetchedSignedExchangeManager.
GetDocument()
->Loader()
->GetPrefetchedSignedExchangeManager()
->StartPrefetchedLinkHeaderPreloads();
}
task_runner_state_->DispatchedLinkHeaderPreloads();
}
}
task_runner_state_->SetSeenCSPMetaTag(preload_data->has_csp_meta_tag);
for (auto& request : preload_data->requests) {
queued_preloads_.push_back(std::move(request));
}
FetchQueuedPreloads();
}
void HTMLDocumentParser::FetchQueuedPreloads() {
DCHECK(preloader_);
TRACE_EVENT0("blink", "HTMLDocumentParser::FetchQueuedPreloads");
if (!queued_preloads_.IsEmpty()) {
base::ElapsedTimer timer;
preloader_->TakeAndPreload(queued_preloads_);
base::UmaHistogramTimes(base::StrCat({"Blink.FetchQueuedPreloadsTime",
GetPreloadHistogramSuffix()}),
timer.Elapsed());
}
}
std::string HTMLDocumentParser::GetPreloadHistogramSuffix() {
bool is_outermost_main_frame =
GetDocument() && GetDocument()->IsInOutermostMainFrame();
bool have_seen_first_byte = task_runner_state_->SeenFirstByte();
return base::StrCat({is_outermost_main_frame ? ".MainFrame" : ".Subframe",
have_seen_first_byte ? ".NonInitial" : ".Initial"});
}
void HTMLDocumentParser::ScanInBackground(const String& source) {
if (task_runner_state_->IsSynchronous() || !GetDocument()->Url().IsValid())
return;
if (ThreadedPreloadScannerEnabled() && preloader_ &&
// TODO(crbug.com/1329535): Support scanning prefetch documents in the
// background.
!GetDocument()->IsPrefetchOnly() &&
IsPreloadScanningEnabled(GetDocument())) {
// The background scanner should never be created if a main thread scanner
// is already available.
DCHECK(!preload_scanner_);
if (!background_scanner_) {
background_scanner_ = HTMLPreloadScanner::CreateBackground(
this, options_, GetPreloadScannerThread()->GetTaskRunner());
}
background_scanner_.AsyncCall(&HTMLPreloadScanner::ScanInBackground)
.WithArgs(
source, GetDocument()->ValidBaseElementURL(),
CrossThreadBindRepeating(
&HTMLDocumentParser::AddPreloadDataOnBackgroundThread,
WrapCrossThreadPersistent(this),
GetDocument()->GetTaskRunner(TaskType::kInternalLoading)));
return;
}
if (!PrecompileInlineScriptsEnabled())
return;
DCHECK(!background_scanner_);
if (!background_script_scanner_) {
background_script_scanner_ = BackgroundHTMLScanner::Create(options_, this);
}
background_script_scanner_.AsyncCall(&BackgroundHTMLScanner::Scan)
.WithArgs(source);
}
void HTMLDocumentParser::AddPreloadDataOnBackgroundThread(
scoped_refptr<base::SequencedTaskRunner> task_runner,
std::unique_ptr<PendingPreloadData> preload_data) {
DCHECK(!IsMainThread());
bool should_post_task = false;
{
base::AutoLock lock(pending_preload_lock_);
// Only post a task if the preload data is empty. Otherwise, a task has
// already been posted and will consume the new data.
should_post_task = pending_preload_data_.IsEmpty();
pending_preload_data_.push_back(std::move(preload_data));
}
if (should_post_task) {
PostCrossThreadTask(
*task_runner, FROM_HERE,
CrossThreadBindOnce(&HTMLDocumentParser::FlushPendingPreloads,
WrapCrossThreadPersistent(this)));
}
}
void HTMLDocumentParser::FlushPendingPreloads() {
DCHECK(IsMainThread());
if (!ThreadedPreloadScannerEnabled())
return;
if (IsDetached() || !preloader_)
return;
// Do this in a loop in case more preloads are added in the background.
while (HasPendingPreloads()) {
Vector<std::unique_ptr<PendingPreloadData>> preload_data;
{
base::AutoLock lock(pending_preload_lock_);
preload_data = std::move(pending_preload_data_);
}
for (auto& preload : preload_data)
ProcessPreloadData(std::move(preload));
}
}
void HTMLDocumentParser::StartFetchBatch() {
GetDocument()->Fetcher()->StartBatch();
pending_batch_operations_++;
}
void HTMLDocumentParser::EndFetchBatch() {
if (!IsDetached() && pending_batch_operations_ > 0) {
pending_batch_operations_--;
GetDocument()->Fetcher()->EndBatch();
}
}
void HTMLDocumentParser::FlushFetchBatch() {
if (!IsDetached() && pending_batch_operations_ > 0) {
ResourceFetcher* fetcher = GetDocument()->Fetcher();
while (pending_batch_operations_ > 0) {
pending_batch_operations_--;
fetcher->EndBatch();
}
}
}
} // namespace blink