| /* |
| * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "third_party/blink/renderer/core/html/parser/html_document_parser.h" |
| |
| #include <memory> |
| #include <utility> |
| |
| #include "base/feature_list.h" |
| #include "base/metrics/histogram_functions.h" |
| #include "base/numerics/safe_conversions.h" |
| #include "base/stl_util.h" |
| #include "base/strings/strcat.h" |
| #include "third_party/blink/public/common/features.h" |
| #include "third_party/blink/public/common/loader/loading_behavior_flag.h" |
| #include "third_party/blink/public/platform/platform.h" |
| #include "third_party/blink/public/platform/task_type.h" |
| #include "third_party/blink/renderer/core/css/media_values_cached.h" |
| #include "third_party/blink/renderer/core/css/style_engine.h" |
| #include "third_party/blink/renderer/core/dom/document_fragment.h" |
| #include "third_party/blink/renderer/core/dom/element.h" |
| #include "third_party/blink/renderer/core/frame/local_frame.h" |
| #include "third_party/blink/renderer/core/frame/settings.h" |
| #include "third_party/blink/renderer/core/html/html_document.h" |
| #include "third_party/blink/renderer/core/html/html_meta_element.h" |
| #include "third_party/blink/renderer/core/html/nesting_level_incrementer.h" |
| #include "third_party/blink/renderer/core/html/parser/atomic_html_token.h" |
| #include "third_party/blink/renderer/core/html/parser/background_html_scanner.h" |
| #include "third_party/blink/renderer/core/html/parser/html_parser_metrics.h" |
| #include "third_party/blink/renderer/core/html/parser/html_preload_scanner.h" |
| #include "third_party/blink/renderer/core/html/parser/html_resource_preloader.h" |
| #include "third_party/blink/renderer/core/html/parser/html_tree_builder.h" |
| #include "third_party/blink/renderer/core/html_names.h" |
| #include "third_party/blink/renderer/core/inspector/inspector_trace_events.h" |
| #include "third_party/blink/renderer/core/loader/document_loader.h" |
| #include "third_party/blink/renderer/core/loader/prefetched_signed_exchange_manager.h" |
| #include "third_party/blink/renderer/core/loader/preload_helper.h" |
| #include "third_party/blink/renderer/core/probe/core_probes.h" |
| #include "third_party/blink/renderer/core/script/html_parser_script_runner.h" |
| #include "third_party/blink/renderer/platform/bindings/runtime_call_stats.h" |
| #include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h" |
| #include "third_party/blink/renderer/platform/heap/garbage_collected.h" |
| #include "third_party/blink/renderer/platform/heap/persistent.h" |
| #include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h" |
| #include "third_party/blink/renderer/platform/loader/fetch/resource_fetcher.h" |
| #include "third_party/blink/renderer/platform/runtime_enabled_features.h" |
| #include "third_party/blink/renderer/platform/scheduler/public/cooperative_scheduling_manager.h" |
| #include "third_party/blink/renderer/platform/scheduler/public/post_cross_thread_task.h" |
| #include "third_party/blink/renderer/platform/scheduler/public/thread.h" |
| #include "third_party/blink/renderer/platform/scheduler/public/thread_scheduler.h" |
| #include "third_party/blink/renderer/platform/wtf/cross_thread_copier_base.h" |
| #include "third_party/blink/renderer/platform/wtf/cross_thread_functional.h" |
| #include "third_party/blink/renderer/platform/wtf/shared_buffer.h" |
| |
| namespace blink { |
| |
| // This sets the (default) maximum number of tokens which the foreground HTML |
| // parser should try to process in one go. Lower values generally mean faster |
| // first paints, larger values delay first paint, but make sure it's closer to |
| // the final page. This is the default value to use, if no Finch-provided |
| // value exists. |
| constexpr int kDefaultMaxTokenizationBudget = 250; |
| constexpr int kNumYieldsWithDefaultBudget = 2; |
| |
| class EndIfDelayedForbiddenScope; |
| class ShouldCompleteScope; |
| class AttemptToEndForbiddenScope; |
| |
| bool ThreadedPreloadScannerEnabled() { |
| // Cache the feature value since checking for each parser regresses some micro |
| // benchmarks. |
| static const bool kEnabled = |
| base::FeatureList::IsEnabled(features::kThreadedPreloadScanner); |
| return kEnabled; |
| } |
| |
| bool TimedParserBudgetEnabled() { |
| // Cache the feature value since checking for each parser regresses some micro |
| // benchmarks. |
| static const bool kEnabled = |
| base::FeatureList::IsEnabled(features::kTimedHTMLParserBudget); |
| return kEnabled; |
| } |
| |
| bool PrecompileInlineScriptsEnabled() { |
| // Cache the feature value since checking for each parser regresses some micro |
| // benchmarks. |
| static const bool kEnabled = |
| base::FeatureList::IsEnabled(features::kPrecompileInlineScripts); |
| return kEnabled; |
| } |
| |
| Thread* GetPreloadScannerThread() { |
| DCHECK(ThreadedPreloadScannerEnabled()); |
| |
| // The preload scanner relies on parsing CSS, which requires creating garbage |
| // collected objects. This means the thread the scanning runs on must be GC |
| // enabled. |
| DEFINE_STATIC_LOCAL( |
| std::unique_ptr<Thread>, preload_scanner_thread, |
| (Thread::CreateThread( |
| ThreadCreationParams(ThreadType::kPreloadScannerThread) |
| .SetSupportsGC(true)))); |
| return preload_scanner_thread.get(); |
| } |
| |
| // Determines how preloads will be processed when available in the background. |
| // It is important to process preloads quickly so the request can be started as |
| // soon as possible. An experiment will be run to pick the best option which |
| // will then be hard coded. |
| enum class PreloadProcessingMode { |
| // Preloads will be processed once the posted task is run. |
| kNone, |
| // Preloads will be checked each iteration of the parser and dispatched |
| // immediately. |
| kImmediate, |
| // The parser will yield if there are pending preloads so the task can be run. |
| kYield, |
| }; |
| |
| PreloadProcessingMode GetPreloadProcessingMode() { |
| if (!ThreadedPreloadScannerEnabled()) |
| return PreloadProcessingMode::kNone; |
| |
| static const base::FeatureParam<PreloadProcessingMode>::Option |
| kPreloadProcessingModeOptions[] = { |
| {PreloadProcessingMode::kNone, "none"}, |
| {PreloadProcessingMode::kImmediate, "immediate"}, |
| {PreloadProcessingMode::kYield, "yield"}, |
| }; |
| |
| static const base::FeatureParam<PreloadProcessingMode> |
| kPreloadProcessingModeParam{ |
| &features::kThreadedPreloadScanner, "preload-processing-mode", |
| PreloadProcessingMode::kImmediate, &kPreloadProcessingModeOptions}; |
| |
| return kPreloadProcessingModeParam.Get(); |
| } |
| |
| bool IsPreloadScanningEnabled(Document* document) { |
| return document->GetSettings() && |
| document->GetSettings()->GetDoHtmlPreloadScanning(); |
| } |
| |
| base::TimeDelta GetDefaultTimedBudget() { |
| static const base::FeatureParam<base::TimeDelta> kDefaultParserBudgetParam{ |
| &features::kTimedHTMLParserBudget, "default-parser-budget", |
| base::Milliseconds(10)}; |
| // Cache the value to avoid parsing the param string more than once. |
| static const base::TimeDelta kDefaultParserBudgetValue = |
| kDefaultParserBudgetParam.Get(); |
| return kDefaultParserBudgetValue; |
| } |
| |
| base::TimeDelta GetTimedBudget(int times_yielded) { |
| static const base::FeatureParam<int> kNumYieldsWithDefaultBudgetParam{ |
| &features::kTimedHTMLParserBudget, "num-yields-with-default-budget", |
| kNumYieldsWithDefaultBudget}; |
| // Cache the value to avoid parsing the param string more than once. |
| static const int kNumYieldsWithDefaultBudgetValue = |
| kNumYieldsWithDefaultBudgetParam.Get(); |
| |
| static const base::FeatureParam<base::TimeDelta> kLongParserBudgetParam{ |
| &features::kTimedHTMLParserBudget, "long-parser-budget", |
| base::Milliseconds(500)}; |
| // Cache the value to avoid parsing the param string more than once. |
| static const base::TimeDelta kLongParserBudgetValue = |
| kLongParserBudgetParam.Get(); |
| |
| if (times_yielded <= kNumYieldsWithDefaultBudgetValue) |
| return GetDefaultTimedBudget(); |
| return kLongParserBudgetValue; |
| } |
| |
| // This class encapsulates the internal state needed for synchronous foreground |
| // HTML parsing (e.g. if HTMLDocumentParser::PumpTokenizer yields, this class |
| // tracks what should be done after the pump completes.) |
| class HTMLDocumentParserState |
| : public GarbageCollected<HTMLDocumentParserState> { |
| friend EndIfDelayedForbiddenScope; |
| friend ShouldCompleteScope; |
| friend AttemptToEndForbiddenScope; |
| |
| public: |
| // Keeps track of whether the parser needs to complete tokenization work, |
| // optionally followed by EndIfDelayed. |
| enum class DeferredParserState { |
| // Indicates that a tokenizer pump has either completed or hasn't been |
| // scheduled. |
| kNotScheduled = 0, // Enforce ordering in this enum. |
| // Indicates that a tokenizer pump is scheduled and hasn't completed yet. |
| kScheduled = 1, |
| // Indicates that a tokenizer pump, followed by EndIfDelayed, is scheduled. |
| kScheduledWithEndIfDelayed = 2 |
| }; |
| |
| enum class MetaCSPTokenState { |
| // If we've seen a meta CSP token in an upcoming HTML chunk, then we need to |
| // defer any preloads until we've added the CSP token to the document and |
| // applied the Content Security Policy. |
| kSeen = 0, |
| // Indicates that there is no meta CSP token in the upcoming chunk. |
| kNotSeen = 1, |
| // Indicates that we've added the CSP token to the document and we can now |
| // fetch preloads. |
| kProcessed = 2, |
| // Indicates that it's too late to apply a Content-Security policy (because |
| // we've exited the header section.) |
| kUnenforceable = 3, |
| }; |
| |
| explicit HTMLDocumentParserState(ParserSynchronizationPolicy mode) |
| : state_(DeferredParserState::kNotScheduled), |
| meta_csp_state_(MetaCSPTokenState::kNotSeen), |
| mode_(mode), |
| preload_processing_mode_(GetPreloadProcessingMode()) {} |
| |
| void Trace(Visitor* v) const {} |
| |
| void SetState(DeferredParserState state) { |
| DCHECK(!(state == DeferredParserState::kScheduled && ShouldComplete())); |
| state_ = state; |
| } |
| DeferredParserState GetState() const { return state_; } |
| |
| bool IsScheduled() const { return state_ >= DeferredParserState::kScheduled; } |
| const char* GetStateAsString() const { |
| switch (state_) { |
| case DeferredParserState::kNotScheduled: |
| return "not_scheduled"; |
| case DeferredParserState::kScheduled: |
| return "scheduled"; |
| case DeferredParserState::kScheduledWithEndIfDelayed: |
| return "scheduled_with_end_if_delayed"; |
| } |
| } |
| |
| bool NeedsLinkHeaderPreloadsDispatch() const { |
| return needs_link_header_dispatch_; |
| } |
| void DispatchedLinkHeaderPreloads() { needs_link_header_dispatch_ = false; } |
| |
| bool SeenFirstByte() const { return have_seen_first_byte_; } |
| void MarkSeenFirstByte() { have_seen_first_byte_ = true; } |
| |
| bool EndWasDelayed() const { return end_was_delayed_; } |
| void SetEndWasDelayed(bool new_value) { end_was_delayed_ = new_value; } |
| |
| bool AddedPendingParserBlockingStylesheet() const { |
| return added_pending_parser_blocking_stylesheet_; |
| } |
| void SetAddedPendingParserBlockingStylesheet(bool new_value) { |
| added_pending_parser_blocking_stylesheet_ = new_value; |
| } |
| |
| bool WaitingForStylesheets() const { return is_waiting_for_stylesheets_; } |
| void SetWaitingForStylesheets(bool new_value) { |
| is_waiting_for_stylesheets_ = new_value; |
| } |
| |
| // Keeps track of whether Document::Finish has been called whilst parsing. |
| // ShouldAttemptToEndOnEOF() means that the parser should close when there's |
| // no more input. |
| bool ShouldAttemptToEndOnEOF() const { return should_attempt_to_end_on_eof_; } |
| void SetAttemptToEndOnEOF() { |
| // Should only ever call ::Finish once. |
| DCHECK(!should_attempt_to_end_on_eof_); |
| // This method should only be called from ::Finish. |
| should_attempt_to_end_on_eof_ = true; |
| } |
| |
| bool ShouldEndIfDelayed() const { return end_if_delayed_forbidden_ == 0; } |
| bool ShouldComplete() const { |
| return should_complete_ || GetMode() != kAllowDeferredParsing; |
| } |
| bool IsSynchronous() const { |
| return mode_ == ParserSynchronizationPolicy::kForceSynchronousParsing; |
| } |
| ParserSynchronizationPolicy GetMode() const { return mode_; } |
| |
| void MarkYield() { times_yielded_++; } |
| int TimesYielded() const { return times_yielded_; } |
| |
| NestingLevelIncrementer ScopedPumpSession() { |
| return NestingLevelIncrementer(pump_session_nesting_level_); |
| } |
| bool InPumpSession() const { return pump_session_nesting_level_; } |
| bool InNestedPumpSession() const { return pump_session_nesting_level_ > 1; } |
| |
| void SetSeenCSPMetaTag(const bool seen) { |
| if (meta_csp_state_ == MetaCSPTokenState::kUnenforceable) |
| return; |
| if (seen) |
| meta_csp_state_ = MetaCSPTokenState::kSeen; |
| else |
| meta_csp_state_ = MetaCSPTokenState::kNotSeen; |
| } |
| |
| void SetExitedHeader() { |
| meta_csp_state_ = MetaCSPTokenState::kUnenforceable; |
| } |
| bool HaveExitedHeader() const { |
| return meta_csp_state_ == MetaCSPTokenState::kUnenforceable; |
| } |
| |
| bool ShouldYieldForPreloads() const { |
| return preload_processing_mode_ == PreloadProcessingMode::kYield; |
| } |
| |
| bool ShouldProcessPreloads() const { |
| return preload_processing_mode_ == PreloadProcessingMode::kImmediate; |
| } |
| |
| private: |
| void EnterEndIfDelayedForbidden() { end_if_delayed_forbidden_++; } |
| void ExitEndIfDelayedForbidden() { |
| DCHECK(end_if_delayed_forbidden_); |
| end_if_delayed_forbidden_--; |
| } |
| |
| void EnterAttemptToEndForbidden() { |
| DCHECK(should_attempt_to_end_on_eof_); |
| should_attempt_to_end_on_eof_ = false; |
| } |
| |
| void EnterShouldComplete() { should_complete_++; } |
| void ExitShouldComplete() { |
| DCHECK(should_complete_); |
| should_complete_--; |
| } |
| |
| DeferredParserState state_; |
| MetaCSPTokenState meta_csp_state_; |
| ParserSynchronizationPolicy mode_; |
| PreloadProcessingMode preload_processing_mode_; |
| unsigned end_if_delayed_forbidden_ = 0; |
| unsigned should_complete_ = 0; |
| unsigned times_yielded_ = 0; |
| unsigned pump_session_nesting_level_ = 0; |
| |
| // Set to non-zero if Document::Finish has been called and we're operating |
| // asynchronously. |
| bool should_attempt_to_end_on_eof_ = false; |
| bool needs_link_header_dispatch_ = true; |
| bool have_seen_first_byte_ = false; |
| bool end_was_delayed_ = false; |
| bool added_pending_parser_blocking_stylesheet_ = false; |
| bool is_waiting_for_stylesheets_ = false; |
| }; |
| |
| class EndIfDelayedForbiddenScope { |
| STACK_ALLOCATED(); |
| |
| public: |
| explicit EndIfDelayedForbiddenScope(HTMLDocumentParserState* state) |
| : state_(state) { |
| state_->EnterEndIfDelayedForbidden(); |
| } |
| ~EndIfDelayedForbiddenScope() { state_->ExitEndIfDelayedForbidden(); } |
| |
| private: |
| HTMLDocumentParserState* state_; |
| }; |
| |
| class AttemptToEndForbiddenScope { |
| STACK_ALLOCATED(); |
| |
| public: |
| explicit AttemptToEndForbiddenScope(HTMLDocumentParserState* state) |
| : state_(state) { |
| state_->EnterAttemptToEndForbidden(); |
| } |
| |
| private: |
| HTMLDocumentParserState* state_; |
| }; |
| |
| class ShouldCompleteScope { |
| STACK_ALLOCATED(); |
| |
| public: |
| explicit ShouldCompleteScope(HTMLDocumentParserState* state) : state_(state) { |
| state_->EnterShouldComplete(); |
| } |
| ~ShouldCompleteScope() { state_->ExitShouldComplete(); } |
| |
| private: |
| HTMLDocumentParserState* state_; |
| }; |
| |
| class FetchBatchScope { |
| STACK_ALLOCATED(); |
| |
| public: |
| explicit FetchBatchScope(HTMLDocumentParser* parser) : parser_(parser) { |
| parser_->StartFetchBatch(); |
| } |
| ~FetchBatchScope() { parser_->EndFetchBatch(); } |
| |
| private: |
| HTMLDocumentParser* const parser_; |
| }; |
| |
| // This is a direct transcription of step 4 from: |
| // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case |
| static HTMLTokenizer::State TokenizerStateForContextElement( |
| Element* context_element, |
| bool report_errors, |
| const HTMLParserOptions& options) { |
| if (!context_element) |
| return HTMLTokenizer::kDataState; |
| |
| const QualifiedName& context_tag = context_element->TagQName(); |
| |
| if (context_tag.Matches(html_names::kTitleTag) || |
| context_tag.Matches(html_names::kTextareaTag)) |
| return HTMLTokenizer::kRCDATAState; |
| if (context_tag.Matches(html_names::kStyleTag) || |
| context_tag.Matches(html_names::kXmpTag) || |
| context_tag.Matches(html_names::kIFrameTag) || |
| context_tag.Matches(html_names::kNoembedTag) || |
| (context_tag.Matches(html_names::kNoscriptTag) && |
| options.scripting_flag) || |
| context_tag.Matches(html_names::kNoframesTag)) |
| return report_errors ? HTMLTokenizer::kRAWTEXTState |
| : HTMLTokenizer::kPLAINTEXTState; |
| if (context_tag.Matches(html_names::kScriptTag)) |
| return report_errors ? HTMLTokenizer::kScriptDataState |
| : HTMLTokenizer::kPLAINTEXTState; |
| if (context_tag.Matches(html_names::kPlaintextTag)) |
| return HTMLTokenizer::kPLAINTEXTState; |
| return HTMLTokenizer::kDataState; |
| } |
| |
| class ScopedYieldTimer { |
| public: |
| // This object is created at the start of a block of parsing, and will |
| // report the time since the last block yielded if known. |
| ScopedYieldTimer(std::unique_ptr<base::ElapsedTimer>* timer, |
| HTMLParserMetrics* metrics_reporter) |
| : timer_(timer), reporting_metrics_(metrics_reporter) { |
| if (!reporting_metrics_ || !(*timer_)) |
| return; |
| |
| metrics_reporter->AddYieldInterval((*timer_)->Elapsed()); |
| timer_->reset(); |
| } |
| |
| // The destructor creates a new timer, which will keep track of time until |
| // the next block starts. |
| ~ScopedYieldTimer() { |
| if (reporting_metrics_) |
| *timer_ = std::make_unique<base::ElapsedTimer>(); |
| } |
| |
| private: |
| std::unique_ptr<base::ElapsedTimer>* timer_; |
| bool reporting_metrics_; |
| }; |
| |
| HTMLDocumentParser::HTMLDocumentParser(HTMLDocument& document, |
| ParserSynchronizationPolicy sync_policy, |
| ParserPrefetchPolicy prefetch_policy) |
| : HTMLDocumentParser(document, |
| kAllowScriptingContent, |
| sync_policy, |
| prefetch_policy) { |
| script_runner_ = |
| HTMLParserScriptRunner::Create(ReentryPermit(), &document, this); |
| |
| // Allow declarative shadow DOM for the document parser, if not explicitly |
| // disabled. |
| bool include_shadow_roots = document.GetDeclarativeShadowRootAllowState() != |
| Document::DeclarativeShadowRootAllowState::kDeny; |
| tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>( |
| this, document, kAllowScriptingContent, options_, include_shadow_roots); |
| } |
| |
| HTMLDocumentParser::HTMLDocumentParser( |
| DocumentFragment* fragment, |
| Element* context_element, |
| ParserContentPolicy parser_content_policy, |
| ParserPrefetchPolicy parser_prefetch_policy) |
| : HTMLDocumentParser(fragment->GetDocument(), |
| parser_content_policy, |
| kForceSynchronousParsing, |
| parser_prefetch_policy) { |
| // Allow declarative shadow DOM for the fragment parser only if explicitly |
| // enabled. |
| bool include_shadow_roots = |
| fragment->GetDocument().GetDeclarativeShadowRootAllowState() == |
| Document::DeclarativeShadowRootAllowState::kAllow; |
| |
| // No script_runner_ in fragment parser. |
| tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>( |
| this, fragment, context_element, parser_content_policy, options_, |
| include_shadow_roots); |
| |
| // For now document fragment parsing never reports errors. |
| bool report_errors = false; |
| tokenizer_->SetState(TokenizerStateForContextElement( |
| context_element, report_errors, options_)); |
| } |
| |
| HTMLDocumentParser::HTMLDocumentParser(Document& document, |
| ParserContentPolicy content_policy, |
| ParserSynchronizationPolicy sync_policy, |
| ParserPrefetchPolicy prefetch_policy) |
| : ScriptableDocumentParser(document, content_policy), |
| options_(&document), |
| token_(std::make_unique<HTMLToken>()), |
| tokenizer_(std::make_unique<HTMLTokenizer>(options_)), |
| loading_task_runner_(sync_policy == kForceSynchronousParsing |
| ? nullptr |
| : document.GetTaskRunner(TaskType::kNetworking)), |
| task_runner_state_( |
| MakeGarbageCollected<HTMLDocumentParserState>(sync_policy)), |
| scheduler_(sync_policy == kAllowDeferredParsing |
| ? Thread::Current()->Scheduler() |
| : nullptr) { |
| // Make sure the preload scanner thread will be ready when needed. |
| if (ThreadedPreloadScannerEnabled() && !task_runner_state_->IsSynchronous()) |
| GetPreloadScannerThread(); |
| |
| // Report metrics for async document parsing or forced synchronous parsing. |
| // The document must be outermost main frame to meet UKM requirements, and |
| // must have a high resolution clock for high quality data. |
| if (sync_policy == kAllowDeferredParsing && |
| document.IsInOutermostMainFrame() && |
| base::TimeTicks::IsHighResolution()) { |
| metrics_reporter_ = std::make_unique<HTMLParserMetrics>( |
| document.UkmSourceID(), document.UkmRecorder()); |
| } |
| |
| if (GetDocument()->IsInOutermostMainFrame() && |
| !task_runner_state_->IsSynchronous()) { |
| tokenizer_metrics_reporter_ = |
| std::make_unique<HTMLTokenizerMetricsReporter>(tokenizer_.get()); |
| } |
| |
| // Don't create preloader for parsing clipboard content. |
| if (content_policy == kDisallowScriptingAndPluginContent) |
| return; |
| |
| // Create preloader only when the document is: |
| // - attached to a frame (likely the prefetched resources will be loaded |
| // soon), |
| // - is for no-state prefetch (made specifically for running preloader). |
| if (!document.GetFrame() && !document.IsPrefetchOnly()) |
| return; |
| |
| if (prefetch_policy == kAllowPrefetching) |
| preloader_ = MakeGarbageCollected<HTMLResourcePreloader>(document); |
| } |
| |
| HTMLDocumentParser::~HTMLDocumentParser() = default; |
| |
| void HTMLDocumentParser::Trace(Visitor* visitor) const { |
| visitor->Trace(reentry_permit_); |
| visitor->Trace(tree_builder_); |
| visitor->Trace(script_runner_); |
| visitor->Trace(preloader_); |
| visitor->Trace(task_runner_state_); |
| ScriptableDocumentParser::Trace(visitor); |
| HTMLParserScriptRunnerHost::Trace(visitor); |
| } |
| |
| bool HTMLDocumentParser::HasPendingWorkScheduledForTesting() const { |
| return task_runner_state_->IsScheduled(); |
| } |
| |
| void HTMLDocumentParser::Detach() { |
| // Unwind any nested batch operations before being detached |
| FlushFetchBatch(); |
| |
| // Deschedule any pending tokenizer pumps. |
| task_runner_state_->SetState( |
| HTMLDocumentParserState::DeferredParserState::kNotScheduled); |
| DocumentParser::Detach(); |
| if (script_runner_) |
| script_runner_->Detach(); |
| if (tree_builder_) |
| tree_builder_->Detach(); |
| // FIXME: It seems wrong that we would have a preload scanner here. Yet during |
| // fast/dom/HTMLScriptElement/script-load-events.html we do. |
| preload_scanner_.reset(); |
| insertion_preload_scanner_.reset(); |
| background_script_scanner_.Reset(); |
| background_scanner_.Reset(); |
| // `tokenizer_metrics_reporter_` has a reference to `tokenizer_`. |
| tokenizer_metrics_reporter_.reset(); |
| // Oilpan: It is important to clear token_ to deallocate backing memory of |
| // HTMLToken::data_ and let the allocator reuse the memory for |
| // HTMLToken::data_ of a next HTMLDocumentParser. We need to clear |
| // tokenizer_ first because tokenizer_ has a raw pointer to token_. |
| // TODO(masonf): We can probably move tokenizer_ and token_ into the |
| // HTMLDocumentParser itself, instead of having them as Members. |
| tokenizer_.reset(); |
| token_.reset(); |
| } |
| |
| void HTMLDocumentParser::StopParsing() { |
| DocumentParser::StopParsing(); |
| task_runner_state_->SetState( |
| HTMLDocumentParserState::DeferredParserState::kNotScheduled); |
| } |
| |
| // This kicks off "Once the user agent stops parsing" as described by: |
| // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end |
| void HTMLDocumentParser::PrepareToStopParsing() { |
| TRACE_EVENT1("blink", "HTMLDocumentParser::PrepareToStopParsing", "parser", |
| (void*)this); |
| DCHECK(!HasInsertionPoint()); |
| |
| // If we've already been detached, e.g. in |
| // WebFrameTest.SwapMainFrameWhileLoading, bail out. |
| if (IsDetached()) |
| return; |
| |
| DCHECK(tokenizer_); |
| |
| // NOTE: This pump should only ever emit buffered character tokens. |
| if (!GetDocument()->IsPrefetchOnly()) { |
| ShouldCompleteScope should_complete(task_runner_state_); |
| EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_); |
| PumpTokenizerIfPossible(); |
| } |
| |
| if (IsStopped()) |
| return; |
| |
| DocumentParser::PrepareToStopParsing(); |
| |
| // We will not have a scriptRunner when parsing a DocumentFragment. |
| if (script_runner_) |
| GetDocument()->SetReadyState(Document::kInteractive); |
| |
| // Setting the ready state above can fire mutation event and detach us from |
| // underneath. In that case, just bail out. |
| if (IsDetached()) |
| return; |
| |
| if (script_runner_) |
| script_runner_->RecordMetricsAtParseEnd(); |
| |
| AttemptToRunDeferredScriptsAndEnd(); |
| } |
| |
| bool HTMLDocumentParser::IsPaused() const { |
| return IsWaitingForScripts() || task_runner_state_->WaitingForStylesheets(); |
| } |
| |
| bool HTMLDocumentParser::IsParsingFragment() const { |
| return tree_builder_->IsParsingFragment(); |
| } |
| |
| void HTMLDocumentParser::DeferredPumpTokenizerIfPossible() { |
| // This method is called asynchronously, continues building the HTML document. |
| |
| // If we're scheduled for a tokenizer pump, then document should be attached |
| // and the parser should not be stopped, but sometimes a script completes |
| // loading (so we schedule a pump) but the Document is stopped in the meantime |
| // (e.g. fast/parser/iframe-onload-document-close-with-external-script.html). |
| DCHECK(task_runner_state_->GetState() == |
| HTMLDocumentParserState::DeferredParserState::kNotScheduled || |
| !IsDetached()); |
| TRACE_EVENT2("blink", "HTMLDocumentParser::DeferredPumpTokenizerIfPossible", |
| "parser", (void*)this, "state", |
| task_runner_state_->GetStateAsString()); |
| |
| // This method is called when the post task is executed, marking the end of |
| // a yield. Report the yielded time. |
| DCHECK(yield_timer_); |
| if (metrics_reporter_) { |
| metrics_reporter_->AddYieldInterval(yield_timer_->Elapsed()); |
| } |
| yield_timer_.reset(); |
| |
| bool should_call_delay_end = |
| task_runner_state_->GetState() == |
| HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed; |
| if (task_runner_state_->IsScheduled()) { |
| task_runner_state_->SetState( |
| HTMLDocumentParserState::DeferredParserState::kNotScheduled); |
| if (should_call_delay_end) { |
| EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_); |
| PumpTokenizerIfPossible(); |
| EndIfDelayed(); |
| } else { |
| PumpTokenizerIfPossible(); |
| } |
| } |
| } |
| |
| void HTMLDocumentParser::PumpTokenizerIfPossible() { |
| // This method is called synchronously, builds the HTML document up to |
| // the current budget, and optionally completes. |
| TRACE_EVENT1("blink", "HTMLDocumentParser::PumpTokenizerIfPossible", "parser", |
| (void*)this); |
| |
| bool yielded = false; |
| CheckIfBlockingStylesheetAdded(); |
| if (!IsStopped() && |
| (!IsPaused() || task_runner_state_->ShouldEndIfDelayed())) { |
| yielded = PumpTokenizer(); |
| } |
| |
| if (yielded) { |
| DCHECK(!task_runner_state_->ShouldComplete()); |
| SchedulePumpTokenizer(); |
| } else if (task_runner_state_->ShouldAttemptToEndOnEOF()) { |
| // Fall into this branch if ::Finish has been previously called and we've |
| // just finished asynchronously parsing everything. |
| if (metrics_reporter_) |
| metrics_reporter_->ReportMetricsAtParseEnd(); |
| AttemptToEnd(); |
| } else if (task_runner_state_->ShouldEndIfDelayed()) { |
| // If we did not exceed the budget or parsed everything there was to |
| // parse, check if we should complete the document. |
| if (task_runner_state_->ShouldComplete() || IsStopped() || IsStopping()) { |
| if (metrics_reporter_) |
| metrics_reporter_->ReportMetricsAtParseEnd(); |
| EndIfDelayed(); |
| } else { |
| ScheduleEndIfDelayed(); |
| } |
| } |
| } |
| |
| void HTMLDocumentParser::RunScriptsForPausedTreeBuilder() { |
| TRACE_EVENT1("blink", "HTMLDocumentParser::RunScriptsForPausedTreeBuilder", |
| "parser", (void*)this); |
| DCHECK(ScriptingContentIsAllowed(GetParserContentPolicy())); |
| |
| TextPosition script_start_position = TextPosition::BelowRangePosition(); |
| Element* script_element = |
| tree_builder_->TakeScriptToProcess(script_start_position); |
| // We will not have a scriptRunner when parsing a DocumentFragment. |
| if (script_runner_) |
| script_runner_->ProcessScriptElement(script_element, script_start_position); |
| CheckIfBlockingStylesheetAdded(); |
| } |
| |
| HTMLDocumentParser::NextTokenStatus HTMLDocumentParser::CanTakeNextToken( |
| base::TimeDelta& time_executing_script) { |
| if (IsStopped()) |
| return kNoTokens; |
| |
| // If we're paused waiting for a script, we try to execute scripts before |
| // continuing. |
| auto ret = kHaveTokens; |
| if (tree_builder_->HasParserBlockingScript()) { |
| base::ElapsedTimer timer; |
| RunScriptsForPausedTreeBuilder(); |
| ret = kHaveTokensAfterScript; |
| time_executing_script += timer.Elapsed(); |
| } |
| if (IsStopped() || IsPaused()) |
| return kNoTokens; |
| return ret; |
| } |
| |
| void HTMLDocumentParser::ForcePlaintextForTextDocument() { |
| tokenizer_->SetState(HTMLTokenizer::kPLAINTEXTState); |
| } |
| |
| bool HTMLDocumentParser::PumpTokenizer() { |
| DCHECK(!GetDocument()->IsPrefetchOnly()); |
| DCHECK(!IsStopped()); |
| DCHECK(tokenizer_); |
| DCHECK(token_); |
| |
| NestingLevelIncrementer session = task_runner_state_->ScopedPumpSession(); |
| |
| // If we're in kForceSynchronousParsing, always run until all available input |
| // is consumed. |
| bool should_run_until_completion = task_runner_state_->ShouldComplete() || |
| task_runner_state_->IsSynchronous() || |
| task_runner_state_->InNestedPumpSession(); |
| |
| bool is_tracing; |
| TRACE_EVENT_CATEGORY_GROUP_ENABLED("blink", &is_tracing); |
| unsigned starting_bytes; |
| if (is_tracing) { |
| starting_bytes = input_.length(); |
| TRACE_EVENT_BEGIN2("blink", "HTMLDocumentParser::PumpTokenizer", |
| "should_complete", should_run_until_completion, |
| "bytes_queued", starting_bytes); |
| } |
| |
| // We tell the InspectorInstrumentation about every pump, even if we end up |
| // pumping nothing. It can filter out empty pumps itself. |
| // FIXME: input_.Current().length() is only accurate if we end up parsing the |
| // whole buffer in this pump. We should pass how much we parsed as part of |
| // DidWriteHTML instead of WillWriteHTML. |
| probe::ParseHTML probe(GetDocument(), this); |
| |
| FetchBatchScope fetch_batch(this); |
| |
| bool should_yield = false; |
| // If we've yielded more than 2 times, then set the budget to a very large |
| // number, to attempt to consume all available tokens in one go. This |
| // heuristic is intended to allow a quick first contentful paint, followed by |
| // a larger rendering lifecycle that processes the remainder of the page. |
| int budget = |
| (task_runner_state_->TimesYielded() <= kNumYieldsWithDefaultBudget) |
| ? kDefaultMaxTokenizationBudget |
| : 1e7; |
| |
| base::TimeDelta timed_budget; |
| if (TimedParserBudgetEnabled()) |
| timed_budget = GetTimedBudget(task_runner_state_->TimesYielded()); |
| |
| base::ElapsedTimer chunk_parsing_timer; |
| unsigned tokens_parsed = 0; |
| base::TimeDelta time_executing_script; |
| while (!should_yield) { |
| if (task_runner_state_->ShouldProcessPreloads()) |
| FlushPendingPreloads(); |
| |
| const auto next_token_status = CanTakeNextToken(time_executing_script); |
| if (next_token_status == kNoTokens) { |
| // No tokens left to process in this pump, so break |
| break; |
| } else if (next_token_status == kHaveTokensAfterScript && |
| task_runner_state_->HaveExitedHeader()) { |
| // Just executed a parser-blocking script in the body. We'd probably like |
| // to yield at some point soon, especially if we're in "extended budget" |
| // mode. So reduce the budget back to at most the default. |
| budget = std::min(budget, kDefaultMaxTokenizationBudget); |
| if (TimedParserBudgetEnabled()) { |
| timed_budget = std::min(timed_budget, chunk_parsing_timer.Elapsed() + |
| GetDefaultTimedBudget()); |
| } |
| } |
| { |
| RUNTIME_CALL_TIMER_SCOPE( |
| V8PerIsolateData::MainThreadIsolate(), |
| RuntimeCallStats::CounterId::kHTMLTokenizerNextToken); |
| if (tokenizer_metrics_reporter_) |
| tokenizer_metrics_reporter_->WillProcessNextToken(input_.Current()); |
| |
| if (!tokenizer_->NextToken(input_.Current(), Token())) |
| break; |
| budget--; |
| tokens_parsed++; |
| } |
| ConstructTreeFromHTMLToken(); |
| if (!should_run_until_completion && !IsPaused()) { |
| DCHECK_EQ(task_runner_state_->GetMode(), kAllowDeferredParsing); |
| if (TimedParserBudgetEnabled()) |
| should_yield = chunk_parsing_timer.Elapsed() >= timed_budget; |
| else |
| should_yield = budget <= 0; |
| should_yield |= scheduler_->ShouldYieldForHighPriorityWork(); |
| should_yield &= task_runner_state_->HaveExitedHeader(); |
| |
| // Yield for preloads even if we haven't exited the header, since they |
| // should be dispatched as soon as possible. |
| if (task_runner_state_->ShouldYieldForPreloads()) |
| should_yield |= HasPendingPreloads(); |
| } else { |
| should_yield = false; |
| } |
| DCHECK(IsStopped() || Token().IsUninitialized()); |
| } |
| |
| if (is_tracing) { |
| TRACE_EVENT_END2("blink", "HTMLDocumentParser::PumpTokenizer", |
| "parsed_tokens", tokens_parsed, "parsed_bytes", |
| starting_bytes - input_.length()); |
| } |
| |
| const bool is_stopped_or_parsing_fragment = |
| IsStopped() || IsParsingFragment(); |
| |
| if (!is_stopped_or_parsing_fragment) { |
| // There should only be PendingText left since the tree-builder always |
| // flushes the task queue before returning. In case that ever changes, |
| // crash. |
| tree_builder_->Flush(kFlushAlways); |
| CHECK(!IsStopped()); |
| } |
| |
| if (tokens_parsed && metrics_reporter_) { |
| metrics_reporter_->AddChunk( |
| chunk_parsing_timer.Elapsed() - time_executing_script, tokens_parsed); |
| } |
| |
| if (is_stopped_or_parsing_fragment) |
| return false; |
| |
| if (IsPaused()) { |
| DCHECK_EQ(tokenizer_->GetState(), HTMLTokenizer::kDataState); |
| |
| if (preloader_ && !background_scanner_) { |
| if (!preload_scanner_) { |
| preload_scanner_ = CreatePreloadScanner( |
| TokenPreloadScanner::ScannerType::kMainDocument); |
| preload_scanner_->AppendToEnd(input_.Current()); |
| } |
| ScanAndPreload(preload_scanner_.get()); |
| } |
| } |
| |
| // should_run_until_completion implies that we should not yield |
| CHECK(!should_run_until_completion || !should_yield); |
| if (should_yield) |
| task_runner_state_->MarkYield(); |
| return should_yield; |
| } |
| |
| void HTMLDocumentParser::SchedulePumpTokenizer() { |
| TRACE_EVENT0("blink", "HTMLDocumentParser::SchedulePumpTokenizer"); |
| DCHECK(!IsStopped()); |
| DCHECK(!task_runner_state_->InPumpSession()); |
| DCHECK(!task_runner_state_->ShouldComplete()); |
| if (task_runner_state_->IsScheduled()) { |
| // If the parser is already scheduled, there's no need to do anything. |
| return; |
| } |
| loading_task_runner_->PostTask( |
| FROM_HERE, WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible, |
| WrapPersistent(this))); |
| task_runner_state_->SetState( |
| HTMLDocumentParserState::DeferredParserState::kScheduled); |
| |
| yield_timer_ = std::make_unique<base::ElapsedTimer>(); |
| } |
| |
| void HTMLDocumentParser::ScheduleEndIfDelayed() { |
| TRACE_EVENT0("blink", "HTMLDocumentParser::ScheduleEndIfDelayed"); |
| DCHECK(!IsStopped()); |
| DCHECK(!task_runner_state_->InPumpSession()); |
| DCHECK(!task_runner_state_->ShouldComplete()); |
| |
| // Schedule a pump callback if needed. |
| if (!task_runner_state_->IsScheduled()) { |
| loading_task_runner_->PostTask( |
| FROM_HERE, |
| WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible, |
| WrapPersistent(this))); |
| yield_timer_ = std::make_unique<base::ElapsedTimer>(); |
| } |
| // If a pump is already scheduled, it's OK to just upgrade it to one |
| // which calls EndIfDelayed afterwards. |
| task_runner_state_->SetState( |
| HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed); |
| } |
| |
| void HTMLDocumentParser::ConstructTreeFromHTMLToken() { |
| DCHECK(!GetDocument()->IsPrefetchOnly()); |
| |
| AtomicHTMLToken atomic_token(Token()); |
| |
| // Check whether we've exited the header. |
| if (!task_runner_state_->HaveExitedHeader()) { |
| if (GetDocument()->body()) { |
| task_runner_state_->SetExitedHeader(); |
| } |
| } |
| |
| // We clear the token_ in case ConstructTree() synchronously re-enters the |
| // parser. |
| Token().Clear(); |
| |
| if (tokenizer_metrics_reporter_) { |
| tokenizer_metrics_reporter_->WillConstructTreeFromToken(atomic_token, |
| input_.Current()); |
| } |
| |
| tree_builder_->ConstructTree(&atomic_token); |
| CheckIfBlockingStylesheetAdded(); |
| } |
| |
| bool HTMLDocumentParser::HasInsertionPoint() { |
| // FIXME: The wasCreatedByScript() branch here might not be fully correct. Our |
| // model of the EOF character differs slightly from the one in the spec |
| // because our treatment is uniform between network-sourced and script-sourced |
| // input streams whereas the spec treats them differently. |
| return input_.HasInsertionPoint() || |
| (WasCreatedByScript() && !input_.HaveSeenEndOfFile()); |
| } |
| |
| void HTMLDocumentParser::insert(const String& source) { |
| // No need to do any processing if the supplied text is empty. |
| if (IsStopped() || source.IsEmpty()) |
| return; |
| |
| TRACE_EVENT2("blink", "HTMLDocumentParser::insert", "source_length", |
| source.length(), "parser", (void*)this); |
| |
| if (tokenizer_metrics_reporter_ && !source.IsEmpty()) |
| tokenizer_metrics_reporter_->OnDocumentWrite(input_.Current()); |
| |
| SegmentedString excluded_line_number_source(source); |
| excluded_line_number_source.SetExcludeLineNumbers(); |
| input_.InsertAtCurrentInsertionPoint(excluded_line_number_source); |
| |
| // Pump the the tokenizer to build the document from the given insert point. |
| // Should process everything available and not defer anything. |
| ShouldCompleteScope should_complete(task_runner_state_); |
| EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_); |
| // Call EndIfDelayed manually at the end to maintain preload behaviour. |
| PumpTokenizerIfPossible(); |
| |
| if (IsPaused()) { |
| // Check the document.write() output with a separate preload scanner as |
| // the main scanner can't deal with insertions. |
| if (!insertion_preload_scanner_) { |
| insertion_preload_scanner_ = |
| CreatePreloadScanner(TokenPreloadScanner::ScannerType::kInsertion); |
| } |
| insertion_preload_scanner_->AppendToEnd(source); |
| if (preloader_) { |
| ScanAndPreload(insertion_preload_scanner_.get()); |
| } |
| } |
| EndIfDelayed(); |
| } |
| |
| void HTMLDocumentParser::Append(const String& input_source) { |
| TRACE_EVENT2("blink", "HTMLDocumentParser::append", "size", |
| input_source.length(), "parser", (void*)this); |
| |
| if (IsStopped()) |
| return; |
| |
| const SegmentedString source(input_source); |
| |
| ScanInBackground(input_source); |
| |
| if (!background_scanner_ && !preload_scanner_ && preloader_ && |
| GetDocument()->Url().IsValid() && |
| (!task_runner_state_->IsSynchronous() || |
| GetDocument()->IsPrefetchOnly() || IsPaused())) { |
| // If we're operating with a budget, we need to create a preload scanner to |
| // make sure that parser-blocking Javascript requests are dispatched in |
| // plenty of time, which prevents unnecessary delays. |
| // When parsing without a budget (e.g. for HTML fragment parsing), it's |
| // additional overhead to scan the string unless the parser's already |
| // paused whilst executing a script. |
| preload_scanner_ = |
| CreatePreloadScanner(TokenPreloadScanner::ScannerType::kMainDocument); |
| } |
| |
| if (GetDocument()->IsPrefetchOnly()) { |
| if (preload_scanner_) { |
| preload_scanner_->AppendToEnd(source); |
| // TODO(Richard.Townsend@arm.com): add test coverage of this branch. |
| // The crash in crbug.com/1166786 indicates that text documents are being |
| // speculatively prefetched. |
| ScanAndPreload(preload_scanner_.get()); |
| } |
| |
| // Return after the preload scanner, do not actually parse the document. |
| return; |
| } |
| if (preload_scanner_) { |
| preload_scanner_->AppendToEnd(source); |
| if (task_runner_state_->GetMode() == kAllowDeferredParsing && |
| (IsPaused() || !task_runner_state_->SeenFirstByte())) { |
| // Should scan and preload if the parser's paused waiting for a resource, |
| // or if we're starting a document for the first time (we want to at least |
| // prefetch anything that's in the <head> section). |
| ScanAndPreload(preload_scanner_.get()); |
| } |
| } |
| |
| if (tokenizer_metrics_reporter_) |
| tokenizer_metrics_reporter_->WillAppend(input_source); |
| input_.AppendToEnd(source); |
| task_runner_state_->MarkSeenFirstByte(); |
| |
| // Add input_source.length() to "file size" metric. |
| if (metrics_reporter_) |
| metrics_reporter_->AddInput(input_source.length()); |
| |
| if (task_runner_state_->InPumpSession()) { |
| // We've gotten data off the network in a nested write. We don't want to |
| // consume any more of the input stream now. Do not worry. We'll consume |
| // this data in a less-nested write(). |
| return; |
| } |
| |
| // If we are preloading, FinishAppend() will be called later in |
| // CommitPreloadedData(). |
| if (IsPreloading()) |
| return; |
| |
| FinishAppend(); |
| } |
| |
| void HTMLDocumentParser::FinishAppend() { |
| // Schedule a tokenizer pump to process this new data. We schedule to give |
| // paint a chance to happen, and because devtools somehow depends on it |
| // for js loads. |
| if (task_runner_state_->GetMode() == |
| ParserSynchronizationPolicy::kAllowDeferredParsing && |
| !task_runner_state_->ShouldComplete()) { |
| SchedulePumpTokenizer(); |
| } else { |
| PumpTokenizerIfPossible(); |
| } |
| } |
| |
| void HTMLDocumentParser::CommitPreloadedData() { |
| if (!IsPreloading()) |
| return; |
| |
| SetIsPreloading(false); |
| if (task_runner_state_->SeenFirstByte() && !IsStopped()) |
| FinishAppend(); |
| } |
| |
| void HTMLDocumentParser::end() { |
| DCHECK(!IsDetached()); |
| |
| // Informs the the rest of WebCore that parsing is really finished (and |
| // deletes this). |
| tree_builder_->Finished(); |
| |
| // All preloads should be done. |
| preloader_ = nullptr; |
| |
| DocumentParser::StopParsing(); |
| } |
| |
| void HTMLDocumentParser::AttemptToRunDeferredScriptsAndEnd() { |
| DCHECK(IsStopping()); |
| DCHECK(!HasInsertionPoint()); |
| if (script_runner_ && !script_runner_->ExecuteScriptsWaitingForParsing()) |
| return; |
| end(); |
| } |
| |
| bool HTMLDocumentParser::ShouldDelayEnd() const { |
| return task_runner_state_->InPumpSession() || IsPaused() || |
| IsExecutingScript() || task_runner_state_->IsScheduled(); |
| } |
| |
| void HTMLDocumentParser::AttemptToEnd() { |
| // finish() indicates we will not receive any more data. If we are waiting on |
| // an external script to load, we can't finish parsing quite yet. |
| TRACE_EVENT1("blink", "HTMLDocumentParser::AttemptToEnd", "parser", |
| (void*)this); |
| DCHECK(task_runner_state_->ShouldAttemptToEndOnEOF()); |
| AttemptToEndForbiddenScope should_not_attempt_to_end(task_runner_state_); |
| // We should only be in this state once after calling Finish. |
| // If there are pending scripts, future control flow should pass to |
| // EndIfDelayed. |
| if (ShouldDelayEnd()) { |
| task_runner_state_->SetEndWasDelayed(true); |
| return; |
| } |
| PrepareToStopParsing(); |
| } |
| |
| void HTMLDocumentParser::EndIfDelayed() { |
| TRACE_EVENT1("blink", "HTMLDocumentParser::EndIfDelayed", "parser", |
| (void*)this); |
| ShouldCompleteScope should_complete(task_runner_state_); |
| EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_); |
| // If we've already been detached, don't bother ending. |
| if (IsDetached()) |
| return; |
| |
| if (!task_runner_state_->EndWasDelayed() || ShouldDelayEnd()) |
| return; |
| |
| task_runner_state_->SetEndWasDelayed(false); |
| PrepareToStopParsing(); |
| } |
| |
| void HTMLDocumentParser::Finish() { |
| ShouldCompleteScope should_complete(task_runner_state_); |
| EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_); |
| Flush(); |
| if (IsDetached()) |
| return; |
| |
| // We're not going to get any more data off the network, so we tell the input |
| // stream we've reached the end of file. finish() can be called more than |
| // once, if the first time does not call end(). |
| if (!input_.HaveSeenEndOfFile()) |
| input_.MarkEndOfFile(); |
| |
| // If there's any deferred work remaining, signal that we |
| // want to end the document once all work's complete. |
| task_runner_state_->SetAttemptToEndOnEOF(); |
| if (task_runner_state_->IsScheduled() && !GetDocument()->IsPrefetchOnly()) { |
| return; |
| } |
| |
| AttemptToEnd(); |
| } |
| |
| bool HTMLDocumentParser::IsExecutingScript() const { |
| if (!script_runner_) |
| return false; |
| return script_runner_->IsExecutingScript(); |
| } |
| |
| OrdinalNumber HTMLDocumentParser::LineNumber() const { |
| return input_.Current().CurrentLine(); |
| } |
| |
| TextPosition HTMLDocumentParser::GetTextPosition() const { |
| const SegmentedString& current_string = input_.Current(); |
| OrdinalNumber line = current_string.CurrentLine(); |
| OrdinalNumber column = current_string.CurrentColumn(); |
| |
| return TextPosition(line, column); |
| } |
| |
| bool HTMLDocumentParser::IsWaitingForScripts() const { |
| if (IsParsingFragment()) { |
| // HTMLTreeBuilder may have a parser blocking script element, but we |
| // ignore it during fragment parsing. |
| DCHECK(!(tree_builder_->HasParserBlockingScript() || (script_runner_ && |
| script_runner_->HasParserBlockingScript()) || reentry_permit_->ParserPauseFlag())); |
| return false; |
| } |
| |
| // When the TreeBuilder encounters a </script> tag, it returns to the |
| // HTMLDocumentParser where the script is transfered from the treebuilder to |
| // the script runner. The script runner will hold the script until its loaded |
| // and run. During any of this time, we want to count ourselves as "waiting |
| // for a script" and thus run the preload scanner, as well as delay completion |
| // of parsing. |
| bool tree_builder_has_blocking_script = |
| tree_builder_->HasParserBlockingScript(); |
| bool script_runner_has_blocking_script = |
| script_runner_ && script_runner_->HasParserBlockingScript(); |
| // Since the parser is paused while a script runner has a blocking script, it |
| // should never be possible to end up with both objects holding a blocking |
| // script. |
| DCHECK( |
| !(tree_builder_has_blocking_script && script_runner_has_blocking_script)); |
| // If either object has a blocking script, the parser should be paused. |
| return tree_builder_has_blocking_script || |
| script_runner_has_blocking_script || |
| reentry_permit_->ParserPauseFlag(); |
| } |
| |
| void HTMLDocumentParser::ResumeParsingAfterPause() { |
| // This function runs after a parser-blocking script has completed. |
| TRACE_EVENT1("blink", "HTMLDocumentParser::ResumeParsingAfterPause", "parser", |
| (void*)this); |
| DCHECK(!IsExecutingScript()); |
| DCHECK(!IsPaused()); |
| |
| CheckIfBlockingStylesheetAdded(); |
| if (IsStopped() || IsPaused() || IsDetached()) |
| return; |
| DCHECK(tokenizer_); |
| |
| insertion_preload_scanner_.reset(); |
| if (task_runner_state_->GetMode() == kAllowDeferredParsing && |
| !task_runner_state_->ShouldComplete() && |
| !task_runner_state_->InPumpSession()) { |
| SchedulePumpTokenizer(); |
| } else { |
| ShouldCompleteScope should_complete(task_runner_state_); |
| PumpTokenizerIfPossible(); |
| } |
| } |
| |
| void HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan() { |
| TRACE_EVENT1( |
| "blink", |
| "HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan", |
| "parser", (void*)this); |
| if (preload_scanner_) { |
| DCHECK(preloader_); |
| preload_scanner_->AppendToEnd(input_.Current()); |
| ScanAndPreload(preload_scanner_.get()); |
| } |
| } |
| |
| void HTMLDocumentParser::NotifyScriptLoaded() { |
| TRACE_EVENT1("blink", "HTMLDocumentParser::NotifyScriptLoaded", "parser", |
| (void*)this); |
| DCHECK(script_runner_); |
| DCHECK(!IsExecutingScript()); |
| |
| scheduler::CooperativeSchedulingManager::AllowedStackScope |
| allowed_stack_scope(scheduler::CooperativeSchedulingManager::Instance()); |
| |
| if (IsStopped()) { |
| return; |
| } |
| |
| if (IsStopping()) { |
| AttemptToRunDeferredScriptsAndEnd(); |
| return; |
| } |
| |
| script_runner_->ExecuteScriptsWaitingForLoad(); |
| if (!IsPaused()) |
| ResumeParsingAfterPause(); |
| } |
| |
| void HTMLDocumentParser::ExecuteScriptsWaitingForResources() { |
| TRACE_EVENT0("blink", |
| "HTMLDocumentParser::ExecuteScriptsWaitingForResources"); |
| if (IsStopped()) |
| return; |
| |
| DCHECK(GetDocument()->IsScriptExecutionReady()); |
| |
| if (task_runner_state_->WaitingForStylesheets()) |
| task_runner_state_->SetWaitingForStylesheets(false); |
| |
| if (IsStopping()) { |
| AttemptToRunDeferredScriptsAndEnd(); |
| return; |
| } |
| |
| // Document only calls this when the Document owns the DocumentParser so this |
| // will not be called in the DocumentFragment case. |
| DCHECK(script_runner_); |
| script_runner_->ExecuteScriptsWaitingForResources(); |
| if (!IsPaused()) |
| ResumeParsingAfterPause(); |
| } |
| |
| void HTMLDocumentParser::DidAddPendingParserBlockingStylesheet() { |
| // In-body CSS doesn't block painting. The parser needs to pause so that |
| // the DOM doesn't include any elements that may depend on the CSS for style. |
| // The stylesheet can be added and removed during the parsing of a single |
| // token so don't actually set the bit to block parsing here, just track |
| // the state of the added sheet in case it does persist beyond a single |
| // token. |
| task_runner_state_->SetAddedPendingParserBlockingStylesheet(true); |
| } |
| |
| void HTMLDocumentParser::DidLoadAllPendingParserBlockingStylesheets() { |
| // Just toggle the stylesheet flag here (mostly for synchronous sheets). |
| // The document will also call into executeScriptsWaitingForResources |
| // which is when the parser will re-start, otherwise it will attempt to |
| // resume twice which could cause state machine issues. |
| task_runner_state_->SetAddedPendingParserBlockingStylesheet(false); |
| } |
| |
| void HTMLDocumentParser::CheckIfBlockingStylesheetAdded() { |
| if (task_runner_state_->AddedPendingParserBlockingStylesheet()) { |
| task_runner_state_->SetAddedPendingParserBlockingStylesheet(false); |
| task_runner_state_->SetWaitingForStylesheets(true); |
| } |
| } |
| |
| void HTMLDocumentParser::ParseDocumentFragment( |
| const String& source, |
| DocumentFragment* fragment, |
| Element* context_element, |
| ParserContentPolicy parser_content_policy) { |
| auto* parser = MakeGarbageCollected<HTMLDocumentParser>( |
| fragment, context_element, parser_content_policy); |
| parser->Append(source); |
| parser->Finish(); |
| // Allows ~DocumentParser to assert it was detached before destruction. |
| parser->Detach(); |
| } |
| |
| void HTMLDocumentParser::AppendBytes(const char* data, size_t length) { |
| TRACE_EVENT2("blink", "HTMLDocumentParser::appendBytes", "size", |
| (unsigned)length, "parser", (void*)this); |
| |
| DCHECK(Thread::MainThread()->IsCurrentThread()); |
| |
| if (!length || IsStopped()) |
| return; |
| |
| DecodedDataDocumentParser::AppendBytes(data, length); |
| } |
| |
| void HTMLDocumentParser::Flush() { |
| TRACE_EVENT1("blink", "HTMLDocumentParser::Flush", "parser", (void*)this); |
| // If we've got no decoder, we never received any data. |
| if (IsDetached() || NeedsDecoder()) |
| return; |
| DecodedDataDocumentParser::Flush(); |
| } |
| |
| void HTMLDocumentParser::SetDecoder( |
| std::unique_ptr<TextResourceDecoder> decoder) { |
| DCHECK(decoder); |
| DecodedDataDocumentParser::SetDecoder(std::move(decoder)); |
| } |
| |
| void HTMLDocumentParser::DocumentElementAvailable() { |
| TRACE_EVENT0("blink,loading", "HTMLDocumentParser::DocumentElementAvailable"); |
| Document* document = GetDocument(); |
| DCHECK(document); |
| DCHECK(document->documentElement()); |
| Element* documentElement = GetDocument()->documentElement(); |
| if (documentElement->hasAttribute(u"\u26A1") || |
| documentElement->hasAttribute("amp") || |
| documentElement->hasAttribute("i-amphtml-layout")) { |
| // The DocumentLoader fetches a main resource and handles the result. |
| // But it may not be available if JavaScript appends HTML to the page later |
| // in the page's lifetime. This can happen both from in-page JavaScript and |
| // from extensions. See example callstacks linked from crbug.com/931330. |
| if (document->Loader()) { |
| document->Loader()->DidObserveLoadingBehavior( |
| kLoadingBehaviorAmpDocumentLoaded); |
| } |
| } |
| if (preloader_) |
| FetchQueuedPreloads(); |
| } |
| |
| std::unique_ptr<HTMLPreloadScanner> HTMLDocumentParser::CreatePreloadScanner( |
| TokenPreloadScanner::ScannerType scanner_type) { |
| #if DCHECK_IS_ON() |
| if (scanner_type == TokenPreloadScanner::ScannerType::kMainDocument) { |
| // A main document scanner should never be created if scanning is already |
| // happening in the background. |
| DCHECK(!background_scanner_); |
| // If background scanning is enabled, the main document scanner is used when |
| // the parser is paused, for prefetch documents, or if preload scanning is |
| // disabled in tests (HTMLPreloadScanner internally handles this setting). |
| DCHECK(!ThreadedPreloadScannerEnabled() || IsPaused() || |
| GetDocument()->IsPrefetchOnly() || |
| !IsPreloadScanningEnabled(GetDocument())); |
| } |
| #endif |
| return HTMLPreloadScanner::Create(*GetDocument(), options_, scanner_type); |
| } |
| |
| void HTMLDocumentParser::ScanAndPreload(HTMLPreloadScanner* scanner) { |
| TRACE_EVENT0("blink", "HTMLDocumentParser::ScanAndPreload"); |
| DCHECK(preloader_); |
| base::ElapsedTimer timer; |
| ProcessPreloadData(scanner->Scan(GetDocument()->ValidBaseElementURL())); |
| base::UmaHistogramTimes( |
| base::StrCat({"Blink.ScanAndPreloadTime", GetPreloadHistogramSuffix()}), |
| timer.Elapsed()); |
| } |
| |
| void HTMLDocumentParser::ProcessPreloadData( |
| std::unique_ptr<PendingPreloadData> preload_data) { |
| for (const auto& value : preload_data->meta_ch_values) { |
| HTMLMetaElement::ProcessMetaCH(*GetDocument(), value.value, value.type, |
| value.is_doc_preloader); |
| } |
| |
| FetchBatchScope fetch_batch(this); |
| |
| // Make sure that the viewport is up-to-date, so that the correct viewport |
| // dimensions will be fed to the preload scanner. |
| if (GetDocument()->Loader() && |
| task_runner_state_->GetMode() == kAllowDeferredParsing) { |
| if (preload_data->viewport.has_value()) { |
| GetDocument()->GetStyleEngine().UpdateViewport(); |
| } |
| if (task_runner_state_->NeedsLinkHeaderPreloadsDispatch()) { |
| { |
| TRACE_EVENT0("blink", "HTMLDocumentParser::DispatchLinkHeaderPreloads"); |
| GetDocument()->Loader()->DispatchLinkHeaderPreloads( |
| base::OptionalOrNullptr(preload_data->viewport), |
| PreloadHelper::kOnlyLoadMedia); |
| } |
| if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) { |
| TRACE_EVENT0("blink", |
| "HTMLDocumentParser::DispatchSignedExchangeManager"); |
| // Link header preloads for prefetched signed exchanges won't be started |
| // until StartPrefetchedLinkHeaderPreloads() is called. See the header |
| // comment of PrefetchedSignedExchangeManager. |
| GetDocument() |
| ->Loader() |
| ->GetPrefetchedSignedExchangeManager() |
| ->StartPrefetchedLinkHeaderPreloads(); |
| } |
| task_runner_state_->DispatchedLinkHeaderPreloads(); |
| } |
| } |
| |
| task_runner_state_->SetSeenCSPMetaTag(preload_data->has_csp_meta_tag); |
| for (auto& request : preload_data->requests) { |
| queued_preloads_.push_back(std::move(request)); |
| } |
| FetchQueuedPreloads(); |
| } |
| |
| void HTMLDocumentParser::FetchQueuedPreloads() { |
| DCHECK(preloader_); |
| TRACE_EVENT0("blink", "HTMLDocumentParser::FetchQueuedPreloads"); |
| |
| if (!queued_preloads_.IsEmpty()) { |
| base::ElapsedTimer timer; |
| preloader_->TakeAndPreload(queued_preloads_); |
| base::UmaHistogramTimes(base::StrCat({"Blink.FetchQueuedPreloadsTime", |
| GetPreloadHistogramSuffix()}), |
| timer.Elapsed()); |
| } |
| } |
| |
| std::string HTMLDocumentParser::GetPreloadHistogramSuffix() { |
| bool is_outermost_main_frame = |
| GetDocument() && GetDocument()->IsInOutermostMainFrame(); |
| bool have_seen_first_byte = task_runner_state_->SeenFirstByte(); |
| return base::StrCat({is_outermost_main_frame ? ".MainFrame" : ".Subframe", |
| have_seen_first_byte ? ".NonInitial" : ".Initial"}); |
| } |
| |
| void HTMLDocumentParser::ScanInBackground(const String& source) { |
| if (task_runner_state_->IsSynchronous() || !GetDocument()->Url().IsValid()) |
| return; |
| |
| if (ThreadedPreloadScannerEnabled() && preloader_ && |
| // TODO(crbug.com/1329535): Support scanning prefetch documents in the |
| // background. |
| !GetDocument()->IsPrefetchOnly() && |
| IsPreloadScanningEnabled(GetDocument())) { |
| // The background scanner should never be created if a main thread scanner |
| // is already available. |
| DCHECK(!preload_scanner_); |
| if (!background_scanner_) { |
| background_scanner_ = HTMLPreloadScanner::CreateBackground( |
| this, options_, GetPreloadScannerThread()->GetTaskRunner()); |
| } |
| |
| background_scanner_.AsyncCall(&HTMLPreloadScanner::ScanInBackground) |
| .WithArgs( |
| source, GetDocument()->ValidBaseElementURL(), |
| CrossThreadBindRepeating( |
| &HTMLDocumentParser::AddPreloadDataOnBackgroundThread, |
| WrapCrossThreadPersistent(this), |
| GetDocument()->GetTaskRunner(TaskType::kInternalLoading))); |
| return; |
| } |
| |
| if (!PrecompileInlineScriptsEnabled()) |
| return; |
| |
| DCHECK(!background_scanner_); |
| if (!background_script_scanner_) { |
| background_script_scanner_ = BackgroundHTMLScanner::Create(options_, this); |
| } |
| |
| background_script_scanner_.AsyncCall(&BackgroundHTMLScanner::Scan) |
| .WithArgs(source); |
| } |
| |
| void HTMLDocumentParser::AddPreloadDataOnBackgroundThread( |
| scoped_refptr<base::SequencedTaskRunner> task_runner, |
| std::unique_ptr<PendingPreloadData> preload_data) { |
| DCHECK(!IsMainThread()); |
| bool should_post_task = false; |
| { |
| base::AutoLock lock(pending_preload_lock_); |
| // Only post a task if the preload data is empty. Otherwise, a task has |
| // already been posted and will consume the new data. |
| should_post_task = pending_preload_data_.IsEmpty(); |
| pending_preload_data_.push_back(std::move(preload_data)); |
| } |
| |
| if (should_post_task) { |
| PostCrossThreadTask( |
| *task_runner, FROM_HERE, |
| CrossThreadBindOnce(&HTMLDocumentParser::FlushPendingPreloads, |
| WrapCrossThreadPersistent(this))); |
| } |
| } |
| |
| void HTMLDocumentParser::FlushPendingPreloads() { |
| DCHECK(IsMainThread()); |
| if (!ThreadedPreloadScannerEnabled()) |
| return; |
| |
| if (IsDetached() || !preloader_) |
| return; |
| |
| // Do this in a loop in case more preloads are added in the background. |
| while (HasPendingPreloads()) { |
| Vector<std::unique_ptr<PendingPreloadData>> preload_data; |
| { |
| base::AutoLock lock(pending_preload_lock_); |
| preload_data = std::move(pending_preload_data_); |
| } |
| |
| for (auto& preload : preload_data) |
| ProcessPreloadData(std::move(preload)); |
| } |
| } |
| |
| void HTMLDocumentParser::StartFetchBatch() { |
| GetDocument()->Fetcher()->StartBatch(); |
| pending_batch_operations_++; |
| } |
| |
| void HTMLDocumentParser::EndFetchBatch() { |
| if (!IsDetached() && pending_batch_operations_ > 0) { |
| pending_batch_operations_--; |
| GetDocument()->Fetcher()->EndBatch(); |
| } |
| } |
| |
| void HTMLDocumentParser::FlushFetchBatch() { |
| if (!IsDetached() && pending_batch_operations_ > 0) { |
| ResourceFetcher* fetcher = GetDocument()->Fetcher(); |
| while (pending_batch_operations_ > 0) { |
| pending_batch_operations_--; |
| fetcher->EndBatch(); |
| } |
| } |
| } |
| |
| } // namespace blink |