blob: 3589b67de86f5fe7344beabcc004ee2afb007aaa [file] [log] [blame]
/*
* Copyright (C) 2013 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "core/html/parser/BackgroundHTMLParser.h"
#include "core/HTMLNames.h"
#include "core/html/parser/HTMLDocumentParser.h"
#include "core/html/parser/TextResourceDecoder.h"
#include "core/html/parser/XSSAuditor.h"
#include "platform/Task.h"
#include "platform/ThreadSafeFunctional.h"
#include "public/platform/Platform.h"
#include "public/platform/WebScheduler.h"
#include "wtf/text/TextPosition.h"
namespace blink {
// On a network with high latency and high bandwidth, using a device
// with a fast CPU, we could end up speculatively tokenizing
// the whole document, well ahead of when the main-thread actually needs it.
// This is a waste of memory (and potentially time if the speculation fails).
// So we limit our outstanding tokens arbitrarily to 10,000.
// Our maximal memory spent speculating will be approximately:
// (defaultOutstandingTokenLimit + defaultPendingTokenLimit) *
// sizeof(CompactToken)
// We use a separate low and high water mark to avoid constantly topping
// off the main thread's token buffer.
// At time of writing, this is (10000 + 1000) * 28 bytes = ~308kb of memory.
// These numbers have not been tuned.
static const size_t defaultOutstandingTokenLimit = 10000;
// We limit our chucks to 1000 tokens, to make sure the main
// thread is never waiting on the parser thread for tokens.
// This was tuned in https://bugs.webkit.org/show_bug.cgi?id=110408.
static const size_t defaultPendingTokenLimit = 1000;
using namespace HTMLNames;
#if ENABLE(ASSERT)
static void checkThatTokensAreSafeToSendToAnotherThread(const CompactHTMLTokenStream* tokens)
{
for (size_t i = 0; i < tokens->size(); ++i)
ASSERT(tokens->at(i).isSafeToSendToAnotherThread());
}
static void checkThatPreloadsAreSafeToSendToAnotherThread(const PreloadRequestStream& preloads)
{
for (size_t i = 0; i < preloads.size(); ++i)
ASSERT(preloads[i]->isSafeToSendToAnotherThread());
}
static void checkThatXSSInfosAreSafeToSendToAnotherThread(const XSSInfoStream& infos)
{
for (size_t i = 0; i < infos.size(); ++i)
ASSERT(infos[i]->isSafeToSendToAnotherThread());
}
#endif
void BackgroundHTMLParser::start(PassRefPtr<WeakReference<BackgroundHTMLParser>> reference, PassOwnPtr<Configuration> config, WebScheduler* scheduler)
{
new BackgroundHTMLParser(reference, config, scheduler);
// Caller must free by calling stop().
}
BackgroundHTMLParser::Configuration::Configuration()
: outstandingTokenLimit(defaultOutstandingTokenLimit)
, pendingTokenLimit(defaultPendingTokenLimit)
{
}
BackgroundHTMLParser::BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser>> reference, PassOwnPtr<Configuration> config, WebScheduler* scheduler)
: m_weakFactory(reference, this)
, m_token(adoptPtr(new HTMLToken))
, m_tokenizer(HTMLTokenizer::create(config->options))
, m_treeBuilderSimulator(config->options)
, m_options(config->options)
, m_outstandingTokenLimit(config->outstandingTokenLimit)
, m_parser(config->parser)
, m_pendingTokens(adoptPtr(new CompactHTMLTokenStream))
, m_pendingTokenLimit(config->pendingTokenLimit)
, m_xssAuditor(config->xssAuditor.release())
, m_preloadScanner(config->preloadScanner.release())
, m_decoder(config->decoder.release())
, m_scheduler(scheduler)
, m_startingScript(false)
{
ASSERT(m_outstandingTokenLimit > 0);
ASSERT(m_pendingTokenLimit > 0);
ASSERT(m_outstandingTokenLimit >= m_pendingTokenLimit);
}
BackgroundHTMLParser::~BackgroundHTMLParser()
{
}
void BackgroundHTMLParser::appendRawBytesFromParserThread(const char* data, int dataLength)
{
ASSERT(m_decoder);
updateDocument(m_decoder->decode(data, dataLength));
}
void BackgroundHTMLParser::appendRawBytesFromMainThread(PassOwnPtr<Vector<char>> buffer)
{
ASSERT(m_decoder);
updateDocument(m_decoder->decode(buffer->data(), buffer->size()));
}
void BackgroundHTMLParser::appendDecodedBytes(const String& input)
{
ASSERT(!m_input.current().isClosed());
m_input.append(input);
pumpTokenizer();
}
void BackgroundHTMLParser::setDecoder(PassOwnPtr<TextResourceDecoder> decoder)
{
ASSERT(decoder);
m_decoder = decoder;
}
void BackgroundHTMLParser::flush()
{
ASSERT(m_decoder);
updateDocument(m_decoder->flush());
}
void BackgroundHTMLParser::updateDocument(const String& decodedData)
{
DocumentEncodingData encodingData(*m_decoder.get());
if (encodingData != m_lastSeenEncodingData) {
m_lastSeenEncodingData = encodingData;
m_xssAuditor->setEncoding(encodingData.encoding());
m_scheduler->loadingTaskRunner()->postTask(
FROM_HERE,
threadSafeBind(&HTMLDocumentParser::didReceiveEncodingDataFromBackgroundParser, AllowCrossThreadAccess(m_parser), encodingData));
}
if (decodedData.isEmpty())
return;
appendDecodedBytes(decodedData);
}
void BackgroundHTMLParser::resumeFrom(PassOwnPtr<Checkpoint> checkpoint)
{
m_parser = checkpoint->parser;
m_token = checkpoint->token.release();
m_tokenizer = checkpoint->tokenizer.release();
m_treeBuilderSimulator.setState(checkpoint->treeBuilderState);
m_input.rewindTo(checkpoint->inputCheckpoint, checkpoint->unparsedInput);
m_preloadScanner->rewindTo(checkpoint->preloadScannerCheckpoint);
m_startingScript = false;
pumpTokenizer();
}
void BackgroundHTMLParser::startedChunkWithCheckpoint(HTMLInputCheckpoint inputCheckpoint)
{
// Note, we should not have to worry about the index being invalid
// as messages from the main thread will be processed in FIFO order.
m_input.invalidateCheckpointsBefore(inputCheckpoint);
pumpTokenizer();
}
void BackgroundHTMLParser::finish()
{
markEndOfFile();
pumpTokenizer();
}
void BackgroundHTMLParser::stop()
{
delete this;
}
void BackgroundHTMLParser::forcePlaintextForTextDocument()
{
// This is only used by the TextDocumentParser (a subclass of HTMLDocumentParser)
// to force us into the PLAINTEXT state w/o using a <plaintext> tag.
// The TextDocumentParser uses a <pre> tag for historical/compatibility reasons.
m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
}
void BackgroundHTMLParser::markEndOfFile()
{
ASSERT(!m_input.current().isClosed());
m_input.append(String(&kEndOfFileMarker, 1));
m_input.close();
}
void BackgroundHTMLParser::pumpTokenizer()
{
HTMLTreeBuilderSimulator::SimulatedToken simulatedToken = HTMLTreeBuilderSimulator::OtherToken;
// No need to start speculating until the main thread has almost caught up.
if (m_input.totalCheckpointTokenCount() > m_outstandingTokenLimit)
return;
while (true) {
if (m_xssAuditor->isEnabled())
m_sourceTracker.start(m_input.current(), m_tokenizer.get(), *m_token);
if (!m_tokenizer->nextToken(m_input.current(), *m_token)) {
// We've reached the end of our current input.
sendTokensToMainThread();
break;
}
if (m_xssAuditor->isEnabled())
m_sourceTracker.end(m_input.current(), m_tokenizer.get(), *m_token);
{
TextPosition position = TextPosition(m_input.current().currentLine(), m_input.current().currentColumn());
if (OwnPtr<XSSInfo> xssInfo = m_xssAuditor->filterToken(FilterTokenRequest(*m_token, m_sourceTracker, m_tokenizer->shouldAllowCDATA()))) {
xssInfo->m_textPosition = position;
m_pendingXSSInfos.append(xssInfo.release());
}
CompactHTMLToken token(m_token.get(), position);
m_preloadScanner->scan(token, m_input.current(), m_pendingPreloads);
simulatedToken = m_treeBuilderSimulator.simulate(token, m_tokenizer.get());
// Break chunks before a script tag is inserted and flag the chunk as starting a script
// so the main parser can decide if it should yield before processing the chunk.
if (simulatedToken == HTMLTreeBuilderSimulator::ScriptStart) {
sendTokensToMainThread();
m_startingScript = true;
}
m_pendingTokens->append(token);
}
m_token->clear();
if (simulatedToken == HTMLTreeBuilderSimulator::ScriptEnd || m_pendingTokens->size() >= m_pendingTokenLimit) {
sendTokensToMainThread();
// If we're far ahead of the main thread, yield for a bit to avoid consuming too much memory.
if (m_input.totalCheckpointTokenCount() > m_outstandingTokenLimit)
break;
}
}
}
void BackgroundHTMLParser::sendTokensToMainThread()
{
if (m_pendingTokens->isEmpty())
return;
#if ENABLE(ASSERT)
checkThatTokensAreSafeToSendToAnotherThread(m_pendingTokens.get());
checkThatPreloadsAreSafeToSendToAnotherThread(m_pendingPreloads);
checkThatXSSInfosAreSafeToSendToAnotherThread(m_pendingXSSInfos);
#endif
OwnPtr<HTMLDocumentParser::ParsedChunk> chunk = adoptPtr(new HTMLDocumentParser::ParsedChunk);
chunk->preloads.swap(m_pendingPreloads);
chunk->xssInfos.swap(m_pendingXSSInfos);
chunk->tokenizerState = m_tokenizer->state();
chunk->treeBuilderState = m_treeBuilderSimulator.state();
chunk->inputCheckpoint = m_input.createCheckpoint(m_pendingTokens->size());
chunk->preloadScannerCheckpoint = m_preloadScanner->createCheckpoint();
chunk->tokens = m_pendingTokens.release();
chunk->startingScript = m_startingScript;
m_startingScript = false;
m_scheduler->loadingTaskRunner()->postTask(
FROM_HERE,
new Task(threadSafeBind(&HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser, AllowCrossThreadAccess(m_parser), chunk.release())));
m_pendingTokens = adoptPtr(new CompactHTMLTokenStream);
}
}