Teach the background parser to ignore certain elements inside '<select>'.
'HTMLTreeBuilderSimulator' doesn't currently understand that we shouldn't
hop into PLAINTEXTState or RAWTEXTState inside '<select>' elements. This
has the unfortunate side-effect of enabling dangling markup injection
attacks that exfiltrate data via '<select><option><plaintext>' and etc.
This patch ensures that `<select>` behaves as specified, matching Safari,
Firefox, and Edge's behavior.
Thanks to @zcorpan for pointing out Blink's error in the thread ad
https://github.com/whatwg/html/issues/2252.
BUG=680072
Review-Url: https://codereview.chromium.org/2625103002
Cr-Commit-Position: refs/heads/master@{#443573}
diff --git a/third_party/WebKit/LayoutTests/fast/parser/inselect-tokenization.html b/third_party/WebKit/LayoutTests/fast/parser/inselect-tokenization.html
new file mode 100644
index 0000000..997cfbd
--- /dev/null
+++ b/third_party/WebKit/LayoutTests/fast/parser/inselect-tokenization.html
@@ -0,0 +1,106 @@
+<!DOCTYPE html>
+<script src="../../resources/testharness.js"></script>
+<script src="../../resources/testharnessreport.js"></script>
+<body>
+<script>
+ /************************************************************************
+ * Helper functions!
+ */
+ function createFrame(markup) {
+ var i = document.createElement('iframe');
+ i.srcdoc = markup;
+ return i;
+ }
+
+ function appendAndWaitForLoad(test, frame) {
+ return new Promise((resolve, reject) => {
+ frame.onload = test.step_func(_ => {
+ frame.onload = null;
+ resolve();
+ });
+ document.body.appendChild(frame);
+ });
+ }
+
+ function assert_select(test, frame, value) {
+ var select = frame.contentDocument.querySelector('select');
+ assert_equals(select.value, value, 'select');
+ }
+
+ function assert_element_innerText(test, frame, name, value) {
+ var el = frame.contentDocument.querySelector(name);
+ if (value === null || value === undefined)
+ assert_equals(el, null, name);
+ else
+ assert_equals(el.innerText, value, name);
+ }
+
+ /************************************************************************
+ * The actual tests!
+ */
+ var tests = [
+ // <input>, <keygen>, and <textarea> close <select>, so <plaintext> works.
+ ];
+
+ var elementsToIgnore = [
+ "iframe",
+ "noembed",
+ "noframes",
+ "noscript",
+ "plaintext",
+ "style",
+ "xmp",
+ ];
+
+ elementsToIgnore.forEach(el => {
+ tests.push(
+ {
+ markup: `<form><select><option><${el}>1<element></element>`,
+ select: "1",
+ innerText: null,
+ name: el
+ }, {
+ markup: `<form><select><option>1<${el}>2<element></element>`,
+ select: "12",
+ innerText: null,
+ name: el
+ }, {
+ markup: `<form><select><option>1<${el}>2<element></element>3`,
+ select: "123",
+ innerText: null,
+ name: el
+ });
+ if (el != "iframe") {
+ tests.push(
+ {
+ markup: `<form><select><option>1<input><${el}>2<element></element>`,
+ select: "1",
+ innerText: "2<element></element>",
+ name: el
+ }, {
+ markup: `<form><select><option>1<keygen><${el}>2<element></element>`,
+ select: "1",
+ innerText: "2<element></element>",
+ name: el
+ }, {
+ markup: `<form><select><option>1<textarea></textarea><${el}>2<element></element>`,
+ select: "1",
+ innerText: "2<element></element>",
+ name: el
+ });
+ }
+ });
+
+
+ tests.forEach(test => {
+ async_test(t => {
+ var i = createFrame(test.markup);
+
+ appendAndWaitForLoad(t, i)
+ .then(t.step_func_done(_ => {
+ assert_select(t, i, test.select);
+ assert_element_innerText(t, i, test.name, test.innerText);
+ }));
+ }, test.markup);
+ });
+</script>
diff --git a/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp b/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp
index d23ba61..aa8d081 100644
--- a/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp
+++ b/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp
@@ -92,9 +92,17 @@
threadSafeMatch(tagName, MathMLNames::mtextTag);
}
+static bool tokenExitsInSelect(const CompactHTMLToken& token) {
+ // https://html.spec.whatwg.org/#parsing-main-inselect
+ const String& tagName = token.data();
+ return threadSafeMatch(tagName, inputTag) ||
+ threadSafeMatch(tagName, keygenTag) ||
+ threadSafeMatch(tagName, textareaTag);
+}
+
HTMLTreeBuilderSimulator::HTMLTreeBuilderSimulator(
const HTMLParserOptions& options)
- : m_options(options) {
+ : m_options(options), m_inSelectInsertionMode(false) {
m_namespaceStack.push_back(HTML);
}
@@ -140,20 +148,38 @@
if (threadSafeMatch(tagName, textareaTag) ||
threadSafeMatch(tagName, titleTag)) {
tokenizer->setState(HTMLTokenizer::RCDATAState);
- } else if (threadSafeMatch(tagName, plaintextTag)) {
- tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
} else if (threadSafeMatch(tagName, scriptTag)) {
tokenizer->setState(HTMLTokenizer::ScriptDataState);
simulatedToken = ScriptStart;
- } else if (threadSafeMatch(tagName, styleTag) ||
- threadSafeMatch(tagName, iframeTag) ||
- threadSafeMatch(tagName, xmpTag) ||
- (threadSafeMatch(tagName, noembedTag) &&
- m_options.pluginsEnabled) ||
- threadSafeMatch(tagName, noframesTag) ||
- (threadSafeMatch(tagName, noscriptTag) &&
- m_options.scriptEnabled)) {
- tokenizer->setState(HTMLTokenizer::RAWTEXTState);
+ } else if (!m_inSelectInsertionMode) {
+ // If we're in the "in select" insertion mode, all of these tags are
+ // ignored, so we shouldn't change the tokenizer state:
+ // https://html.spec.whatwg.org/#parsing-main-inselect
+ if (threadSafeMatch(tagName, plaintextTag) &&
+ !m_inSelectInsertionMode) {
+ tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
+ } else if (threadSafeMatch(tagName, styleTag) ||
+ threadSafeMatch(tagName, iframeTag) ||
+ threadSafeMatch(tagName, xmpTag) ||
+ (threadSafeMatch(tagName, noembedTag) &&
+ m_options.pluginsEnabled) ||
+ threadSafeMatch(tagName, noframesTag) ||
+ (threadSafeMatch(tagName, noscriptTag) &&
+ m_options.scriptEnabled)) {
+ tokenizer->setState(HTMLTokenizer::RAWTEXTState);
+ }
+ }
+
+ // We need to track whether we're in the "in select" insertion mode
+ // in order to determine whether '<plaintext>' will put the tokenizer
+ // into PLAINTEXTState, and whether '<xmp>' and others will consume
+ // textual content.
+ //
+ // https://html.spec.whatwg.org/#parsing-main-inselect
+ if (threadSafeMatch(tagName, selectTag)) {
+ m_inSelectInsertionMode = true;
+ } else if (m_inSelectInsertionMode && tokenExitsInSelect(token)) {
+ m_inSelectInsertionMode = false;
}
}
}
@@ -169,12 +195,15 @@
(m_namespaceStack.contains(SVG) && m_namespaceStack.back() == HTML &&
tokenExitsSVG(token)) ||
(m_namespaceStack.contains(MathML) && m_namespaceStack.back() == HTML &&
- tokenExitsMath(token)))
+ tokenExitsMath(token))) {
m_namespaceStack.pop_back();
+ }
if (threadSafeMatch(tagName, scriptTag)) {
if (!inForeignContent())
tokenizer->setState(HTMLTokenizer::DataState);
return ScriptEnd;
+ } else if (threadSafeMatch(tagName, selectTag)) {
+ m_inSelectInsertionMode = false;
}
}
diff --git a/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.h b/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.h
index d8027db..420eb21 100644
--- a/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.h
+++ b/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.h
@@ -61,6 +61,7 @@
HTMLParserOptions m_options;
State m_namespaceStack;
+ bool m_inSelectInsertionMode;
};
} // namespace blink