Teach the background parser to ignore certain elements inside '<select>'.

'HTMLTreeBuilderSimulator' doesn't currently understand that we shouldn't
hop into PLAINTEXTState or RAWTEXTState inside '<select>' elements. This
has the unfortunate side-effect of enabling dangling markup injection
attacks that exfiltrate data via '<select><option><plaintext>' and etc.

This patch ensures that `<select>` behaves as specified, matching Safari,
Firefox, and Edge's behavior.

Thanks to @zcorpan for pointing out Blink's error in the thread ad
https://github.com/whatwg/html/issues/2252.

BUG=680072

Review-Url: https://codereview.chromium.org/2625103002
Cr-Commit-Position: refs/heads/master@{#443573}
diff --git a/third_party/WebKit/LayoutTests/fast/parser/inselect-tokenization.html b/third_party/WebKit/LayoutTests/fast/parser/inselect-tokenization.html
new file mode 100644
index 0000000..997cfbd
--- /dev/null
+++ b/third_party/WebKit/LayoutTests/fast/parser/inselect-tokenization.html
@@ -0,0 +1,106 @@
+<!DOCTYPE html>
+<script src="../../resources/testharness.js"></script>
+<script src="../../resources/testharnessreport.js"></script>
+<body>
+<script>
+  /************************************************************************
+   *   Helper functions!
+   */
+  function createFrame(markup) {
+    var i = document.createElement('iframe');
+    i.srcdoc = markup;
+    return i;
+  }
+
+  function appendAndWaitForLoad(test, frame) {
+    return new Promise((resolve, reject) => {
+      frame.onload = test.step_func(_ => {
+        frame.onload = null;
+        resolve();
+      });
+      document.body.appendChild(frame);
+    });
+  }
+
+  function assert_select(test, frame, value) {
+    var select = frame.contentDocument.querySelector('select');
+    assert_equals(select.value, value, 'select');
+  }
+
+  function assert_element_innerText(test, frame, name, value) {
+    var el = frame.contentDocument.querySelector(name);
+    if (value === null || value === undefined)
+      assert_equals(el, null, name);
+    else
+      assert_equals(el.innerText, value, name);
+  }
+
+  /************************************************************************
+   *   The actual tests!
+   */
+  var tests = [
+    // <input>, <keygen>, and <textarea> close <select>, so <plaintext> works.
+  ];
+
+  var elementsToIgnore = [
+    "iframe",
+    "noembed",
+    "noframes",
+    "noscript",
+    "plaintext",
+    "style",
+    "xmp",
+  ];
+
+  elementsToIgnore.forEach(el => {
+    tests.push(
+      {
+        markup: `<form><select><option><${el}>1<element></element>`,
+        select: "1",
+        innerText: null,
+        name: el
+      }, {
+        markup: `<form><select><option>1<${el}>2<element></element>`,
+        select: "12",
+        innerText: null,
+        name: el
+      }, {
+        markup: `<form><select><option>1<${el}>2<element></element>3`,
+        select: "123",
+        innerText: null,
+        name: el
+      });
+    if (el != "iframe") {
+      tests.push(
+        {
+          markup: `<form><select><option>1<input><${el}>2<element></element>`,
+          select: "1",
+          innerText: "2<element></element>",
+          name: el
+        }, {
+          markup: `<form><select><option>1<keygen><${el}>2<element></element>`,
+          select: "1",
+          innerText: "2<element></element>",
+          name: el
+        }, {
+          markup: `<form><select><option>1<textarea></textarea><${el}>2<element></element>`,
+          select: "1",
+          innerText: "2<element></element>",
+          name: el
+        });
+    }
+  });
+  
+
+  tests.forEach(test => {
+    async_test(t => {
+      var i = createFrame(test.markup);
+
+      appendAndWaitForLoad(t, i)
+        .then(t.step_func_done(_ => {
+          assert_select(t, i, test.select);
+          assert_element_innerText(t, i, test.name, test.innerText);
+        }));
+    }, test.markup);
+  });
+</script>
diff --git a/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp b/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp
index d23ba61..aa8d081 100644
--- a/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp
+++ b/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp
@@ -92,9 +92,17 @@
          threadSafeMatch(tagName, MathMLNames::mtextTag);
 }
 
+static bool tokenExitsInSelect(const CompactHTMLToken& token) {
+  // https://html.spec.whatwg.org/#parsing-main-inselect
+  const String& tagName = token.data();
+  return threadSafeMatch(tagName, inputTag) ||
+         threadSafeMatch(tagName, keygenTag) ||
+         threadSafeMatch(tagName, textareaTag);
+}
+
 HTMLTreeBuilderSimulator::HTMLTreeBuilderSimulator(
     const HTMLParserOptions& options)
-    : m_options(options) {
+    : m_options(options), m_inSelectInsertionMode(false) {
   m_namespaceStack.push_back(HTML);
 }
 
@@ -140,20 +148,38 @@
       if (threadSafeMatch(tagName, textareaTag) ||
           threadSafeMatch(tagName, titleTag)) {
         tokenizer->setState(HTMLTokenizer::RCDATAState);
-      } else if (threadSafeMatch(tagName, plaintextTag)) {
-        tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
       } else if (threadSafeMatch(tagName, scriptTag)) {
         tokenizer->setState(HTMLTokenizer::ScriptDataState);
         simulatedToken = ScriptStart;
-      } else if (threadSafeMatch(tagName, styleTag) ||
-                 threadSafeMatch(tagName, iframeTag) ||
-                 threadSafeMatch(tagName, xmpTag) ||
-                 (threadSafeMatch(tagName, noembedTag) &&
-                  m_options.pluginsEnabled) ||
-                 threadSafeMatch(tagName, noframesTag) ||
-                 (threadSafeMatch(tagName, noscriptTag) &&
-                  m_options.scriptEnabled)) {
-        tokenizer->setState(HTMLTokenizer::RAWTEXTState);
+      } else if (!m_inSelectInsertionMode) {
+        // If we're in the "in select" insertion mode, all of these tags are
+        // ignored, so we shouldn't change the tokenizer state:
+        // https://html.spec.whatwg.org/#parsing-main-inselect
+        if (threadSafeMatch(tagName, plaintextTag) &&
+            !m_inSelectInsertionMode) {
+          tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
+        } else if (threadSafeMatch(tagName, styleTag) ||
+                   threadSafeMatch(tagName, iframeTag) ||
+                   threadSafeMatch(tagName, xmpTag) ||
+                   (threadSafeMatch(tagName, noembedTag) &&
+                    m_options.pluginsEnabled) ||
+                   threadSafeMatch(tagName, noframesTag) ||
+                   (threadSafeMatch(tagName, noscriptTag) &&
+                    m_options.scriptEnabled)) {
+          tokenizer->setState(HTMLTokenizer::RAWTEXTState);
+        }
+      }
+
+      // We need to track whether we're in the "in select" insertion mode
+      // in order to determine whether '<plaintext>' will put the tokenizer
+      // into PLAINTEXTState, and whether '<xmp>' and others will consume
+      // textual content.
+      //
+      // https://html.spec.whatwg.org/#parsing-main-inselect
+      if (threadSafeMatch(tagName, selectTag)) {
+        m_inSelectInsertionMode = true;
+      } else if (m_inSelectInsertionMode && tokenExitsInSelect(token)) {
+        m_inSelectInsertionMode = false;
       }
     }
   }
@@ -169,12 +195,15 @@
         (m_namespaceStack.contains(SVG) && m_namespaceStack.back() == HTML &&
          tokenExitsSVG(token)) ||
         (m_namespaceStack.contains(MathML) && m_namespaceStack.back() == HTML &&
-         tokenExitsMath(token)))
+         tokenExitsMath(token))) {
       m_namespaceStack.pop_back();
+    }
     if (threadSafeMatch(tagName, scriptTag)) {
       if (!inForeignContent())
         tokenizer->setState(HTMLTokenizer::DataState);
       return ScriptEnd;
+    } else if (threadSafeMatch(tagName, selectTag)) {
+      m_inSelectInsertionMode = false;
     }
   }
 
diff --git a/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.h b/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.h
index d8027db..420eb21 100644
--- a/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.h
+++ b/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.h
@@ -61,6 +61,7 @@
 
   HTMLParserOptions m_options;
   State m_namespaceStack;
+  bool m_inSelectInsertionMode;
 };
 
 }  // namespace blink