| 'use strict'; |
| |
| var Tokenizer = require('../tokenization/tokenizer'), |
| ForeignContent = require('../common/foreign_content'), |
| UNICODE = require('../common/unicode'), |
| HTML = require('../common/html'); |
| |
| //Aliases |
| var $ = HTML.TAG_NAMES, |
| NS = HTML.NAMESPACES; |
| |
| |
| //Tokenizer proxy |
| //NOTE: this proxy simulates adjustment of the Tokenizer which performed by standard parser during tree construction. |
| var TokenizerProxy = module.exports = function (html, options) { |
| this.tokenizer = new Tokenizer(html, options); |
| |
| this.namespaceStack = []; |
| this.namespaceStackTop = -1; |
| this.currentNamespace = null; |
| this.inForeignContent = false; |
| }; |
| |
| //API |
| TokenizerProxy.prototype.getNextToken = function () { |
| var token = this.tokenizer.getNextToken(); |
| |
| if (token.type === Tokenizer.START_TAG_TOKEN) |
| this._handleStartTagToken(token); |
| |
| else if (token.type === Tokenizer.END_TAG_TOKEN) |
| this._handleEndTagToken(token); |
| |
| else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) { |
| token.type = Tokenizer.CHARACTER_TOKEN; |
| token.chars = UNICODE.REPLACEMENT_CHARACTER; |
| } |
| |
| return token; |
| }; |
| |
| //Namespace stack mutations |
| TokenizerProxy.prototype._enterNamespace = function (namespace) { |
| this.namespaceStackTop++; |
| this.namespaceStack.push(namespace); |
| |
| this.inForeignContent = namespace !== NS.HTML; |
| this.currentNamespace = namespace; |
| this.tokenizer.allowCDATA = this.inForeignContent; |
| }; |
| |
| TokenizerProxy.prototype._leaveCurrentNamespace = function () { |
| this.namespaceStackTop--; |
| this.namespaceStack.pop(); |
| |
| this.currentNamespace = this.namespaceStack[this.namespaceStackTop]; |
| this.inForeignContent = this.currentNamespace !== NS.HTML; |
| this.tokenizer.allowCDATA = this.inForeignContent; |
| }; |
| |
| //Token handlers |
| TokenizerProxy.prototype._ensureTokenizerMode = function (tn) { |
| if (tn === $.TEXTAREA || tn === $.TITLE) |
| this.tokenizer.state = Tokenizer.MODE.RCDATA; |
| |
| else if (tn === $.PLAINTEXT) |
| this.tokenizer.state = Tokenizer.MODE.PLAINTEXT; |
| |
| else if (tn === $.SCRIPT) |
| this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA; |
| |
| else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP || |
| tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT) { |
| this.tokenizer.state = Tokenizer.MODE.RAWTEXT; |
| } |
| }; |
| |
| TokenizerProxy.prototype._handleStartTagToken = function (token) { |
| var tn = token.tagName; |
| |
| if (tn === $.SVG) |
| this._enterNamespace(NS.SVG); |
| |
| else if (tn === $.MATH) |
| this._enterNamespace(NS.MATHML); |
| |
| else { |
| if (this.inForeignContent) { |
| if (ForeignContent.causesExit(token)) |
| this._leaveCurrentNamespace(); |
| |
| else if (ForeignContent.isMathMLTextIntegrationPoint(tn, this.currentNamespace) || |
| ForeignContent.isHtmlIntegrationPoint(tn, this.currentNamespace, token.attrs)) { |
| this._enterNamespace(NS.HTML); |
| } |
| } |
| |
| else |
| this._ensureTokenizerMode(tn); |
| } |
| }; |
| |
| TokenizerProxy.prototype._handleEndTagToken = function (token) { |
| var tn = token.tagName; |
| |
| if (!this.inForeignContent) { |
| var previousNs = this.namespaceStack[this.namespaceStackTop - 1]; |
| |
| //NOTE: check for exit from integration point |
| if (ForeignContent.isMathMLTextIntegrationPoint(tn, previousNs) || |
| ForeignContent.isHtmlIntegrationPoint(tn, previousNs, token.attrs)) { |
| this._leaveCurrentNamespace(); |
| } |
| |
| else if (tn === $.SCRIPT) |
| this.tokenizer.state = Tokenizer.MODE.DATA; |
| } |
| |
| else if ((tn === $.SVG && this.currentNamespace === NS.SVG) || |
| (tn === $.MATH && this.currentNamespace === NS.MATHML)) |
| this._leaveCurrentNamespace(); |
| }; |