blob: 291354009d537b75b3ed4582d046f09584e4d27a [file] [log] [blame]
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
(function (factory) {
if (typeof module === "object" && typeof module.exports === "object") {
var v = factory(require, exports);
if (v !== undefined) module.exports = v;
}
else if (typeof define === "function" && define.amd) {
define(["require", "exports", "vscode-nls", "../htmlLanguageTypes"], factory);
}
})(function (require, exports) {
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
var nls = require("vscode-nls");
var htmlLanguageTypes_1 = require("../htmlLanguageTypes");
var localize = nls.loadMessageBundle();
var MultiLineStream = /** @class */ (function () {
function MultiLineStream(source, position) {
this.source = source;
this.len = source.length;
this.position = position;
}
MultiLineStream.prototype.eos = function () {
return this.len <= this.position;
};
MultiLineStream.prototype.getSource = function () {
return this.source;
};
MultiLineStream.prototype.pos = function () {
return this.position;
};
MultiLineStream.prototype.goBackTo = function (pos) {
this.position = pos;
};
MultiLineStream.prototype.goBack = function (n) {
this.position -= n;
};
MultiLineStream.prototype.advance = function (n) {
this.position += n;
};
MultiLineStream.prototype.goToEnd = function () {
this.position = this.source.length;
};
MultiLineStream.prototype.nextChar = function () {
return this.source.charCodeAt(this.position++) || 0;
};
MultiLineStream.prototype.peekChar = function (n) {
if (n === void 0) { n = 0; }
return this.source.charCodeAt(this.position + n) || 0;
};
MultiLineStream.prototype.advanceIfChar = function (ch) {
if (ch === this.source.charCodeAt(this.position)) {
this.position++;
return true;
}
return false;
};
MultiLineStream.prototype.advanceIfChars = function (ch) {
var i;
if (this.position + ch.length > this.source.length) {
return false;
}
for (i = 0; i < ch.length; i++) {
if (this.source.charCodeAt(this.position + i) !== ch[i]) {
return false;
}
}
this.advance(i);
return true;
};
MultiLineStream.prototype.advanceIfRegExp = function (regex) {
var str = this.source.substr(this.position);
var match = str.match(regex);
if (match) {
this.position = this.position + match.index + match[0].length;
return match[0];
}
return '';
};
MultiLineStream.prototype.advanceUntilRegExp = function (regex) {
var str = this.source.substr(this.position);
var match = str.match(regex);
if (match) {
this.position = this.position + match.index;
return match[0];
}
else {
this.goToEnd();
}
return '';
};
MultiLineStream.prototype.advanceUntilChar = function (ch) {
while (this.position < this.source.length) {
if (this.source.charCodeAt(this.position) === ch) {
return true;
}
this.advance(1);
}
return false;
};
MultiLineStream.prototype.advanceUntilChars = function (ch) {
while (this.position + ch.length <= this.source.length) {
var i = 0;
for (; i < ch.length && this.source.charCodeAt(this.position + i) === ch[i]; i++) {
}
if (i === ch.length) {
return true;
}
this.advance(1);
}
this.goToEnd();
return false;
};
MultiLineStream.prototype.skipWhitespace = function () {
var n = this.advanceWhileChar(function (ch) {
return ch === _WSP || ch === _TAB || ch === _NWL || ch === _LFD || ch === _CAR;
});
return n > 0;
};
MultiLineStream.prototype.advanceWhileChar = function (condition) {
var posNow = this.position;
while (this.position < this.len && condition(this.source.charCodeAt(this.position))) {
this.position++;
}
return this.position - posNow;
};
return MultiLineStream;
}());
var _BNG = '!'.charCodeAt(0);
var _MIN = '-'.charCodeAt(0);
var _LAN = '<'.charCodeAt(0);
var _RAN = '>'.charCodeAt(0);
var _FSL = '/'.charCodeAt(0);
var _EQS = '='.charCodeAt(0);
var _DQO = '"'.charCodeAt(0);
var _SQO = '\''.charCodeAt(0);
var _NWL = '\n'.charCodeAt(0);
var _CAR = '\r'.charCodeAt(0);
var _LFD = '\f'.charCodeAt(0);
var _WSP = ' '.charCodeAt(0);
var _TAB = '\t'.charCodeAt(0);
var htmlScriptContents = {
'text/x-handlebars-template': true
};
function createScanner(input, initialOffset, initialState, emitPseudoCloseTags) {
if (initialOffset === void 0) { initialOffset = 0; }
if (initialState === void 0) { initialState = htmlLanguageTypes_1.ScannerState.WithinContent; }
if (emitPseudoCloseTags === void 0) { emitPseudoCloseTags = false; }
var stream = new MultiLineStream(input, initialOffset);
var state = initialState;
var tokenOffset = 0;
var tokenType = htmlLanguageTypes_1.TokenType.Unknown;
var tokenError;
var hasSpaceAfterTag;
var lastTag;
var lastAttributeName;
var lastTypeValue;
function nextElementName() {
return stream.advanceIfRegExp(/^[_:\w][_:\w-.\d]*/).toLowerCase();
}
function nextAttributeName() {
return stream.advanceIfRegExp(/^[^\s"'></=\x00-\x0F\x7F\x80-\x9F]*/).toLowerCase();
}
function finishToken(offset, type, errorMessage) {
tokenType = type;
tokenOffset = offset;
tokenError = errorMessage;
return type;
}
function scan() {
var offset = stream.pos();
var oldState = state;
var token = internalScan();
if (token !== htmlLanguageTypes_1.TokenType.EOS && offset === stream.pos() && !(emitPseudoCloseTags && (token === htmlLanguageTypes_1.TokenType.StartTagClose || token === htmlLanguageTypes_1.TokenType.EndTagClose))) {
console.log('Scanner.scan has not advanced at offset ' + offset + ', state before: ' + oldState + ' after: ' + state);
stream.advance(1);
return finishToken(offset, htmlLanguageTypes_1.TokenType.Unknown);
}
return token;
}
function internalScan() {
var offset = stream.pos();
if (stream.eos()) {
return finishToken(offset, htmlLanguageTypes_1.TokenType.EOS);
}
var errorMessage;
switch (state) {
case htmlLanguageTypes_1.ScannerState.WithinComment:
if (stream.advanceIfChars([_MIN, _MIN, _RAN])) { // -->
state = htmlLanguageTypes_1.ScannerState.WithinContent;
return finishToken(offset, htmlLanguageTypes_1.TokenType.EndCommentTag);
}
stream.advanceUntilChars([_MIN, _MIN, _RAN]); // -->
return finishToken(offset, htmlLanguageTypes_1.TokenType.Comment);
case htmlLanguageTypes_1.ScannerState.WithinDoctype:
if (stream.advanceIfChar(_RAN)) {
state = htmlLanguageTypes_1.ScannerState.WithinContent;
return finishToken(offset, htmlLanguageTypes_1.TokenType.EndDoctypeTag);
}
stream.advanceUntilChar(_RAN); // >
return finishToken(offset, htmlLanguageTypes_1.TokenType.Doctype);
case htmlLanguageTypes_1.ScannerState.WithinContent:
if (stream.advanceIfChar(_LAN)) { // <
if (!stream.eos() && stream.peekChar() === _BNG) { // !
if (stream.advanceIfChars([_BNG, _MIN, _MIN])) { // <!--
state = htmlLanguageTypes_1.ScannerState.WithinComment;
return finishToken(offset, htmlLanguageTypes_1.TokenType.StartCommentTag);
}
if (stream.advanceIfRegExp(/^!doctype/i)) {
state = htmlLanguageTypes_1.ScannerState.WithinDoctype;
return finishToken(offset, htmlLanguageTypes_1.TokenType.StartDoctypeTag);
}
}
if (stream.advanceIfChar(_FSL)) { // /
state = htmlLanguageTypes_1.ScannerState.AfterOpeningEndTag;
return finishToken(offset, htmlLanguageTypes_1.TokenType.EndTagOpen);
}
state = htmlLanguageTypes_1.ScannerState.AfterOpeningStartTag;
return finishToken(offset, htmlLanguageTypes_1.TokenType.StartTagOpen);
}
stream.advanceUntilChar(_LAN);
return finishToken(offset, htmlLanguageTypes_1.TokenType.Content);
case htmlLanguageTypes_1.ScannerState.AfterOpeningEndTag:
var tagName = nextElementName();
if (tagName.length > 0) {
state = htmlLanguageTypes_1.ScannerState.WithinEndTag;
return finishToken(offset, htmlLanguageTypes_1.TokenType.EndTag);
}
if (stream.skipWhitespace()) { // white space is not valid here
return finishToken(offset, htmlLanguageTypes_1.TokenType.Whitespace, localize('error.unexpectedWhitespace', 'Tag name must directly follow the open bracket.'));
}
state = htmlLanguageTypes_1.ScannerState.WithinEndTag;
stream.advanceUntilChar(_RAN);
if (offset < stream.pos()) {
return finishToken(offset, htmlLanguageTypes_1.TokenType.Unknown, localize('error.endTagNameExpected', 'End tag name expected.'));
}
return internalScan();
case htmlLanguageTypes_1.ScannerState.WithinEndTag:
if (stream.skipWhitespace()) { // white space is valid here
return finishToken(offset, htmlLanguageTypes_1.TokenType.Whitespace);
}
if (stream.advanceIfChar(_RAN)) { // >
state = htmlLanguageTypes_1.ScannerState.WithinContent;
return finishToken(offset, htmlLanguageTypes_1.TokenType.EndTagClose);
}
if (emitPseudoCloseTags && stream.peekChar() === _LAN) { // <
state = htmlLanguageTypes_1.ScannerState.WithinContent;
return finishToken(offset, htmlLanguageTypes_1.TokenType.EndTagClose, localize('error.closingBracketMissing', 'Closing bracket missing.'));
}
errorMessage = localize('error.closingBracketExpected', 'Closing bracket expected.');
break;
case htmlLanguageTypes_1.ScannerState.AfterOpeningStartTag:
lastTag = nextElementName();
lastTypeValue = void 0;
lastAttributeName = void 0;
if (lastTag.length > 0) {
hasSpaceAfterTag = false;
state = htmlLanguageTypes_1.ScannerState.WithinTag;
return finishToken(offset, htmlLanguageTypes_1.TokenType.StartTag);
}
if (stream.skipWhitespace()) { // white space is not valid here
return finishToken(offset, htmlLanguageTypes_1.TokenType.Whitespace, localize('error.unexpectedWhitespace', 'Tag name must directly follow the open bracket.'));
}
state = htmlLanguageTypes_1.ScannerState.WithinTag;
stream.advanceUntilChar(_RAN);
if (offset < stream.pos()) {
return finishToken(offset, htmlLanguageTypes_1.TokenType.Unknown, localize('error.startTagNameExpected', 'Start tag name expected.'));
}
return internalScan();
case htmlLanguageTypes_1.ScannerState.WithinTag:
if (stream.skipWhitespace()) {
hasSpaceAfterTag = true; // remember that we have seen a whitespace
return finishToken(offset, htmlLanguageTypes_1.TokenType.Whitespace);
}
if (hasSpaceAfterTag) {
lastAttributeName = nextAttributeName();
if (lastAttributeName.length > 0) {
state = htmlLanguageTypes_1.ScannerState.AfterAttributeName;
hasSpaceAfterTag = false;
return finishToken(offset, htmlLanguageTypes_1.TokenType.AttributeName);
}
}
if (stream.advanceIfChars([_FSL, _RAN])) { // />
state = htmlLanguageTypes_1.ScannerState.WithinContent;
return finishToken(offset, htmlLanguageTypes_1.TokenType.StartTagSelfClose);
}
if (stream.advanceIfChar(_RAN)) { // >
if (lastTag === 'script') {
if (lastTypeValue && htmlScriptContents[lastTypeValue]) {
// stay in html
state = htmlLanguageTypes_1.ScannerState.WithinContent;
}
else {
state = htmlLanguageTypes_1.ScannerState.WithinScriptContent;
}
}
else if (lastTag === 'style') {
state = htmlLanguageTypes_1.ScannerState.WithinStyleContent;
}
else {
state = htmlLanguageTypes_1.ScannerState.WithinContent;
}
return finishToken(offset, htmlLanguageTypes_1.TokenType.StartTagClose);
}
if (emitPseudoCloseTags && stream.peekChar() === _LAN) { // <
state = htmlLanguageTypes_1.ScannerState.WithinContent;
return finishToken(offset, htmlLanguageTypes_1.TokenType.StartTagClose, localize('error.closingBracketMissing', 'Closing bracket missing.'));
}
stream.advance(1);
return finishToken(offset, htmlLanguageTypes_1.TokenType.Unknown, localize('error.unexpectedCharacterInTag', 'Unexpected character in tag.'));
case htmlLanguageTypes_1.ScannerState.AfterAttributeName:
if (stream.skipWhitespace()) {
hasSpaceAfterTag = true;
return finishToken(offset, htmlLanguageTypes_1.TokenType.Whitespace);
}
if (stream.advanceIfChar(_EQS)) {
state = htmlLanguageTypes_1.ScannerState.BeforeAttributeValue;
return finishToken(offset, htmlLanguageTypes_1.TokenType.DelimiterAssign);
}
state = htmlLanguageTypes_1.ScannerState.WithinTag;
return internalScan(); // no advance yet - jump to WithinTag
case htmlLanguageTypes_1.ScannerState.BeforeAttributeValue:
if (stream.skipWhitespace()) {
return finishToken(offset, htmlLanguageTypes_1.TokenType.Whitespace);
}
var attributeValue = stream.advanceIfRegExp(/^[^\s"'`=<>]+/);
if (attributeValue.length > 0) {
if (stream.peekChar() === _RAN && stream.peekChar(-1) === _FSL) { // <foo bar=http://foo/>
stream.goBack(1);
attributeValue = attributeValue.substr(0, attributeValue.length - 1);
}
if (lastAttributeName === 'type') {
lastTypeValue = attributeValue;
}
state = htmlLanguageTypes_1.ScannerState.WithinTag;
hasSpaceAfterTag = false;
return finishToken(offset, htmlLanguageTypes_1.TokenType.AttributeValue);
}
var ch = stream.peekChar();
if (ch === _SQO || ch === _DQO) {
stream.advance(1); // consume quote
if (stream.advanceUntilChar(ch)) {
stream.advance(1); // consume quote
}
if (lastAttributeName === 'type') {
lastTypeValue = stream.getSource().substring(offset + 1, stream.pos() - 1);
}
state = htmlLanguageTypes_1.ScannerState.WithinTag;
hasSpaceAfterTag = false;
return finishToken(offset, htmlLanguageTypes_1.TokenType.AttributeValue);
}
state = htmlLanguageTypes_1.ScannerState.WithinTag;
hasSpaceAfterTag = false;
return internalScan(); // no advance yet - jump to WithinTag
case htmlLanguageTypes_1.ScannerState.WithinScriptContent:
// see http://stackoverflow.com/questions/14574471/how-do-browsers-parse-a-script-tag-exactly
var sciptState = 1;
while (!stream.eos()) {
var match = stream.advanceIfRegExp(/<!--|-->|<\/?script\s*\/?>?/i);
if (match.length === 0) {
stream.goToEnd();
return finishToken(offset, htmlLanguageTypes_1.TokenType.Script);
}
else if (match === '<!--') {
if (sciptState === 1) {
sciptState = 2;
}
}
else if (match === '-->') {
sciptState = 1;
}
else if (match[1] !== '/') { // <script
if (sciptState === 2) {
sciptState = 3;
}
}
else { // </script
if (sciptState === 3) {
sciptState = 2;
}
else {
stream.goBack(match.length); // to the beginning of the closing tag
break;
}
}
}
state = htmlLanguageTypes_1.ScannerState.WithinContent;
if (offset < stream.pos()) {
return finishToken(offset, htmlLanguageTypes_1.TokenType.Script);
}
return internalScan(); // no advance yet - jump to content
case htmlLanguageTypes_1.ScannerState.WithinStyleContent:
stream.advanceUntilRegExp(/<\/style/i);
state = htmlLanguageTypes_1.ScannerState.WithinContent;
if (offset < stream.pos()) {
return finishToken(offset, htmlLanguageTypes_1.TokenType.Styles);
}
return internalScan(); // no advance yet - jump to content
}
stream.advance(1);
state = htmlLanguageTypes_1.ScannerState.WithinContent;
return finishToken(offset, htmlLanguageTypes_1.TokenType.Unknown, errorMessage);
}
return {
scan: scan,
getTokenType: function () { return tokenType; },
getTokenOffset: function () { return tokenOffset; },
getTokenLength: function () { return stream.pos() - tokenOffset; },
getTokenEnd: function () { return stream.pos(); },
getTokenText: function () { return stream.getSource().substring(tokenOffset, stream.pos()); },
getScannerState: function () { return state; },
getTokenError: function () { return tokenError; }
};
}
exports.createScanner = createScanner;
});