| import { adoptBuffer } from './adopt-buffer.js'; |
| import { cmpStr } from './utils.js'; |
| import tokenNames from './names.js'; |
| import { |
| WhiteSpace, |
| Comment, |
| Delim, |
| EOF, |
| Function as FunctionToken, |
| LeftParenthesis, |
| RightParenthesis, |
| LeftSquareBracket, |
| RightSquareBracket, |
| LeftCurlyBracket, |
| RightCurlyBracket |
| } from './types.js'; |
| |
| const OFFSET_MASK = 0x00FFFFFF; |
| const TYPE_SHIFT = 24; |
| const balancePair = new Map([ |
| [FunctionToken, RightParenthesis], |
| [LeftParenthesis, RightParenthesis], |
| [LeftSquareBracket, RightSquareBracket], |
| [LeftCurlyBracket, RightCurlyBracket] |
| ]); |
| |
| export class TokenStream { |
| constructor(source, tokenize) { |
| this.setSource(source, tokenize); |
| } |
| reset() { |
| this.eof = false; |
| this.tokenIndex = -1; |
| this.tokenType = 0; |
| this.tokenStart = this.firstCharOffset; |
| this.tokenEnd = this.firstCharOffset; |
| } |
| setSource(source = '', tokenize = () => {}) { |
| source = String(source || ''); |
| |
| const sourceLength = source.length; |
| const offsetAndType = adoptBuffer(this.offsetAndType, source.length + 1); // +1 because of eof-token |
| const balance = adoptBuffer(this.balance, source.length + 1); |
| let tokenCount = 0; |
| let balanceCloseType = 0; |
| let balanceStart = 0; |
| let firstCharOffset = -1; |
| |
| // capture buffers |
| this.offsetAndType = null; |
| this.balance = null; |
| |
| tokenize(source, (type, start, end) => { |
| switch (type) { |
| default: |
| balance[tokenCount] = sourceLength; |
| break; |
| |
| case balanceCloseType: { |
| let balancePrev = balanceStart & OFFSET_MASK; |
| balanceStart = balance[balancePrev]; |
| balanceCloseType = balanceStart >> TYPE_SHIFT; |
| balance[tokenCount] = balancePrev; |
| balance[balancePrev++] = tokenCount; |
| for (; balancePrev < tokenCount; balancePrev++) { |
| if (balance[balancePrev] === sourceLength) { |
| balance[balancePrev] = tokenCount; |
| } |
| } |
| break; |
| } |
| |
| case LeftParenthesis: |
| case FunctionToken: |
| case LeftSquareBracket: |
| case LeftCurlyBracket: |
| balance[tokenCount] = balanceStart; |
| balanceCloseType = balancePair.get(type); |
| balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount; |
| break; |
| } |
| |
| offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | end; |
| if (firstCharOffset === -1) { |
| firstCharOffset = start; |
| } |
| }); |
| |
| // finalize buffers |
| offsetAndType[tokenCount] = (EOF << TYPE_SHIFT) | sourceLength; // <EOF-token> |
| balance[tokenCount] = sourceLength; |
| balance[sourceLength] = sourceLength; // prevents false positive balance match with any token |
| while (balanceStart !== 0) { |
| const balancePrev = balanceStart & OFFSET_MASK; |
| balanceStart = balance[balancePrev]; |
| balance[balancePrev] = sourceLength; |
| } |
| |
| this.source = source; |
| this.firstCharOffset = firstCharOffset === -1 ? 0 : firstCharOffset; |
| this.tokenCount = tokenCount; |
| this.offsetAndType = offsetAndType; |
| this.balance = balance; |
| |
| this.reset(); |
| this.next(); |
| } |
| |
| lookupType(offset) { |
| offset += this.tokenIndex; |
| |
| if (offset < this.tokenCount) { |
| return this.offsetAndType[offset] >> TYPE_SHIFT; |
| } |
| |
| return EOF; |
| } |
| lookupOffset(offset) { |
| offset += this.tokenIndex; |
| |
| if (offset < this.tokenCount) { |
| return this.offsetAndType[offset - 1] & OFFSET_MASK; |
| } |
| |
| return this.source.length; |
| } |
| lookupValue(offset, referenceStr) { |
| offset += this.tokenIndex; |
| |
| if (offset < this.tokenCount) { |
| return cmpStr( |
| this.source, |
| this.offsetAndType[offset - 1] & OFFSET_MASK, |
| this.offsetAndType[offset] & OFFSET_MASK, |
| referenceStr |
| ); |
| } |
| |
| return false; |
| } |
| getTokenStart(tokenIndex) { |
| if (tokenIndex === this.tokenIndex) { |
| return this.tokenStart; |
| } |
| |
| if (tokenIndex > 0) { |
| return tokenIndex < this.tokenCount |
| ? this.offsetAndType[tokenIndex - 1] & OFFSET_MASK |
| : this.offsetAndType[this.tokenCount] & OFFSET_MASK; |
| } |
| |
| return this.firstCharOffset; |
| } |
| substrToCursor(start) { |
| return this.source.substring(start, this.tokenStart); |
| } |
| |
| isBalanceEdge(pos) { |
| return this.balance[this.tokenIndex] < pos; |
| } |
| isDelim(code, offset) { |
| if (offset) { |
| return ( |
| this.lookupType(offset) === Delim && |
| this.source.charCodeAt(this.lookupOffset(offset)) === code |
| ); |
| } |
| |
| return ( |
| this.tokenType === Delim && |
| this.source.charCodeAt(this.tokenStart) === code |
| ); |
| } |
| |
| skip(tokenCount) { |
| let next = this.tokenIndex + tokenCount; |
| |
| if (next < this.tokenCount) { |
| this.tokenIndex = next; |
| this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK; |
| next = this.offsetAndType[next]; |
| this.tokenType = next >> TYPE_SHIFT; |
| this.tokenEnd = next & OFFSET_MASK; |
| } else { |
| this.tokenIndex = this.tokenCount; |
| this.next(); |
| } |
| } |
| next() { |
| let next = this.tokenIndex + 1; |
| |
| if (next < this.tokenCount) { |
| this.tokenIndex = next; |
| this.tokenStart = this.tokenEnd; |
| next = this.offsetAndType[next]; |
| this.tokenType = next >> TYPE_SHIFT; |
| this.tokenEnd = next & OFFSET_MASK; |
| } else { |
| this.eof = true; |
| this.tokenIndex = this.tokenCount; |
| this.tokenType = EOF; |
| this.tokenStart = this.tokenEnd = this.source.length; |
| } |
| } |
| skipSC() { |
| while (this.tokenType === WhiteSpace || this.tokenType === Comment) { |
| this.next(); |
| } |
| } |
| skipUntilBalanced(startToken, stopConsume) { |
| let cursor = startToken; |
| let balanceEnd; |
| let offset; |
| |
| loop: |
| for (; cursor < this.tokenCount; cursor++) { |
| balanceEnd = this.balance[cursor]; |
| |
| // stop scanning on balance edge that points to offset before start token |
| if (balanceEnd < startToken) { |
| break loop; |
| } |
| |
| offset = cursor > 0 ? this.offsetAndType[cursor - 1] & OFFSET_MASK : this.firstCharOffset; |
| |
| // check stop condition |
| switch (stopConsume(this.source.charCodeAt(offset))) { |
| case 1: // just stop |
| break loop; |
| |
| case 2: // stop & included |
| cursor++; |
| break loop; |
| |
| default: |
| // fast forward to the end of balanced block |
| if (this.balance[balanceEnd] === cursor) { |
| cursor = balanceEnd; |
| } |
| } |
| } |
| |
| this.skip(cursor - this.tokenIndex); |
| } |
| |
| forEachToken(fn) { |
| for (let i = 0, offset = this.firstCharOffset; i < this.tokenCount; i++) { |
| const start = offset; |
| const item = this.offsetAndType[i]; |
| const end = item & OFFSET_MASK; |
| const type = item >> TYPE_SHIFT; |
| |
| offset = end; |
| |
| fn(type, start, end, i); |
| } |
| } |
| dump() { |
| const tokens = new Array(this.tokenCount); |
| |
| this.forEachToken((type, start, end, index) => { |
| tokens[index] = { |
| idx: index, |
| type: tokenNames[type], |
| chunk: this.source.substring(start, end), |
| balance: this.balance[index] |
| }; |
| }); |
| |
| return tokens; |
| } |
| }; |