blob: dedf5aaac4b32f6775cf338949ab2b0d30c52f85 [file] [log] [blame]
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Token utility functions."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
import copy
import StringIO
from closure_linter.common import tokens
from closure_linter.javascripttokens import JavaScriptToken
from closure_linter.javascripttokens import JavaScriptTokenType
# Shorthand
Type = tokens.TokenType
def GetFirstTokenInSameLine(token):
"""Returns the first token in the same line as token.
Args:
token: Any token in the line.
Returns:
The first token in the same line as token.
"""
while not token.IsFirstInLine():
token = token.previous
return token
def GetFirstTokenInPreviousLine(token):
"""Returns the first token in the previous line as token.
Args:
token: Any token in the line.
Returns:
The first token in the previous line as token, or None if token is on the
first line.
"""
first_in_line = GetFirstTokenInSameLine(token)
if first_in_line.previous:
return GetFirstTokenInSameLine(first_in_line.previous)
return None
def GetLastTokenInSameLine(token):
"""Returns the last token in the same line as token.
Args:
token: Any token in the line.
Returns:
The last token in the same line as token.
"""
while not token.IsLastInLine():
token = token.next
return token
def GetAllTokensInSameLine(token):
"""Returns all tokens in the same line as the given token.
Args:
token: Any token in the line.
Returns:
All tokens on the same line as the given token.
"""
first_token = GetFirstTokenInSameLine(token)
last_token = GetLastTokenInSameLine(token)
tokens_in_line = []
while first_token != last_token:
tokens_in_line.append(first_token)
first_token = first_token.next
tokens_in_line.append(last_token)
return tokens_in_line
def CustomSearch(start_token, func, end_func=None, distance=None,
reverse=False):
"""Returns the first token where func is True within distance of this token.
Args:
start_token: The token to start searching from
func: The function to call to test a token for applicability
end_func: The function to call to test a token to determine whether to abort
the search.
distance: The number of tokens to look through before failing search. Must
be positive. If unspecified, will search until the end of the token
chain
reverse: When true, search the tokens before this one instead of the tokens
after it
Returns:
The first token matching func within distance of this token, or None if no
such token is found.
"""
token = start_token
if reverse:
while token and (distance is None or distance > 0):
previous = token.previous
if previous:
if func(previous):
return previous
if end_func and end_func(previous):
return None
token = previous
if distance is not None:
distance -= 1
else:
while token and (distance is None or distance > 0):
next_token = token.next
if next_token:
if func(next_token):
return next_token
if end_func and end_func(next_token):
return None
token = next_token
if distance is not None:
distance -= 1
return None
def Search(start_token, token_types, distance=None, reverse=False):
"""Returns the first token of type in token_types within distance.
Args:
start_token: The token to start searching from
token_types: The allowable types of the token being searched for
distance: The number of tokens to look through before failing search. Must
be positive. If unspecified, will search until the end of the token
chain
reverse: When true, search the tokens before this one instead of the tokens
after it
Returns:
The first token of any type in token_types within distance of this token, or
None if no such token is found.
"""
return CustomSearch(start_token, lambda token: token.IsAnyType(token_types),
None, distance, reverse)
def SearchExcept(start_token, token_types, distance=None, reverse=False):
"""Returns the first token not of any type in token_types within distance.
Args:
start_token: The token to start searching from
token_types: The unallowable types of the token being searched for
distance: The number of tokens to look through before failing search. Must
be positive. If unspecified, will search until the end of the token
chain
reverse: When true, search the tokens before this one instead of the tokens
after it
Returns:
The first token of any type in token_types within distance of this token, or
None if no such token is found.
"""
return CustomSearch(start_token,
lambda token: not token.IsAnyType(token_types),
None, distance, reverse)
def SearchUntil(start_token, token_types, end_types, distance=None,
reverse=False):
"""Returns the first token of type in token_types before a token of end_type.
Args:
start_token: The token to start searching from.
token_types: The allowable types of the token being searched for.
end_types: Types of tokens to abort search if we find.
distance: The number of tokens to look through before failing search. Must
be positive. If unspecified, will search until the end of the token
chain
reverse: When true, search the tokens before this one instead of the tokens
after it
Returns:
The first token of any type in token_types within distance of this token
before any tokens of type in end_type, or None if no such token is found.
"""
return CustomSearch(start_token, lambda token: token.IsAnyType(token_types),
lambda token: token.IsAnyType(end_types),
distance, reverse)
def DeleteToken(token):
"""Deletes the given token from the linked list.
Args:
token: The token to delete
"""
# When deleting a token, we do not update the deleted token itself to make
# sure the previous and next pointers are still pointing to tokens which are
# not deleted. Also it is very hard to keep track of all previously deleted
# tokens to update them when their pointers become invalid. So we add this
# flag that any token linked list iteration logic can skip deleted node safely
# when its current token is deleted.
token.is_deleted = True
if token.previous:
token.previous.next = token.next
if token.next:
token.next.previous = token.previous
following_token = token.next
while following_token and following_token.metadata.last_code == token:
following_token.metadata.last_code = token.metadata.last_code
following_token = following_token.next
def DeleteTokens(token, token_count):
"""Deletes the given number of tokens starting with the given token.
Args:
token: The token to start deleting at.
token_count: The total number of tokens to delete.
"""
for i in xrange(1, token_count):
DeleteToken(token.next)
DeleteToken(token)
def InsertTokenBefore(new_token, token):
"""Insert new_token before token.
Args:
new_token: A token to be added to the stream
token: A token already in the stream
"""
new_token.next = token
new_token.previous = token.previous
new_token.metadata = copy.copy(token.metadata)
if new_token.IsCode():
old_last_code = token.metadata.last_code
following_token = token
while (following_token and
following_token.metadata.last_code == old_last_code):
following_token.metadata.last_code = new_token
following_token = following_token.next
token.previous = new_token
if new_token.previous:
new_token.previous.next = new_token
if new_token.start_index is None:
if new_token.line_number == token.line_number:
new_token.start_index = token.start_index
else:
previous_token = new_token.previous
if previous_token:
new_token.start_index = (previous_token.start_index +
len(previous_token.string))
else:
new_token.start_index = 0
iterator = new_token.next
while iterator and iterator.line_number == new_token.line_number:
iterator.start_index += len(new_token.string)
iterator = iterator.next
def InsertTokenAfter(new_token, token):
"""Insert new_token after token.
Args:
new_token: A token to be added to the stream
token: A token already in the stream
"""
new_token.previous = token
new_token.next = token.next
new_token.metadata = copy.copy(token.metadata)
if token.IsCode():
new_token.metadata.last_code = token
if new_token.IsCode():
following_token = token.next
while following_token and following_token.metadata.last_code == token:
following_token.metadata.last_code = new_token
following_token = following_token.next
token.next = new_token
if new_token.next:
new_token.next.previous = new_token
if new_token.start_index is None:
if new_token.line_number == token.line_number:
new_token.start_index = token.start_index + len(token.string)
else:
new_token.start_index = 0
iterator = new_token.next
while iterator and iterator.line_number == new_token.line_number:
iterator.start_index += len(new_token.string)
iterator = iterator.next
def InsertTokensAfter(new_tokens, token):
"""Insert multiple tokens after token.
Args:
new_tokens: An array of tokens to be added to the stream
token: A token already in the stream
"""
# TODO(user): It would be nicer to have InsertTokenAfter defer to here
# instead of vice-versa.
current_token = token
for new_token in new_tokens:
InsertTokenAfter(new_token, current_token)
current_token = new_token
def InsertSpaceTokenAfter(token):
"""Inserts a space token after the given token.
Args:
token: The token to insert a space token after
Returns:
A single space token
"""
space_token = JavaScriptToken(' ', Type.WHITESPACE, token.line,
token.line_number)
InsertTokenAfter(space_token, token)
def InsertBlankLineAfter(token):
"""Inserts a blank line after the given token.
Args:
token: The token to insert a blank line after
Returns:
A single space token
"""
blank_token = JavaScriptToken('', Type.BLANK_LINE, '',
token.line_number + 1)
InsertLineAfter(token, [blank_token])
def InsertLineAfter(token, new_tokens):
"""Inserts a new line consisting of new_tokens after the given token.
Args:
token: The token to insert after.
new_tokens: The tokens that will make up the new line.
"""
insert_location = token
for new_token in new_tokens:
InsertTokenAfter(new_token, insert_location)
insert_location = new_token
# Update all subsequent line numbers.
next_token = new_tokens[-1].next
while next_token:
next_token.line_number += 1
next_token = next_token.next
def SplitToken(token, position):
"""Splits the token into two tokens at position.
Args:
token: The token to split
position: The position to split at. Will be the beginning of second token.
Returns:
The new second token.
"""
new_string = token.string[position:]
token.string = token.string[:position]
new_token = JavaScriptToken(new_string, token.type, token.line,
token.line_number)
InsertTokenAfter(new_token, token)
return new_token
def Compare(token1, token2):
"""Compares two tokens and determines their relative order.
Args:
token1: The first token to compare.
token2: The second token to compare.
Returns:
A negative integer, zero, or a positive integer as the first token is
before, equal, or after the second in the token stream.
"""
if token2.line_number != token1.line_number:
return token1.line_number - token2.line_number
else:
return token1.start_index - token2.start_index
def GoogScopeOrNoneFromStartBlock(token):
"""Determines if the given START_BLOCK is part of a goog.scope statement.
Args:
token: A token of type START_BLOCK.
Returns:
The goog.scope function call token, or None if such call doesn't exist.
"""
if token.type != JavaScriptTokenType.START_BLOCK:
return None
# Search for a goog.scope statement, which will be 5 tokens before the
# block. Illustration of the tokens found prior to the start block:
# goog.scope(function() {
# 5 4 3 21 ^
maybe_goog_scope = token
for unused_i in xrange(5):
maybe_goog_scope = (maybe_goog_scope.previous if maybe_goog_scope and
maybe_goog_scope.previous else None)
if maybe_goog_scope and maybe_goog_scope.string == 'goog.scope':
return maybe_goog_scope
def GetTokenRange(start_token, end_token):
"""Returns a list of tokens between the two given, inclusive.
Args:
start_token: Start token in the range.
end_token: End token in the range.
Returns:
A list of tokens, in order, from start_token to end_token (including start
and end). Returns none if the tokens do not describe a valid range.
"""
token_range = []
token = start_token
while token:
token_range.append(token)
if token == end_token:
return token_range
token = token.next
def TokensToString(token_iterable):
"""Convert a number of tokens into a string.
Newlines will be inserted whenever the line_number of two neighboring
strings differ.
Args:
token_iterable: The tokens to turn to a string.
Returns:
A string representation of the given tokens.
"""
buf = StringIO.StringIO()
token_list = list(token_iterable)
if not token_list:
return ''
line_number = token_list[0].line_number
for token in token_list:
while line_number < token.line_number:
line_number += 1
buf.write('\n')
if line_number > token.line_number:
line_number = token.line_number
buf.write('\n')
buf.write(token.string)
return buf.getvalue()
def GetPreviousCodeToken(token):
"""Returns the code token before the specified token.
Args:
token: A token.
Returns:
The code token before the specified token or None if no such token
exists.
"""
return CustomSearch(
token,
lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES,
reverse=True)
def GetNextCodeToken(token):
"""Returns the next code token after the specified token.
Args:
token: A token.
Returns:
The next code token after the specified token or None if no such token
exists.
"""
return CustomSearch(
token,
lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES,
reverse=False)
def GetIdentifierStart(token):
"""Returns the first token in an identifier.
Given a token which is part of an identifier, returns the token at the start
of the identifier.
Args:
token: A token which is part of an identifier.
Returns:
The token at the start of the identifier or None if the identifier was not
of the form 'a.b.c' (e.g. "['a']['b'].c").
"""
start_token = token
previous_code_token = GetPreviousCodeToken(token)
while (previous_code_token and (
previous_code_token.IsType(JavaScriptTokenType.IDENTIFIER) or
IsDot(previous_code_token))):
start_token = previous_code_token
previous_code_token = GetPreviousCodeToken(previous_code_token)
if IsDot(start_token):
return None
return start_token
def GetIdentifierForToken(token):
"""Get the symbol specified by a token.
Given a token, this function additionally concatenates any parts of an
identifying symbol being identified that are split by whitespace or a
newline.
The function will return None if the token is not the first token of an
identifier.
Args:
token: The first token of a symbol.
Returns:
The whole symbol, as a string.
"""
# Search backward to determine if this token is the first token of the
# identifier. If it is not the first token, return None to signal that this
# token should be ignored.
prev_token = token.previous
while prev_token:
if (prev_token.IsType(JavaScriptTokenType.IDENTIFIER) or
IsDot(prev_token)):
return None
if (prev_token.IsType(tokens.TokenType.WHITESPACE) or
prev_token.IsAnyType(JavaScriptTokenType.COMMENT_TYPES)):
prev_token = prev_token.previous
else:
break
# A "function foo()" declaration.
if token.type is JavaScriptTokenType.FUNCTION_NAME:
return token.string
# A "var foo" declaration (if the previous token is 'var')
previous_code_token = GetPreviousCodeToken(token)
if previous_code_token and previous_code_token.IsKeyword('var'):
return token.string
# Otherwise, this is potentially a namespaced (goog.foo.bar) identifier that
# could span multiple lines or be broken up by whitespace. We need
# to concatenate.
identifier_types = set([
JavaScriptTokenType.IDENTIFIER,
JavaScriptTokenType.SIMPLE_LVALUE
])
assert token.type in identifier_types
# Start with the first token
symbol_tokens = [token]
if token.next:
for t in token.next:
last_symbol_token = symbol_tokens[-1]
# A dot is part of the previous symbol.
if IsDot(t):
symbol_tokens.append(t)
continue
# An identifier is part of the previous symbol if the previous one was a
# dot.
if t.type in identifier_types:
if IsDot(last_symbol_token):
symbol_tokens.append(t)
continue
else:
break
# Skip any whitespace
if t.type in JavaScriptTokenType.NON_CODE_TYPES:
continue
# This is the end of the identifier. Stop iterating.
break
if symbol_tokens:
return ''.join([t.string for t in symbol_tokens])
def GetStringAfterToken(token):
"""Get string after token.
Args:
token: Search will be done after this token.
Returns:
String if found after token else None (empty string will also
return None).
Search until end of string as in case of empty string Type.STRING_TEXT is not
present/found and don't want to return next string.
E.g.
a = '';
b = 'test';
When searching for string after 'a' if search is not limited by end of string
then it will return 'test' which is not desirable as there is a empty string
before that.
This will return None for cases where string is empty or no string found
as in both cases there is no Type.STRING_TEXT.
"""
string_token = SearchUntil(token, JavaScriptTokenType.STRING_TEXT,
[JavaScriptTokenType.SINGLE_QUOTE_STRING_END,
JavaScriptTokenType.DOUBLE_QUOTE_STRING_END,
JavaScriptTokenType.TEMPLATE_STRING_END])
if string_token:
return string_token.string
else:
return None
def IsDot(token):
"""Whether the token represents a "dot" operator (foo.bar)."""
return token.type is JavaScriptTokenType.OPERATOR and token.string == '.'
def IsIdentifierOrDot(token):
"""Whether the token is either an identifier or a '.'."""
return (token.type in [JavaScriptTokenType.IDENTIFIER,
JavaScriptTokenType.SIMPLE_LVALUE] or
IsDot(token))