blob: a8657d2f4defa91eaad2d071a94e16687f68f846 [file] [log] [blame]
#!/usr/bin/env python
#
# Copyright 2010 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Metadata pass for annotating tokens in EcmaScript files."""
__author__ = ('robbyw@google.com (Robert Walker)')
from closure_linter import javascripttokens
from closure_linter import tokenutil
TokenType = javascripttokens.JavaScriptTokenType
class ParseError(Exception):
"""Exception indicating a parse error at the given token.
Attributes:
token: The token where the parse error occurred.
"""
def __init__(self, token, message=None):
"""Initialize a parse error at the given token with an optional message.
Args:
token: The token where the parse error occurred.
message: A message describing the parse error.
"""
Exception.__init__(self, message)
self.token = token
class EcmaContext(object):
"""Context object for EcmaScript languages.
Attributes:
type: The context type.
start_token: The token where this context starts.
end_token: The token where this context ends.
parent: The parent context.
"""
# The root context.
ROOT = 'root'
# A block of code.
BLOCK = 'block'
# A pseudo-block of code for a given case or default section.
CASE_BLOCK = 'case_block'
# Block of statements in a for loop's parentheses.
FOR_GROUP_BLOCK = 'for_block'
# An implied block of code for 1 line if, while, and for statements
IMPLIED_BLOCK = 'implied_block'
# An index in to an array or object.
INDEX = 'index'
# An array literal in [].
ARRAY_LITERAL = 'array_literal'
# An object literal in {}.
OBJECT_LITERAL = 'object_literal'
# An individual element in an array or object literal.
LITERAL_ELEMENT = 'literal_element'
# The portion of a ternary statement between ? and :
TERNARY_TRUE = 'ternary_true'
# The portion of a ternary statment after :
TERNARY_FALSE = 'ternary_false'
# The entire switch statment. This will contain a GROUP with the variable
# and a BLOCK with the code.
# Since that BLOCK is not a normal block, it can not contain statements except
# for case and default.
SWITCH = 'switch'
# A normal comment.
COMMENT = 'comment'
# A JsDoc comment.
DOC = 'doc'
# An individual statement.
STATEMENT = 'statement'
# Code within parentheses.
GROUP = 'group'
# Parameter names in a function declaration.
PARAMETERS = 'parameters'
# A set of variable declarations appearing after the 'var' keyword.
VAR = 'var'
# Context types that are blocks.
BLOCK_TYPES = frozenset([
ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK])
def __init__(self, context_type, start_token, parent=None):
"""Initializes the context object.
Args:
context_type: The context type.
start_token: The token where this context starts.
parent: The parent context.
Attributes:
type: The context type.
start_token: The token where this context starts.
end_token: The token where this context ends.
parent: The parent context.
children: The child contexts of this context, in order.
"""
self.type = context_type
self.start_token = start_token
self.end_token = None
self.parent = None
self.children = []
if parent:
parent.AddChild(self)
def __repr__(self):
"""Returns a string representation of the context object."""
stack = []
context = self
while context:
stack.append(context.type)
context = context.parent
return 'Context(%s)' % ' > '.join(stack)
def AddChild(self, child):
"""Adds a child to this context and sets child's parent to this context.
Args:
child: A child EcmaContext. The child's parent will be set to this
context.
"""
child.parent = self
self.children.append(child)
self.children.sort(EcmaContext._CompareContexts)
def GetRoot(self):
"""Get the root context that contains this context, if any."""
context = self
while context:
if context.type is EcmaContext.ROOT:
return context
context = context.parent
@staticmethod
def _CompareContexts(context1, context2):
"""Sorts contexts 1 and 2 by start token document position."""
return tokenutil.Compare(context1.start_token, context2.start_token)
class EcmaMetaData(object):
"""Token metadata for EcmaScript languages.
Attributes:
last_code: The last code token to appear before this one.
context: The context this token appears in.
operator_type: The operator type, will be one of the *_OPERATOR constants
defined below.
aliased_symbol: The full symbol being identified, as a string (e.g. an
'XhrIo' alias for 'goog.net.XhrIo'). Only applicable to identifier
tokens. This is set in aliaspass.py and is a best guess.
is_alias_definition: True if the symbol is part of an alias definition.
If so, these symbols won't be counted towards goog.requires/provides.
"""
UNARY_OPERATOR = 'unary'
UNARY_POST_OPERATOR = 'unary_post'
BINARY_OPERATOR = 'binary'
TERNARY_OPERATOR = 'ternary'
def __init__(self):
"""Initializes a token metadata object."""
self.last_code = None
self.context = None
self.operator_type = None
self.is_implied_semicolon = False
self.is_implied_block = False
self.is_implied_block_close = False
self.aliased_symbol = None
self.is_alias_definition = False
def __repr__(self):
"""Returns a string representation of the context object."""
parts = ['%r' % self.context]
if self.operator_type:
parts.append('optype: %r' % self.operator_type)
if self.is_implied_semicolon:
parts.append('implied;')
if self.aliased_symbol:
parts.append('alias for: %s' % self.aliased_symbol)
return 'MetaData(%s)' % ', '.join(parts)
def IsUnaryOperator(self):
return self.operator_type in (EcmaMetaData.UNARY_OPERATOR,
EcmaMetaData.UNARY_POST_OPERATOR)
def IsUnaryPostOperator(self):
return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR
class EcmaMetaDataPass(object):
"""A pass that iterates over all tokens and builds metadata about them."""
def __init__(self):
"""Initialize the meta data pass object."""
self.Reset()
def Reset(self):
"""Resets the metadata pass to prepare for the next file."""
self._token = None
self._context = None
self._AddContext(EcmaContext.ROOT)
self._last_code = None
def _CreateContext(self, context_type):
"""Overridable by subclasses to create the appropriate context type."""
return EcmaContext(context_type, self._token, self._context)
def _CreateMetaData(self):
"""Overridable by subclasses to create the appropriate metadata type."""
return EcmaMetaData()
def _AddContext(self, context_type):
"""Adds a context of the given type to the context stack.
Args:
context_type: The type of context to create
"""
self._context = self._CreateContext(context_type)
def _PopContext(self):
"""Moves up one level in the context stack.
Returns:
The former context.
Raises:
ParseError: If the root context is popped.
"""
top_context = self._context
top_context.end_token = self._token
self._context = top_context.parent
if self._context:
return top_context
else:
raise ParseError(self._token)
def _PopContextType(self, *stop_types):
"""Pops the context stack until a context of the given type is popped.
Args:
*stop_types: The types of context to pop to - stops at the first match.
Returns:
The context object of the given type that was popped.
"""
last = None
while not last or last.type not in stop_types:
last = self._PopContext()
return last
def _EndStatement(self):
"""Process the end of a statement."""
self._PopContextType(EcmaContext.STATEMENT)
if self._context.type == EcmaContext.IMPLIED_BLOCK:
self._token.metadata.is_implied_block_close = True
self._PopContext()
def _ProcessContext(self):
"""Process the context at the current token.
Returns:
The context that should be assigned to the current token, or None if
the current context after this method should be used.
Raises:
ParseError: When the token appears in an invalid context.
"""
token = self._token
token_type = token.type
if self._context.type in EcmaContext.BLOCK_TYPES:
# Whenever we're in a block, we add a statement context. We make an
# exception for switch statements since they can only contain case: and
# default: and therefore don't directly contain statements.
# The block we add here may be immediately removed in some cases, but
# that causes no harm.
parent = self._context.parent
if not parent or parent.type != EcmaContext.SWITCH:
self._AddContext(EcmaContext.STATEMENT)
elif self._context.type == EcmaContext.ARRAY_LITERAL:
self._AddContext(EcmaContext.LITERAL_ELEMENT)
if token_type == TokenType.START_PAREN:
if self._last_code and self._last_code.IsKeyword('for'):
# for loops contain multiple statements in the group unlike while,
# switch, if, etc.
self._AddContext(EcmaContext.FOR_GROUP_BLOCK)
else:
self._AddContext(EcmaContext.GROUP)
elif token_type == TokenType.END_PAREN:
result = self._PopContextType(EcmaContext.GROUP,
EcmaContext.FOR_GROUP_BLOCK)
keyword_token = result.start_token.metadata.last_code
# keyword_token will not exist if the open paren is the first line of the
# file, for example if all code is wrapped in an immediately executed
# annonymous function.
if keyword_token and keyword_token.string in ('if', 'for', 'while'):
next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
if next_code.type != TokenType.START_BLOCK:
# Check for do-while.
is_do_while = False
pre_keyword_token = keyword_token.metadata.last_code
if (pre_keyword_token and
pre_keyword_token.type == TokenType.END_BLOCK):
start_block_token = pre_keyword_token.metadata.context.start_token
is_do_while = start_block_token.metadata.last_code.string == 'do'
# If it's not do-while, it's an implied block.
if not is_do_while:
self._AddContext(EcmaContext.IMPLIED_BLOCK)
token.metadata.is_implied_block = True
return result
# else (not else if) with no open brace after it should be considered the
# start of an implied block, similar to the case with if, for, and while
# above.
elif (token_type == TokenType.KEYWORD and
token.string == 'else'):
next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
if (next_code.type != TokenType.START_BLOCK and
(next_code.type != TokenType.KEYWORD or next_code.string != 'if')):
self._AddContext(EcmaContext.IMPLIED_BLOCK)
token.metadata.is_implied_block = True
elif token_type == TokenType.START_PARAMETERS:
self._AddContext(EcmaContext.PARAMETERS)
elif token_type == TokenType.END_PARAMETERS:
return self._PopContextType(EcmaContext.PARAMETERS)
elif token_type == TokenType.START_BRACKET:
if (self._last_code and
self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
self._AddContext(EcmaContext.INDEX)
else:
self._AddContext(EcmaContext.ARRAY_LITERAL)
elif token_type == TokenType.END_BRACKET:
return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL)
elif token_type == TokenType.START_BLOCK:
if (self._last_code.type in (TokenType.END_PAREN,
TokenType.END_PARAMETERS) or
self._last_code.IsKeyword('else') or
self._last_code.IsKeyword('do') or
self._last_code.IsKeyword('try') or
self._last_code.IsKeyword('finally') or
(self._last_code.IsOperator(':') and
self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)):
# else, do, try, and finally all might have no () before {.
# Also, handle the bizzare syntax case 10: {...}.
self._AddContext(EcmaContext.BLOCK)
else:
self._AddContext(EcmaContext.OBJECT_LITERAL)
elif token_type == TokenType.END_BLOCK:
context = self._PopContextType(EcmaContext.BLOCK,
EcmaContext.OBJECT_LITERAL)
if self._context.type == EcmaContext.SWITCH:
# The end of the block also means the end of the switch statement it
# applies to.
return self._PopContext()
return context
elif token.IsKeyword('switch'):
self._AddContext(EcmaContext.SWITCH)
elif (token_type == TokenType.KEYWORD and
token.string in ('case', 'default') and
self._context.type != EcmaContext.OBJECT_LITERAL):
# Pop up to but not including the switch block.
while self._context.parent.type != EcmaContext.SWITCH:
self._PopContext()
if self._context.parent is None:
raise ParseError(token, 'Encountered case/default statement '
'without switch statement')
elif token.IsOperator('?'):
self._AddContext(EcmaContext.TERNARY_TRUE)
elif token.IsOperator(':'):
if self._context.type == EcmaContext.OBJECT_LITERAL:
self._AddContext(EcmaContext.LITERAL_ELEMENT)
elif self._context.type == EcmaContext.TERNARY_TRUE:
self._PopContext()
self._AddContext(EcmaContext.TERNARY_FALSE)
# Handle nested ternary statements like:
# foo = bar ? baz ? 1 : 2 : 3
# When we encounter the second ":" the context is
# ternary_false > ternary_true > statement > root
elif (self._context.type == EcmaContext.TERNARY_FALSE and
self._context.parent.type == EcmaContext.TERNARY_TRUE):
self._PopContext() # Leave current ternary false context.
self._PopContext() # Leave current parent ternary true
self._AddContext(EcmaContext.TERNARY_FALSE)
elif self._context.parent.type == EcmaContext.SWITCH:
self._AddContext(EcmaContext.CASE_BLOCK)
elif token.IsKeyword('var'):
self._AddContext(EcmaContext.VAR)
elif token.IsOperator(','):
while self._context.type not in (EcmaContext.VAR,
EcmaContext.ARRAY_LITERAL,
EcmaContext.OBJECT_LITERAL,
EcmaContext.STATEMENT,
EcmaContext.PARAMETERS,
EcmaContext.GROUP):
self._PopContext()
elif token_type == TokenType.SEMICOLON:
self._EndStatement()
def Process(self, first_token):
"""Processes the token stream starting with the given token."""
self._token = first_token
while self._token:
self._ProcessToken()
if self._token.IsCode():
self._last_code = self._token
self._token = self._token.next
try:
self._PopContextType(self, EcmaContext.ROOT)
except ParseError:
# Ignore the "popped to root" error.
pass
def _ProcessToken(self):
"""Process the given token."""
token = self._token
token.metadata = self._CreateMetaData()
context = (self._ProcessContext() or self._context)
token.metadata.context = context
token.metadata.last_code = self._last_code
# Determine the operator type of the token, if applicable.
if token.type == TokenType.OPERATOR:
token.metadata.operator_type = self._GetOperatorType(token)
# Determine if there is an implied semicolon after the token.
if token.type != TokenType.SEMICOLON:
next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
# A statement like if (x) does not need a semicolon after it
is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK
is_last_code_in_line = token.IsCode() and (
not next_code or next_code.line_number != token.line_number)
is_continued_identifier = (token.type == TokenType.IDENTIFIER and
token.string.endswith('.'))
is_continued_operator = (token.type == TokenType.OPERATOR and
not token.metadata.IsUnaryPostOperator())
is_continued_dot = token.string == '.'
next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR
next_code_is_dot = next_code and next_code.string == '.'
is_end_of_block = (
token.type == TokenType.END_BLOCK and
token.metadata.context.type != EcmaContext.OBJECT_LITERAL)
is_multiline_string = token.type == TokenType.STRING_TEXT
is_continued_var_decl = (token.IsKeyword('var') and
next_code and
(next_code.type in [TokenType.IDENTIFIER,
TokenType.SIMPLE_LVALUE]) and
token.line_number < next_code.line_number)
next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK
if (is_last_code_in_line and
self._StatementCouldEndInContext() and
not is_multiline_string and
not is_end_of_block and
not is_continued_var_decl and
not is_continued_identifier and
not is_continued_operator and
not is_continued_dot and
not next_code_is_dot and
not next_code_is_operator and
not is_implied_block and
not next_code_is_block):
token.metadata.is_implied_semicolon = True
self._EndStatement()
def _StatementCouldEndInContext(self):
"""Returns if the current statement (if any) may end in this context."""
# In the basic statement or variable declaration context, statement can
# always end in this context.
if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR):
return True
# End of a ternary false branch inside a statement can also be the
# end of the statement, for example:
# var x = foo ? foo.bar() : null
# In this case the statement ends after the null, when the context stack
# looks like ternary_false > var > statement > root.
if (self._context.type == EcmaContext.TERNARY_FALSE and
self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)):
return True
# In all other contexts like object and array literals, ternary true, etc.
# the statement can't yet end.
return False
def _GetOperatorType(self, token):
"""Returns the operator type of the given operator token.
Args:
token: The token to get arity for.
Returns:
The type of the operator. One of the *_OPERATOR constants defined in
EcmaMetaData.
"""
if token.string == '?':
return EcmaMetaData.TERNARY_OPERATOR
if token.string in TokenType.UNARY_OPERATORS:
return EcmaMetaData.UNARY_OPERATOR
last_code = token.metadata.last_code
if not last_code or last_code.type == TokenType.END_BLOCK:
return EcmaMetaData.UNARY_OPERATOR
if (token.string in TokenType.UNARY_POST_OPERATORS and
last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
return EcmaMetaData.UNARY_POST_OPERATOR
if (token.string in TokenType.UNARY_OK_OPERATORS and
last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and
last_code.string not in TokenType.UNARY_POST_OPERATORS):
return EcmaMetaData.UNARY_OPERATOR
return EcmaMetaData.BINARY_OPERATOR