| |
| Grammar of Starlark |
| ================== |
| |
| File = {Statement | newline} eof . |
| |
| Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt . |
| |
| DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite . |
| |
| Parameters = Parameter {',' Parameter}. |
| |
| Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier . |
| |
| IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] . |
| |
| ForStmt = 'for' LoopVariables 'in' Expression ':' Suite . |
| |
| WhileStmt = 'while' Test ':' Suite . |
| |
| Suite = [newline indent {Statement} outdent] | SimpleStmt . |
| |
| SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' . |
| # NOTE: '\n' optional at EOF |
| |
| SmallStmt = ReturnStmt |
| | BreakStmt | ContinueStmt | PassStmt |
| | AssignStmt |
| | ExprStmt |
| | LoadStmt |
| . |
| |
| ReturnStmt = 'return' [Expression] . |
| BreakStmt = 'break' . |
| ContinueStmt = 'continue' . |
| PassStmt = 'pass' . |
| AssignStmt = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression . |
| ExprStmt = Expression . |
| |
| LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' . |
| |
| Test = LambdaExpr |
| | IfExpr |
| | PrimaryExpr |
| | UnaryExpr |
| | BinaryExpr |
| . |
| |
| LambdaExpr = 'lambda' [Parameters] ':' Test . |
| |
| IfExpr = Test 'if' Test 'else' Test . |
| |
| PrimaryExpr = Operand |
| | PrimaryExpr DotSuffix |
| | PrimaryExpr CallSuffix |
| | PrimaryExpr SliceSuffix |
| . |
| |
| Operand = identifier |
| | int | float | string |
| | ListExpr | ListComp |
| | DictExpr | DictComp |
| | '(' [Expression [',']] ')' |
| | ('-' | '+') PrimaryExpr |
| . |
| |
| DotSuffix = '.' identifier . |
| CallSuffix = '(' [Arguments [',']] ')' . |
| SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' . |
| |
| # A CallSuffix does not allow a trailing comma |
| # if the last argument is '*' Test or '**' Test. |
| |
| Arguments = Argument {',' Argument} . |
| Argument = Test | identifier '=' Test | '*' Test | '**' Test . |
| |
| ListExpr = '[' [Expression [',']] ']' . |
| ListComp = '[' Test {CompClause} ']'. |
| |
| DictExpr = '{' [Entries [',']] '}' . |
| DictComp = '{' Entry {CompClause} '}' . |
| Entries = Entry {',' Entry} . |
| Entry = Test ':' Test . |
| |
| CompClause = 'for' LoopVariables 'in' Test | 'if' Test . |
| |
| UnaryExpr = 'not' Test . |
| |
| BinaryExpr = Test {Binop Test} . |
| |
| Binop = 'or' |
| | 'and' |
| | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in' |
| | '|' |
| | '^' |
| | '&' |
| | '-' | '+' |
| | '*' | '%' | '/' | '//' |
| . |
| |
| Expression = Test {',' Test} . |
| # NOTE: trailing comma permitted only when within [...] or (...). |
| |
| LoopVariables = PrimaryExpr {',' PrimaryExpr} . |
| |
| |
| # Notation (similar to Go spec): |
| - lowercase and 'quoted' items are lexical tokens. |
| - Capitalized names denote grammar productions. |
| - (...) implies grouping |
| - x | y means either x or y. |
| - [x] means x is optional |
| - {x} means x is repeated zero or more times |
| - The end of each declaration is marked with a period. |
| |
| # Tokens |
| - spaces: newline, eof, indent, outdent. |
| - identifier. |
| - literals: string, int, float. |
| - plus all quoted tokens such as '+=', 'return'. |
| |
| # Notes: |
| - Ambiguity is resolved using operator precedence. |
| - The grammar does not enforce the legal order of params and args, |
| nor that the first compclause must be a 'for'. |
| |
| TODO: |
| - explain how the lexer generates indent, outdent, and newline tokens. |
| - why is unary NOT separated from unary - and +? |
| - the grammar is (mostly) in LL(1) style so, for example, |
| dot expressions are formed suffixes, not complete expressions, |
| which makes the spec harder to read. Reorganize into non-LL(1) form? |