Grammar of Starlark ================== File = {Statement | newline} eof . Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt . DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite . Parameters = Parameter {',' Parameter}. Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier . IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] . ForStmt = 'for' LoopVariables 'in' Expression ':' Suite . WhileStmt = 'while' Test ':' Suite . Suite = [newline indent {Statement} outdent] | SimpleStmt . SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' . # NOTE: '\n' optional at EOF SmallStmt = ReturnStmt | BreakStmt | ContinueStmt | PassStmt | AssignStmt | ExprStmt | LoadStmt . ReturnStmt = 'return' [Expression] . BreakStmt = 'break' . ContinueStmt = 'continue' . PassStmt = 'pass' . AssignStmt = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression . ExprStmt = Expression . LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' . Test = LambdaExpr | IfExpr | PrimaryExpr | UnaryExpr | BinaryExpr . LambdaExpr = 'lambda' [Parameters] ':' Test . IfExpr = Test 'if' Test 'else' Test . PrimaryExpr = Operand | PrimaryExpr DotSuffix | PrimaryExpr CallSuffix | PrimaryExpr SliceSuffix . Operand = identifier | int | float | string | ListExpr | ListComp | DictExpr | DictComp | '(' [Expression [',']] ')' | ('-' | '+') PrimaryExpr . DotSuffix = '.' identifier . CallSuffix = '(' [Arguments [',']] ')' . SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' . Arguments = Argument {',' Argument} . Argument = Test | identifier '=' Test | '*' Test | '**' Test . ListExpr = '[' [Expression [',']] ']' . ListComp = '[' Test {CompClause} ']'. DictExpr = '{' [Entries [',']] '}' . DictComp = '{' Entry {CompClause} '}' . Entries = Entry {',' Entry} . Entry = Test ':' Test . CompClause = 'for' LoopVariables 'in' Test | 'if' Test . UnaryExpr = 'not' Test . BinaryExpr = Test {Binop Test} . Binop = 'or' | 'and' | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in' | '|' | '^' | '&' | '-' | '+' | '*' | '%' | '/' | '//' . Expression = Test {',' Test} . # NOTE: trailing comma permitted only when within [...] or (...). LoopVariables = PrimaryExpr {',' PrimaryExpr} . # Notation (similar to Go spec): - lowercase and 'quoted' items are lexical tokens. - Capitalized names denote grammar productions. - (...) implies grouping - x | y means either x or y. - [x] means x is optional - {x} means x is repeated zero or more times - The end of each declaration is marked with a period. # Tokens - spaces: newline, eof, indent, outdent. - identifier. - literals: string, int, float. - plus all quoted tokens such as '+=', 'return'. # Notes: - Ambiguity is resolved using operator precedence. - The grammar does not enforce the legal order of params and args, nor that the first compclause must be a 'for'. TODO: - explain how the lexer generates indent, outdent, and newline tokens. - why is unary NOT separated from unary - and +? - the grammar is (mostly) in LL(1) style so, for example, dot expressions are formed suffixes, not complete expressions, which makes the spec harder to read. Reorganize into non-LL(1) form?