# oc.py # # A subset-C parser, (BNF taken from 1996 International Obfuscated C Code Contest) # # Copyright, 2010, Paul McGuire # """ https://www.ioccc.org/1996/august.hint The following is a description of the OC grammar: OC grammar ========== Terminals are in quotes, () is used for bracketing. program: decl* decl: vardecl fundecl vardecl: type NAME ; type NAME "[" INT "]" ; fundecl: type NAME "(" args ")" "{" body "}" args: /*empty*/ ( arg "," )* arg arg: type NAME body: vardecl* stmt* stmt: ifstmt whilestmt dowhilestmt "return" expr ";" expr ";" "{" stmt* "}" ";" ifstmt: "if" "(" expr ")" stmt "if" "(" expr ")" stmt "else" stmt whilestmt: "while" "(" expr ")" stmt dowhilestmt: "do" stmt "while" "(" expr ")" ";" expr: expr binop expr unop expr expr "[" expr "]" "(" expr ")" expr "(" exprs ")" NAME INT CHAR STRING exprs: /*empty*/ (expr ",")* expr binop: "+" | "-" | "*" | "/" | "%" | "=" | "<" | "==" | "!=" unop: "!" | "-" | "*" type: "int" stars "char" stars stars: "*"* """ from pyparsing import * ParserElement.enablePackrat() LPAR,RPAR,LBRACK,RBRACK,LBRACE,RBRACE,SEMI,COMMA = map(Suppress, "()[]{};,") INT, CHAR, WHILE, DO, IF, ELSE, RETURN = map(Keyword, "int char while do if else return".split()) NAME = Word(alphas+"_", alphanums+"_") integer = Regex(r"[+-]?\d+") char = Regex(r"'.'") string_ = dblQuotedString TYPE = Group((INT | CHAR) + ZeroOrMore("*")) expr = Forward() func_call = Group(NAME + LPAR + Group(Optional(delimitedList(expr))) + RPAR) operand = func_call | NAME | integer | char | string_ expr <<= (infixNotation(operand, [ (oneOf('! - *'), 1, opAssoc.RIGHT), (oneOf('++ --'), 1, opAssoc.RIGHT), (oneOf('++ --'), 1, opAssoc.LEFT), (oneOf('* / %'), 2, opAssoc.LEFT), (oneOf('+ -'), 2, opAssoc.LEFT), (oneOf('< == > <= >= !='), 2, opAssoc.LEFT), (Regex(r'(?