#! /usr/bin/env python # Amply: a GNU MathProg data-parser # Copyright (c) 2010, Q. Lim (qlim001@aucklanduni.ac.nz) # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ Amply: a GNU MathProg data-parser This module implements a parser for a subset of the GNU MathProg language, namely parameter and set data records. Amply uses the Pyparsing library to parse input: http://pyparsing.wikispaces.com Usage: Create an Amply object, optionally passing in a string to parse. >>> a = Amply("param T := 3;") Symbols that are defined can be accessed as attributes or items. >>> print(a.T) 3.0 >>> print(a['T']) 3.0 The load_string and load_file methods can be used to parse additional data >>> a.load_string("set N := 1 2 3;") >>> a.load_file(open('some_file.dat')) An Amply object can be constructed from a file using Amply.from_file >>> a = Amply.from_file(open('some_file.dat')) How it works: The Amply class parses the input using Pyparsing. This results in a list of Stmt objects, each representing a MathProg statement. The statements are then evaluated by calling their eval() method. """ try: import pyparsing except ImportError: pass else: from pyparsing import alphas, nums, alphanums, delimitedList, oneOf from pyparsing import Combine, Dict, Forward, Group, Literal, NotAny from pyparsing import OneOrMore, Optional, ParseResults, QuotedString from pyparsing import StringEnd, Suppress, Word, ZeroOrMore from itertools import chain __all__ = ['Amply', 'AmplyError'] class AmplyObject(object): """ Represents the value of some object (e.g. a Set object or Parameter object """ class AmplyStmt(object): """ Represents a statement that has been parsed Statements implement an eval method. When the eval method is called, the Stmt object is responsible for modifying the Amply object that gets passed in appropriately (i.e. by adding or modifying a symbol) """ def eval(self, amply): # pragma: no coverage raise NotImplementedError() class NoDefault(object): """ Sentinel """ class AmplyError(Exception): """ Amply Exception Class """ def chunk(it, n): """ Yields n-tuples from iterator """ c = [] for i, x in enumerate(it): c.append(x) if (i + 1) % n == 0: yield tuple(c) c = [] if c: yield tuple(c) def access_data(curr_dict, keys, default=NoDefault): """ Convenience method for walking down a series of nested dictionaries keys is a tuple of strings access_data(dict, ('key1', 'key2', 'key3') is equivalent to dict['key1']['key2']['key3'] All dictionaries must exist, but the last dictionary in the hierarchy does not have to contain the final key, if default is set. """ if keys in curr_dict: return curr_dict[keys] if isinstance(keys, tuple): for sym in keys[:-1]: curr_dict = curr_dict[sym] r = curr_dict.get(keys[-1], default) if r is not NoDefault: return r if default is not NoDefault: return default raise KeyError() def transpose(data): """ Transpose a matrix represented as a dict of dicts """ rows = list(data.keys()) cols = set() for d in list(data.values()): cols.update(list(d.keys())) d = {} for col in cols: d[col] = {} for row in rows: d[col][row] = data[row][col] return d class SetDefStmt(AmplyStmt): """ Represents a set definition statement """ def __init__(self, tokens): assert (tokens[0] == 'set') self.name = tokens[1] self.dimen = tokens.get('dimen', None) self.subscripts = len(tokens.get('subscripts', ())) def __repr__(self): # pragma: no cover return '<%s: %s[%s]>' % (self.__class__.__name__, self.name, self.dimen) def eval(self, amply): set_obj = SetObject(subscripts=self.subscripts, dimen=self.dimen) amply._addSymbol(self.name, set_obj) class SetStmt(AmplyStmt): """ Represents a set statement """ def __init__(self, tokens): assert(tokens[0] == 'set') self.name = tokens[1] self.records = tokens.get('records') self.member = tokens.get('member', None) def __repr__(self): return '<%s: %s[%s] = %s>' % (self.__class__.__name__, self.name, self.member, self.records) def eval(self, amply): if self.name in amply.symbols: obj = amply.symbols[self.name] assert isinstance(obj, SetObject) else: obj = SetObject() obj.addData(self.member, self.records) amply._addSymbol(self.name, obj) class SliceRecord(object): """ Represents a parameter or set slice record """ def __init__(self, tokens): self.components = tuple(tokens) def __repr__(self): return '<%s: %s>' % (self.__class__.__name__, self.components) class TabularRecord(object): """ Represents a parameter tabular record """ def __init__(self, tokens): self._columns = tokens.columns self._data = tokens.data self.transposed = False def setTransposed(self, t): self.transposed = t def _rows(self): c = Chunker(self._data) while c.notEmpty(): row_label = c.chunk() data = c.chunk(len(self._columns)) yield row_label, data def data(self): d = {} for row, data in self._rows(): d[row] = {} for col, value in zip(self._columns, data): d[row][col] = value if self.transposed: return transpose(d) else: return d def __repr__(self): return '<%s: %s>' % (self.__class__.__name__, self.data()) class MatrixData(TabularRecord): """ Represents a set matrix data record """ def _rows(self): for row in self._data: yield row[0], row[1:] def data(self): d = [] for row_label, data in self._rows(): for col, value in zip(self._columns, data): if value == '+': if self.transposed: d.append((col, row_label)) else: d.append((row_label, col)) return d class ParamStmt(AmplyStmt): """ Represents a parameter statement """ def __init__(self, tokens): assert(tokens[0] == 'param') self.name = tokens.name self.records = tokens.records self.default = tokens.get('default', 0) def __repr__(self): return '<%s: %s = %s>' % (self.__class__.__name__, self.name, self.records) def eval(self, amply): if self.name in amply.symbols: obj = amply.symbols[self.name] assert isinstance(obj, ParamObject) else: obj = ParamObject() if obj.subscripts == 0: assert len(self.records) == 1 assert len(self.records[0]) == 1 amply._addSymbol(self.name, self.records[0][0]) else: obj.addData(self.records.asList(), default=self.default) amply._addSymbol(self.name, obj) class Chunker(object): """ Chunker class - used to consume tuples from an iterator """ def __init__(self, it): """ it is a sequence or iterator """ self.it = iter(it) self.empty = False self.next = None self._getNext() def _getNext(self): """ basically acts as a 1 element buffer so that we can detect if we've reached the end of the iterator """ old = self.next try: self.next = next(self.it) except StopIteration: self.empty = True return old def notEmpty(self): """ Test if the iterator has reached the end """ return not self.empty def chunk(self, n=None): """ Return a list with the next n elements from the iterator, or the next element if n is None """ if n is None: return self._getNext() return [self._getNext() for i in range(n)] class ParamTabbingStmt(AmplyStmt): """ Represents a parameter tabbing data statement """ def __init__(self, tokens): assert(tokens[0] == 'param') self.default = tokens.get('default', 0) self.params = tokens.params self.data = tokens.data def eval(self, amply): for i, param_name in enumerate(self.params): if param_name in amply.symbols: obj = amply.symbols[param_name] else: raise AmplyError("Param %s not previously defined" % param_name) for subs, data in self._rows(obj.subscripts): obj.setValue(subs, data[i]) def _rows(self, n_subscripts): c = Chunker(self.data) while c.notEmpty(): subscripts = c.chunk(n_subscripts) data = c.chunk(len(self.params)) yield (subscripts, data) class ParamDefStmt(AmplyStmt): """ Represents a parameter definition """ def __init__(self, tokens): assert(tokens[0] == 'param') self.name = tokens[1] self.subscripts = tokens.get('subscripts') self.default = tokens.get('default', NoDefault) def eval(self, amply): def _getDimen(symbol): s = amply[symbol] if s is None or s.dimen is None: return 1 return s.dimen num_subscripts = sum(_getDimen(s) for s in self.subscripts) amply._addSymbol(self.name, ParamObject(num_subscripts, self.default)) class ParamObject(AmplyObject): def __init__(self, subscripts=0, default=NoDefault): self.subscripts = subscripts self.default = default self.data = {} # initial slice is all *'s self._setSlice(SliceRecord(['*'] * self.subscripts)) def addData(self, data, default=0): def _v(v): if v == '.': return default return v for record in data: if isinstance(record, SliceRecord): self._setSlice(record) elif isinstance(record, list): # a plain data record rec_len = len(self.free_indices) + 1 if len(record) % rec_len != 0: raise AmplyError("Incomplete data record, expecting %d" " subscripts per value" % len(self.free_indices)) for c in chunk(record, len(self.free_indices) + 1): self.setValue(c[:-1], _v(c[-1])) elif isinstance(record, TabularRecord): record_data = record.data() for row_symbol in record_data: for col_symbol, value in list(record_data[row_symbol].items()): self.setValue((row_symbol, col_symbol), _v(value)) def _setSlice(self, slice): self.current_slice = list(slice.components) #copy self.free_indices = [i for i, v in enumerate(self.current_slice) if v == '*'] def setValue(self, symbols, value): if value == '.': value = self.default assert len(symbols) == len(self.free_indices) symbol_path = self.current_slice for index, symbol in zip(self.free_indices, symbols): symbol_path[index] = symbol curr_dict = self.data for symbol in symbol_path[:-1]: if symbol not in curr_dict: curr_dict[symbol] = {} curr_dict = curr_dict[symbol] curr_dict[symbol_path[-1]] = value def __getitem__(self, key): return access_data(self.data, key, self.default) def __repr__(self): return '<%s: %s>' % (self.__class__.__name__, self.data) def __eq__(self, other): return self.data == other def __ne__(self, other): return self.data != other class SetObject(AmplyObject): def __init__(self, subscripts=0, dimen=None): self.dimen = dimen self.subscripts = subscripts if self.subscripts == 0: self.data = [] else: self.data = {} self.current_slice = None def addData(self, member, data): dest_list = self._memberList(member) if self.dimen is not None and self.current_slice is None: self._setSlice(['*'] * self.dimen) for record in data: if isinstance(record, SliceRecord): self._setSlice(record.components) elif isinstance(record, MatrixData): if self.dimen is None: self.dimen = 2 self._setSlice(['*'] * 2) d = record.data() for v in d: self._addValue(dest_list, v) else: # simple-data self._addSimpleData(dest_list, record) def _setSlice(self, slice): self.current_slice = slice self.free_indices = [i for i, v in enumerate(self.current_slice) if v == '*'] def _memberList(self, member): if member is None: return self.data assert len(member) == self.subscripts curr_dict = self.data for symbol in member[:-1]: if symbol not in curr_dict: curr_dict[symbol] = {} curr_dict = curr_dict[symbol] if member[-1] not in curr_dict: curr_dict[member[-1]] = [] return curr_dict[member[-1]] def _dataLen(self, d): if isinstance(d, (tuple, list)): return len(d) return 1 def _addSimpleData(self, data_list, data): if isinstance(data[0], ParseResults): inferred_dimen = len(data[0]) else: inferred_dimen = 1 if self.dimen == None: # infer dimension from records self.dimen = inferred_dimen if self.current_slice == None: self._setSlice(tuple(['*'] * self.dimen)) if len(self.free_indices) == inferred_dimen: for d in data.asList(): self._addValue(data_list, d) elif len(self.free_indices) > 1 and inferred_dimen: for c in chunk(data, len(self.free_indices)): self._addValue(data_list, c) else: raise AmplyError("Dimension of elements (%d) does not match " "declared dimension, (%d)" % (inferred_dimen, self.dimen)) def _addValue(self, data_list, item): if self.dimen == 1: data_list.append(item) else: assert len(self.free_indices) == self._dataLen(item) to_add = list(self.current_slice) if isinstance(item, (tuple, list)): for index, value in zip(self.free_indices, item): to_add[index] = value else: assert len(self.free_indices) == 1 to_add[self.free_indices[0]] = item data_list.append(tuple(to_add)) def __getitem__(self, key): if not self.subscripts: return self.data[key] return access_data(self.data, key) def __len__(self): return len(self.data) def __iter__(self): return iter(self.data) def __contains__(self, item): return item in self.data def __eq__(self, other): return self.data == other def __ne__(self, other): return self.data != other def __repr__(self): return '<%s: %s>' % (self.__class__.__name__, self.data) def mark_transposed(tokens): tokens[0].setTransposed(True) return tokens # What follows is a Pyparsing description of the grammar symbol = Word(alphas, alphanums + "_") sign = Optional(oneOf("+ -")) integer = Combine(sign + Word(nums)).setParseAction(lambda t: int(t[0])) number = Combine(Word( "+-"+nums, nums) + Optional("." + Optional(Word(nums))) + Optional(oneOf("e E") + Word("+-"+nums, nums)))\ .setParseAction(lambda t: float(t[0])) LPAREN = Suppress('(') RPAREN = Suppress(')') LBRACE = Suppress('{') RBRACE = Suppress('}') LBRACKET = Suppress('[') RBRACKET = Suppress(']') END = Suppress(';') PLUS = Literal('+') MINUS = Literal('-') single = number | symbol | QuotedString('"') | QuotedString("'") tuple_ = Group(LPAREN + delimitedList(single) + RPAREN) subscript_domain = LBRACE + Group(delimitedList(symbol)) \ .setResultsName('subscripts') + RBRACE data = single | tuple_ # should not match a single (tr) simple_data = Group(NotAny('(tr)') + data + ZeroOrMore(Optional(Suppress(',')) + data)) # the first element of a set data record cannot be 'dimen', or else # these would match set_def_stmts non_dimen_simple_data = ~Literal('dimen') + simple_data matrix_row = Group(single + OneOrMore(PLUS | MINUS)) matrix_data = ":" + OneOrMore(single).setResultsName('columns') \ + ":=" + OneOrMore(matrix_row).setResultsName('data') matrix_data.setParseAction(MatrixData) tr_matrix_data = Suppress("(tr)") + matrix_data tr_matrix_data.setParseAction(mark_transposed) set_slice_component = number | symbol | '*' set_slice_record = LPAREN + NotAny('tr') + delimitedList(set_slice_component) + RPAREN set_slice_record.setParseAction(SliceRecord) _set_record = set_slice_record | matrix_data | tr_matrix_data | Suppress(":=") set_record = simple_data | _set_record non_dimen_set_record = non_dimen_simple_data | _set_record set_def_stmt = "set" + symbol + Optional(subscript_domain) + \ Optional("dimen" + integer.setResultsName('dimen')) + END set_def_stmt.setParseAction(SetDefStmt) set_member = LBRACKET + delimitedList(data) + RBRACKET set_stmt = "set" + symbol + Optional(set_member).setResultsName("member") + \ Group(non_dimen_set_record + ZeroOrMore(Optional(Suppress(',')) + set_record)) \ .setResultsName("records") + END set_stmt.setParseAction(SetStmt) subscript = single param_data = data | '.' plain_data = param_data | subscript + ZeroOrMore(Optional(Suppress(',')) + subscript) + param_data # should not match a single (tr) plain_data_record = Group(NotAny('(tr)') + plain_data + NotAny(plain_data) | \ plain_data + OneOrMore(plain_data) + NotAny(plain_data)) tabular_record = ":" + OneOrMore(single).setResultsName("columns") \ + ":=" + OneOrMore(single | '.').setResultsName('data') tabular_record.setParseAction(TabularRecord) tr_tabular_record = Suppress("(tr)") + tabular_record tr_tabular_record.setParseAction(mark_transposed) param_slice_component = number | symbol | '*' param_slice_record = LBRACKET + delimitedList(param_slice_component) + RBRACKET param_slice_record.setParseAction(SliceRecord) param_record = param_slice_record | plain_data_record | tabular_record | \ tr_tabular_record | Suppress(":=") param_default = Optional("default" + data.setResultsName('default')) param_stmt = "param" + symbol.setResultsName('name') + param_default + \ Group(OneOrMore(param_record)).setResultsName('records') + END param_stmt.setParseAction(ParamStmt) param_tabbing_stmt = "param" + param_default + ':' + Optional(symbol + ':') + \ OneOrMore(data).setResultsName('params') \ + ':=' + OneOrMore(single).setResultsName('data') + END param_tabbing_stmt.setParseAction(ParamTabbingStmt) param_def_stmt = "param" + symbol + Optional(subscript_domain) + \ param_default + END param_def_stmt.setParseAction(ParamDefStmt) stmts = set_stmt | set_def_stmt | param_stmt | param_def_stmt | \ param_tabbing_stmt grammar = ZeroOrMore(stmts) + StringEnd() class Amply(object): """ Data parsing interface """ def __init__(self, s=""): """ Create an Amply parser instance @param s (default ""): initial string to parse """ self.symbols = {} self.load_string(s) def __getitem__(self, key): """ Override so that symbols can be accessed using [] subscripts """ if key in self.symbols: return self.symbols[key] def __getattr__(self, name): """ Override so that symbols can be accesed as attributes """ if name in self.symbols: return self.symbols[name] return super(Amply, self).__getattr__(name) def _addSymbol(self, name, value): """ Adds a symbol to this instance. Typically, this class is called by objects created by the parser, and should not need to be called by users directly """ self.symbols[name] = value def load_string(self, string): """ Load and parse string @param string string to parse """ for obj in grammar.parseString(string): obj.eval(self) def load_file(self, f): """ Load and parse file @param f file-like object """ self.load_string(f.read()) @staticmethod def from_file(f): """ Create a new Amply instance from file (factory method) @param f file-like object """ return Amply(f.read())