# -*- coding: cp1252 -*- ## # Module for parsing/evaluating Microsoft Excel formulas. # #

Copyright © 2005-2012 Stephen John Machin, Lingfo Pty Ltd

#

This module is part of the xlrd package, which is released under # a BSD-style licence.

## # No part of the content of this file was derived from the works of David Giffin. from __future__ import print_function import copy from struct import unpack from .timemachine import * from .biffh import unpack_unicode_update_pos, unpack_string_update_pos, \ XLRDError, hex_char_dump, error_text_from_code, BaseObject __all__ = [ 'oBOOL', 'oERR', 'oNUM', 'oREF', 'oREL', 'oSTRG', 'oUNK', 'decompile_formula', 'dump_formula', 'evaluate_name_formula', 'okind_dict', 'rangename3d', 'rangename3drel', 'cellname', 'cellnameabs', 'colname', 'FMLA_TYPE_CELL', 'FMLA_TYPE_SHARED', 'FMLA_TYPE_ARRAY', 'FMLA_TYPE_COND_FMT', 'FMLA_TYPE_DATA_VAL', 'FMLA_TYPE_NAME', ] FMLA_TYPE_CELL = 1 FMLA_TYPE_SHARED = 2 FMLA_TYPE_ARRAY = 4 FMLA_TYPE_COND_FMT = 8 FMLA_TYPE_DATA_VAL = 16 FMLA_TYPE_NAME = 32 ALL_FMLA_TYPES = 63 FMLA_TYPEDESCR_MAP = { 1 : 'CELL', 2 : 'SHARED', 4 : 'ARRAY', 8 : 'COND-FMT', 16: 'DATA-VAL', 32: 'NAME', } _TOKEN_NOT_ALLOWED = { 0x01: ALL_FMLA_TYPES - FMLA_TYPE_CELL, # tExp 0x02: ALL_FMLA_TYPES - FMLA_TYPE_CELL, # tTbl 0x0F: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tIsect 0x10: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tUnion/List 0x11: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tRange 0x20: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tArray 0x23: FMLA_TYPE_SHARED, # tName 0x39: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tNameX 0x3A: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tRef3d 0x3B: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tArea3d 0x2C: FMLA_TYPE_CELL + FMLA_TYPE_ARRAY, # tRefN 0x2D: FMLA_TYPE_CELL + FMLA_TYPE_ARRAY, # tAreaN # plus weird stuff like tMem* }.get oBOOL = 3 oERR = 4 oMSNG = 5 # tMissArg oNUM = 2 oREF = -1 oREL = -2 oSTRG = 1 oUNK = 0 okind_dict = { -2: "oREL", -1: "oREF", 0 : "oUNK", 1 : "oSTRG", 2 : "oNUM", 3 : "oBOOL", 4 : "oERR", 5 : "oMSNG", } listsep = ',' #### probably should depend on locale # sztabN[opcode] -> the number of bytes to consume. # -1 means variable # -2 means this opcode not implemented in this version. # Which N to use? Depends on biff_version; see szdict. sztab0 = [-2, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 8, 4, 2, 2, 3, 9, 8, 2, 3, 8, 4, 7, 5, 5, 5, 2, 4, 7, 4, 7, 2, 2, -2, -2, -2, -2, -2, -2, -2, -2, 3, -2, -2, -2, -2, -2, -2, -2] sztab1 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, 2, 2, 3, 9, 9, 2, 3, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, 3, -2, -2, -2, -2, -2, -2, -2] sztab2 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, 2, 2, 3, 9, 9, 3, 4, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2] sztab3 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, -2, -2, 2, 2, 3, 9, 9, 3, 4, 15, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, 25, 18, 21, 18, 21, -2, -2] sztab4 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -2, -2, 2, 2, 3, 9, 9, 3, 4, 5, 5, 9, 7, 7, 7, 3, 5, 9, 5, 9, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, 7, 7, 11, 7, 11, -2, -2] szdict = { 20 : sztab0, 21 : sztab0, 30 : sztab1, 40 : sztab2, 45 : sztab2, 50 : sztab3, 70 : sztab3, 80 : sztab4, } # For debugging purposes ... the name for each opcode # (without the prefix "t" used on OOo docs) onames = ['Unk00', 'Exp', 'Tbl', 'Add', 'Sub', 'Mul', 'Div', 'Power', 'Concat', 'LT', 'LE', 'EQ', 'GE', 'GT', 'NE', 'Isect', 'List', 'Range', 'Uplus', 'Uminus', 'Percent', 'Paren', 'MissArg', 'Str', 'Extended', 'Attr', 'Sheet', 'EndSheet', 'Err', 'Bool', 'Int', 'Num', 'Array', 'Func', 'FuncVar', 'Name', 'Ref', 'Area', 'MemArea', 'MemErr', 'MemNoMem', 'MemFunc', 'RefErr', 'AreaErr', 'RefN', 'AreaN', 'MemAreaN', 'MemNoMemN', '', '', '', '', '', '', '', '', 'FuncCE', 'NameX', 'Ref3d', 'Area3d', 'RefErr3d', 'AreaErr3d', '', ''] func_defs = { # index: (name, min#args, max#args, flags, #known_args, return_type, kargs) 0 : ('COUNT', 0, 30, 0x04, 1, 'V', 'R'), 1 : ('IF', 2, 3, 0x04, 3, 'V', 'VRR'), 2 : ('ISNA', 1, 1, 0x02, 1, 'V', 'V'), 3 : ('ISERROR', 1, 1, 0x02, 1, 'V', 'V'), 4 : ('SUM', 0, 30, 0x04, 1, 'V', 'R'), 5 : ('AVERAGE', 1, 30, 0x04, 1, 'V', 'R'), 6 : ('MIN', 1, 30, 0x04, 1, 'V', 'R'), 7 : ('MAX', 1, 30, 0x04, 1, 'V', 'R'), 8 : ('ROW', 0, 1, 0x04, 1, 'V', 'R'), 9 : ('COLUMN', 0, 1, 0x04, 1, 'V', 'R'), 10 : ('NA', 0, 0, 0x02, 0, 'V', ''), 11 : ('NPV', 2, 30, 0x04, 2, 'V', 'VR'), 12 : ('STDEV', 1, 30, 0x04, 1, 'V', 'R'), 13 : ('DOLLAR', 1, 2, 0x04, 1, 'V', 'V'), 14 : ('FIXED', 2, 3, 0x04, 3, 'V', 'VVV'), 15 : ('SIN', 1, 1, 0x02, 1, 'V', 'V'), 16 : ('COS', 1, 1, 0x02, 1, 'V', 'V'), 17 : ('TAN', 1, 1, 0x02, 1, 'V', 'V'), 18 : ('ATAN', 1, 1, 0x02, 1, 'V', 'V'), 19 : ('PI', 0, 0, 0x02, 0, 'V', ''), 20 : ('SQRT', 1, 1, 0x02, 1, 'V', 'V'), 21 : ('EXP', 1, 1, 0x02, 1, 'V', 'V'), 22 : ('LN', 1, 1, 0x02, 1, 'V', 'V'), 23 : ('LOG10', 1, 1, 0x02, 1, 'V', 'V'), 24 : ('ABS', 1, 1, 0x02, 1, 'V', 'V'), 25 : ('INT', 1, 1, 0x02, 1, 'V', 'V'), 26 : ('SIGN', 1, 1, 0x02, 1, 'V', 'V'), 27 : ('ROUND', 2, 2, 0x02, 2, 'V', 'VV'), 28 : ('LOOKUP', 2, 3, 0x04, 2, 'V', 'VR'), 29 : ('INDEX', 2, 4, 0x0c, 4, 'R', 'RVVV'), 30 : ('REPT', 2, 2, 0x02, 2, 'V', 'VV'), 31 : ('MID', 3, 3, 0x02, 3, 'V', 'VVV'), 32 : ('LEN', 1, 1, 0x02, 1, 'V', 'V'), 33 : ('VALUE', 1, 1, 0x02, 1, 'V', 'V'), 34 : ('TRUE', 0, 0, 0x02, 0, 'V', ''), 35 : ('FALSE', 0, 0, 0x02, 0, 'V', ''), 36 : ('AND', 1, 30, 0x04, 1, 'V', 'R'), 37 : ('OR', 1, 30, 0x04, 1, 'V', 'R'), 38 : ('NOT', 1, 1, 0x02, 1, 'V', 'V'), 39 : ('MOD', 2, 2, 0x02, 2, 'V', 'VV'), 40 : ('DCOUNT', 3, 3, 0x02, 3, 'V', 'RRR'), 41 : ('DSUM', 3, 3, 0x02, 3, 'V', 'RRR'), 42 : ('DAVERAGE', 3, 3, 0x02, 3, 'V', 'RRR'), 43 : ('DMIN', 3, 3, 0x02, 3, 'V', 'RRR'), 44 : ('DMAX', 3, 3, 0x02, 3, 'V', 'RRR'), 45 : ('DSTDEV', 3, 3, 0x02, 3, 'V', 'RRR'), 46 : ('VAR', 1, 30, 0x04, 1, 'V', 'R'), 47 : ('DVAR', 3, 3, 0x02, 3, 'V', 'RRR'), 48 : ('TEXT', 2, 2, 0x02, 2, 'V', 'VV'), 49 : ('LINEST', 1, 4, 0x04, 4, 'A', 'RRVV'), 50 : ('TREND', 1, 4, 0x04, 4, 'A', 'RRRV'), 51 : ('LOGEST', 1, 4, 0x04, 4, 'A', 'RRVV'), 52 : ('GROWTH', 1, 4, 0x04, 4, 'A', 'RRRV'), 56 : ('PV', 3, 5, 0x04, 5, 'V', 'VVVVV'), 57 : ('FV', 3, 5, 0x04, 5, 'V', 'VVVVV'), 58 : ('NPER', 3, 5, 0x04, 5, 'V', 'VVVVV'), 59 : ('PMT', 3, 5, 0x04, 5, 'V', 'VVVVV'), 60 : ('RATE', 3, 6, 0x04, 6, 'V', 'VVVVVV'), 61 : ('MIRR', 3, 3, 0x02, 3, 'V', 'RVV'), 62 : ('IRR', 1, 2, 0x04, 2, 'V', 'RV'), 63 : ('RAND', 0, 0, 0x0a, 0, 'V', ''), 64 : ('MATCH', 2, 3, 0x04, 3, 'V', 'VRR'), 65 : ('DATE', 3, 3, 0x02, 3, 'V', 'VVV'), 66 : ('TIME', 3, 3, 0x02, 3, 'V', 'VVV'), 67 : ('DAY', 1, 1, 0x02, 1, 'V', 'V'), 68 : ('MONTH', 1, 1, 0x02, 1, 'V', 'V'), 69 : ('YEAR', 1, 1, 0x02, 1, 'V', 'V'), 70 : ('WEEKDAY', 1, 2, 0x04, 2, 'V', 'VV'), 71 : ('HOUR', 1, 1, 0x02, 1, 'V', 'V'), 72 : ('MINUTE', 1, 1, 0x02, 1, 'V', 'V'), 73 : ('SECOND', 1, 1, 0x02, 1, 'V', 'V'), 74 : ('NOW', 0, 0, 0x0a, 0, 'V', ''), 75 : ('AREAS', 1, 1, 0x02, 1, 'V', 'R'), 76 : ('ROWS', 1, 1, 0x02, 1, 'V', 'R'), 77 : ('COLUMNS', 1, 1, 0x02, 1, 'V', 'R'), 78 : ('OFFSET', 3, 5, 0x04, 5, 'R', 'RVVVV'), 82 : ('SEARCH', 2, 3, 0x04, 3, 'V', 'VVV'), 83 : ('TRANSPOSE', 1, 1, 0x02, 1, 'A', 'A'), 86 : ('TYPE', 1, 1, 0x02, 1, 'V', 'V'), 92 : ('SERIESSUM', 4, 4, 0x02, 4, 'V', 'VVVA'), 97 : ('ATAN2', 2, 2, 0x02, 2, 'V', 'VV'), 98 : ('ASIN', 1, 1, 0x02, 1, 'V', 'V'), 99 : ('ACOS', 1, 1, 0x02, 1, 'V', 'V'), 100: ('CHOOSE', 2, 30, 0x04, 2, 'V', 'VR'), 101: ('HLOOKUP', 3, 4, 0x04, 4, 'V', 'VRRV'), 102: ('VLOOKUP', 3, 4, 0x04, 4, 'V', 'VRRV'), 105: ('ISREF', 1, 1, 0x02, 1, 'V', 'R'), 109: ('LOG', 1, 2, 0x04, 2, 'V', 'VV'), 111: ('CHAR', 1, 1, 0x02, 1, 'V', 'V'), 112: ('LOWER', 1, 1, 0x02, 1, 'V', 'V'), 113: ('UPPER', 1, 1, 0x02, 1, 'V', 'V'), 114: ('PROPER', 1, 1, 0x02, 1, 'V', 'V'), 115: ('LEFT', 1, 2, 0x04, 2, 'V', 'VV'), 116: ('RIGHT', 1, 2, 0x04, 2, 'V', 'VV'), 117: ('EXACT', 2, 2, 0x02, 2, 'V', 'VV'), 118: ('TRIM', 1, 1, 0x02, 1, 'V', 'V'), 119: ('REPLACE', 4, 4, 0x02, 4, 'V', 'VVVV'), 120: ('SUBSTITUTE', 3, 4, 0x04, 4, 'V', 'VVVV'), 121: ('CODE', 1, 1, 0x02, 1, 'V', 'V'), 124: ('FIND', 2, 3, 0x04, 3, 'V', 'VVV'), 125: ('CELL', 1, 2, 0x0c, 2, 'V', 'VR'), 126: ('ISERR', 1, 1, 0x02, 1, 'V', 'V'), 127: ('ISTEXT', 1, 1, 0x02, 1, 'V', 'V'), 128: ('ISNUMBER', 1, 1, 0x02, 1, 'V', 'V'), 129: ('ISBLANK', 1, 1, 0x02, 1, 'V', 'V'), 130: ('T', 1, 1, 0x02, 1, 'V', 'R'), 131: ('N', 1, 1, 0x02, 1, 'V', 'R'), 140: ('DATEVALUE', 1, 1, 0x02, 1, 'V', 'V'), 141: ('TIMEVALUE', 1, 1, 0x02, 1, 'V', 'V'), 142: ('SLN', 3, 3, 0x02, 3, 'V', 'VVV'), 143: ('SYD', 4, 4, 0x02, 4, 'V', 'VVVV'), 144: ('DDB', 4, 5, 0x04, 5, 'V', 'VVVVV'), 148: ('INDIRECT', 1, 2, 0x0c, 2, 'R', 'VV'), 162: ('CLEAN', 1, 1, 0x02, 1, 'V', 'V'), 163: ('MDETERM', 1, 1, 0x02, 1, 'V', 'A'), 164: ('MINVERSE', 1, 1, 0x02, 1, 'A', 'A'), 165: ('MMULT', 2, 2, 0x02, 2, 'A', 'AA'), 167: ('IPMT', 4, 6, 0x04, 6, 'V', 'VVVVVV'), 168: ('PPMT', 4, 6, 0x04, 6, 'V', 'VVVVVV'), 169: ('COUNTA', 0, 30, 0x04, 1, 'V', 'R'), 183: ('PRODUCT', 0, 30, 0x04, 1, 'V', 'R'), 184: ('FACT', 1, 1, 0x02, 1, 'V', 'V'), 189: ('DPRODUCT', 3, 3, 0x02, 3, 'V', 'RRR'), 190: ('ISNONTEXT', 1, 1, 0x02, 1, 'V', 'V'), 193: ('STDEVP', 1, 30, 0x04, 1, 'V', 'R'), 194: ('VARP', 1, 30, 0x04, 1, 'V', 'R'), 195: ('DSTDEVP', 3, 3, 0x02, 3, 'V', 'RRR'), 196: ('DVARP', 3, 3, 0x02, 3, 'V', 'RRR'), 197: ('TRUNC', 1, 2, 0x04, 2, 'V', 'VV'), 198: ('ISLOGICAL', 1, 1, 0x02, 1, 'V', 'V'), 199: ('DCOUNTA', 3, 3, 0x02, 3, 'V', 'RRR'), 204: ('USDOLLAR', 1, 2, 0x04, 2, 'V', 'VV'), 205: ('FINDB', 2, 3, 0x04, 3, 'V', 'VVV'), 206: ('SEARCHB', 2, 3, 0x04, 3, 'V', 'VVV'), 207: ('REPLACEB', 4, 4, 0x02, 4, 'V', 'VVVV'), 208: ('LEFTB', 1, 2, 0x04, 2, 'V', 'VV'), 209: ('RIGHTB', 1, 2, 0x04, 2, 'V', 'VV'), 210: ('MIDB', 3, 3, 0x02, 3, 'V', 'VVV'), 211: ('LENB', 1, 1, 0x02, 1, 'V', 'V'), 212: ('ROUNDUP', 2, 2, 0x02, 2, 'V', 'VV'), 213: ('ROUNDDOWN', 2, 2, 0x02, 2, 'V', 'VV'), 214: ('ASC', 1, 1, 0x02, 1, 'V', 'V'), 215: ('DBCS', 1, 1, 0x02, 1, 'V', 'V'), 216: ('RANK', 2, 3, 0x04, 3, 'V', 'VRV'), 219: ('ADDRESS', 2, 5, 0x04, 5, 'V', 'VVVVV'), 220: ('DAYS360', 2, 3, 0x04, 3, 'V', 'VVV'), 221: ('TODAY', 0, 0, 0x0a, 0, 'V', ''), 222: ('VDB', 5, 7, 0x04, 7, 'V', 'VVVVVVV'), 227: ('MEDIAN', 1, 30, 0x04, 1, 'V', 'R'), 228: ('SUMPRODUCT', 1, 30, 0x04, 1, 'V', 'A'), 229: ('SINH', 1, 1, 0x02, 1, 'V', 'V'), 230: ('COSH', 1, 1, 0x02, 1, 'V', 'V'), 231: ('TANH', 1, 1, 0x02, 1, 'V', 'V'), 232: ('ASINH', 1, 1, 0x02, 1, 'V', 'V'), 233: ('ACOSH', 1, 1, 0x02, 1, 'V', 'V'), 234: ('ATANH', 1, 1, 0x02, 1, 'V', 'V'), 235: ('DGET', 3, 3, 0x02, 3, 'V', 'RRR'), 244: ('INFO', 1, 1, 0x02, 1, 'V', 'V'), 247: ('DB', 4, 5, 0x04, 5, 'V', 'VVVVV'), 252: ('FREQUENCY', 2, 2, 0x02, 2, 'A', 'RR'), 261: ('ERROR.TYPE', 1, 1, 0x02, 1, 'V', 'V'), 269: ('AVEDEV', 1, 30, 0x04, 1, 'V', 'R'), 270: ('BETADIST', 3, 5, 0x04, 1, 'V', 'V'), 271: ('GAMMALN', 1, 1, 0x02, 1, 'V', 'V'), 272: ('BETAINV', 3, 5, 0x04, 1, 'V', 'V'), 273: ('BINOMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), 274: ('CHIDIST', 2, 2, 0x02, 2, 'V', 'VV'), 275: ('CHIINV', 2, 2, 0x02, 2, 'V', 'VV'), 276: ('COMBIN', 2, 2, 0x02, 2, 'V', 'VV'), 277: ('CONFIDENCE', 3, 3, 0x02, 3, 'V', 'VVV'), 278: ('CRITBINOM', 3, 3, 0x02, 3, 'V', 'VVV'), 279: ('EVEN', 1, 1, 0x02, 1, 'V', 'V'), 280: ('EXPONDIST', 3, 3, 0x02, 3, 'V', 'VVV'), 281: ('FDIST', 3, 3, 0x02, 3, 'V', 'VVV'), 282: ('FINV', 3, 3, 0x02, 3, 'V', 'VVV'), 283: ('FISHER', 1, 1, 0x02, 1, 'V', 'V'), 284: ('FISHERINV', 1, 1, 0x02, 1, 'V', 'V'), 285: ('FLOOR', 2, 2, 0x02, 2, 'V', 'VV'), 286: ('GAMMADIST', 4, 4, 0x02, 4, 'V', 'VVVV'), 287: ('GAMMAINV', 3, 3, 0x02, 3, 'V', 'VVV'), 288: ('CEILING', 2, 2, 0x02, 2, 'V', 'VV'), 289: ('HYPGEOMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), 290: ('LOGNORMDIST', 3, 3, 0x02, 3, 'V', 'VVV'), 291: ('LOGINV', 3, 3, 0x02, 3, 'V', 'VVV'), 292: ('NEGBINOMDIST', 3, 3, 0x02, 3, 'V', 'VVV'), 293: ('NORMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), 294: ('NORMSDIST', 1, 1, 0x02, 1, 'V', 'V'), 295: ('NORMINV', 3, 3, 0x02, 3, 'V', 'VVV'), 296: ('NORMSINV', 1, 1, 0x02, 1, 'V', 'V'), 297: ('STANDARDIZE', 3, 3, 0x02, 3, 'V', 'VVV'), 298: ('ODD', 1, 1, 0x02, 1, 'V', 'V'), 299: ('PERMUT', 2, 2, 0x02, 2, 'V', 'VV'), 300: ('POISSON', 3, 3, 0x02, 3, 'V', 'VVV'), 301: ('TDIST', 3, 3, 0x02, 3, 'V', 'VVV'), 302: ('WEIBULL', 4, 4, 0x02, 4, 'V', 'VVVV'), 303: ('SUMXMY2', 2, 2, 0x02, 2, 'V', 'AA'), 304: ('SUMX2MY2', 2, 2, 0x02, 2, 'V', 'AA'), 305: ('SUMX2PY2', 2, 2, 0x02, 2, 'V', 'AA'), 306: ('CHITEST', 2, 2, 0x02, 2, 'V', 'AA'), 307: ('CORREL', 2, 2, 0x02, 2, 'V', 'AA'), 308: ('COVAR', 2, 2, 0x02, 2, 'V', 'AA'), 309: ('FORECAST', 3, 3, 0x02, 3, 'V', 'VAA'), 310: ('FTEST', 2, 2, 0x02, 2, 'V', 'AA'), 311: ('INTERCEPT', 2, 2, 0x02, 2, 'V', 'AA'), 312: ('PEARSON', 2, 2, 0x02, 2, 'V', 'AA'), 313: ('RSQ', 2, 2, 0x02, 2, 'V', 'AA'), 314: ('STEYX', 2, 2, 0x02, 2, 'V', 'AA'), 315: ('SLOPE', 2, 2, 0x02, 2, 'V', 'AA'), 316: ('TTEST', 4, 4, 0x02, 4, 'V', 'AAVV'), 317: ('PROB', 3, 4, 0x04, 3, 'V', 'AAV'), 318: ('DEVSQ', 1, 30, 0x04, 1, 'V', 'R'), 319: ('GEOMEAN', 1, 30, 0x04, 1, 'V', 'R'), 320: ('HARMEAN', 1, 30, 0x04, 1, 'V', 'R'), 321: ('SUMSQ', 0, 30, 0x04, 1, 'V', 'R'), 322: ('KURT', 1, 30, 0x04, 1, 'V', 'R'), 323: ('SKEW', 1, 30, 0x04, 1, 'V', 'R'), 324: ('ZTEST', 2, 3, 0x04, 2, 'V', 'RV'), 325: ('LARGE', 2, 2, 0x02, 2, 'V', 'RV'), 326: ('SMALL', 2, 2, 0x02, 2, 'V', 'RV'), 327: ('QUARTILE', 2, 2, 0x02, 2, 'V', 'RV'), 328: ('PERCENTILE', 2, 2, 0x02, 2, 'V', 'RV'), 329: ('PERCENTRANK', 2, 3, 0x04, 2, 'V', 'RV'), 330: ('MODE', 1, 30, 0x04, 1, 'V', 'A'), 331: ('TRIMMEAN', 2, 2, 0x02, 2, 'V', 'RV'), 332: ('TINV', 2, 2, 0x02, 2, 'V', 'VV'), 336: ('CONCATENATE', 0, 30, 0x04, 1, 'V', 'V'), 337: ('POWER', 2, 2, 0x02, 2, 'V', 'VV'), 342: ('RADIANS', 1, 1, 0x02, 1, 'V', 'V'), 343: ('DEGREES', 1, 1, 0x02, 1, 'V', 'V'), 344: ('SUBTOTAL', 2, 30, 0x04, 2, 'V', 'VR'), 345: ('SUMIF', 2, 3, 0x04, 3, 'V', 'RVR'), 346: ('COUNTIF', 2, 2, 0x02, 2, 'V', 'RV'), 347: ('COUNTBLANK', 1, 1, 0x02, 1, 'V', 'R'), 350: ('ISPMT', 4, 4, 0x02, 4, 'V', 'VVVV'), 351: ('DATEDIF', 3, 3, 0x02, 3, 'V', 'VVV'), 352: ('DATESTRING', 1, 1, 0x02, 1, 'V', 'V'), 353: ('NUMBERSTRING', 2, 2, 0x02, 2, 'V', 'VV'), 354: ('ROMAN', 1, 2, 0x04, 2, 'V', 'VV'), 358: ('GETPIVOTDATA', 2, 2, 0x02, 2, 'V', 'RV'), 359: ('HYPERLINK', 1, 2, 0x04, 2, 'V', 'VV'), 360: ('PHONETIC', 1, 1, 0x02, 1, 'V', 'V'), 361: ('AVERAGEA', 1, 30, 0x04, 1, 'V', 'R'), 362: ('MAXA', 1, 30, 0x04, 1, 'V', 'R'), 363: ('MINA', 1, 30, 0x04, 1, 'V', 'R'), 364: ('STDEVPA', 1, 30, 0x04, 1, 'V', 'R'), 365: ('VARPA', 1, 30, 0x04, 1, 'V', 'R'), 366: ('STDEVA', 1, 30, 0x04, 1, 'V', 'R'), 367: ('VARA', 1, 30, 0x04, 1, 'V', 'R'), 368: ('BAHTTEXT', 1, 1, 0x02, 1, 'V', 'V'), 369: ('THAIDAYOFWEEK', 1, 1, 0x02, 1, 'V', 'V'), 370: ('THAIDIGIT', 1, 1, 0x02, 1, 'V', 'V'), 371: ('THAIMONTHOFYEAR', 1, 1, 0x02, 1, 'V', 'V'), 372: ('THAINUMSOUND', 1, 1, 0x02, 1, 'V', 'V'), 373: ('THAINUMSTRING', 1, 1, 0x02, 1, 'V', 'V'), 374: ('THAISTRINGLENGTH', 1, 1, 0x02, 1, 'V', 'V'), 375: ('ISTHAIDIGIT', 1, 1, 0x02, 1, 'V', 'V'), 376: ('ROUNDBAHTDOWN', 1, 1, 0x02, 1, 'V', 'V'), 377: ('ROUNDBAHTUP', 1, 1, 0x02, 1, 'V', 'V'), 378: ('THAIYEAR', 1, 1, 0x02, 1, 'V', 'V'), 379: ('RTD', 2, 5, 0x04, 1, 'V', 'V'), } tAttrNames = { 0x00: "Skip??", # seen in SAMPLES.XLS which shipped with Excel 5.0 0x01: "Volatile", 0x02: "If", 0x04: "Choose", 0x08: "Skip", 0x10: "Sum", 0x20: "Assign", 0x40: "Space", 0x41: "SpaceVolatile", } error_opcodes = set([0x07, 0x08, 0x0A, 0x0B, 0x1C, 0x1D, 0x2F]) tRangeFuncs = (min, max, min, max, min, max) tIsectFuncs = (max, min, max, min, max, min) def do_box_funcs(box_funcs, boxa, boxb): return tuple([ func(numa, numb) for func, numa, numb in zip(box_funcs, boxa.coords, boxb.coords) ]) def adjust_cell_addr_biff8(rowval, colval, reldelta, browx=None, bcolx=None): row_rel = (colval >> 15) & 1 col_rel = (colval >> 14) & 1 rowx = rowval colx = colval & 0xff if reldelta: if row_rel and rowx >= 32768: rowx -= 65536 if col_rel and colx >= 128: colx -= 256 else: if row_rel: rowx -= browx if col_rel: colx -= bcolx return rowx, colx, row_rel, col_rel def adjust_cell_addr_biff_le7( rowval, colval, reldelta, browx=None, bcolx=None): row_rel = (rowval >> 15) & 1 col_rel = (rowval >> 14) & 1 rowx = rowval & 0x3fff colx = colval if reldelta: if row_rel and rowx >= 8192: rowx -= 16384 if col_rel and colx >= 128: colx -= 256 else: if row_rel: rowx -= browx if col_rel: colx -= bcolx return rowx, colx, row_rel, col_rel def get_cell_addr(data, pos, bv, reldelta, browx=None, bcolx=None): if bv >= 80: rowval, colval = unpack("= 80: row1val, row2val, col1val, col2val = unpack(" addins %r" % (refx, info), file=bk.logfile) assert ref_first_sheetx == 0xFFFE == ref_last_sheetx return (-5, -5) if ref_recordx != bk._supbook_locals_inx: if blah: print("/// get_externsheet_local_range(refx=%d) -> external %r" % (refx, info), file=bk.logfile) return (-4, -4) # external reference if ref_first_sheetx == 0xFFFE == ref_last_sheetx: if blah: print("/// get_externsheet_local_range(refx=%d) -> unspecified sheet %r" % (refx, info), file=bk.logfile) return (-1, -1) # internal reference, any sheet if ref_first_sheetx == 0xFFFF == ref_last_sheetx: if blah: print("/// get_externsheet_local_range(refx=%d) -> deleted sheet(s)" % (refx, ), file=bk.logfile) return (-2, -2) # internal reference, deleted sheet(s) nsheets = len(bk._all_sheets_map) if not(0 <= ref_first_sheetx <= ref_last_sheetx < nsheets): if blah: print("/// get_externsheet_local_range(refx=%d) -> %r" % (refx, info), file=bk.logfile) print("--- first/last sheet not in range(%d)" % nsheets, file=bk.logfile) return (-102, -102) # stuffed up somewhere :-( xlrd_sheetx1 = bk._all_sheets_map[ref_first_sheetx] xlrd_sheetx2 = bk._all_sheets_map[ref_last_sheetx] if not(0 <= xlrd_sheetx1 <= xlrd_sheetx2): return (-3, -3) # internal reference, but to a macro sheet return xlrd_sheetx1, xlrd_sheetx2 def get_externsheet_local_range_b57( bk, raw_extshtx, ref_first_sheetx, ref_last_sheetx, blah=0): if raw_extshtx > 0: if blah: print("/// get_externsheet_local_range_b57(raw_extshtx=%d) -> external" % raw_extshtx, file=bk.logfile) return (-4, -4) # external reference if ref_first_sheetx == -1 and ref_last_sheetx == -1: return (-2, -2) # internal reference, deleted sheet(s) nsheets = len(bk._all_sheets_map) if not(0 <= ref_first_sheetx <= ref_last_sheetx < nsheets): if blah: print("/// get_externsheet_local_range_b57(%d, %d, %d) -> ???" \ % (raw_extshtx, ref_first_sheetx, ref_last_sheetx), file=bk.logfile) print("--- first/last sheet not in range(%d)" % nsheets, file=bk.logfile) return (-103, -103) # stuffed up somewhere :-( xlrd_sheetx1 = bk._all_sheets_map[ref_first_sheetx] xlrd_sheetx2 = bk._all_sheets_map[ref_last_sheetx] if not(0 <= xlrd_sheetx1 <= xlrd_sheetx2): return (-3, -3) # internal reference, but to a macro sheet return xlrd_sheetx1, xlrd_sheetx2 class FormulaError(Exception): pass ## # Used in evaluating formulas. # The following table describes the kinds and how their values # are represented.

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
Kind symbolKind numberValue representation
oBOOL3integer: 0 => False; 1 => True
oERR4None, or an int error code (same as XL_CELL_ERROR in the Cell class). #
oMSNG5Used by Excel as a placeholder for a missing (not supplied) function # argument. Should *not* appear as a final formula result. Value is None.
oNUM2A float. Note that there is no way of distinguishing dates.
oREF-1The value is either None or a non-empty list of # absolute Ref3D instances.
#
oREL-2The value is None or a non-empty list of # fully or partially relative Ref3D instances. #
oSTRG1A Unicode string.
oUNK0The kind is unknown or ambiguous. The value is None
#

class Operand(object): ## # None means that the actual value of the operand is a variable # (depends on cell data), not a constant. value = None ## # oUNK means that the kind of operand is not known unambiguously. kind = oUNK ## # The reconstituted text of the original formula. Function names will be # in English irrespective of the original language, which doesn't seem # to be recorded anywhere. The separator is ",", not ";" or whatever else # might be more appropriate for the end-user's locale; patches welcome. text = '?' def __init__(self, akind=None, avalue=None, arank=0, atext='?'): if akind is not None: self.kind = akind if avalue is not None: self.value = avalue self.rank = arank # rank is an internal gizmo (operator precedence); # it's used in reconstructing formula text. self.text = atext def __repr__(self): kind_text = okind_dict.get(self.kind, "?Unknown kind?") return "Operand(kind=%s, value=%r, text=%r)" \ % (kind_text, self.value, self.text) ## #

Represents an absolute or relative 3-dimensional reference to a box # of one or more cells.
# -- New in version 0.6.0 #

# #

The coords attribute is a tuple of the form:
# (shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi)
# where 0 <= thingxlo <= thingx < thingxhi.
# Note that it is quite possible to have thingx > nthings; for example # Print_Titles could have colxhi == 256 and/or rowxhi == 65536 # irrespective of how many columns/rows are actually used in the worksheet. # The caller will need to decide how to handle this situation. # Keyword: IndexError :-) #

# #

The components of the coords attribute are also available as individual # attributes: shtxlo, shtxhi, rowxlo, rowxhi, colxlo, and colxhi.

# #

The relflags attribute is a 6-tuple of flags which indicate whether # the corresponding (sheet|row|col)(lo|hi) is relative (1) or absolute (0).
# Note that there is necessarily no information available as to what cell(s) # the reference could possibly be relative to. The caller must decide what if # any use to make of oREL operands. Note also that a partially relative # reference may well be a typo. # For example, define name A1Z10 as $a$1:$z10 (missing $ after z) # while the cursor is on cell Sheet3!A27.
# The resulting Ref3D instance will have coords = (2, 3, 0, -16, 0, 26) # and relflags = (0, 0, 0, 1, 0, 0).
# So far, only one possibility of a sheet-relative component in # a reference has been noticed: a 2D reference located in the "current sheet". #
This will appear as coords = (0, 1, ...) and relflags = (1, 1, ...). class Ref3D(tuple): def __init__(self, atuple): self.coords = atuple[0:6] self.relflags = atuple[6:12] if not self.relflags: self.relflags = (0, 0, 0, 0, 0, 0) (self.shtxlo, self.shtxhi, self.rowxlo, self.rowxhi, self.colxlo, self.colxhi) = self.coords def __repr__(self): if not self.relflags or self.relflags == (0, 0, 0, 0, 0, 0): return "Ref3D(coords=%r)" % (self.coords, ) else: return "Ref3D(coords=%r, relflags=%r)" \ % (self.coords, self.relflags) tAdd = 0x03 tSub = 0x04 tMul = 0x05 tDiv = 0x06 tPower = 0x07 tConcat = 0x08 tLT, tLE, tEQ, tGE, tGT, tNE = range(0x09, 0x0F) import operator as opr def nop(x): return x def _opr_pow(x, y): return x ** y def _opr_lt(x, y): return x < y def _opr_le(x, y): return x <= y def _opr_eq(x, y): return x == y def _opr_ge(x, y): return x >= y def _opr_gt(x, y): return x > y def _opr_ne(x, y): return x != y def num2strg(num): """Attempt to emulate Excel's default conversion from number to string. """ s = str(num) if s.endswith(".0"): s = s[:-2] return s _arith_argdict = {oNUM: nop, oSTRG: float} _cmp_argdict = {oNUM: nop, oSTRG: nop} # Seems no conversions done on relops; in Excel, "1" > 9 produces TRUE. _strg_argdict = {oNUM:num2strg, oSTRG:nop} binop_rules = { tAdd: (_arith_argdict, oNUM, opr.add, 30, '+'), tSub: (_arith_argdict, oNUM, opr.sub, 30, '-'), tMul: (_arith_argdict, oNUM, opr.mul, 40, '*'), tDiv: (_arith_argdict, oNUM, opr.truediv, 40, '/'), tPower: (_arith_argdict, oNUM, _opr_pow, 50, '^',), tConcat:(_strg_argdict, oSTRG, opr.add, 20, '&'), tLT: (_cmp_argdict, oBOOL, _opr_lt, 10, '<'), tLE: (_cmp_argdict, oBOOL, _opr_le, 10, '<='), tEQ: (_cmp_argdict, oBOOL, _opr_eq, 10, '='), tGE: (_cmp_argdict, oBOOL, _opr_ge, 10, '>='), tGT: (_cmp_argdict, oBOOL, _opr_gt, 10, '>'), tNE: (_cmp_argdict, oBOOL, _opr_ne, 10, '<>'), } unop_rules = { 0x13: (lambda x: -x, 70, '-', ''), # unary minus 0x12: (lambda x: x, 70, '+', ''), # unary plus 0x14: (lambda x: x / 100.0, 60, '', '%'),# percent } LEAF_RANK = 90 FUNC_RANK = 90 STACK_ALARM_LEVEL = 5 STACK_PANIC_LEVEL = 10 def evaluate_name_formula(bk, nobj, namex, blah=0, level=0): if level > STACK_ALARM_LEVEL: blah = 1 data = nobj.raw_formula fmlalen = nobj.basic_formula_len bv = bk.biff_version reldelta = 1 # All defined name formulas use "Method B" [OOo docs] if blah: print("::: evaluate_name_formula %r %r %d %d %r level=%d" \ % (namex, nobj.name, fmlalen, bv, data, level), file=bk.logfile) hex_char_dump(data, 0, fmlalen, fout=bk.logfile) if level > STACK_PANIC_LEVEL: raise XLRDError("Excessive indirect references in NAME formula") sztab = szdict[bv] pos = 0 stack = [] any_rel = 0 any_err = 0 any_external = 0 unk_opnd = Operand(oUNK, None) error_opnd = Operand(oERR, None) spush = stack.append def do_binop(opcd, stk): assert len(stk) >= 2 bop = stk.pop() aop = stk.pop() argdict, result_kind, func, rank, sym = binop_rules[opcd] otext = ''.join([ '('[:aop.rank < rank], aop.text, ')'[:aop.rank < rank], sym, '('[:bop.rank < rank], bop.text, ')'[:bop.rank < rank], ]) resop = Operand(result_kind, None, rank, otext) try: bconv = argdict[bop.kind] aconv = argdict[aop.kind] except KeyError: stk.append(resop) return if bop.value is None or aop.value is None: stk.append(resop) return bval = bconv(bop.value) aval = aconv(aop.value) result = func(aval, bval) if result_kind == oBOOL: result = 1 if result else 0 resop.value = result stk.append(resop) def do_unaryop(opcode, result_kind, stk): assert len(stk) >= 1 aop = stk.pop() val = aop.value func, rank, sym1, sym2 = unop_rules[opcode] otext = ''.join([ sym1, '('[:aop.rank < rank], aop.text, ')'[:aop.rank < rank], sym2, ]) if val is not None: val = func(val) stk.append(Operand(result_kind, val, rank, otext)) def not_in_name_formula(op_arg, oname_arg): msg = "ERROR *** Token 0x%02x (%s) found in NAME formula" \ % (op_arg, oname_arg) raise FormulaError(msg) if fmlalen == 0: stack = [unk_opnd] while 0 <= pos < fmlalen: op = BYTES_ORD(data[pos]) opcode = op & 0x1f optype = (op & 0x60) >> 5 if optype: opx = opcode + 32 else: opx = opcode oname = onames[opx] # + [" RVA"][optype] sz = sztab[opx] if blah: print("Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh" \ % (pos, op, oname, sz, opcode, optype), file=bk.logfile) print("Stack =", stack, file=bk.logfile) if sz == -2: msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' \ % (op, oname, bv) raise FormulaError(msg) if not optype: if 0x00 <= opcode <= 0x02: # unk_opnd, tExp, tTbl not_in_name_formula(op, oname) elif 0x03 <= opcode <= 0x0E: # Add, Sub, Mul, Div, Power # tConcat # tLT, ..., tNE do_binop(opcode, stack) elif opcode == 0x0F: # tIsect if blah: print("tIsect pre", stack, file=bk.logfile) assert len(stack) >= 2 bop = stack.pop() aop = stack.pop() sym = ' ' rank = 80 ########## check ####### otext = ''.join([ '('[:aop.rank < rank], aop.text, ')'[:aop.rank < rank], sym, '('[:bop.rank < rank], bop.text, ')'[:bop.rank < rank], ]) res = Operand(oREF) res.text = otext if bop.kind == oERR or aop.kind == oERR: res.kind = oERR elif bop.kind == oUNK or aop.kind == oUNK: # This can happen with undefined # (go search in the current sheet) labels. # For example =Bob Sales # Each label gets a NAME record with an empty formula (!) # Evaluation of the tName token classifies it as oUNK # res.kind = oREF pass elif bop.kind == oREF == aop.kind: if aop.value is not None and bop.value is not None: assert len(aop.value) == 1 assert len(bop.value) == 1 coords = do_box_funcs( tIsectFuncs, aop.value[0], bop.value[0]) res.value = [Ref3D(coords)] elif bop.kind == oREL == aop.kind: res.kind = oREL if aop.value is not None and bop.value is not None: assert len(aop.value) == 1 assert len(bop.value) == 1 coords = do_box_funcs( tIsectFuncs, aop.value[0], bop.value[0]) relfa = aop.value[0].relflags relfb = bop.value[0].relflags if relfa == relfb: res.value = [Ref3D(coords + relfa)] else: pass spush(res) if blah: print("tIsect post", stack, file=bk.logfile) elif opcode == 0x10: # tList if blah: print("tList pre", stack, file=bk.logfile) assert len(stack) >= 2 bop = stack.pop() aop = stack.pop() sym = ',' rank = 80 ########## check ####### otext = ''.join([ '('[:aop.rank < rank], aop.text, ')'[:aop.rank < rank], sym, '('[:bop.rank < rank], bop.text, ')'[:bop.rank < rank], ]) res = Operand(oREF, None, rank, otext) if bop.kind == oERR or aop.kind == oERR: res.kind = oERR elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL): res.kind = oREF if aop.kind == oREL or bop.kind == oREL: res.kind = oREL if aop.value is not None and bop.value is not None: assert len(aop.value) >= 1 assert len(bop.value) == 1 res.value = aop.value + bop.value else: pass spush(res) if blah: print("tList post", stack, file=bk.logfile) elif opcode == 0x11: # tRange if blah: print("tRange pre", stack, file=bk.logfile) assert len(stack) >= 2 bop = stack.pop() aop = stack.pop() sym = ':' rank = 80 ########## check ####### otext = ''.join([ '('[:aop.rank < rank], aop.text, ')'[:aop.rank < rank], sym, '('[:bop.rank < rank], bop.text, ')'[:bop.rank < rank], ]) res = Operand(oREF, None, rank, otext) if bop.kind == oERR or aop.kind == oERR: res = oERR elif bop.kind == oREF == aop.kind: if aop.value is not None and bop.value is not None: assert len(aop.value) == 1 assert len(bop.value) == 1 coords = do_box_funcs( tRangeFuncs, aop.value[0], bop.value[0]) res.value = [Ref3D(coords)] elif bop.kind == oREL == aop.kind: res.kind = oREL if aop.value is not None and bop.value is not None: assert len(aop.value) == 1 assert len(bop.value) == 1 coords = do_box_funcs( tRangeFuncs, aop.value[0], bop.value[0]) relfa = aop.value[0].relflags relfb = bop.value[0].relflags if relfa == relfb: res.value = [Ref3D(coords + relfa)] else: pass spush(res) if blah: print("tRange post", stack, file=bk.logfile) elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent do_unaryop(opcode, oNUM, stack) elif opcode == 0x15: # tParen # source cosmetics pass elif opcode == 0x16: # tMissArg spush(Operand(oMSNG, None, LEAF_RANK, '')) elif opcode == 0x17: # tStr if bv <= 70: strg, newpos = unpack_string_update_pos( data, pos+1, bk.encoding, lenlen=1) else: strg, newpos = unpack_unicode_update_pos( data, pos+1, lenlen=1) sz = newpos - pos if blah: print(" sz=%d strg=%r" % (sz, strg), file=bk.logfile) text = '"' + strg.replace('"', '""') + '"' spush(Operand(oSTRG, strg, LEAF_RANK, text)) elif opcode == 0x18: # tExtended # new with BIFF 8 assert bv >= 80 # not in OOo docs raise FormulaError("tExtended token not implemented") elif opcode == 0x19: # tAttr subop, nc = unpack("= 1 aop = stack[-1] otext = 'SUM(%s)' % aop.text stack[-1] = Operand(oNUM, None, FUNC_RANK, otext) else: sz = 4 if blah: print(" subop=%02xh subname=t%s sz=%d nc=%02xh" \ % (subop, subname, sz, nc), file=bk.logfile) elif 0x1A <= opcode <= 0x1B: # tSheet, tEndSheet assert bv < 50 raise FormulaError("tSheet & tEndsheet tokens not implemented") elif 0x1C <= opcode <= 0x1F: # tErr, tBool, tInt, tNum inx = opcode - 0x1C nb = [1, 1, 2, 8][inx] kind = [oERR, oBOOL, oNUM, oNUM][inx] value, = unpack("<" + "BBHd"[inx], data[pos+1:pos+1+nb]) if inx == 2: # tInt value = float(value) text = str(value) elif inx == 3: # tNum text = str(value) elif inx == 1: # tBool text = ('FALSE', 'TRUE')[value] else: text = '"' +error_text_from_code[value] + '"' spush(Operand(kind, value, LEAF_RANK, text)) else: raise FormulaError("Unhandled opcode: 0x%02x" % opcode) if sz <= 0: raise FormulaError("Size not set for opcode 0x%02x" % opcode) pos += sz continue if opcode == 0x00: # tArray spush(unk_opnd) elif opcode == 0x01: # tFunc nb = 1 + int(bv >= 40) funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb])[0] func_attrs = func_defs.get(funcx, None) if not func_attrs: print("*** formula/tFunc unknown FuncID:%d" \ % funcx, file=bk.logfile) spush(unk_opnd) else: func_name, nargs = func_attrs[:2] if blah: print(" FuncID=%d name=%s nargs=%d" \ % (funcx, func_name, nargs), file=bk.logfile) assert len(stack) >= nargs if nargs: argtext = listsep.join([arg.text for arg in stack[-nargs:]]) otext = "%s(%s)" % (func_name, argtext) del stack[-nargs:] else: otext = func_name + "()" res = Operand(oUNK, None, FUNC_RANK, otext) spush(res) elif opcode == 0x02: #tFuncVar nb = 1 + int(bv >= 40) nargs, funcx = unpack("= nargs assert len(stack) >= nargs argtext = listsep.join([arg.text for arg in stack[-nargs:]]) otext = "%s(%s)" % (func_name, argtext) res = Operand(oUNK, None, FUNC_RANK, otext) if funcx == 1: # IF testarg = stack[-nargs] if testarg.kind not in (oNUM, oBOOL): if blah and testarg.kind != oUNK: print("IF testarg kind?", file=bk.logfile) elif testarg.value not in (0, 1): if blah and testarg.value is not None: print("IF testarg value?", file=bk.logfile) else: if nargs == 2 and not testarg.value: # IF(FALSE, tv) => FALSE res.kind, res.value = oBOOL, 0 else: respos = -nargs + 2 - int(testarg.value) chosen = stack[respos] if chosen.kind == oMSNG: res.kind, res.value = oNUM, 0 else: res.kind, res.value = chosen.kind, chosen.value if blah: print("$$$$$$ IF => constant", file=bk.logfile) elif funcx == 100: # CHOOSE testarg = stack[-nargs] if testarg.kind == oNUM: if 1 <= testarg.value < nargs: chosen = stack[-nargs + int(testarg.value)] if chosen.kind == oMSNG: res.kind, res.value = oNUM, 0 else: res.kind, res.value = chosen.kind, chosen.value del stack[-nargs:] spush(res) elif opcode == 0x03: #tName tgtnamex = unpack("> bk.logfile, " ", res # spush(res) elif opcode == 0x0D: #tAreaN not_in_name_formula(op, oname) # res = get_cell_range_addr(data, pos+1, bv, reldelta=1) # # note *ALL* tAreaN usage has signed offset for relative addresses # any_rel = 1 # if blah: print >> bk.logfile, " ", res elif opcode == 0x1A: # tRef3d if bv >= 80: res = get_cell_addr(data, pos+3, bv, reldelta) refx = unpack("= 80: res1, res2 = get_cell_range_addr(data, pos+3, bv, reldelta) refx = unpack("= 80: refx, tgtnamex = unpack(" 0: refx -= 1 elif refx < 0: refx = -refx - 1 else: dodgy = 1 if blah: print(" origrefx=%d refx=%d tgtnamex=%d dodgy=%d" \ % (origrefx, refx, tgtnamex, dodgy), file=bk.logfile) if tgtnamex == namex: if blah: print("!!!! Self-referential !!!!", file=bk.logfile) dodgy = any_err = 1 if not dodgy: if bv >= 80: shx1, shx2 = get_externsheet_local_range(bk, refx, blah) elif origrefx > 0: shx1, shx2 = (-4, -4) # external ref else: exty = bk._externsheet_type_b57[refx] if exty == 4: # non-specific sheet in own doc't shx1, shx2 = (-1, -1) # internal, any sheet else: shx1, shx2 = (-666, -666) if dodgy or shx1 < -1: otext = "<>" \ % (tgtnamex, origrefx) res = Operand(oUNK, None, LEAF_RANK, otext) else: tgtobj = bk.name_obj_list[tgtnamex] if not tgtobj.evaluated: ### recursive ### evaluate_name_formula(bk, tgtobj, tgtnamex, blah, level+1) if tgtobj.macro or tgtobj.binary \ or tgtobj.any_err: if blah: tgtobj.dump( bk.logfile, header="!!! bad tgtobj !!!", footer="------------------", ) res = Operand(oUNK, None) any_err = any_err or tgtobj.macro or tgtobj.binary or tgtobj.any_err any_rel = any_rel or tgtobj.any_rel else: assert len(tgtobj.stack) == 1 res = copy.deepcopy(tgtobj.stack[0]) res.rank = LEAF_RANK if tgtobj.scope == -1: res.text = tgtobj.name else: res.text = "%s!%s" \ % (bk._sheet_names[tgtobj.scope], tgtobj.name) if blah: print(" tNameX: setting text to", repr(res.text), file=bk.logfile) spush(res) elif opcode in error_opcodes: any_err = 1 spush(error_opnd) else: if blah: print("FORMULA: /// Not handled yet: t" + oname, file=bk.logfile) any_err = 1 if sz <= 0: raise FormulaError("Fatal: token size is not positive") pos += sz any_rel = not not any_rel if blah: fprintf(bk.logfile, "End of formula. level=%d any_rel=%d any_err=%d stack=%r\n", level, not not any_rel, any_err, stack) if len(stack) >= 2: print("*** Stack has unprocessed args", file=bk.logfile) print(file=bk.logfile) nobj.stack = stack if len(stack) != 1: nobj.result = None else: nobj.result = stack[0] nobj.any_rel = any_rel nobj.any_err = any_err nobj.any_external = any_external nobj.evaluated = 1 #### under construction ############################################################################# def decompile_formula(bk, fmla, fmlalen, fmlatype=None, browx=None, bcolx=None, blah=0, level=0, r1c1=0): if level > STACK_ALARM_LEVEL: blah = 1 reldelta = fmlatype in (FMLA_TYPE_SHARED, FMLA_TYPE_NAME, FMLA_TYPE_COND_FMT, FMLA_TYPE_DATA_VAL) data = fmla bv = bk.biff_version if blah: print("::: decompile_formula len=%d fmlatype=%r browx=%r bcolx=%r reldelta=%d %r level=%d" \ % (fmlalen, fmlatype, browx, bcolx, reldelta, data, level), file=bk.logfile) hex_char_dump(data, 0, fmlalen, fout=bk.logfile) if level > STACK_PANIC_LEVEL: raise XLRDError("Excessive indirect references in formula") sztab = szdict[bv] pos = 0 stack = [] any_rel = 0 any_err = 0 any_external = 0 unk_opnd = Operand(oUNK, None) error_opnd = Operand(oERR, None) spush = stack.append def do_binop(opcd, stk): assert len(stk) >= 2 bop = stk.pop() aop = stk.pop() argdict, result_kind, func, rank, sym = binop_rules[opcd] otext = ''.join([ '('[:aop.rank < rank], aop.text, ')'[:aop.rank < rank], sym, '('[:bop.rank < rank], bop.text, ')'[:bop.rank < rank], ]) resop = Operand(result_kind, None, rank, otext) stk.append(resop) def do_unaryop(opcode, result_kind, stk): assert len(stk) >= 1 aop = stk.pop() func, rank, sym1, sym2 = unop_rules[opcode] otext = ''.join([ sym1, '('[:aop.rank < rank], aop.text, ')'[:aop.rank < rank], sym2, ]) stk.append(Operand(result_kind, None, rank, otext)) def unexpected_opcode(op_arg, oname_arg): msg = "ERROR *** Unexpected token 0x%02x (%s) found in formula type %s" \ % (op_arg, oname_arg, FMLA_TYPEDESCR_MAP[fmlatype]) print(msg, file=bk.logfile) # raise FormulaError(msg) if fmlalen == 0: stack = [unk_opnd] while 0 <= pos < fmlalen: op = BYTES_ORD(data[pos]) opcode = op & 0x1f optype = (op & 0x60) >> 5 if optype: opx = opcode + 32 else: opx = opcode oname = onames[opx] # + [" RVA"][optype] sz = sztab[opx] if blah: print("Pos:%d Op:0x%02x opname:t%s Sz:%d opcode:%02xh optype:%02xh" \ % (pos, op, oname, sz, opcode, optype), file=bk.logfile) print("Stack =", stack, file=bk.logfile) if sz == -2: msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' \ % (op, oname, bv) raise FormulaError(msg) if _TOKEN_NOT_ALLOWED(opx, 0) & fmlatype: unexpected_opcode(op, oname) if not optype: if opcode <= 0x01: # tExp if bv >= 30: fmt = '= 2 bop = stack.pop() aop = stack.pop() sym = ' ' rank = 80 ########## check ####### otext = ''.join([ '('[:aop.rank < rank], aop.text, ')'[:aop.rank < rank], sym, '('[:bop.rank < rank], bop.text, ')'[:bop.rank < rank], ]) res = Operand(oREF) res.text = otext if bop.kind == oERR or aop.kind == oERR: res.kind = oERR elif bop.kind == oUNK or aop.kind == oUNK: # This can happen with undefined # (go search in the current sheet) labels. # For example =Bob Sales # Each label gets a NAME record with an empty formula (!) # Evaluation of the tName token classifies it as oUNK # res.kind = oREF pass elif bop.kind == oREF == aop.kind: pass elif bop.kind == oREL == aop.kind: res.kind = oREL else: pass spush(res) if blah: print("tIsect post", stack, file=bk.logfile) elif opcode == 0x10: # tList if blah: print("tList pre", stack, file=bk.logfile) assert len(stack) >= 2 bop = stack.pop() aop = stack.pop() sym = ',' rank = 80 ########## check ####### otext = ''.join([ '('[:aop.rank < rank], aop.text, ')'[:aop.rank < rank], sym, '('[:bop.rank < rank], bop.text, ')'[:bop.rank < rank], ]) res = Operand(oREF, None, rank, otext) if bop.kind == oERR or aop.kind == oERR: res.kind = oERR elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL): res.kind = oREF if aop.kind == oREL or bop.kind == oREL: res.kind = oREL else: pass spush(res) if blah: print("tList post", stack, file=bk.logfile) elif opcode == 0x11: # tRange if blah: print("tRange pre", stack, file=bk.logfile) assert len(stack) >= 2 bop = stack.pop() aop = stack.pop() sym = ':' rank = 80 ########## check ####### otext = ''.join([ '('[:aop.rank < rank], aop.text, ')'[:aop.rank < rank], sym, '('[:bop.rank < rank], bop.text, ')'[:bop.rank < rank], ]) res = Operand(oREF, None, rank, otext) if bop.kind == oERR or aop.kind == oERR: res = oERR elif bop.kind == oREF == aop.kind: pass else: pass spush(res) if blah: print("tRange post", stack, file=bk.logfile) elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent do_unaryop(opcode, oNUM, stack) elif opcode == 0x15: # tParen # source cosmetics pass elif opcode == 0x16: # tMissArg spush(Operand(oMSNG, None, LEAF_RANK, '')) elif opcode == 0x17: # tStr if bv <= 70: strg, newpos = unpack_string_update_pos( data, pos+1, bk.encoding, lenlen=1) else: strg, newpos = unpack_unicode_update_pos( data, pos+1, lenlen=1) sz = newpos - pos if blah: print(" sz=%d strg=%r" % (sz, strg), file=bk.logfile) text = '"' + strg.replace('"', '""') + '"' spush(Operand(oSTRG, None, LEAF_RANK, text)) elif opcode == 0x18: # tExtended # new with BIFF 8 assert bv >= 80 # not in OOo docs, don't even know how to determine its length raise FormulaError("tExtended token not implemented") elif opcode == 0x19: # tAttr subop, nc = unpack("= 1 aop = stack[-1] otext = 'SUM(%s)' % aop.text stack[-1] = Operand(oNUM, None, FUNC_RANK, otext) else: sz = 4 if blah: print(" subop=%02xh subname=t%s sz=%d nc=%02xh" \ % (subop, subname, sz, nc), file=bk.logfile) elif 0x1A <= opcode <= 0x1B: # tSheet, tEndSheet assert bv < 50 raise FormulaError("tSheet & tEndsheet tokens not implemented") elif 0x1C <= opcode <= 0x1F: # tErr, tBool, tInt, tNum inx = opcode - 0x1C nb = [1, 1, 2, 8][inx] kind = [oERR, oBOOL, oNUM, oNUM][inx] value, = unpack("<" + "BBHd"[inx], data[pos+1:pos+1+nb]) if inx == 2: # tInt value = float(value) text = str(value) elif inx == 3: # tNum text = str(value) elif inx == 1: # tBool text = ('FALSE', 'TRUE')[value] else: text = '"' +error_text_from_code[value] + '"' spush(Operand(kind, None, LEAF_RANK, text)) else: raise FormulaError("Unhandled opcode: 0x%02x" % opcode) if sz <= 0: raise FormulaError("Size not set for opcode 0x%02x" % opcode) pos += sz continue if opcode == 0x00: # tArray spush(unk_opnd) elif opcode == 0x01: # tFunc nb = 1 + int(bv >= 40) funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb])[0] func_attrs = func_defs.get(funcx, None) if not func_attrs: print("*** formula/tFunc unknown FuncID:%d" % funcx, file=bk.logfile) spush(unk_opnd) else: func_name, nargs = func_attrs[:2] if blah: print(" FuncID=%d name=%s nargs=%d" \ % (funcx, func_name, nargs), file=bk.logfile) assert len(stack) >= nargs if nargs: argtext = listsep.join([arg.text for arg in stack[-nargs:]]) otext = "%s(%s)" % (func_name, argtext) del stack[-nargs:] else: otext = func_name + "()" res = Operand(oUNK, None, FUNC_RANK, otext) spush(res) elif opcode == 0x02: #tFuncVar nb = 1 + int(bv >= 40) nargs, funcx = unpack("= nargs assert len(stack) >= nargs argtext = listsep.join([arg.text for arg in stack[-nargs:]]) otext = "%s(%s)" % (func_name, argtext) res = Operand(oUNK, None, FUNC_RANK, otext) del stack[-nargs:] spush(res) elif opcode == 0x03: #tName tgtnamex = unpack("> bk.logfile, " ", res res1, res2 = get_cell_range_addr( data, pos+1, bv, reldelta, browx, bcolx) if blah: print(" ", res1, res2, file=bk.logfile) rowx1, colx1, row_rel1, col_rel1 = res1 rowx2, colx2, row_rel2, col_rel2 = res2 coords = (rowx1, rowx2+1, colx1, colx2+1) relflags = (row_rel1, row_rel2, col_rel1, col_rel2) if sum(relflags): # relative okind = oREL else: okind = oREF if blah: print(" ", coords, relflags, file=bk.logfile) otext = rangename2drel(coords, relflags, browx, bcolx, r1c1) res = Operand(okind, None, LEAF_RANK, otext) spush(res) elif opcode == 0x1A: # tRef3d if bv >= 80: res = get_cell_addr(data, pos+3, bv, reldelta, browx, bcolx) refx = unpack("= 80: res1, res2 = get_cell_range_addr(data, pos+3, bv, reldelta) refx = unpack("= 80: refx, tgtnamex = unpack(" 0: refx -= 1 elif refx < 0: refx = -refx - 1 else: dodgy = 1 if blah: print(" origrefx=%d refx=%d tgtnamex=%d dodgy=%d" \ % (origrefx, refx, tgtnamex, dodgy), file=bk.logfile) # if tgtnamex == namex: # if blah: print >> bk.logfile, "!!!! Self-referential !!!!" # dodgy = any_err = 1 if not dodgy: if bv >= 80: shx1, shx2 = get_externsheet_local_range(bk, refx, blah) elif origrefx > 0: shx1, shx2 = (-4, -4) # external ref else: exty = bk._externsheet_type_b57[refx] if exty == 4: # non-specific sheet in own doc't shx1, shx2 = (-1, -1) # internal, any sheet else: shx1, shx2 = (-666, -666) okind = oUNK ovalue = None if shx1 == -5: # addin func name okind = oSTRG ovalue = bk.addin_func_names[tgtnamex] otext = '"' + ovalue.replace('"', '""') + '"' elif dodgy or shx1 < -1: otext = "<>" \ % (tgtnamex, origrefx) else: tgtobj = bk.name_obj_list[tgtnamex] if tgtobj.scope == -1: otext = tgtobj.name else: otext = "%s!%s" \ % (bk._sheet_names[tgtobj.scope], tgtobj.name) if blah: print(" tNameX: setting text to", repr(res.text), file=bk.logfile) res = Operand(okind, ovalue, LEAF_RANK, otext) spush(res) elif opcode in error_opcodes: any_err = 1 spush(error_opnd) else: if blah: print("FORMULA: /// Not handled yet: t" + oname, file=bk.logfile) any_err = 1 if sz <= 0: raise FormulaError("Fatal: token size is not positive") pos += sz any_rel = not not any_rel if blah: print("End of formula. level=%d any_rel=%d any_err=%d stack=%r" % \ (level, not not any_rel, any_err, stack), file=bk.logfile) if len(stack) >= 2: print("*** Stack has unprocessed args", file=bk.logfile) print(file=bk.logfile) if len(stack) != 1: result = None else: result = stack[0].text return result #### under deconstruction ### def dump_formula(bk, data, fmlalen, bv, reldelta, blah=0, isname=0): if blah: print("dump_formula", fmlalen, bv, len(data), file=bk.logfile) hex_char_dump(data, 0, fmlalen, fout=bk.logfile) assert bv >= 80 #### this function needs updating #### sztab = szdict[bv] pos = 0 stack = [] any_rel = 0 any_err = 0 spush = stack.append while 0 <= pos < fmlalen: op = BYTES_ORD(data[pos]) opcode = op & 0x1f optype = (op & 0x60) >> 5 if optype: opx = opcode + 32 else: opx = opcode oname = onames[opx] # + [" RVA"][optype] sz = sztab[opx] if blah: print("Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh" \ % (pos, op, oname, sz, opcode, optype), file=bk.logfile) if not optype: if 0x01 <= opcode <= 0x02: # tExp, tTbl # reference to a shared formula or table record rowx, colx = unpack("= 2 bop = stack.pop() aop = stack.pop() spush(aop + bop) if blah: print("tlist post", stack, file=bk.logfile) elif opcode == 0x11: # tRange if blah: print("tRange pre", stack, file=bk.logfile) assert len(stack) >= 2 bop = stack.pop() aop = stack.pop() assert len(aop) == 1 assert len(bop) == 1 result = do_box_funcs(tRangeFuncs, aop[0], bop[0]) spush(result) if blah: print("tRange post", stack, file=bk.logfile) elif opcode == 0x0F: # tIsect if blah: print("tIsect pre", stack, file=bk.logfile) assert len(stack) >= 2 bop = stack.pop() aop = stack.pop() assert len(aop) == 1 assert len(bop) == 1 result = do_box_funcs(tIsectFuncs, aop[0], bop[0]) spush(result) if blah: print("tIsect post", stack, file=bk.logfile) elif opcode == 0x19: # tAttr subop, nc = unpack("= 40) funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb]) if blah: print(" FuncID=%d" % funcx, file=bk.logfile) elif opcode == 0x02: #tFuncVar nb = 1 + int(bv >= 40) nargs, funcx = unpack("= 2: print("*** Stack has unprocessed args", file=bk.logfile) # === Some helper functions for displaying cell references === # I'm aware of only one possibility of a sheet-relative component in # a reference: a 2D reference located in the "current sheet". # xlrd stores this internally with bounds of (0, 1, ...) and # relative flags of (1, 1, ...). These functions display the # sheet component as empty, just like Excel etc. def rownamerel(rowx, rowxrel, browx=None, r1c1=0): # if no base rowx is provided, we have to return r1c1 if browx is None: r1c1 = True if not rowxrel: if r1c1: return "R%d" % (rowx+1) return "$%d" % (rowx+1) if r1c1: if rowx: return "R[%d]" % rowx return "R" return "%d" % ((browx + rowx) % 65536 + 1) def colnamerel(colx, colxrel, bcolx=None, r1c1=0): # if no base colx is provided, we have to return r1c1 if bcolx is None: r1c1 = True if not colxrel: if r1c1: return "C%d" % (colx + 1) return "$" + colname(colx) if r1c1: if colx: return "C[%d]" % colx return "C" return colname((bcolx + colx) % 256) ## # Utility function: (5, 7) => 'H6' def cellname(rowx, colx): """ (5, 7) => 'H6' """ return "%s%d" % (colname(colx), rowx+1) ## # Utility function: (5, 7) => '$H$6' def cellnameabs(rowx, colx, r1c1=0): """ (5, 7) => '$H$6' or 'R8C6'""" if r1c1: return "R%dC%d" % (rowx+1, colx+1) return "$%s$%d" % (colname(colx), rowx+1) def cellnamerel(rowx, colx, rowxrel, colxrel, browx=None, bcolx=None, r1c1=0): if not rowxrel and not colxrel: return cellnameabs(rowx, colx, r1c1) if (rowxrel and browx is None) or (colxrel and bcolx is None): # must flip the whole cell into R1C1 mode r1c1 = True c = colnamerel(colx, colxrel, bcolx, r1c1) r = rownamerel(rowx, rowxrel, browx, r1c1) if r1c1: return r + c return c + r ## # Utility function: 7 => 'H', 27 => 'AB' def colname(colx): """ 7 => 'H', 27 => 'AB' """ alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" if colx <= 25: return alphabet[colx] else: xdiv26, xmod26 = divmod(colx, 26) return alphabet[xdiv26 - 1] + alphabet[xmod26] def rangename2d(rlo, rhi, clo, chi, r1c1=0): """ (5, 20, 7, 10) => '$H$6:$J$20' """ if r1c1: return if rhi == rlo+1 and chi == clo+1: return cellnameabs(rlo, clo, r1c1) return "%s:%s" % (cellnameabs(rlo, clo, r1c1), cellnameabs(rhi-1, chi-1, r1c1)) def rangename2drel(rlo_rhi_clo_chi, rlorel_rhirel_clorel_chirel, browx=None, bcolx=None, r1c1=0): rlo, rhi, clo, chi = rlo_rhi_clo_chi rlorel, rhirel, clorel, chirel = rlorel_rhirel_clorel_chirel if (rlorel or rhirel) and browx is None: r1c1 = True if (clorel or chirel) and bcolx is None: r1c1 = True return "%s:%s" % ( cellnamerel(rlo, clo, rlorel, clorel, browx, bcolx, r1c1), cellnamerel(rhi-1, chi-1, rhirel, chirel, browx, bcolx, r1c1) ) ## # Utility function: #
Ref3D((1, 4, 5, 20, 7, 10)) => 'Sheet2:Sheet3!$H$6:$J$20' def rangename3d(book, ref3d): """ Ref3D(1, 4, 5, 20, 7, 10) => 'Sheet2:Sheet3!$H$6:$J$20' (assuming Excel's default sheetnames) """ coords = ref3d.coords return "%s!%s" % ( sheetrange(book, *coords[:2]), rangename2d(*coords[2:6])) ## # Utility function: #
Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1)) # R1C1 mode => 'Sheet1!R[-32]C[-13]:R[-23]C[12]' # A1 mode => depends on base cell (browx, bcolx) def rangename3drel(book, ref3d, browx=None, bcolx=None, r1c1=0): coords = ref3d.coords relflags = ref3d.relflags shdesc = sheetrangerel(book, coords[:2], relflags[:2]) rngdesc = rangename2drel(coords[2:6], relflags[2:6], browx, bcolx, r1c1) if not shdesc: return rngdesc return "%s!%s" % (shdesc, rngdesc) def quotedsheetname(shnames, shx): if shx >= 0: shname = shnames[shx] else: shname = { -1: "?internal; any sheet?", -2: "internal; deleted sheet", -3: "internal; macro sheet", -4: "<>", }.get(shx, "?error %d?" % shx) if "'" in shname: return "'" + shname.replace("'", "''") + "'" if " " in shname: return "'" + shname + "'" return shname def sheetrange(book, slo, shi): shnames = book.sheet_names() shdesc = quotedsheetname(shnames, slo) if slo != shi-1: shdesc += ":" + quotedsheetname(shnames, shi-1) return shdesc def sheetrangerel(book, srange, srangerel): slo, shi = srange slorel, shirel = srangerel if not slorel and not shirel: return sheetrange(book, slo, shi) assert (slo == 0 == shi-1) and slorel and shirel return "" # ==============================================================