# -*- coding: cp1252 -*- ## # Support module for the xlrd package. # #

Portions copyright © 2005-2010 Stephen John Machin, Lingfo Pty Ltd

#

This module is part of the xlrd package, which is released under a BSD-style licence.

## # 2010-03-01 SJM Reading SCL record # 2010-03-01 SJM Added more record IDs for biff_dump & biff_count # 2008-02-10 SJM BIFF2 BLANK record # 2008-02-08 SJM Preparation for Excel 2.0 support # 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for biff_dump & biff_count # 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. # 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte. # 2007-04-22 SJM Remove experimental "trimming" facility. from __future__ import print_function DEBUG = 0 from struct import unpack import sys from .timemachine import * class XLRDError(Exception): pass ## # Parent of almost all other classes in the package. Defines a common "dump" method # for debugging. class BaseObject(object): _repr_these = [] ## # @param f open file object, to which the dump is written # @param header text to write before the dump # @param footer text to write after the dump # @param indent number of leading spaces (for recursive calls) def dump(self, f=None, header=None, footer=None, indent=0): if f is None: f = sys.stderr if hasattr(self, "__slots__"): alist = [] for attr in self.__slots__: alist.append((attr, getattr(self, attr))) else: alist = self.__dict__.items() alist = sorted(alist) pad = " " * indent if header is not None: print(header, file=f) list_type = type([]) dict_type = type({}) for attr, value in alist: if getattr(value, 'dump', None) and attr != 'book': value.dump(f, header="%s%s (%s object):" % (pad, attr, value.__class__.__name__), indent=indent+4) elif attr not in self._repr_these and ( isinstance(value, list_type) or isinstance(value, dict_type) ): print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f) else: fprintf(f, "%s%s: %r\n", pad, attr, value) if footer is not None: print(footer, file=f) FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text DATEFORMAT = FDT NUMBERFORMAT = FNU ( XL_CELL_EMPTY, XL_CELL_TEXT, XL_CELL_NUMBER, XL_CELL_DATE, XL_CELL_BOOLEAN, XL_CELL_ERROR, XL_CELL_BLANK, # for use in debugging, gathering stats, etc ) = range(7) biff_text_from_num = { 0: "(not BIFF)", 20: "2.0", 21: "2.1", 30: "3", 40: "4S", 45: "4W", 50: "5", 70: "7", 80: "8", 85: "8X", } ## #

This dictionary can be used to produce a text version of the internal codes # that Excel uses for error cells. Here are its contents: #

# 0x00: '#NULL!',  # Intersection of two cell ranges is empty
# 0x07: '#DIV/0!', # Division by zero
# 0x0F: '#VALUE!', # Wrong type of operand
# 0x17: '#REF!',   # Illegal or deleted cell reference
# 0x1D: '#NAME?',  # Wrong function or range name
# 0x24: '#NUM!',   # Value range overflow
# 0x2A: '#N/A',    # Argument or function not available
# 

error_text_from_code = { 0x00: '#NULL!', # Intersection of two cell ranges is empty 0x07: '#DIV/0!', # Division by zero 0x0F: '#VALUE!', # Wrong type of operand 0x17: '#REF!', # Illegal or deleted cell reference 0x1D: '#NAME?', # Wrong function or range name 0x24: '#NUM!', # Value range overflow 0x2A: '#N/A', # Argument or function not available } BIFF_FIRST_UNICODE = 80 XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5 XL_WORKBOOK_GLOBALS_4W = 0x100 XL_WORKSHEET = WRKSHEET = 0x10 XL_BOUNDSHEET_WORKSHEET = 0x00 XL_BOUNDSHEET_CHART = 0x02 XL_BOUNDSHEET_VB_MODULE = 0x06 # XL_RK2 = 0x7e XL_ARRAY = 0x0221 XL_ARRAY2 = 0x0021 XL_BLANK = 0x0201 XL_BLANK_B2 = 0x01 XL_BOF = 0x809 XL_BOOLERR = 0x205 XL_BOOLERR_B2 = 0x5 XL_BOUNDSHEET = 0x85 XL_BUILTINFMTCOUNT = 0x56 XL_CF = 0x01B1 XL_CODEPAGE = 0x42 XL_COLINFO = 0x7D XL_COLUMNDEFAULT = 0x20 # BIFF2 only XL_COLWIDTH = 0x24 # BIFF2 only XL_CONDFMT = 0x01B0 XL_CONTINUE = 0x3c XL_COUNTRY = 0x8C XL_DATEMODE = 0x22 XL_DEFAULTROWHEIGHT = 0x0225 XL_DEFCOLWIDTH = 0x55 XL_DIMENSION = 0x200 XL_DIMENSION2 = 0x0 XL_EFONT = 0x45 XL_EOF = 0x0a XL_EXTERNNAME = 0x23 XL_EXTERNSHEET = 0x17 XL_EXTSST = 0xff XL_FEAT11 = 0x872 XL_FILEPASS = 0x2f XL_FONT = 0x31 XL_FONT_B3B4 = 0x231 XL_FORMAT = 0x41e XL_FORMAT2 = 0x1E # BIFF2, BIFF3 XL_FORMULA = 0x6 XL_FORMULA3 = 0x206 XL_FORMULA4 = 0x406 XL_GCW = 0xab XL_HLINK = 0x01B8 XL_QUICKTIP = 0x0800 XL_HORIZONTALPAGEBREAKS = 0x1b XL_INDEX = 0x20b XL_INTEGER = 0x2 # BIFF2 only XL_IXFE = 0x44 # BIFF2 only XL_LABEL = 0x204 XL_LABEL_B2 = 0x04 XL_LABELRANGES = 0x15f XL_LABELSST = 0xfd XL_LEFTMARGIN = 0x26 XL_TOPMARGIN = 0x28 XL_RIGHTMARGIN = 0x27 XL_BOTTOMMARGIN = 0x29 XL_HEADER = 0x14 XL_FOOTER = 0x15 XL_HCENTER = 0x83 XL_VCENTER = 0x84 XL_MERGEDCELLS = 0xE5 XL_MSO_DRAWING = 0x00EC XL_MSO_DRAWING_GROUP = 0x00EB XL_MSO_DRAWING_SELECTION = 0x00ED XL_MULRK = 0xbd XL_MULBLANK = 0xbe XL_NAME = 0x18 XL_NOTE = 0x1c XL_NUMBER = 0x203 XL_NUMBER_B2 = 0x3 XL_OBJ = 0x5D XL_PAGESETUP = 0xA1 XL_PALETTE = 0x92 XL_PANE = 0x41 XL_PRINTGRIDLINES = 0x2B XL_PRINTHEADERS = 0x2A XL_RK = 0x27e XL_ROW = 0x208 XL_ROW_B2 = 0x08 XL_RSTRING = 0xd6 XL_SCL = 0x00A0 XL_SHEETHDR = 0x8F # BIFF4W only XL_SHEETPR = 0x81 XL_SHEETSOFFSET = 0x8E # BIFF4W only XL_SHRFMLA = 0x04bc XL_SST = 0xfc XL_STANDARDWIDTH = 0x99 XL_STRING = 0x207 XL_STRING_B2 = 0x7 XL_STYLE = 0x293 XL_SUPBOOK = 0x1AE # aka EXTERNALBOOK in OOo docs XL_TABLEOP = 0x236 XL_TABLEOP2 = 0x37 XL_TABLEOP_B2 = 0x36 XL_TXO = 0x1b6 XL_UNCALCED = 0x5e XL_UNKNOWN = 0xffff XL_VERTICALPAGEBREAKS = 0x1a XL_WINDOW2 = 0x023E XL_WINDOW2_B2 = 0x003E XL_WRITEACCESS = 0x5C XL_WSBOOL = XL_SHEETPR XL_XF = 0xe0 XL_XF2 = 0x0043 # BIFF2 version of XF record XL_XF3 = 0x0243 # BIFF3 version of XF record XL_XF4 = 0x0443 # BIFF4 version of XF record boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4} bofcodes = (0x0809, 0x0409, 0x0209, 0x0009) XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206) _cell_opcode_list = [ XL_BOOLERR, XL_FORMULA, XL_FORMULA3, XL_FORMULA4, XL_LABEL, XL_LABELSST, XL_MULRK, XL_NUMBER, XL_RK, XL_RSTRING, ] _cell_opcode_dict = {} for _cell_opcode in _cell_opcode_list: _cell_opcode_dict[_cell_opcode] = 1 def is_cell_opcode(c): return c in _cell_opcode_dict def upkbits(tgt_obj, src, manifest, local_setattr=setattr): for n, mask, attr in manifest: local_setattr(tgt_obj, attr, (src & mask) >> n) def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int): for n, mask, attr in manifest: local_setattr(tgt_obj, attr, local_int((src & mask) >> n)) def unpack_string(data, pos, encoding, lenlen=1): nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] pos += lenlen return unicode(data[pos:pos+nchars], encoding) def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None): if known_len is not None: # On a NAME record, the length byte is detached from the front of the string. nchars = known_len else: nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] pos += lenlen newpos = pos + nchars return (unicode(data[pos:newpos], encoding), newpos) def unpack_unicode(data, pos, lenlen=2): "Return unicode_strg" nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] if not nchars: # Ambiguous whether 0-length string should have an "options" byte. # Avoid crash if missing. return UNICODE_LITERAL("") pos += lenlen options = BYTES_ORD(data[pos]) pos += 1 # phonetic = options & 0x04 # richtext = options & 0x08 if options & 0x08: # rt = unpack(' endpos=%d pos=%d endsub=%d substrg=%r\n', ofs, dlen, base, endpos, pos, endsub, substrg) break hexd = ''.join(["%02x " % BYTES_ORD(c) for c in substrg]) chard = '' for c in substrg: c = chr(BYTES_ORD(c)) if c == '\0': c = '~' elif not (' ' <= c <= '~'): c = '?' chard += c if numbered: num_prefix = "%5d: " % (base+pos-ofs) fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard) pos = endsub def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, unnumbered=False): pos = stream_offset stream_end = stream_offset + stream_len adj = base - stream_offset dummies = 0 numbered = not unnumbered num_prefix = '' while stream_end - pos >= 4: rc, length = unpack('') if numbered: num_prefix = "%5d: " % (adj + pos) fprintf(fout, "%s%04x %s len = %04x (%d)\n", num_prefix, rc, recname, length, length) pos += 4 hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered) pos += length if dummies: if numbered: num_prefix = "%5d: " % (adj + savpos) fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies) if pos < stream_end: if numbered: num_prefix = "%5d: " % (adj + pos) fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix) hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered) elif pos > stream_end: fprintf(fout, "Last dumped record has length (%d) that is too large\n", length) def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout): pos = stream_offset stream_end = stream_offset + stream_len tally = {} while stream_end - pos >= 4: rc, length = unpack('