# -*- coding: cp1252 -*- ## #

Portions copyright © 2005-2013 Stephen John Machin, Lingfo Pty Ltd

#

This module is part of the xlrd package, which is released under a BSD-style licence.

## # 2010-04-25 SJM fix zoom factors cooking logic # 2010-04-15 CW r4253 fix zoom factors cooking logic # 2010-04-09 CW r4248 add a flag so xlutils knows whether or not to write a PANE record # 2010-03-29 SJM Fixed bug in adding new empty rows in put_cell_ragged # 2010-03-28 SJM Tailored put_cell method for each of ragged_rows=False (fixed speed regression) and =True (faster) # 2010-03-25 CW r4236 Slight refactoring to remove method calls # 2010-03-25 CW r4235 Collapse expand_cells into put_cell and enhance the raggedness. This should save even more memory! # 2010-03-25 CW r4234 remove duplicate chunks for extend_cells; refactor to remove put_number_cell and put_blank_cell which essentially duplicated the code of put_cell # 2010-03-10 SJM r4222 Added reading of the PANE record. # 2010-03-10 SJM r4221 Preliminary work on "cooked" mag factors; use at own peril # 2010-03-01 SJM Reading SCL record # 2010-03-01 SJM Added ragged_rows functionality # 2009-08-23 SJM Reduced CPU time taken by parsing MULBLANK records. # 2009-08-18 SJM Used __slots__ and sharing to reduce memory consumed by Rowinfo instances # 2009-05-31 SJM Fixed problem with no CODEPAGE record on extremely minimal BIFF2.x 3rd-party file # 2009-04-27 SJM Integrated on_demand patch by Armando Serrano Lombillo # 2008-02-09 SJM Excel 2.0: build XFs on the fly from cell attributes # 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. # 2007-10-11 SJM Added missing entry for blank cell type to ctype_text # 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file # 2007-04-22 SJM Remove experimental "trimming" facility. from __future__ import print_function from array import array from struct import unpack, calcsize from .biffh import * from .timemachine import * from .formula import dump_formula, decompile_formula, rangename2d, FMLA_TYPE_CELL, FMLA_TYPE_SHARED from .formatting import nearest_colour_index, Format DEBUG = 0 OBJ_MSO_DEBUG = 0 _WINDOW2_options = ( # Attribute names and initial values to use in case # a WINDOW2 record is not written. ("show_formulas", 0), ("show_grid_lines", 1), ("show_sheet_headers", 1), ("panes_are_frozen", 0), ("show_zero_values", 1), ("automatic_grid_line_colour", 1), ("columns_from_right_to_left", 0), ("show_outline_symbols", 1), ("remove_splits_if_pane_freeze_is_removed", 0), # Multiple sheets can be selected, but only one can be active # (hold down Ctrl and click multiple tabs in the file in OOo) ("sheet_selected", 0), # "sheet_visible" should really be called "sheet_active" # and is 1 when this sheet is the sheet displayed when the file # is open. More than likely only one sheet should ever be set as # visible. # This would correspond to the Book's sheet_active attribute, but # that doesn't exist as WINDOW1 records aren't currently processed. # The real thing is the visibility attribute from the BOUNDSHEET record. ("sheet_visible", 0), ("show_in_page_break_preview", 0), ) ## #

Contains the data for one worksheet.

# #

In the cell access functions, "rowx" is a row index, counting from zero, and "colx" is a # column index, counting from zero. # Negative values for row/column indexes and slice positions are supported in the expected fashion.

# #

For information about cell types and cell values, refer to the documentation of the {@link #Cell} class.

# #

WARNING: You don't call this class yourself. You access Sheet objects via the Book object that # was returned when you called xlrd.open_workbook("myfile.xls").

class Sheet(BaseObject): ## # Name of sheet. name = '' ## # A reference to the Book object to which this sheet belongs. # Example usage: some_sheet.book.datemode book = None ## # Number of rows in sheet. A row index is in range(thesheet.nrows). nrows = 0 ## # Nominal number of columns in sheet. It is 1 + the maximum column index # found, ignoring trailing empty cells. See also open_workbook(ragged_rows=?) # and Sheet.{@link #Sheet.row_len}(row_index). ncols = 0 ## # The map from a column index to a {@link #Colinfo} object. Often there is an entry # in COLINFO records for all column indexes in range(257). # Note that xlrd ignores the entry for the non-existent # 257th column. On the other hand, there may be no entry for unused columns. #
-- New in version 0.6.1. Populated only if open_workbook(formatting_info=True). colinfo_map = {} ## # The map from a row index to a {@link #Rowinfo} object. Note that it is possible # to have missing entries -- at least one source of XLS files doesn't # bother writing ROW records. #
-- New in version 0.6.1. Populated only if open_workbook(formatting_info=True). rowinfo_map = {} ## # List of address ranges of cells containing column labels. # These are set up in Excel by Insert > Name > Labels > Columns. #
-- New in version 0.6.0 #
How to deconstruct the list: #
    # for crange in thesheet.col_label_ranges:
    #     rlo, rhi, clo, chi = crange
    #     for rx in xrange(rlo, rhi):
    #         for cx in xrange(clo, chi):
    #             print "Column label at (rowx=%d, colx=%d) is %r" \
    #                 (rx, cx, thesheet.cell_value(rx, cx))
    # 
col_label_ranges = [] ## # List of address ranges of cells containing row labels. # For more details, see col_label_ranges above. #
-- New in version 0.6.0 row_label_ranges = [] ## # List of address ranges of cells which have been merged. # These are set up in Excel by Format > Cells > Alignment, then ticking # the "Merge cells" box. #
Note that the upper limits are exclusive: i.e. [2, 3, 7, 9] only # spans two cells. #
-- New in version 0.6.1. Extracted only if open_workbook(formatting_info=True). #
How to deconstruct the list: #
    # for crange in thesheet.merged_cells:
    #     rlo, rhi, clo, chi = crange
    #     for rowx in xrange(rlo, rhi):
    #         for colx in xrange(clo, chi):
    #             # cell (rlo, clo) (the top left one) will carry the data
    #             # and formatting info; the remainder will be recorded as
    #             # blank cells, but a renderer will apply the formatting info
    #             # for the top left cell (e.g. border, pattern) to all cells in
    #             # the range.
    # 
merged_cells = [] ## # Mapping of (rowx, colx) to list of (offset, font_index) tuples. The offset # defines where in the string the font begins to be used. # Offsets are expected to be in ascending order. # If the first offset is not zero, the meaning is that the cell's XF's font should # be used from offset 0. #
This is a sparse mapping. There is no entry for cells that are not formatted with # rich text. #
How to use: #
    # runlist = thesheet.rich_text_runlist_map.get((rowx, colx))
    # if runlist:
    #     for offset, font_index in runlist:
    #         # do work here.
    #         pass
    # 
# Populated only if open_workbook(formatting_info=True). #
-- New in version 0.7.2. #
  rich_text_runlist_map = {} ## # Default column width from DEFCOLWIDTH record, else None. # From the OOo docs:
# """Column width in characters, using the width of the zero character # from default font (first FONT record in the file). Excel adds some # extra space to the default width, depending on the default font and # default font size. The algorithm how to exactly calculate the resulting # column width is not known.
# Example: The default width of 8 set in this record results in a column # width of 8.43 using Arial font with a size of 10 points."""
# For the default hierarchy, refer to the {@link #Colinfo} class. #
-- New in version 0.6.1 defcolwidth = None ## # Default column width from STANDARDWIDTH record, else None. # From the OOo docs:
# """Default width of the columns in 1/256 of the width of the zero # character, using default font (first FONT record in the file)."""
# For the default hierarchy, refer to the {@link #Colinfo} class. #
-- New in version 0.6.1 standardwidth = None ## # Default value to be used for a row if there is # no ROW record for that row. # From the optional DEFAULTROWHEIGHT record. default_row_height = None ## # Default value to be used for a row if there is # no ROW record for that row. # From the optional DEFAULTROWHEIGHT record. default_row_height_mismatch = None ## # Default value to be used for a row if there is # no ROW record for that row. # From the optional DEFAULTROWHEIGHT record. default_row_hidden = None ## # Default value to be used for a row if there is # no ROW record for that row. # From the optional DEFAULTROWHEIGHT record. default_additional_space_above = None ## # Default value to be used for a row if there is # no ROW record for that row. # From the optional DEFAULTROWHEIGHT record. default_additional_space_below = None ## # Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden # by user -- Format/Sheet/Unhide), 2 = "very hidden" (can be unhidden # only by VBA macro). visibility = 0 ## # A 256-element tuple corresponding to the contents of the GCW record for this sheet. # If no such record, treat as all bits zero. # Applies to BIFF4-7 only. See docs of the {@link #Colinfo} class for discussion. gcw = (0, ) * 256 ## #

A list of {@link #Hyperlink} objects corresponding to HLINK records found # in the worksheet.
-- New in version 0.7.2

hyperlink_list = [] ## #

A sparse mapping from (rowx, colx) to an item in {@link #Sheet.hyperlink_list}. # Cells not covered by a hyperlink are not mapped. # It is possible using the Excel UI to set up a hyperlink that # covers a larger-than-1x1 rectangle of cells. # Hyperlink rectangles may overlap (Excel doesn't check). # When a multiply-covered cell is clicked on, the hyperlink that is activated # (and the one that is mapped here) is the last in hyperlink_list. #
-- New in version 0.7.2

hyperlink_map = {} ## #

A sparse mapping from (rowx, colx) to a {@link #Note} object. # Cells not containing a note ("comment") are not mapped. #
-- New in version 0.7.2

cell_note_map = {} ## # Number of columns in left pane (frozen panes; for split panes, see comments below in code) vert_split_pos = 0 ## # Number of rows in top pane (frozen panes; for split panes, see comments below in code) horz_split_pos = 0 ## # Index of first visible row in bottom frozen/split pane horz_split_first_visible = 0 ## # Index of first visible column in right frozen/split pane vert_split_first_visible = 0 ## # Frozen panes: ignore it. Split panes: explanation and diagrams in OOo docs. split_active_pane = 0 ## # Boolean specifying if a PANE record was present, ignore unless you're xlutils.copy has_pane_record = 0 ## # A list of the horizontal page breaks in this sheet. # Breaks are tuples in the form (index of row after break, start col index, end col index). # Populated only if open_workbook(formatting_info=True). #
-- New in version 0.7.2 horizontal_page_breaks = [] ## # A list of the vertical page breaks in this sheet. # Breaks are tuples in the form (index of col after break, start row index, end row index). # Populated only if open_workbook(formatting_info=True). #
-- New in version 0.7.2 vertical_page_breaks = [] def __init__(self, book, position, name, number): self.book = book self.biff_version = book.biff_version self._position = position self.logfile = book.logfile self.bt = array('B', [XL_CELL_EMPTY]) self.bf = array('h', [-1]) self.name = name self.number = number self.verbosity = book.verbosity self.formatting_info = book.formatting_info self.ragged_rows = book.ragged_rows if self.ragged_rows: self.put_cell = self.put_cell_ragged else: self.put_cell = self.put_cell_unragged self._xf_index_to_xl_type_map = book._xf_index_to_xl_type_map self.nrows = 0 # actual, including possibly empty cells self.ncols = 0 self._maxdatarowx = -1 # highest rowx containing a non-empty cell self._maxdatacolx = -1 # highest colx containing a non-empty cell self._dimnrows = 0 # as per DIMENSIONS record self._dimncols = 0 self._cell_values = [] self._cell_types = [] self._cell_xf_indexes = [] self.defcolwidth = None self.standardwidth = None self.default_row_height = None self.default_row_height_mismatch = 0 self.default_row_hidden = 0 self.default_additional_space_above = 0 self.default_additional_space_below = 0 self.colinfo_map = {} self.rowinfo_map = {} self.col_label_ranges = [] self.row_label_ranges = [] self.merged_cells = [] self.rich_text_runlist_map = {} self.horizontal_page_breaks = [] self.vertical_page_breaks = [] self._xf_index_stats = [0, 0, 0, 0] self.visibility = book._sheet_visibility[number] # from BOUNDSHEET record for attr, defval in _WINDOW2_options: setattr(self, attr, defval) self.first_visible_rowx = 0 self.first_visible_colx = 0 self.gridline_colour_index = 0x40 self.gridline_colour_rgb = None # pre-BIFF8 self.hyperlink_list = [] self.hyperlink_map = {} self.cell_note_map = {} # Values calculated by xlrd to predict the mag factors that # will actually be used by Excel to display your worksheet. # Pass these values to xlwt when writing XLS files. # Warning 1: Behaviour of OOo Calc and Gnumeric has been observed to differ from Excel's. # Warning 2: A value of zero means almost exactly what it says. Your sheet will be # displayed as a very tiny speck on the screen. xlwt will reject attempts to set # a mag_factor that is not (10 <= mag_factor <= 400). self.cooked_page_break_preview_mag_factor = 60 self.cooked_normal_view_mag_factor = 100 # Values (if any) actually stored on the XLS file self.cached_page_break_preview_mag_factor = 0 # default (60%), from WINDOW2 record self.cached_normal_view_mag_factor = 0 # default (100%), from WINDOW2 record self.scl_mag_factor = None # from SCL record self._ixfe = None # BIFF2 only self._cell_attr_to_xfx = {} # BIFF2.0 only #### Don't initialise this here, use class attribute initialisation. #### self.gcw = (0, ) * 256 #### if self.biff_version >= 80: self.utter_max_rows = 65536 else: self.utter_max_rows = 16384 self.utter_max_cols = 256 self._first_full_rowx = -1 # self._put_cell_exceptions = 0 # self._put_cell_row_widenings = 0 # self._put_cell_rows_appended = 0 # self._put_cell_cells_appended = 0 ## # {@link #Cell} object in the given row and column. def cell(self, rowx, colx): if self.formatting_info: xfx = self.cell_xf_index(rowx, colx) else: xfx = None return Cell( self._cell_types[rowx][colx], self._cell_values[rowx][colx], xfx, ) ## # Value of the cell in the given row and column. def cell_value(self, rowx, colx): return self._cell_values[rowx][colx] ## # Type of the cell in the given row and column. # Refer to the documentation of the {@link #Cell} class. def cell_type(self, rowx, colx): return self._cell_types[rowx][colx] ## # XF index of the cell in the given row and column. # This is an index into Book.{@link #Book.xf_list}. #
-- New in version 0.6.1 def cell_xf_index(self, rowx, colx): self.req_fmt_info() xfx = self._cell_xf_indexes[rowx][colx] if xfx > -1: self._xf_index_stats[0] += 1 return xfx # Check for a row xf_index try: xfx = self.rowinfo_map[rowx].xf_index if xfx > -1: self._xf_index_stats[1] += 1 return xfx except KeyError: pass # Check for a column xf_index try: xfx = self.colinfo_map[colx].xf_index if xfx == -1: xfx = 15 self._xf_index_stats[2] += 1 return xfx except KeyError: # If all else fails, 15 is used as hardwired global default xf_index. self._xf_index_stats[3] += 1 return 15 ## # Returns the effective number of cells in the given row. For use with # open_workbook(ragged_rows=True) which is likely to produce rows # with fewer than {@link #Sheet.ncols} cells. #
-- New in version 0.7.2 def row_len(self, rowx): return len(self._cell_values[rowx]) ## # Returns a sequence of the {@link #Cell} objects in the given row. def row(self, rowx): return [ self.cell(rowx, colx) for colx in xrange(len(self._cell_values[rowx])) ] ## # Returns a generator for iterating through each row. def get_rows(self): return (self.row(index) for index in range(self.nrows)) ## # Returns a slice of the types # of the cells in the given row. def row_types(self, rowx, start_colx=0, end_colx=None): if end_colx is None: return self._cell_types[rowx][start_colx:] return self._cell_types[rowx][start_colx:end_colx] ## # Returns a slice of the values # of the cells in the given row. def row_values(self, rowx, start_colx=0, end_colx=None): if end_colx is None: return self._cell_values[rowx][start_colx:] return self._cell_values[rowx][start_colx:end_colx] ## # Returns a slice of the {@link #Cell} objects in the given row. def row_slice(self, rowx, start_colx=0, end_colx=None): nc = len(self._cell_values[rowx]) if start_colx < 0: start_colx += nc if start_colx < 0: start_colx = 0 if end_colx is None or end_colx > nc: end_colx = nc elif end_colx < 0: end_colx += nc return [ self.cell(rowx, colx) for colx in xrange(start_colx, end_colx) ] ## # Returns a slice of the {@link #Cell} objects in the given column. def col_slice(self, colx, start_rowx=0, end_rowx=None): nr = self.nrows if start_rowx < 0: start_rowx += nr if start_rowx < 0: start_rowx = 0 if end_rowx is None or end_rowx > nr: end_rowx = nr elif end_rowx < 0: end_rowx += nr return [ self.cell(rowx, colx) for rowx in xrange(start_rowx, end_rowx) ] ## # Returns a slice of the values of the cells in the given column. def col_values(self, colx, start_rowx=0, end_rowx=None): nr = self.nrows if start_rowx < 0: start_rowx += nr if start_rowx < 0: start_rowx = 0 if end_rowx is None or end_rowx > nr: end_rowx = nr elif end_rowx < 0: end_rowx += nr return [ self._cell_values[rowx][colx] for rowx in xrange(start_rowx, end_rowx) ] ## # Returns a slice of the types of the cells in the given column. def col_types(self, colx, start_rowx=0, end_rowx=None): nr = self.nrows if start_rowx < 0: start_rowx += nr if start_rowx < 0: start_rowx = 0 if end_rowx is None or end_rowx > nr: end_rowx = nr elif end_rowx < 0: end_rowx += nr return [ self._cell_types[rowx][colx] for rowx in xrange(start_rowx, end_rowx) ] ## # Returns a sequence of the {@link #Cell} objects in the given column. def col(self, colx): return self.col_slice(colx) # Above two lines just for the docs. Here's the real McCoy: col = col_slice # === Following methods are used in building the worksheet. # === They are not part of the API. def tidy_dimensions(self): if self.verbosity >= 3: fprintf(self.logfile, "tidy_dimensions: nrows=%d ncols=%d \n", self.nrows, self.ncols, ) if 1 and self.merged_cells: nr = nc = 0 umaxrows = self.utter_max_rows umaxcols = self.utter_max_cols for crange in self.merged_cells: rlo, rhi, clo, chi = crange if not (0 <= rlo < rhi <= umaxrows) \ or not (0 <= clo < chi <= umaxcols): fprintf(self.logfile, "*** WARNING: sheet #%d (%r), MERGEDCELLS bad range %r\n", self.number, self.name, crange) if rhi > nr: nr = rhi if chi > nc: nc = chi if nc > self.ncols: self.ncols = nc self._first_full_rowx = -2 if nr > self.nrows: # we put one empty cell at (nr-1,0) to make sure # we have the right number of rows. The ragged rows # will sort out the rest if needed. self.put_cell(nr-1, 0, XL_CELL_EMPTY, UNICODE_LITERAL(''), -1) if self.verbosity >= 1 \ and (self.nrows != self._dimnrows or self.ncols != self._dimncols): fprintf(self.logfile, "NOTE *** sheet %d (%r): DIMENSIONS R,C = %d,%d should be %d,%d\n", self.number, self.name, self._dimnrows, self._dimncols, self.nrows, self.ncols, ) if not self.ragged_rows: # fix ragged rows ncols = self.ncols s_cell_types = self._cell_types s_cell_values = self._cell_values s_cell_xf_indexes = self._cell_xf_indexes s_fmt_info = self.formatting_info # for rowx in xrange(self.nrows): if self._first_full_rowx == -2: ubound = self.nrows else: ubound = self._first_full_rowx for rowx in xrange(ubound): trow = s_cell_types[rowx] rlen = len(trow) nextra = ncols - rlen if nextra > 0: s_cell_values[rowx][rlen:] = [UNICODE_LITERAL('')] * nextra trow[rlen:] = self.bt * nextra if s_fmt_info: s_cell_xf_indexes[rowx][rlen:] = self.bf * nextra def put_cell_ragged(self, rowx, colx, ctype, value, xf_index): if ctype is None: # we have a number, so look up the cell type ctype = self._xf_index_to_xl_type_map[xf_index] assert 0 <= colx < self.utter_max_cols assert 0 <= rowx < self.utter_max_rows fmt_info = self.formatting_info try: nr = rowx + 1 if self.nrows < nr: scta = self._cell_types.append scva = self._cell_values.append scxa = self._cell_xf_indexes.append bt = self.bt bf = self.bf for _unused in xrange(self.nrows, nr): scta(bt * 0) scva([]) if fmt_info: scxa(bf * 0) self.nrows = nr types_row = self._cell_types[rowx] values_row = self._cell_values[rowx] if fmt_info: fmt_row = self._cell_xf_indexes[rowx] ltr = len(types_row) if colx >= self.ncols: self.ncols = colx + 1 num_empty = colx - ltr if not num_empty: # most common case: colx == previous colx + 1 # self._put_cell_cells_appended += 1 types_row.append(ctype) values_row.append(value) if fmt_info: fmt_row.append(xf_index) return if num_empty > 0: num_empty += 1 # self._put_cell_row_widenings += 1 # types_row.extend(self.bt * num_empty) # values_row.extend([UNICODE_LITERAL('')] * num_empty) # if fmt_info: # fmt_row.extend(self.bf * num_empty) types_row[ltr:] = self.bt * num_empty values_row[ltr:] = [UNICODE_LITERAL('')] * num_empty if fmt_info: fmt_row[ltr:] = self.bf * num_empty types_row[colx] = ctype values_row[colx] = value if fmt_info: fmt_row[colx] = xf_index except: print("put_cell", rowx, colx, file=self.logfile) raise def put_cell_unragged(self, rowx, colx, ctype, value, xf_index): if ctype is None: # we have a number, so look up the cell type ctype = self._xf_index_to_xl_type_map[xf_index] # assert 0 <= colx < self.utter_max_cols # assert 0 <= rowx < self.utter_max_rows try: self._cell_types[rowx][colx] = ctype self._cell_values[rowx][colx] = value if self.formatting_info: self._cell_xf_indexes[rowx][colx] = xf_index except IndexError: # print >> self.logfile, "put_cell extending", rowx, colx # self.extend_cells(rowx+1, colx+1) # self._put_cell_exceptions += 1 nr = rowx + 1 nc = colx + 1 assert 1 <= nc <= self.utter_max_cols assert 1 <= nr <= self.utter_max_rows if nc > self.ncols: self.ncols = nc # The row self._first_full_rowx and all subsequent rows # are guaranteed to have length == self.ncols. Thus the # "fix ragged rows" section of the tidy_dimensions method # doesn't need to examine them. if nr < self.nrows: # cell data is not in non-descending row order *AND* # self.ncols has been bumped up. # This very rare case ruins this optmisation. self._first_full_rowx = -2 elif rowx > self._first_full_rowx > -2: self._first_full_rowx = rowx if nr <= self.nrows: # New cell is in an existing row, so extend that row (if necessary). # Note that nr < self.nrows means that the cell data # is not in ascending row order!! trow = self._cell_types[rowx] nextra = self.ncols - len(trow) if nextra > 0: # self._put_cell_row_widenings += 1 trow.extend(self.bt * nextra) if self.formatting_info: self._cell_xf_indexes[rowx].extend(self.bf * nextra) self._cell_values[rowx].extend([UNICODE_LITERAL('')] * nextra) else: scta = self._cell_types.append scva = self._cell_values.append scxa = self._cell_xf_indexes.append fmt_info = self.formatting_info nc = self.ncols bt = self.bt bf = self.bf for _unused in xrange(self.nrows, nr): # self._put_cell_rows_appended += 1 scta(bt * nc) scva([UNICODE_LITERAL('')] * nc) if fmt_info: scxa(bf * nc) self.nrows = nr # === end of code from extend_cells() try: self._cell_types[rowx][colx] = ctype self._cell_values[rowx][colx] = value if self.formatting_info: self._cell_xf_indexes[rowx][colx] = xf_index except: print("put_cell", rowx, colx, file=self.logfile) raise except: print("put_cell", rowx, colx, file=self.logfile) raise # === Methods after this line neither know nor care about how cells are stored. def read(self, bk): global rc_stats DEBUG = 0 blah = DEBUG or self.verbosity >= 2 blah_rows = DEBUG or self.verbosity >= 4 blah_formulas = 0 and blah r1c1 = 0 oldpos = bk._position bk._position = self._position XL_SHRFMLA_ETC_ETC = ( XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2, XL_ARRAY2, XL_TABLEOP_B2, ) self_put_cell = self.put_cell local_unpack = unpack bk_get_record_parts = bk.get_record_parts bv = self.biff_version fmt_info = self.formatting_info do_sst_rich_text = fmt_info and bk._rich_text_runlist_map rowinfo_sharing_dict = {} txos = {} eof_found = 0 while 1: # if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position rc, data_len, data = bk_get_record_parts() # if rc in rc_stats: # rc_stats[rc] += 1 # else: # rc_stats[rc] = 1 # if DEBUG: print "SHEET.READ: op 0x%04x, %d bytes %r" % (rc, data_len, data) if rc == XL_NUMBER: # [:14] in following stmt ignores extraneous rubbish at end of record. # Sample file testEON-8.xls supplied by Jan Kraus. rowx, colx, xf_index, d = local_unpack('> 15) & 1 r.outline_level = bits2 & 7 r.outline_group_starts_ends = (bits2 >> 4) & 1 r.hidden = (bits2 >> 5) & 1 r.height_mismatch = (bits2 >> 6) & 1 r.has_default_xf_index = (bits2 >> 7) & 1 r.xf_index = (bits2 >> 16) & 0xfff r.additional_space_above = (bits2 >> 28) & 1 r.additional_space_below = (bits2 >> 29) & 1 if not r.has_default_xf_index: r.xf_index = -1 self.rowinfo_map[rowx] = r if 0 and r.xf_index > -1: fprintf(self.logfile, "**ROW %d %d %d\n", self.number, rowx, r.xf_index) if blah_rows: print('ROW', rowx, bits1, bits2, file=self.logfile) r.dump(self.logfile, header="--- sh #%d, rowx=%d ---" % (self.number, rowx)) elif rc in XL_FORMULA_OPCODES: # 06, 0206, 0406 # DEBUG = 1 # if DEBUG: print "FORMULA: rc: 0x%04x data: %r" % (rc, data) if bv >= 50: rowx, colx, xf_index, result_str, flags = local_unpack('= 30: rowx, colx, xf_index, result_str, flags = local_unpack(' 255: break # Excel does 0 to 256 inclusive self.colinfo_map[colx] = c if 0: fprintf(self.logfile, "**COL %d %d %d\n", self.number, colx, c.xf_index) if blah: fprintf( self.logfile, "COLINFO sheet #%d cols %d-%d: wid=%d xf_index=%d flags=0x%04x\n", self.number, first_colx, last_colx, c.width, c.xf_index, flags, ) c.dump(self.logfile, header='===') elif rc == XL_DEFCOLWIDTH: self.defcolwidth, = local_unpack(">= 1 self.gcw = tuple(gcw) if 0: showgcw = "".join(map(lambda x: "F "[x], gcw)).rstrip().replace(' ', '.') print("GCW:", showgcw, file=self.logfile) elif rc == XL_BLANK: if not fmt_info: continue rowx, colx, xf_index = local_unpack('> self.logfile, "BLANK", rowx, colx, xf_index self_put_cell(rowx, colx, XL_CELL_BLANK, '', xf_index) elif rc == XL_MULBLANK: # 00BE if not fmt_info: continue nitems = data_len >> 1 result = local_unpack("<%dH" % nitems, data) rowx, mul_first = result[:2] mul_last = result[-1] # print >> self.logfile, "MULBLANK", rowx, mul_first, mul_last, data_len, nitems, mul_last + 4 - mul_first assert nitems == mul_last + 4 - mul_first pos = 2 for colx in xrange(mul_first, mul_last + 1): self_put_cell(rowx, colx, XL_CELL_BLANK, '', result[pos]) pos += 1 elif rc == XL_DIMENSION or rc == XL_DIMENSION2: if data_len == 0: # Four zero bytes after some other record. See github issue 64. continue # if data_len == 10: # Was crashing on BIFF 4.0 file w/o the two trailing unused bytes. # Reported by Ralph Heimburger. if bv < 80: dim_tuple = local_unpack(' found EOF", file=self.logfile) elif rc == XL_COUNTRY: bk.handle_country(data) elif rc == XL_LABELRANGES: pos = 0 pos = unpack_cell_range_address_list_update_pos( self.row_label_ranges, data, pos, bv, addr_size=8, ) pos = unpack_cell_range_address_list_update_pos( self.col_label_ranges, data, pos, bv, addr_size=8, ) assert pos == data_len elif rc == XL_ARRAY: row1x, rownx, col1x, colnx, array_flags, tokslen = \ local_unpack("= 80 num_CFs, needs_recalc, browx1, browx2, bcolx1, bcolx2 = \ unpack("<6H", data[0:12]) if self.verbosity >= 1: fprintf(self.logfile, "\n*** WARNING: Ignoring CONDFMT (conditional formatting) record\n" \ "*** in Sheet %d (%r).\n" \ "*** %d CF record(s); needs_recalc_or_redraw = %d\n" \ "*** Bounding box is %s\n", self.number, self.name, num_CFs, needs_recalc, rangename2d(browx1, browx2+1, bcolx1, bcolx2+1), ) olist = [] # updated by the function pos = unpack_cell_range_address_list_update_pos( olist, data, 12, bv, addr_size=8) # print >> self.logfile, repr(result), len(result) if self.verbosity >= 1: fprintf(self.logfile, "*** %d individual range(s):\n" \ "*** %s\n", len(olist), ", ".join([rangename2d(*coords) for coords in olist]), ) elif rc == XL_CF: if not fmt_info: continue cf_type, cmp_op, sz1, sz2, flags = unpack("> 26) & 1 bord_block = (flags >> 28) & 1 patt_block = (flags >> 29) & 1 if self.verbosity >= 1: fprintf(self.logfile, "\n*** WARNING: Ignoring CF (conditional formatting) sub-record.\n" \ "*** cf_type=%d, cmp_op=%d, sz1=%d, sz2=%d, flags=0x%08x\n" \ "*** optional data blocks: font=%d, border=%d, pattern=%d\n", cf_type, cmp_op, sz1, sz2, flags, font_block, bord_block, patt_block, ) # hex_char_dump(data, 0, data_len, fout=self.logfile) pos = 12 if font_block: (font_height, font_options, weight, escapement, underline, font_colour_index, two_bits, font_esc, font_underl) = \ unpack("<64x i i H H B 3x i 4x i i i 18x", data[pos:pos+118]) font_style = (two_bits > 1) & 1 posture = (font_options > 1) & 1 font_canc = (two_bits > 7) & 1 cancellation = (font_options > 7) & 1 if self.verbosity >= 1: fprintf(self.logfile, "*** Font info: height=%d, weight=%d, escapement=%d,\n" \ "*** underline=%d, colour_index=%d, esc=%d, underl=%d,\n" \ "*** style=%d, posture=%d, canc=%d, cancellation=%d\n", font_height, weight, escapement, underline, font_colour_index, font_esc, font_underl, font_style, posture, font_canc, cancellation, ) pos += 118 if bord_block: pos += 8 if patt_block: pos += 4 fmla1 = data[pos:pos+sz1] pos += sz1 if blah and sz1: fprintf(self.logfile, "*** formula 1:\n", ) dump_formula(bk, fmla1, sz1, bv, reldelta=0, blah=1) fmla2 = data[pos:pos+sz2] pos += sz2 assert pos == data_len if blah and sz2: fprintf(self.logfile, "*** formula 2:\n", ) dump_formula(bk, fmla2, sz2, bv, reldelta=0, blah=1) elif rc == XL_DEFAULTROWHEIGHT: if data_len == 4: bits, self.default_row_height = unpack("> 1) & 1 self.default_additional_space_above = (bits >> 2) & 1 self.default_additional_space_below = (bits >> 3) & 1 elif rc == XL_MERGEDCELLS: if not fmt_info: continue pos = unpack_cell_range_address_list_update_pos( self.merged_cells, data, 0, bv, addr_size=8) if blah: fprintf(self.logfile, "MERGEDCELLS: %d ranges\n", (pos - 2) // 8) assert pos == data_len, \ "MERGEDCELLS: pos=%d data_len=%d" % (pos, data_len) elif rc == XL_WINDOW2: if bv >= 80 and data_len >= 14: (options, self.first_visible_rowx, self.first_visible_colx, self.gridline_colour_index, self.cached_page_break_preview_mag_factor, self.cached_normal_view_mag_factor ) = unpack("= 30 # BIFF3-7 (options, self.first_visible_rowx, self.first_visible_colx, ) = unpack(">= 1 elif rc == XL_SCL: num, den = unpack("= 0: print(( "WARNING *** SCL rcd sheet %d: should have 0.1 <= num/den <= 4; got %d/%d" % (self.number, num, den) ), file=self.logfile) result = 100 self.scl_mag_factor = result elif rc == XL_PANE: ( self.vert_split_pos, self.horz_split_pos, self.horz_split_first_visible, self.vert_split_first_visible, self.split_active_pane, ) = unpack("= 80)) + 2 == data_len pos = 2 if bv < 80: while pos < data_len: self.horizontal_page_breaks.append((local_unpack("= 80)) + 2 == data_len pos = 2 if bv < 80: while pos < data_len: self.vertical_page_breaks.append((local_unpack("> 15) & 1 r.has_default_xf_index = bits2 & 1 r.xf_index = xf_index # r.outline_level = 0 # set in __init__ # r.outline_group_starts_ends = 0 # set in __init__ # r.hidden = 0 # set in __init__ # r.height_mismatch = 0 # set in __init__ # r.additional_space_above = 0 # set in __init__ # r.additional_space_below = 0 # set in __init__ self.rowinfo_map[rowx] = r if 0 and r.xf_index > -1: fprintf(self.logfile, "**ROW %d %d %d\n", self.number, rowx, r.xf_index) if blah_rows: print('ROW_B2', rowx, bits1, has_defaults, file=self.logfile) r.dump(self.logfile, header="--- sh #%d, rowx=%d ---" % (self.number, rowx)) elif rc == XL_COLWIDTH: # BIFF2 only if not fmt_info: continue first_colx, last_colx, width\ = local_unpack("= 30) + 1 nchars_expected = unpack("<" + "BH"[lenlen - 1], data[:lenlen])[0] offset = lenlen if bv < 80: enc = bk.encoding or bk.derive_encoding() nchars_found = 0 result = UNICODE_LITERAL("") while 1: if bv >= 80: flag = BYTES_ORD(data[offset]) & 1 enc = ("latin_1", "utf_16_le")[flag] offset += 1 chunk = unicode(data[offset:], enc) result += chunk nchars_found += len(chunk) if nchars_found == nchars_expected: return result if nchars_found > nchars_expected: msg = ("STRING/CONTINUE: expected %d chars, found %d" % (nchars_expected, nchars_found)) raise XLRDError(msg) rc, _unused_len, data = bk.get_record_parts() if rc != XL_CONTINUE: raise XLRDError( "Expected CONTINUE record; found record-type 0x%04X" % rc) offset = 0 def update_cooked_mag_factors(self): # Cached values are used ONLY for the non-active view mode. # When the user switches to the non-active view mode, # if the cached value for that mode is not valid, # Excel pops up a window which says: # "The number must be between 10 and 400. Try again by entering a number in this range." # When the user hits OK, it drops into the non-active view mode # but uses the magn from the active mode. # NOTE: definition of "valid" depends on mode ... see below blah = DEBUG or self.verbosity > 0 if self.show_in_page_break_preview: if self.scl_mag_factor is None: # no SCL record self.cooked_page_break_preview_mag_factor = 100 # Yes, 100, not 60, NOT a typo else: self.cooked_page_break_preview_mag_factor = self.scl_mag_factor zoom = self.cached_normal_view_mag_factor if not (10 <= zoom <=400): if blah: print(( "WARNING *** WINDOW2 rcd sheet %d: Bad cached_normal_view_mag_factor: %d" % (self.number, self.cached_normal_view_mag_factor) ), file=self.logfile) zoom = self.cooked_page_break_preview_mag_factor self.cooked_normal_view_mag_factor = zoom else: # normal view mode if self.scl_mag_factor is None: # no SCL record self.cooked_normal_view_mag_factor = 100 else: self.cooked_normal_view_mag_factor = self.scl_mag_factor zoom = self.cached_page_break_preview_mag_factor if not zoom: # VALID, defaults to 60 zoom = 60 elif not (10 <= zoom <= 400): if blah: print(( "WARNING *** WINDOW2 rcd sheet %r: Bad cached_page_break_preview_mag_factor: %r" % (self.number, self.cached_page_break_preview_mag_factor) ), file=self.logfile) zoom = self.cooked_normal_view_mag_factor self.cooked_page_break_preview_mag_factor = zoom def fixed_BIFF2_xfindex(self, cell_attr, rowx, colx, true_xfx=None): DEBUG = 0 blah = DEBUG or self.verbosity >= 2 if self.biff_version == 21: if self.book.xf_list: if true_xfx is not None: xfx = true_xfx else: xfx = BYTES_ORD(cell_attr[0]) & 0x3F if xfx == 0x3F: if self._ixfe is None: raise XLRDError("BIFF2 cell record has XF index 63 but no preceding IXFE record.") xfx = self._ixfe # OOo docs are capable of interpretation that each # cell record is preceded immediately by its own IXFE record. # Empirical evidence is that (sensibly) an IXFE record applies to all # following cell records until another IXFE comes along. return xfx # Have either Excel 2.0, or broken 2.1 w/o XF records -- same effect. self.biff_version = self.book.biff_version = 20 #### check that XF slot in cell_attr is zero xfx_slot = BYTES_ORD(cell_attr[0]) & 0x3F assert xfx_slot == 0 xfx = self._cell_attr_to_xfx.get(cell_attr) if xfx is not None: return xfx if blah: fprintf(self.logfile, "New cell_attr %r at (%r, %r)\n", cell_attr, rowx, colx) if not self.book.xf_list: for xfx in xrange(16): self.insert_new_BIFF20_xf(cell_attr=b"\x40\x00\x00", style=xfx < 15) xfx = self.insert_new_BIFF20_xf(cell_attr=cell_attr) return xfx def insert_new_BIFF20_xf(self, cell_attr, style=0): DEBUG = 0 blah = DEBUG or self.verbosity >= 2 book = self.book xfx = len(book.xf_list) xf = self.fake_XF_from_BIFF20_cell_attr(cell_attr, style) xf.xf_index = xfx book.xf_list.append(xf) if blah: xf.dump(self.logfile, header="=== Faked XF %d ===" % xfx, footer="======") if xf.format_key not in book.format_map: if xf.format_key: msg = "ERROR *** XF[%d] unknown format key (%d, 0x%04x)\n" fprintf(self.logfile, msg, xf.xf_index, xf.format_key, xf.format_key) fmt = Format(xf.format_key, FUN, UNICODE_LITERAL("General")) book.format_map[xf.format_key] = fmt book.format_list.append(fmt) cellty_from_fmtty = { FNU: XL_CELL_NUMBER, FUN: XL_CELL_NUMBER, FGE: XL_CELL_NUMBER, FDT: XL_CELL_DATE, FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text. } fmt = book.format_map[xf.format_key] cellty = cellty_from_fmtty[fmt.type] self._xf_index_to_xl_type_map[xf.xf_index] = cellty self._cell_attr_to_xfx[cell_attr] = xfx return xfx def fake_XF_from_BIFF20_cell_attr(self, cell_attr, style=0): from .formatting import XF, XFAlignment, XFBorder, XFBackground, XFProtection xf = XF() xf.alignment = XFAlignment() xf.alignment.indent_level = 0 xf.alignment.shrink_to_fit = 0 xf.alignment.text_direction = 0 xf.border = XFBorder() xf.border.diag_up = 0 xf.border.diag_down = 0 xf.border.diag_colour_index = 0 xf.border.diag_line_style = 0 # no line xf.background = XFBackground() xf.protection = XFProtection() (prot_bits, font_and_format, halign_etc) = unpack('> 6 upkbits(xf.protection, prot_bits, ( (6, 0x40, 'cell_locked'), (7, 0x80, 'formula_hidden'), )) xf.alignment.hor_align = halign_etc & 0x07 for mask, side in ((0x08, 'left'), (0x10, 'right'), (0x20, 'top'), (0x40, 'bottom')): if halign_etc & mask: colour_index, line_style = 8, 1 # black, thin else: colour_index, line_style = 0, 0 # none, none setattr(xf.border, side + '_colour_index', colour_index) setattr(xf.border, side + '_line_style', line_style) bg = xf.background if halign_etc & 0x80: bg.fill_pattern = 17 else: bg.fill_pattern = 0 bg.background_colour_index = 9 # white bg.pattern_colour_index = 8 # black xf.parent_style_index = (0x0FFF, 0)[style] xf.alignment.vert_align = 2 # bottom xf.alignment.rotation = 0 for attr_stem in \ "format font alignment border background protection".split(): attr = "_" + attr_stem + "_flag" setattr(xf, attr, 1) return xf def req_fmt_info(self): if not self.formatting_info: raise XLRDError("Feature requires open_workbook(..., formatting_info=True)") ## # Determine column display width. #
-- New in version 0.6.1 #
# @param colx Index of the queried column, range 0 to 255. # Note that it is possible to find out the width that will be used to display # columns with no cell information e.g. column IV (colx=255). # @return The column width that will be used for displaying # the given column by Excel, in units of 1/256th of the width of a # standard character (the digit zero in the first font). def computed_column_width(self, colx): self.req_fmt_info() if self.biff_version >= 80: colinfo = self.colinfo_map.get(colx, None) if colinfo is not None: return colinfo.width if self.standardwidth is not None: return self.standardwidth elif self.biff_version >= 40: if self.gcw[colx]: if self.standardwidth is not None: return self.standardwidth else: colinfo = self.colinfo_map.get(colx, None) if colinfo is not None: return colinfo.width elif self.biff_version == 30: colinfo = self.colinfo_map.get(colx, None) if colinfo is not None: return colinfo.width # All roads lead to Rome and the DEFCOLWIDTH ... if self.defcolwidth is not None: return self.defcolwidth * 256 return 8 * 256 # 8 is what Excel puts in a DEFCOLWIDTH record def handle_hlink(self, data): # DEBUG = 1 if DEBUG: print("\n=== hyperlink ===", file=self.logfile) record_size = len(data) h = Hyperlink() h.frowx, h.lrowx, h.fcolx, h.lcolx, guid0, dummy, options = unpack(' 0: fprintf( self.logfile, "*** WARNING: hyperlink at r=%d c=%d has %d extra data bytes: %s\n", h.frowx, h.fcolx, extra_nbytes, REPR(data[-extra_nbytes:]) ) # Seen: b"\x00\x00" also b"A\x00", b"V\x00" elif extra_nbytes < 0: raise XLRDError("Bug or corrupt file, send copy of input file for debugging") self.hyperlink_list.append(h) for rowx in xrange(h.frowx, h.lrowx+1): for colx in xrange(h.fcolx, h.lcolx+1): self.hyperlink_map[rowx, colx] = h def handle_quicktip(self, data): rcx, frowx, lrowx, fcolx, lcolx = unpack('<5H', data[:10]) assert rcx == XL_QUICKTIP assert self.hyperlink_list h = self.hyperlink_list[-1] assert (frowx, lrowx, fcolx, lcolx) == (h.frowx, h.lrowx, h.fcolx, h.lcolx) assert data[-2:] == b'\x00\x00' h.quicktip = unicode(data[10:-2], 'utf_16_le') def handle_msodrawingetc(self, recid, data_len, data): if not OBJ_MSO_DEBUG: return DEBUG = 1 if self.biff_version < 80: return o = MSODrawing() pos = 0 while pos < data_len: tmp, fbt, cb = unpack('> 4) & 0xFFF if ver == 0xF: ndb = 0 # container else: ndb = cb if DEBUG: hex_char_dump(data, pos, ndb + 8, base=0, fout=self.logfile) fprintf(self.logfile, "fbt:0x%04X inst:%d ver:0x%X cb:%d (0x%04X)\n", fbt, inst, ver, cb, cb) if fbt == 0xF010: # Client Anchor assert ndb == 18 (o.anchor_unk, o.anchor_colx_lo, o.anchor_rowx_lo, o.anchor_colx_hi, o.anchor_rowx_hi) = unpack(' 0: rc2, data2_len, data2 = self.book.get_record_parts() assert rc2 == XL_NOTE dummy_rowx, nb = unpack('> 1) & 1 o.row_hidden = (option_flags >> 7) & 1 o.col_hidden = (option_flags >> 8) & 1 # XL97 dev kit book says NULL [sic] bytes padding between string count and string data # to ensure that string is word-aligned. Appears to be nonsense. o.author, endpos = unpack_unicode_update_pos(data, 8, lenlen=2) # There is a random/undefined byte after the author string (not counted in the # string length). # Issue 4 on github: Google Spreadsheet doesn't write the undefined byte. assert (data_len - endpos) in (0, 1) if OBJ_MSO_DEBUG: o.dump(self.logfile, header="=== Note ===", footer= " ") txo = txos.get(o._object_id) if txo: o.text = txo.text o.rich_text_runlist = txo.rich_text_runlist self.cell_note_map[o.rowx, o.colx] = o def handle_txo(self, data): if self.biff_version < 80: return o = MSTxo() data_len = len(data) fmt = ' Represents a user "comment" or "note". # Note objects are accessible through Sheet.{@link #Sheet.cell_note_map}. #
-- New in version 0.7.2 #

class Note(BaseObject): ## # Author of note author = UNICODE_LITERAL('') ## # True if the containing column is hidden col_hidden = 0 ## # Column index colx = 0 ## # List of (offset_in_string, font_index) tuples. # Unlike Sheet.{@link #Sheet.rich_text_runlist_map}, the first offset should always be 0. rich_text_runlist = None ## # True if the containing row is hidden row_hidden = 0 ## # Row index rowx = 0 ## # True if note is always shown show = 0 ## # Text of the note text = UNICODE_LITERAL('') ## #

Contains the attributes of a hyperlink. # Hyperlink objects are accessible through Sheet.{@link #Sheet.hyperlink_list} # and Sheet.{@link #Sheet.hyperlink_map}. #
-- New in version 0.7.2 #

class Hyperlink(BaseObject): ## # Index of first row frowx = None ## # Index of last row lrowx = None ## # Index of first column fcolx = None ## # Index of last column lcolx = None ## # Type of hyperlink. Unicode string, one of 'url', 'unc', # 'local file', 'workbook', 'unknown' type = None ## # The URL or file-path, depending in the type. Unicode string, except # in the rare case of a local but non-existent file with non-ASCII # characters in the name, in which case only the "8.3" filename is available, # as a bytes (3.x) or str (2.x) string, with unknown encoding. url_or_path = None ## # Description ... this is displayed in the cell, # and should be identical to the cell value. Unicode string, or None. It seems # impossible NOT to have a description created by the Excel UI. desc = None ## # Target frame. Unicode string. Note: I have not seen a case of this. # It seems impossible to create one in the Excel UI. target = None ## # "Textmark": the piece after the "#" in # "http://docs.python.org/library#struct_module", or the Sheet1!A1:Z99 # part when type is "workbook". textmark = None ## # The text of the "quick tip" displayed when the cursor # hovers over the hyperlink. quicktip = None # === helpers === def unpack_RK(rk_str): flags = BYTES_ORD(rk_str[0]) if flags & 2: # There's a SIGNED 30-bit integer in there! i, = unpack('>= 2 # div by 4 to drop the 2 flag bits if flags & 1: return i / 100.0 return float(i) else: # It's the most significant 30 bits of an IEEE 754 64-bit FP number d, = unpack('Contains the data for one cell.

# #

WARNING: You don't call this class yourself. You access Cell objects # via methods of the {@link #Sheet} object(s) that you found in the {@link #Book} object that # was returned when you called xlrd.open_workbook("myfile.xls").

#

Cell objects have three attributes: ctype is an int, value # (which depends on ctype) and xf_index. # If "formatting_info" is not enabled when the workbook is opened, xf_index will be None. # The following table describes the types of cells and how their values # are represented in Python.

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
Type symbolType numberPython value
XL_CELL_EMPTY0empty string u''
XL_CELL_TEXT1a Unicode string
XL_CELL_NUMBER2float
XL_CELL_DATE3float
XL_CELL_BOOLEAN4int; 1 means TRUE, 0 means FALSE
XL_CELL_ERROR5int representing internal Excel codes; for a text representation, # refer to the supplied dictionary error_text_from_code
XL_CELL_BLANK6empty string u''. Note: this type will appear only when # open_workbook(..., formatting_info=True) is used.
#

class Cell(BaseObject): __slots__ = ['ctype', 'value', 'xf_index'] def __init__(self, ctype, value, xf_index=None): self.ctype = ctype self.value = value self.xf_index = xf_index def __repr__(self): if self.xf_index is None: return "%s:%r" % (ctype_text[self.ctype], self.value) else: return "%s:%r (XF:%r)" % (ctype_text[self.ctype], self.value, self.xf_index) empty_cell = Cell(XL_CELL_EMPTY, UNICODE_LITERAL('')) ##### =============== Colinfo and Rowinfo ============================== ##### ## # Width and default formatting information that applies to one or # more columns in a sheet. Derived from COLINFO records. # #

Here is the default hierarchy for width, according to the OOo docs: # #
"""In BIFF3, if a COLINFO record is missing for a column, # the width specified in the record DEFCOLWIDTH is used instead. # #
In BIFF4-BIFF7, the width set in this [COLINFO] record is only used, # if the corresponding bit for this column is cleared in the GCW # record, otherwise the column width set in the DEFCOLWIDTH record # is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]). # #
In BIFF8, if a COLINFO record is missing for a column, # the width specified in the record STANDARDWIDTH is used. # If this [STANDARDWIDTH] record is also missing, # the column width of the record DEFCOLWIDTH is used instead.""" #
# # Footnote: The docs on the GCW record say this: # """
# If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH # record. If a bit is cleared, the corresponding column uses the width set in the # COLINFO record for this column. #
If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if # the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH # record of the worksheet will be used instead. #
"""
# At the moment (2007-01-17) xlrd is going with the GCW version of the story. # Reference to the source may be useful: see the computed_column_width(colx) method # of the Sheet class. #
-- New in version 0.6.1 #

class Colinfo(BaseObject): ## # Width of the column in 1/256 of the width of the zero character, # using default font (first FONT record in the file). width = 0 ## # XF index to be used for formatting empty cells. xf_index = -1 ## # 1 = column is hidden hidden = 0 ## # Value of a 1-bit flag whose purpose is unknown # but is often seen set to 1 bit1_flag = 0 ## # Outline level of the column, in range(7). # (0 = no outline) outline_level = 0 ## # 1 = column is collapsed collapsed = 0 _USE_SLOTS = 1 ## #

Height and default formatting information that applies to a row in a sheet. # Derived from ROW records. #
-- New in version 0.6.1

# #

height: Height of the row, in twips. One twip == 1/20 of a point.

# #

has_default_height: 0 = Row has custom height; 1 = Row has default height.

# #

outline_level: Outline level of the row (0 to 7)

# #

outline_group_starts_ends: 1 = Outline group starts or ends here (depending on where the # outline buttons are located, see WSBOOL record [TODO ??]), # and is collapsed

# #

hidden: 1 = Row is hidden (manually, or by a filter or outline group)

# #

height_mismatch: 1 = Row height and default font height do not match

# #

has_default_xf_index: 1 = the xf_index attribute is usable; 0 = ignore it

# #

xf_index: Index to default XF record for empty cells in this row. # Don't use this if has_default_xf_index == 0.

# #

additional_space_above: This flag is set, if the upper border of at least one cell in this row # or if the lower border of at least one cell in the row above is # formatted with a thick line style. Thin and medium line styles are not # taken into account.

# #

additional_space_below: This flag is set, if the lower border of at least one cell in this row # or if the upper border of at least one cell in the row below is # formatted with a medium or thick line style. Thin line styles are not # taken into account.

class Rowinfo(BaseObject): if _USE_SLOTS: __slots__ = ( "height", "has_default_height", "outline_level", "outline_group_starts_ends", "hidden", "height_mismatch", "has_default_xf_index", "xf_index", "additional_space_above", "additional_space_below", ) def __init__(self): self.height = None self.has_default_height = None self.outline_level = None self.outline_group_starts_ends = None self.hidden = None self.height_mismatch = None self.has_default_xf_index = None self.xf_index = None self.additional_space_above = None self.additional_space_below = None def __getstate__(self): return ( self.height, self.has_default_height, self.outline_level, self.outline_group_starts_ends, self.hidden, self.height_mismatch, self.has_default_xf_index, self.xf_index, self.additional_space_above, self.additional_space_below, ) def __setstate__(self, state): ( self.height, self.has_default_height, self.outline_level, self.outline_group_starts_ends, self.hidden, self.height_mismatch, self.has_default_xf_index, self.xf_index, self.additional_space_above, self.additional_space_below, ) = state