############################################################################### # # Workbook - A class for writing the Excel XLSX Workbook file. # # Copyright 2013-2020, John McNamara, jmcnamara@cpan.org # # Standard packages. import hashlib import re import os import operator import time from warnings import warn from datetime import datetime from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED, LargeZipFile from struct import unpack from .compatibility import int_types, num_types, str_types, force_unicode # Package imports. from . import xmlwriter from .worksheet import Worksheet from .chartsheet import Chartsheet from .sharedstrings import SharedStringTable from .format import Format from .packager import Packager from .utility import xl_cell_to_rowcol from .chart_area import ChartArea from .chart_bar import ChartBar from .chart_column import ChartColumn from .chart_doughnut import ChartDoughnut from .chart_line import ChartLine from .chart_pie import ChartPie from .chart_radar import ChartRadar from .chart_scatter import ChartScatter from .chart_stock import ChartStock from .exceptions import InvalidWorksheetName from .exceptions import DuplicateWorksheetName from .exceptions import UndefinedImageSize from .exceptions import UnsupportedImageFormat from .exceptions import FileCreateError from .exceptions import FileSizeError class Workbook(xmlwriter.XMLwriter): """ A class for writing the Excel XLSX Workbook file. """ ########################################################################### # # Public API. # ########################################################################### chartsheet_class = Chartsheet worksheet_class = Worksheet def __init__(self, filename=None, options=None): """ Constructor. """ if options is None: options = {} super(Workbook, self).__init__() self.filename = filename self.tmpdir = options.get('tmpdir', None) self.date_1904 = options.get('date_1904', False) self.strings_to_numbers = options.get('strings_to_numbers', False) self.strings_to_formulas = options.get('strings_to_formulas', True) self.strings_to_urls = options.get('strings_to_urls', True) self.nan_inf_to_errors = options.get('nan_inf_to_errors', False) self.default_date_format = options.get('default_date_format', None) self.constant_memory = options.get('constant_memory', False) self.in_memory = options.get('in_memory', False) self.excel2003_style = options.get('excel2003_style', False) self.remove_timezone = options.get('remove_timezone', False) self.default_format_properties = \ options.get('default_format_properties', {}) self.max_url_length = options.get('max_url_length', 2079) if self.max_url_length < 255: self.max_url_length = 2079 if options.get('use_zip64'): self.allow_zip64 = True else: self.allow_zip64 = False self.worksheet_meta = WorksheetMeta() self.selected = 0 self.fileclosed = 0 self.filehandle = None self.internal_fh = 0 self.sheet_name = 'Sheet' self.chart_name = 'Chart' self.sheetname_count = 0 self.chartname_count = 0 self.worksheets_objs = [] self.charts = [] self.drawings = [] self.sheetnames = {} self.formats = [] self.xf_formats = [] self.xf_format_indices = {} self.dxf_formats = [] self.dxf_format_indices = {} self.palette = [] self.font_count = 0 self.num_format_count = 0 self.defined_names = [] self.named_ranges = [] self.custom_colors = [] self.doc_properties = {} self.custom_properties = [] self.createtime = datetime.utcnow() self.num_vml_files = 0 self.num_comment_files = 0 self.x_window = 240 self.y_window = 15 self.window_width = 16095 self.window_height = 9660 self.tab_ratio = 600 self.str_table = SharedStringTable() self.vba_project = None self.vba_is_stream = False self.vba_codename = None self.image_types = {} self.images = [] self.border_count = 0 self.fill_count = 0 self.drawing_count = 0 self.calc_mode = "auto" self.calc_on_load = True self.calc_id = 124519 self.has_comments = False # We can't do 'constant_memory' mode while doing 'in_memory' mode. if self.in_memory: self.constant_memory = False # Add the default cell format. if self.excel2003_style: self.add_format({'xf_index': 0, 'font_family': 0}) else: self.add_format({'xf_index': 0}) # Add a default URL format. self.default_url_format = self.add_format({'hyperlink': True}) # Add the default date format. if self.default_date_format is not None: self.default_date_format = \ self.add_format({'num_format': self.default_date_format}) def __enter__(self): """Return self object to use with "with" statement.""" return self def __exit__(self, type, value, traceback): """Close workbook when exiting "with" statement.""" self.close() def add_worksheet(self, name=None, worksheet_class=None): """ Add a new worksheet to the Excel workbook. Args: name: The worksheet name. Defaults to 'Sheet1', etc. Returns: Reference to a worksheet object. """ if worksheet_class is None: worksheet_class = self.worksheet_class return self._add_sheet(name, worksheet_class=worksheet_class) def add_chartsheet(self, name=None, chartsheet_class=None): """ Add a new chartsheet to the Excel workbook. Args: name: The chartsheet name. Defaults to 'Sheet1', etc. Returns: Reference to a chartsheet object. """ if chartsheet_class is None: chartsheet_class = self.chartsheet_class return self._add_sheet(name, worksheet_class=chartsheet_class) def add_format(self, properties=None): """ Add a new Format to the Excel Workbook. Args: properties: The format properties. Returns: Reference to a Format object. """ format_properties = self.default_format_properties.copy() if self.excel2003_style: format_properties = {'font_name': 'Arial', 'font_size': 10, 'theme': 1 * -1} if properties: format_properties.update(properties) xf_format = Format(format_properties, self.xf_format_indices, self.dxf_format_indices) # Store the format reference. self.formats.append(xf_format) return xf_format def add_chart(self, options): """ Create a chart object. Args: options: The chart type and subtype options. Returns: Reference to a Chart object. """ # Type must be specified so we can create the required chart instance. chart_type = options.get('type') if chart_type is None: warn("Chart type must be defined in add_chart()") return if chart_type == 'area': chart = ChartArea(options) elif chart_type == 'bar': chart = ChartBar(options) elif chart_type == 'column': chart = ChartColumn(options) elif chart_type == 'doughnut': chart = ChartDoughnut(options) elif chart_type == 'line': chart = ChartLine(options) elif chart_type == 'pie': chart = ChartPie(options) elif chart_type == 'radar': chart = ChartRadar(options) elif chart_type == 'scatter': chart = ChartScatter(options) elif chart_type == 'stock': chart = ChartStock(options) else: warn("Unknown chart type '%s' in add_chart()" % chart_type) return # Set the embedded chart name if present. if 'name' in options: chart.chart_name = options['name'] chart.embedded = True chart.date_1904 = self.date_1904 chart.remove_timezone = self.remove_timezone self.charts.append(chart) return chart def add_vba_project(self, vba_project, is_stream=False): """ Add a vbaProject binary to the Excel workbook. Args: vba_project: The vbaProject binary file name. is_stream: vba_project is an in memory byte stream. Returns: Nothing. """ if not is_stream and not os.path.exists(vba_project): warn("VBA project binary file '%s' not found." % force_unicode(vba_project)) return -1 if self.vba_codename is None: self.vba_codename = 'ThisWorkbook' self.vba_project = vba_project self.vba_is_stream = is_stream def close(self): """ Call finalization code and close file. Args: None. Returns: Nothing. """ if not self.fileclosed: try: self._store_workbook() except IOError as e: raise FileCreateError(e) except LargeZipFile as e: raise FileSizeError("Filesize would require ZIP64 extensions. " "Use workbook.use_zip64().") self.fileclosed = True else: warn("Calling close() on already closed file.") def set_size(self, width, height): """ Set the size of a workbook window. Args: width: Width of the window in pixels. height: Height of the window in pixels. Returns: Nothing. """ # Convert the width/height to twips at 96 dpi. if width: self.window_width = int(width * 1440 / 96) else: self.window_width = 16095 if height: self.window_height = int(height * 1440 / 96) else: self.window_height = 9660 def set_tab_ratio(self, tab_ratio=None): """ Set the ratio between worksheet tabs and the horizontal slider. Args: tab_ratio: The tab ratio, 0 <= tab_ratio <= 100 Returns: Nothing. """ if tab_ratio is None: return if tab_ratio < 0 or tab_ratio > 100: warn("Tab ratio '%d' outside: 0 <= tab_ratio <= 100" % tab_ratio) else: self.tab_ratio = int(tab_ratio * 10) def set_properties(self, properties): """ Set the document properties such as Title, Author etc. Args: properties: Dictionary of document properties. Returns: Nothing. """ self.doc_properties = properties def set_custom_property(self, name, value, property_type=None): """ Set a custom document property. Args: name: The name of the custom property. value: The value of the custom property. property_type: The type of the custom property. Optional. Returns: Nothing. """ if name is None or value is None: warn("The name and value parameters must be non-None in " "set_custom_property()") return -1 if property_type is None: # Determine the property type from the Python type. if isinstance(value, bool): property_type = 'bool' elif isinstance(value, datetime): property_type = 'date' elif isinstance(value, int_types): property_type = 'number_int' elif isinstance(value, num_types): property_type = 'number' else: property_type = 'text' if property_type == 'date': value = value.strftime("%Y-%m-%dT%H:%M:%SZ") if property_type == 'text' and len(value) > 255: warn("Length of 'value' parameter exceeds Excel's limit of 255 " "characters in set_custom_property(): '%s'" % force_unicode(value)) if len(name) > 255: warn("Length of 'name' parameter exceeds Excel's limit of 255 " "characters in set_custom_property(): '%s'" % force_unicode(name)) self.custom_properties.append((name, value, property_type)) def set_calc_mode(self, mode, calc_id=None): """ Set the Excel calculation mode for the workbook. Args: mode: String containing one of: * manual * auto_except_tables * auto Returns: Nothing. """ self.calc_mode = mode if mode == 'manual': self.calc_on_load = False elif mode == 'auto_except_tables': self.calc_mode = 'autoNoTable' # Leave undocumented for now. Rarely required. if calc_id: self.calc_id = calc_id def define_name(self, name, formula): # Create a defined name in Excel. We handle global/workbook level # names and local/worksheet names. """ Create a defined name in the workbook. Args: name: The defined name. formula: The cell or range that the defined name refers to. Returns: Nothing. """ sheet_index = None sheetname = '' # Remove the = sign from the formula if it exists. if formula.startswith('='): formula = formula.lstrip('=') # Local defined names are formatted like "Sheet1!name". sheet_parts = re.compile(r'^(.*)!(.*)$') match = sheet_parts.match(name) if match: sheetname = match.group(1) name = match.group(2) sheet_index = self._get_sheet_index(sheetname) # Warn if the sheet index wasn't found. if sheet_index is None: warn("Unknown sheet name '%s' in defined_name()" % force_unicode(sheetname)) return -1 else: # Use -1 to indicate global names. sheet_index = -1 # Warn if the defined name contains invalid chars as defined by Excel. if (not re.match(r'^[\w\\][\w\\.]*$', name, re.UNICODE) or re.match(r'^\d', name)): warn("Invalid Excel characters in defined_name(): '%s'" % force_unicode(name)) return -1 # Warn if the defined name looks like a cell name. if re.match(r'^[a-zA-Z][a-zA-Z]?[a-dA-D]?[0-9]+$', name): warn("Name looks like a cell name in defined_name(): '%s'" % force_unicode(name)) return -1 # Warn if the name looks like a R1C1 cell reference. if (re.match(r'^[rcRC]$', name) or re.match(r'^[rcRC]\d+[rcRC]\d+$', name)): warn("Invalid name '%s' like a RC cell ref in defined_name()" % force_unicode(name)) return -1 self.defined_names.append([name, sheet_index, formula, False]) def worksheets(self): """ Return a list of the worksheet objects in the workbook. Args: None. Returns: A list of worksheet objects. """ return self.worksheets_objs def get_worksheet_by_name(self, name): """ Return a worksheet object in the workbook using the sheetname. Args: name: The name of the worksheet. Returns: A worksheet object or None. """ return self.sheetnames.get(name) def get_default_url_format(self): """ Get the default url format used when a user defined format isn't specified with write_url(). The format is the hyperlink style defined by Excel for the default theme. Args: None. Returns: A format object. """ return self.default_url_format def use_zip64(self): """ Allow ZIP64 extensions when writing xlsx file zip container. Args: None. Returns: Nothing. """ self.allow_zip64 = True def set_vba_name(self, name=None): """ Set the VBA name for the workbook. By default the workbook is referred to as ThisWorkbook in VBA. Args: name: The VBA name for the workbook. Returns: Nothing. """ if name is not None: self.vba_codename = name else: self.vba_codename = 'ThisWorkbook' ########################################################################### # # Private API. # ########################################################################### def _assemble_xml_file(self): # Assemble and write the XML file. # Prepare format object for passing to Style.pm. self._prepare_format_properties() # Write the XML declaration. self._xml_declaration() # Write the workbook element. self._write_workbook() # Write the fileVersion element. self._write_file_version() # Write the workbookPr element. self._write_workbook_pr() # Write the bookViews element. self._write_book_views() # Write the sheets element. self._write_sheets() # Write the workbook defined names. self._write_defined_names() # Write the calcPr element. self._write_calc_pr() # Close the workbook tag. self._xml_end_tag('workbook') # Close the file. self._xml_close() def _store_workbook(self): # Create the xlsx/zip file. try: xlsx_file = ZipFile(self.filename, "w", compression=ZIP_DEFLATED, allowZip64=self.allow_zip64) except IOError as e: raise e # Assemble worksheets into a workbook. packager = self._get_packager() # Add a default worksheet if non have been added. if not self.worksheets(): self.add_worksheet() # Ensure that at least one worksheet has been selected. if self.worksheet_meta.activesheet == 0: self.worksheets_objs[0].selected = 1 self.worksheets_objs[0].hidden = 0 # Set the active sheet. for sheet in self.worksheets(): if sheet.index == self.worksheet_meta.activesheet: sheet.active = 1 # Set the sheet vba_codename the workbook has a vbaProject binary. if self.vba_project: for sheet in self.worksheets(): if sheet.vba_codename is None: sheet.set_vba_name() # Convert the SST strings data structure. self._prepare_sst_string_data() # Prepare the worksheet VML elements such as comments and buttons. self._prepare_vml() # Set the defined names for the worksheets such as Print Titles. self._prepare_defined_names() # Prepare the drawings, charts and images. self._prepare_drawings() # Add cached data to charts. self._add_chart_data() # Prepare the worksheet tables. self._prepare_tables() # Package the workbook. packager._add_workbook(self) packager._set_tmpdir(self.tmpdir) packager._set_in_memory(self.in_memory) xml_files = packager._create_package() # Free up the Packager object. packager = None # Add XML sub-files to the Zip file with their Excel filename. for file_id, file_data in enumerate(xml_files): os_filename, xml_filename, is_binary = file_data if self.in_memory: # Set sub-file timestamp to Excel's timestamp of 1/1/1980. zipinfo = ZipInfo(xml_filename, (1980, 1, 1, 0, 0, 0)) # Copy compression type from parent ZipFile. zipinfo.compress_type = xlsx_file.compression if is_binary: xlsx_file.writestr(zipinfo, os_filename.getvalue()) else: xlsx_file.writestr(zipinfo, os_filename.getvalue().encode('utf-8')) else: # The sub-files are tempfiles on disk, i.e, not in memory. # Set sub-file timestamp to 31/1/1980 due to portability # issues setting it to Excel's timestamp of 1/1/1980. timestamp = time.mktime((1980, 1, 31, 0, 0, 0, 0, 0, -1)) os.utime(os_filename, (timestamp, timestamp)) try: xlsx_file.write(os_filename, xml_filename) os.remove(os_filename) except LargeZipFile as e: # Close open temp files on zipfile.LargeZipFile exception. for i in range(file_id, len(xml_files)-1): os.remove(xml_files[i][0]) raise e xlsx_file.close() def _add_sheet(self, name, worksheet_class=None): # Utility for shared code in add_worksheet() and add_chartsheet(). if worksheet_class: worksheet = worksheet_class() else: worksheet = self.worksheet_class() sheet_index = len(self.worksheets_objs) name = self._check_sheetname(name, isinstance(worksheet, Chartsheet)) # Initialization data to pass to the worksheet. init_data = { 'name': name, 'index': sheet_index, 'str_table': self.str_table, 'worksheet_meta': self.worksheet_meta, 'constant_memory': self.constant_memory, 'tmpdir': self.tmpdir, 'date_1904': self.date_1904, 'strings_to_numbers': self.strings_to_numbers, 'strings_to_formulas': self.strings_to_formulas, 'strings_to_urls': self.strings_to_urls, 'nan_inf_to_errors': self.nan_inf_to_errors, 'default_date_format': self.default_date_format, 'default_url_format': self.default_url_format, 'excel2003_style': self.excel2003_style, 'remove_timezone': self.remove_timezone, 'max_url_length': self.max_url_length, } worksheet._initialize(init_data) self.worksheets_objs.append(worksheet) self.sheetnames[name] = worksheet return worksheet def _check_sheetname(self, sheetname, is_chartsheet=False): # Check for valid worksheet names. We check the length, if it contains # any invalid chars and if the sheetname is unique in the workbook. invalid_char = re.compile(r'[\[\]:*?/\\]') # Increment the Sheet/Chart number used for default sheet names below. if is_chartsheet: self.chartname_count += 1 else: self.sheetname_count += 1 # Supply default Sheet/Chart sheetname if none has been defined. if sheetname is None or sheetname == '': if is_chartsheet: sheetname = self.chart_name + str(self.chartname_count) else: sheetname = self.sheet_name + str(self.sheetname_count) # Check that sheet sheetname is <= 31. Excel limit. if len(sheetname) > 31: raise InvalidWorksheetName( "Excel worksheet name '%s' must be <= 31 chars." % sheetname) # Check that sheetname doesn't contain any invalid characters. if invalid_char.search(sheetname): raise InvalidWorksheetName( "Invalid Excel character '[]:*?/\\' in sheetname '%s'." % sheetname) # Check that sheetname doesn't start or end with an apostrophe. if sheetname.startswith("'") or sheetname.endswith("'"): raise InvalidWorksheetName( "Sheet name cannot start or end with an apostrophe \"%s\"." % sheetname) # Check that the worksheet name doesn't already exist since this is a # fatal Excel error. The check must be case insensitive like Excel. for worksheet in self.worksheets(): if sheetname.lower() == worksheet.name.lower(): raise DuplicateWorksheetName( "Sheetname '%s', with case ignored, is already in use." % sheetname) return sheetname def _prepare_format_properties(self): # Prepare all Format properties prior to passing them to styles.py. # Separate format objects into XF and DXF formats. self._prepare_formats() # Set the font index for the format objects. self._prepare_fonts() # Set the number format index for the format objects. self._prepare_num_formats() # Set the border index for the format objects. self._prepare_borders() # Set the fill index for the format objects. self._prepare_fills() def _prepare_formats(self): # Iterate through the XF Format objects and separate them into # XF and DXF formats. The XF and DF formats then need to be sorted # back into index order rather than creation order. xf_formats = [] dxf_formats = [] # Sort into XF and DXF formats. for xf_format in self.formats: if xf_format.xf_index is not None: xf_formats.append(xf_format) if xf_format.dxf_index is not None: dxf_formats.append(xf_format) # Pre-extend the format lists. self.xf_formats = [None] * len(xf_formats) self.dxf_formats = [None] * len(dxf_formats) # Rearrange formats into index order. for xf_format in xf_formats: index = xf_format.xf_index self.xf_formats[index] = xf_format for dxf_format in dxf_formats: index = dxf_format.dxf_index self.dxf_formats[index] = dxf_format def _set_default_xf_indices(self): # Set the default index for each format. Only used for testing. formats = list(self.formats) # Delete the default url format. del formats[1] # Skip the default date format if set. if self.default_date_format is not None: del formats[1] # Set the remaining formats. for xf_format in formats: xf_format._get_xf_index() def _prepare_fonts(self): # Iterate through the XF Format objects and give them an index to # non-default font elements. fonts = {} index = 0 for xf_format in self.xf_formats: key = xf_format._get_font_key() if key in fonts: # Font has already been used. xf_format.font_index = fonts[key] xf_format.has_font = 0 else: # This is a new font. fonts[key] = index xf_format.font_index = index xf_format.has_font = 1 index += 1 self.font_count = index # For DXF formats we only need to check if the properties have changed. for xf_format in self.dxf_formats: # The only font properties that can change for a DXF format are: # color, bold, italic, underline and strikethrough. if (xf_format.font_color or xf_format.bold or xf_format.italic or xf_format.underline or xf_format.font_strikeout): xf_format.has_dxf_font = 1 def _prepare_num_formats(self): # User defined records in Excel start from index 0xA4. num_formats = {} index = 164 num_format_count = 0 for xf_format in (self.xf_formats + self.dxf_formats): num_format = xf_format.num_format # Check if num_format is an index to a built-in number format. if not isinstance(num_format, str_types): num_format = int(num_format) # Number format '0' is indexed as 1 in Excel. if num_format == 0: num_format = 1 xf_format.num_format_index = num_format continue elif num_format == '0': # Number format '0' is indexed as 1 in Excel. xf_format.num_format_index = 1 continue elif num_format == 'General': # The 'General' format has an number format index of 0. xf_format.num_format_index = 0 continue if num_format in num_formats: # Number xf_format has already been used. xf_format.num_format_index = num_formats[num_format] else: # Add a new number xf_format. num_formats[num_format] = index xf_format.num_format_index = index index += 1 # Only increase font count for XF formats (not DXF formats). if xf_format.xf_index: num_format_count += 1 self.num_format_count = num_format_count def _prepare_borders(self): # Iterate through the XF Format objects and give them an index to # non-default border elements. borders = {} index = 0 for xf_format in self.xf_formats: key = xf_format._get_border_key() if key in borders: # Border has already been used. xf_format.border_index = borders[key] xf_format.has_border = 0 else: # This is a new border. borders[key] = index xf_format.border_index = index xf_format.has_border = 1 index += 1 self.border_count = index # For DXF formats we only need to check if the properties have changed. has_border = re.compile(r'[^0:]') for xf_format in self.dxf_formats: key = xf_format._get_border_key() if has_border.search(key): xf_format.has_dxf_border = 1 def _prepare_fills(self): # Iterate through the XF Format objects and give them an index to # non-default fill elements. # The user defined fill properties start from 2 since there are 2 # default fills: patternType="none" and patternType="gray125". fills = {} index = 2 # Start from 2. See above. # Add the default fills. fills['0:0:0'] = 0 fills['17:0:0'] = 1 # Store the DXF colors separately since them may be reversed below. for xf_format in self.dxf_formats: if xf_format.pattern or xf_format.bg_color or xf_format.fg_color: xf_format.has_dxf_fill = 1 xf_format.dxf_bg_color = xf_format.bg_color xf_format.dxf_fg_color = xf_format.fg_color for xf_format in self.xf_formats: # The following logical statements jointly take care of special # cases in relation to cell colors and patterns: # 1. For a solid fill (_pattern == 1) Excel reverses the role of # foreground and background colors, and # 2. If the user specifies a foreground or background color # without a pattern they probably wanted a solid fill, so we fill # in the defaults. if (xf_format.pattern == 1 and xf_format.bg_color != 0 and xf_format.fg_color != 0): tmp = xf_format.fg_color xf_format.fg_color = xf_format.bg_color xf_format.bg_color = tmp if (xf_format.pattern <= 1 and xf_format.bg_color != 0 and xf_format.fg_color == 0): xf_format.fg_color = xf_format.bg_color xf_format.bg_color = 0 xf_format.pattern = 1 if (xf_format.pattern <= 1 and xf_format.bg_color == 0 and xf_format.fg_color != 0): xf_format.bg_color = 0 xf_format.pattern = 1 key = xf_format._get_fill_key() if key in fills: # Fill has already been used. xf_format.fill_index = fills[key] xf_format.has_fill = 0 else: # This is a new fill. fills[key] = index xf_format.fill_index = index xf_format.has_fill = 1 index += 1 self.fill_count = index def _prepare_defined_names(self): # Iterate through the worksheets and store any defined names in # addition to any user defined names. Stores the defined names # for the Workbook.xml and the named ranges for App.xml. defined_names = self.defined_names for sheet in self.worksheets(): # Check for Print Area settings. if sheet.autofilter_area: hidden = 1 sheet_range = sheet.autofilter_area # Store the defined names. defined_names.append(['_xlnm._FilterDatabase', sheet.index, sheet_range, hidden]) # Check for Print Area settings. if sheet.print_area_range: hidden = 0 sheet_range = sheet.print_area_range # Store the defined names. defined_names.append(['_xlnm.Print_Area', sheet.index, sheet_range, hidden]) # Check for repeat rows/cols referred to as Print Titles. if sheet.repeat_col_range or sheet.repeat_row_range: hidden = 0 sheet_range = '' if sheet.repeat_col_range and sheet.repeat_row_range: sheet_range = (sheet.repeat_col_range + ',' + sheet.repeat_row_range) else: sheet_range = (sheet.repeat_col_range + sheet.repeat_row_range) # Store the defined names. defined_names.append(['_xlnm.Print_Titles', sheet.index, sheet_range, hidden]) defined_names = self._sort_defined_names(defined_names) self.defined_names = defined_names self.named_ranges = self._extract_named_ranges(defined_names) def _sort_defined_names(self, names): # Sort the list of list of internal and user defined names in # the same order as used by Excel. # Add a normalize name string to each list for sorting. for name_list in names: (defined_name, _, sheet_name, _) = name_list # Normalize the defined name by removing any leading '_xmln.' # from internal names and lowercasing the string. defined_name = defined_name.replace('_xlnm.', '').lower() # Normalize the sheetname by removing the leading quote and # lowercasing the string. sheet_name = sheet_name.lstrip("'").lower() name_list.append(defined_name + "::" + sheet_name) # Sort based on the normalized key. names.sort(key=operator.itemgetter(4)) # Remove the extra key used for sorting. for name_list in names: name_list.pop() return names def _prepare_drawings(self): # Iterate through the worksheets and set up chart and image drawings. chart_ref_id = 0 image_ref_id = 0 ref_id = 0 drawing_id = 0 image_ids = {} header_image_ids = {} for sheet in self.worksheets(): chart_count = len(sheet.charts) image_count = len(sheet.images) shape_count = len(sheet.shapes) header_image_count = len(sheet.header_images) footer_image_count = len(sheet.footer_images) has_drawing = False if not (chart_count or image_count or shape_count or header_image_count or footer_image_count): continue # Don't increase the drawing_id header/footer images. if chart_count or image_count or shape_count: drawing_id += 1 has_drawing = True # Prepare the worksheet images. for index in range(image_count): filename = sheet.images[index][2] image_data = sheet.images[index][10] (image_type, width, height, name, x_dpi, y_dpi, digest) = \ self._get_image_properties(filename, image_data) if digest in image_ids: ref_id = image_ids[digest] else: image_ref_id += 1 ref_id = image_ref_id image_ids[digest] = image_ref_id self.images.append([filename, image_type, image_data]) sheet._prepare_image(index, ref_id, drawing_id, width, height, name, image_type, x_dpi, y_dpi, digest) # Prepare the worksheet charts. for index in range(chart_count): chart_ref_id += 1 sheet._prepare_chart(index, chart_ref_id, drawing_id) # Prepare the worksheet shapes. for index in range(shape_count): sheet._prepare_shape(index, drawing_id) # Prepare the header images. for index in range(header_image_count): filename = sheet.header_images[index][0] image_data = sheet.header_images[index][1] position = sheet.header_images[index][2] (image_type, width, height, name, x_dpi, y_dpi, digest) = \ self._get_image_properties(filename, image_data) if digest in header_image_ids: ref_id = header_image_ids[digest] else: image_ref_id += 1 ref_id = image_ref_id header_image_ids[digest] = image_ref_id self.images.append([filename, image_type, image_data]) sheet._prepare_header_image(ref_id, width, height, name, image_type, position, x_dpi, y_dpi, digest) # Prepare the footer images. for index in range(footer_image_count): filename = sheet.footer_images[index][0] image_data = sheet.footer_images[index][1] position = sheet.footer_images[index][2] (image_type, width, height, name, x_dpi, y_dpi, digest) = \ self._get_image_properties(filename, image_data) if digest in header_image_ids: ref_id = header_image_ids[digest] else: image_ref_id += 1 ref_id = image_ref_id header_image_ids[digest] = image_ref_id self.images.append([filename, image_type, image_data]) sheet._prepare_header_image(image_ref_id, width, height, name, image_type, position, x_dpi, y_dpi, digest) if has_drawing: drawing = sheet.drawing self.drawings.append(drawing) # Remove charts that were created but not inserted into worksheets. for chart in self.charts[:]: if chart.id == -1: self.charts.remove(chart) # Sort the workbook charts references into the order that the were # written to the worksheets above. self.charts = sorted(self.charts, key=lambda chart: chart.id) self.drawing_count = drawing_id def _get_image_properties(self, filename, image_data): # Extract dimension information from the image file. height = 0 width = 0 x_dpi = 96 y_dpi = 96 if not image_data: # Open the image file and read in the data. fh = open(filename, "rb") data = fh.read() else: # Read the image data from the user supplied byte stream. data = image_data.getvalue() digest = hashlib.sha256(data).hexdigest() # Get the image filename without the path. image_name = os.path.basename(filename) # Look for some common image file markers. marker1 = unpack('3s', data[1:4])[0] marker2 = unpack('>H', data[:2])[0] marker3 = unpack('2s', data[:2])[0] marker4 = unpack('I', data[offset + 0:offset + 4])[0] marker = unpack('>I', data[offset + 4:offset + 8])[0] # Read the image dimensions. if marker == marker_ihdr: width = unpack('>I', data[offset + 8:offset + 12])[0] height = unpack('>I', data[offset + 12:offset + 16])[0] # Read the image DPI. if marker == marker_phys: x_density = unpack('>I', data[offset + 8:offset + 12])[0] y_density = unpack('>I', data[offset + 12:offset + 16])[0] units = unpack('b', data[offset + 16:offset + 17])[0] if units == 1: x_dpi = x_density * 0.0254 y_dpi = y_density * 0.0254 if marker == marker_iend: end_marker = True continue offset = offset + length + 12 return 'png', width, height, x_dpi, y_dpi def _process_jpg(self, data): # Extract width and height information from a JPEG file. offset = 2 data_length = len(data) end_marker = False width = 0 height = 0 x_dpi = 96 y_dpi = 96 # Search through the image data to read the JPEG markers. while not end_marker and offset < data_length: marker = unpack('>H', data[offset + 0:offset + 2])[0] length = unpack('>H', data[offset + 2:offset + 4])[0] # Read the height and width in the 0xFFCn elements (except C4, C8 # and CC which aren't SOF markers). if ((marker & 0xFFF0) == 0xFFC0 and marker != 0xFFC4 and marker != 0xFFC8 and marker != 0xFFCC): height = unpack('>H', data[offset + 5:offset + 7])[0] width = unpack('>H', data[offset + 7:offset + 9])[0] # Read the DPI in the 0xFFE0 element. if marker == 0xFFE0: units = unpack('b', data[offset + 11:offset + 12])[0] x_density = unpack('>H', data[offset + 12:offset + 14])[0] y_density = unpack('>H', data[offset + 14:offset + 16])[0] if units == 1: x_dpi = x_density y_dpi = y_density if units == 2: x_dpi = x_density * 2.54 y_dpi = y_density * 2.54 # Workaround for incorrect dpi. if x_dpi == 1: x_dpi = 96 if y_dpi == 1: y_dpi = 96 if marker == 0xFFDA: end_marker = True continue offset = offset + length + 2 return 'jpeg', width, height, x_dpi, y_dpi def _process_bmp(self, data): # Extract width and height information from a BMP file. width = unpack(' 0: sheetname = c_range[:pos] cells = c_range[pos + 1:] else: return None, None # Split the cell range into 2 cells or else use single cell for both. if cells.find(':') > 0: (cell_1, cell_2) = cells.split(':', 1) else: (cell_1, cell_2) = (cells, cells) # Remove leading/trailing quotes and convert escaped quotes to single. sheetname = sheetname.strip("'") sheetname = sheetname.replace("''", "'") try: # Get the row, col values from the Excel ranges. We do this in a # try block for ranges that can't be parsed such as defined names. (row_start, col_start) = xl_cell_to_rowcol(cell_1) (row_end, col_end) = xl_cell_to_rowcol(cell_2) except AttributeError: return None, None # We only handle 1D ranges. if row_start != row_end and col_start != col_end: return None, None return sheetname, [row_start, col_start, row_end, col_end] def _prepare_sst_string_data(self): # Convert the SST string data from a dict to a list. self.str_table._sort_string_data() def _get_packager(self): # Get and instance of the Packager class to create the xlsx package. # This allows the default packager to be over-ridden. return Packager() ########################################################################### # # XML methods. # ########################################################################### def _write_workbook(self): # Write element. schema = 'http://schemas.openxmlformats.org' xmlns = schema + '/spreadsheetml/2006/main' xmlns_r = schema + '/officeDocument/2006/relationships' attributes = [ ('xmlns', xmlns), ('xmlns:r', xmlns_r), ] self._xml_start_tag('workbook', attributes) def _write_file_version(self): # Write the element. app_name = 'xl' last_edited = 4 lowest_edited = 4 rup_build = 4505 attributes = [ ('appName', app_name), ('lastEdited', last_edited), ('lowestEdited', lowest_edited), ('rupBuild', rup_build), ] if self.vba_project: attributes.append( ('codeName', '{37E998C4-C9E5-D4B9-71C8-EB1FF731991C}')) self._xml_empty_tag('fileVersion', attributes) def _write_workbook_pr(self): # Write element. default_theme_version = 124226 attributes = [] if self.vba_codename: attributes.append(('codeName', self.vba_codename)) if self.date_1904: attributes.append(('date1904', 1)) attributes.append(('defaultThemeVersion', default_theme_version)) self._xml_empty_tag('workbookPr', attributes) def _write_book_views(self): # Write element. self._xml_start_tag('bookViews') self._write_workbook_view() self._xml_end_tag('bookViews') def _write_workbook_view(self): # Write element. attributes = [ ('xWindow', self.x_window), ('yWindow', self.y_window), ('windowWidth', self.window_width), ('windowHeight', self.window_height), ] # Store the tabRatio attribute when it isn't the default. if self.tab_ratio != 600: attributes.append(('tabRatio', self.tab_ratio)) # Store the firstSheet attribute when it isn't the default. if self.worksheet_meta.firstsheet > 0: firstsheet = self.worksheet_meta.firstsheet + 1 attributes.append(('firstSheet', firstsheet)) # Store the activeTab attribute when it isn't the first sheet. if self.worksheet_meta.activesheet > 0: attributes.append(('activeTab', self.worksheet_meta.activesheet)) self._xml_empty_tag('workbookView', attributes) def _write_sheets(self): # Write element. self._xml_start_tag('sheets') id_num = 1 for worksheet in self.worksheets(): self._write_sheet(worksheet.name, id_num, worksheet.hidden) id_num += 1 self._xml_end_tag('sheets') def _write_sheet(self, name, sheet_id, hidden): # Write element. attributes = [ ('name', name), ('sheetId', sheet_id), ] if hidden: attributes.append(('state', 'hidden')) attributes.append(('r:id', 'rId' + str(sheet_id))) self._xml_empty_tag('sheet', attributes) def _write_calc_pr(self): # Write the element. attributes = [('calcId', self.calc_id)] if self.calc_mode == 'manual': attributes.append(('calcMode', self.calc_mode)) attributes.append(('calcOnSave', "0")) elif self.calc_mode == 'autoNoTable': attributes.append(('calcMode', self.calc_mode)) if self.calc_on_load: attributes.append(('fullCalcOnLoad', '1')) self._xml_empty_tag('calcPr', attributes) def _write_defined_names(self): # Write the element. if not self.defined_names: return self._xml_start_tag('definedNames') for defined_name in self.defined_names: self._write_defined_name(defined_name) self._xml_end_tag('definedNames') def _write_defined_name(self, defined_name): # Write the element. name = defined_name[0] sheet_id = defined_name[1] sheet_range = defined_name[2] hidden = defined_name[3] attributes = [('name', name)] if sheet_id != -1: attributes.append(('localSheetId', sheet_id)) if hidden: attributes.append(('hidden', 1)) self._xml_data_element('definedName', sheet_range, attributes) # A metadata class to share data between worksheets. class WorksheetMeta(object): """ A class to track worksheets data such as the active sheet and the first sheet. """ def __init__(self): self.activesheet = 0 self.firstsheet = 0