""" printing tools """ import sys from pandas.core.dtypes.inference import is_sequence from pandas import compat from pandas.compat import u from pandas.core.config import get_option def adjoin(space, *lists, **kwargs): """ Glues together two sets of strings using the amount of space requested. The idea is to prettify. ---------- space : int number of spaces for padding lists : str list of str which being joined strlen : callable function used to calculate the length of each str. Needed for unicode handling. justfunc : callable function used to justify str. Needed for unicode handling. """ strlen = kwargs.pop('strlen', len) justfunc = kwargs.pop('justfunc', justify) out_lines = [] newLists = [] lengths = [max(map(strlen, x)) + space for x in lists[:-1]] # not the last one lengths.append(max(map(len, lists[-1]))) maxLen = max(map(len, lists)) for i, lst in enumerate(lists): nl = justfunc(lst, lengths[i], mode='left') nl.extend([' ' * lengths[i]] * (maxLen - len(lst))) newLists.append(nl) toJoin = zip(*newLists) for lines in toJoin: out_lines.append(_join_unicode(lines)) return _join_unicode(out_lines, sep='\n') def justify(texts, max_len, mode='right'): """ Perform ljust, center, rjust against string or list-like """ if mode == 'left': return [x.ljust(max_len) for x in texts] elif mode == 'center': return [x.center(max_len) for x in texts] else: return [x.rjust(max_len) for x in texts] def _join_unicode(lines, sep=''): try: return sep.join(lines) except UnicodeDecodeError: sep = compat.text_type(sep) return sep.join([x.decode('utf-8') if isinstance(x, str) else x for x in lines]) # Unicode consolidation # --------------------- # # pprinting utility functions for generating Unicode text or # bytes(3.x)/str(2.x) representations of objects. # Try to use these as much as possible rather then rolling your own. # # When to use # ----------- # # 1) If you're writing code internal to pandas (no I/O directly involved), # use pprint_thing(). # # It will always return unicode text which can handled by other # parts of the package without breakage. # # 2) if you need to write something out to file, use # pprint_thing_encoded(encoding). # # If no encoding is specified, it defaults to utf-8. Since encoding pure # ascii with utf-8 is a no-op you can safely use the default utf-8 if you're # working with straight ascii. def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. bounds length of printed sequence, depending on options """ if isinstance(seq, set): fmt = u("{{{body}}}") else: fmt = u("[{body}]") if hasattr(seq, '__setitem__') else u("({body})") if max_seq_items is False: nitems = len(seq) else: nitems = max_seq_items or get_option("max_seq_items") or len(seq) s = iter(seq) r = [] for i in range(min(nitems, len(seq))): # handle sets, no slicing r.append(pprint_thing( next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)) body = ", ".join(r) if nitems < len(seq): body += ", ..." elif isinstance(seq, tuple) and len(seq) == 1: body += ',' return fmt.format(body=body) def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. """ fmt = u("{{{things}}}") pairs = [] pfmt = u("{key}: {val}") if max_seq_items is False: nitems = len(seq) else: nitems = max_seq_items or get_option("max_seq_items") or len(seq) for k, v in list(seq.items())[:nitems]: pairs.append( pfmt.format( key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))) if nitems < len(seq): return fmt.format(things=", ".join(pairs) + ", ...") else: return fmt.format(things=", ".join(pairs)) def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, quote_strings=False, max_seq_items=None): """ This function is the sanctioned way of converting objects to a unicode representation. properly handles nested sequences containing unicode strings (unicode(object) does not) Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : False, int, default None Pass thru to other pretty printers to limit sequence printing Returns ------- result - unicode object on py2, str on py3. Always Unicode. """ def as_escaped_unicode(thing, escape_chars=escape_chars): # Unicode is fine, else we try to decode using utf-8 and 'replace' # if that's not it either, we have no way of knowing and the user # should deal with it himself. try: result = compat.text_type(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return compat.text_type(result) if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): return compat.text_type(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) elif (is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items) elif isinstance(thing, compat.string_types) and quote_strings: if compat.PY3: fmt = u("'{thing}'") else: fmt = u("u'{thing}'") result = fmt.format(thing=as_escaped_unicode(thing)) else: result = as_escaped_unicode(thing) return compat.text_type(result) # always unicode def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): value = pprint_thing(object) # get unicode representation of object return value.encode(encoding, errors, **kwds) def _enable_data_resource_formatter(enable): if 'IPython' not in sys.modules: # definitely not in IPython return from IPython import get_ipython ip = get_ipython() if ip is None: # still not in IPython return formatters = ip.display_formatter.formatters mimetype = "application/vnd.dataresource+json" if enable: if mimetype not in formatters: # define tableschema formatter from IPython.core.formatters import BaseFormatter class TableSchemaFormatter(BaseFormatter): print_method = '_repr_data_resource_' _return_type = (dict,) # register it: formatters[mimetype] = TableSchemaFormatter() # enable it if it's been disabled: formatters[mimetype].enabled = True else: # unregister tableschema mime-type if mimetype in formatters: formatters[mimetype].enabled = False