# This file is part of Dictdiffer. # # Copyright (C) 2015 CERN. # Copyright (C) 2017, 2019 ETH Zurich, Swiss Data Science Center, Jiri Kuncar. # # Dictdiffer is free software; you can redistribute it and/or modify # it under the terms of the MIT License; see LICENSE file for more # details. """Utils gathers helper functions, classes for the dictdiffer module.""" import math import sys from itertools import zip_longest num_types = int, float EPSILON = sys.float_info.epsilon class WildcardDict(dict): """Provide possibility to use special wildcard keys to access values. Those wildcards are: *: wildcard for everything that follows +: wildcard for anything on the same path level The intended use case of this are dictionaries, that utilize tuples as keys. >>> from dictdiffer.utils import WildcardDict >>> w = WildcardDict({('foo', '*'): '* card', ... ('banana', '+'): '+ card'}) >>> w[ ('foo', 'bar', 'baz') ] '* card' >>> w[ ('banana', 'apple') ] '+ card' """ def __init__(self, values=None): """Set lookup key indices. :param values: a dictionary """ super(WildcardDict, self).__init__() self.star_keys = set() self.plus_keys = set() if values is not None: for key, value in values.items(): self.__setitem__(key, value) def __getitem__(self, key): """Return the value corresponding to the key, regarding wildcards. If the key doesn't exit it tries the '+' wildcard and then the '*' wildcard. >>> w = WildcardDict({('foo', '*'): '* card', ... ('banana', '+'): '+ card'}) >>> w[ ('foo', 'bar') ] '* card' >>> w[ ('foo', 'bar', 'baz') ] '* card' >>> w[ ('banana', 'apple') ] '+ card' >>> w[ ('banana', 'apple', 'mango') ] Traceback (most recent call last): ... KeyError """ try: return super(WildcardDict, self).__getitem__(key) except KeyError: if key[:-1] in self.plus_keys: return super(WildcardDict, self).__getitem__(key[:-1]+('+',)) for _key in [key[:-i] for i in range(1, len(key)+1)]: if _key in self.star_keys: return super(WildcardDict, self).__getitem__(_key+('*',)) raise KeyError def __setitem__(self, key, value): """Set the item for a given key (path).""" super(WildcardDict, self).__setitem__(key, value) if key[-1] == '+': self.plus_keys.add(key[:-1]) if key[-1] == '*': self.star_keys.add(key[:-1]) def query_path(self, key): """Return the key (path) that matches the queried key. >>> w = WildcardDict({('foo', '*'): 'banana'}) >>> w.query_path(('foo', 'bar', 'baz')) ('foo', '*') """ if key in self: return key if key[:-1] in self.plus_keys: return key[:-1]+('+',) for _key in [key[:-i] for i in range(1, len(key)+1)]: if _key in self.star_keys: return _key+('*',) raise KeyError class PathLimit(object): """Class to limit recursion depth during the dictdiffer.diff execution.""" def __init__(self, path_limits=[], final_key=None): """Initialize a dictionary structure to determine a path limit. :param path_limits: list of keys (tuples) determining the path limits :param final_key: the key used in the dictionary to determin if the path is final >>> pl = PathLimit( [('foo', 'bar')] , final_key='!@#$%FINAL') >>> pl.dict {'foo': {'bar': {'!@#$%FINAL': True}}} """ self.final_key = final_key if final_key else '!@#$FINAL' self.dict = {} for key_path in path_limits: containing = self.dict for key in key_path: try: containing = containing[key] except KeyError: containing[key] = {} containing = containing[key] containing[self.final_key] = True def path_is_limit(self, key_path): """Query the PathLimit object if the given key_path is a limit. >>> pl = PathLimit( [('foo', 'bar')] , final_key='!@#$%FINAL') >>> pl.path_is_limit( ('foo', 'bar') ) True """ containing = self.dict for key in key_path: try: containing = containing[key] except KeyError: try: containing = containing['*'] except KeyError: return False return containing.get(self.final_key, False) def create_dotted_node(node): """Create the *dotted node* notation for the dictdiffer.diff patches. >>> create_dotted_node( ['foo', 'bar', 'baz'] ) 'foo.bar.baz' """ if all(map(lambda x: isinstance(x, str), node)): return '.'.join(node) else: return list(node) def get_path(patch): """Return the path for a given dictdiffer.diff patch.""" if patch[1] != '': keys = (patch[1].split('.') if isinstance(patch[1], str) else patch[1]) else: keys = [] keys = keys + [patch[2][0][0]] if patch[0] != 'change' else keys return tuple(keys) def is_super_path(path1, path2): """Check if one path is the super path of the other. Super path means, that the n values in tuple are equal to the first n of m vales in tuple b. >>> is_super_path( ('foo', 'bar'), ('foo', 'bar') ) True >>> is_super_path( ('foo', 'bar'), ('foo', 'bar', 'baz') ) True >>> is_super_path( ('foo', 'bar'), ('foo', 'apple', 'banana') ) False """ return all(map(lambda x: x[0] == x[1] or x[0] is None, zip_longest(path1, path2))) def nested_hash(obj): """Create a hash of nested, mutable data structures. It shall be noted, that the uniqeness of those hashes in general cases is not assured but it should be enough for the cases occurring during the merging process. """ try: return hash(obj) except TypeError: if isinstance(obj, (list, tuple)): return hash(tuple(map(nested_hash, obj))) elif isinstance(obj, set): return hash(tuple(map(nested_hash, sorted(obj)))) elif isinstance(obj, dict): return hash(tuple(map(nested_hash, sorted(obj.items())))) def dot_lookup(source, lookup, parent=False): """Allow you to reach dictionary items with string or list lookup. Recursively find value by lookup key split by '.'. >>> from dictdiffer.utils import dot_lookup >>> dot_lookup({'a': {'b': 'hello'}}, 'a.b') 'hello' If parent argument is True, returns the parent node of matched object. >>> dot_lookup({'a': {'b': 'hello'}}, 'a.b', parent=True) {'b': 'hello'} If node is empty value, returns the whole dictionary object. >>> dot_lookup({'a': {'b': 'hello'}}, '') {'a': {'b': 'hello'}} """ if lookup is None or lookup == '' or lookup == []: return source value = source if isinstance(lookup, str): keys = lookup.split('.') elif isinstance(lookup, list): keys = lookup else: raise TypeError('lookup must be string or list') if parent: keys = keys[:-1] for key in keys: if isinstance(value, list): key = int(key) value = value[key] return value def are_different(first, second, tolerance, absolute_tolerance=None): """Check if 2 values are different. In case of numerical values, the tolerance is used to check if the values are different. In all other cases, the difference is straight forward. """ if first == second: # values are same - simple case return False first_is_nan, second_is_nan = bool(first != first), bool(second != second) if first_is_nan or second_is_nan: # two 'NaN' values are not different (see issue #114) return not (first_is_nan and second_is_nan) elif isinstance(first, num_types) and isinstance(second, num_types): # two numerical values are compared with tolerance return not math.isclose( first, second, rel_tol=tolerance or 0, abs_tol=absolute_tolerance or 0, ) # we got different values return True