Last active
July 15, 2022 06:32
-
-
Save jgoizueta/e7a6f28faab4ae11781ef888248f534b to your computer and use it in GitHub Desktop.
Python approximate dict comparison
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| def floats_approx_equal(a, b, rel_tol=1e-5, abs_tol=0.0): | |
| # for Python >= 3.5: math.isclose(a, b, rel_tol, abs_tol): | |
| return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) | |
| floats_list = re.compile(r""" | |
| ^ | |
| (?:[-+]?\d*\.?\d+)(?:E([-+]?\d+))? # first float | |
| (?: # optional sequence of: | |
| \s*\,\s* # separator | |
| (?:[-+]?\d*\.?\d+)(?:E([-+]?\d+))? # additional float | |
| )* | |
| $ | |
| """, re.IGNORECASE | re.VERBOSE) | |
| def split_floats(n): | |
| return [float(x) for x in n.split(',')] | |
| def floats_list_approx_equal(alist, blist, rel_tol=1e-5, abs_tol=0.0): | |
| if len(alist) != len(blist): | |
| return False | |
| for a, b in zip(alist, blist): | |
| if not floats_approx_equal(a, b, rel_tol, abs_tol): | |
| return False | |
| return True | |
| def values_approx_equal(a, b, rel_tol=1e-5, abs_tol=0.0, sort_keys={}): | |
| return len(values_approx_diff(a, b, rel_tol, abs_tol, sort_keys=sort_keys, only_first=True)) == 0 | |
| def values_approx_diff_message(indent, a, b, rel_tol=1e-5, abs_tol=0.0, sort_keys={}): | |
| diffs = values_approx_diff(a, b, rel_tol, abs_tol, sort_keys=sort_keys) | |
| return '\n'.join([indent + msg for msg in diffs]) | |
| def lists_approx_diff(a, b, rel_tol=1e-5, abs_tol=0.0, sort_keys = {}, key_prefix='', only_first=False): | |
| spec = ' ' if key_prefix == '' else f' of "{key_prefix}" ' | |
| diffs = [] | |
| if (a == b): | |
| return diffs | |
| if len(a) != len(b): | |
| diffs.append(f'Lengths{spec}differ') | |
| else: | |
| for i, (a_value, b_value) in enumerate(zip(a, b)): | |
| item_desc = f'{key_prefix}[{i}]' | |
| diffs += values_approx_diff(a_value, b_value, rel_tol, abs_tol, sort_keys, item_desc, only_first) | |
| if only_first and len(diffs) > 0: | |
| return diffs | |
| return diffs | |
| def values_approx_diff(a_value, b_value, rel_tol=1e-5, abs_tol=0.0, sort_keys = {}, key_prefix='', only_first=False): | |
| spec = ' ' if key_prefix == '' else f' of "{key_prefix}" ' | |
| diffs = [] | |
| if (a_value == b_value): | |
| return diffs | |
| a_type = type(a_value) | |
| b_type = type(b_value) | |
| if a_type != b_type: | |
| diffs.append(f'Types{spec}differ') | |
| if a_type == list: | |
| sort_key = sort_keys.get(re.sub(r'\d+', '*', key_prefix), None) | |
| if sort_key: | |
| a_value = sorted(a_value, key=lambda v: v.get(sort_key)) | |
| b_value = sorted(b_value, key=lambda v: v.get(sort_key)) | |
| diffs += lists_approx_diff(a_value, b_value, rel_tol, abs_tol, sort_keys, key_prefix) | |
| elif a_type == tuple: | |
| diffs += lists_approx_diff(list(a_value), list(b_value), rel_tol, abs_tol, sort_keys, key_prefix) | |
| elif a_type == dict: | |
| diffs += dicts_approx_diff(a_value, b_value, rel_tol, abs_tol, sort_keys, key_prefix) | |
| elif a_type == float: | |
| if not floats_approx_equal(a_value, b_value, rel_tol, abs_tol): | |
| diffs.append(f'Values{spec}too different: {a_value} vs {b_value}') | |
| elif a_type == str: | |
| if floats_list.match(a_value) and floats_list.match(b_value): | |
| a_values = split_floats(a_value) | |
| b_values = split_floats(b_value) | |
| if not floats_list_approx_equal(a_values, b_values, rel_tol, abs_tol): | |
| diffs.append(f'Values{spec}too different: {a_value} vs {b_value}') | |
| elif a_value != b_value: | |
| diffs.append(f'Values{spec}differ: "{a_value}" vs "{b_value}"') | |
| elif a_value != b_value: | |
| diffs.append(f'Values{spec}differ: "{a_value}" vs "{b_value}"') | |
| return diffs | |
| def dicts_approx_diff(a, b, rel_tol=1e-5, abs_tol=0.0, sort_keys = {}, key_prefix='', only_first=False): | |
| if key_prefix != '': | |
| key_prefix = key_prefix + '.' | |
| diffs = [] | |
| if (a == b): | |
| return diffs | |
| a_keys = a.keys() | |
| b_keys = b.keys() | |
| if (a_keys != b_keys): | |
| anotb_keys = [key for key in a_keys if not key in b_keys] | |
| bnota_keys = [key for key in b_keys if not key in a_keys] | |
| diffs += [f'Key "{key_prefix + key}" only in first' for key in anotb_keys] | |
| diffs += [f'Key "{key_prefix + key}" only in second' for key in bnota_keys] | |
| if only_first: | |
| return diffs | |
| for key in a_keys: | |
| if not key in b_keys: | |
| continue | |
| a_value = a.get(key) | |
| b_value = b.get(key) | |
| diffs += values_approx_diff(a_value, b_value, rel_tol, abs_tol, sort_keys, key_prefix + key, only_first) | |
| if only_first and len(diffs) > 0: | |
| return diffs | |
| return diffs | |
| a = { | |
| 'aaa':'xxx', | |
| 'bbb':1.1, | |
| 'ccc':'1.1,2.2', | |
| 'ddd':{ | |
| 'eee':{ | |
| 'fff':3.3 | |
| } | |
| }, | |
| 'xxx': True, | |
| 'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'], | |
| 'zzz': (1.1,2.2) | |
| } | |
| b = { | |
| 'aaa':'xxx', | |
| 'bbb':1.10001, | |
| 'ccc':'1.10001,2.2', | |
| 'ddd':{ | |
| 'eee':{ | |
| 'fff':3.30001 | |
| } | |
| }, | |
| 'xxx': True, | |
| 'yyy': [1.10001,2.20001,{'a':1.1,'b':'abc'},'xyz'], | |
| 'zzz': (1.10001,2.20001) | |
| } | |
| c = { | |
| 'aaa':'xxx', | |
| 'bbb':1.1, | |
| 'ccc':'1.1,2.2', | |
| 'ddd':{ | |
| 'eee':{ | |
| 'fff':3.4 | |
| } | |
| }, | |
| 'xxx': True, | |
| 'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'], | |
| 'zzz': (1.1,2.2) | |
| } | |
| d = { | |
| 'aaa':'xx', | |
| 'bbb':1.1, | |
| 'ccc':'1.1,2.2', | |
| 'ddd':{ | |
| 'eee':{ | |
| 'fff':3.3 | |
| } | |
| }, | |
| 'xxx': True, | |
| 'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'], | |
| 'zzz': (1.1,2.2) | |
| } | |
| e = { | |
| 'aaa':'xxx', | |
| 'bbb':1.1, | |
| 'ccc':'1.1,2.2', | |
| 'ddd':{ | |
| 'eee':{ | |
| 'fff':3.3 | |
| }, | |
| 'ggg':None | |
| }, | |
| 'xxx': True, | |
| 'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'], | |
| 'zzz': (1.1,2.2) | |
| } | |
| f = { | |
| 'aaa':'xxx', | |
| 'bbb':1.1, | |
| 'ccc':'1.1,2.3', | |
| 'ddd':{ | |
| 'eee':{ | |
| 'fff':3.3 | |
| } | |
| }, | |
| 'xxx': True, | |
| 'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'], | |
| 'zzz': (1.1,2.2) | |
| } | |
| g = { | |
| 'aaa':'xxx', | |
| 'bbb':1.1, | |
| 'ccc':'1.1,2.2', | |
| 'ddd':{ | |
| 'eee':{ | |
| 'fff':3.3 | |
| } | |
| }, | |
| 'xxx': False, | |
| 'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'], | |
| 'zzz': (1.1,2.2) | |
| } | |
| g = { | |
| 'aaa':123, | |
| 'bbb':1.2, | |
| 'ccc':'1.3,2.2', | |
| 'ddd':{ | |
| 'eee':{ | |
| 'fff':3.5 | |
| } | |
| }, | |
| 'xxx': False, | |
| 'www': 111, | |
| 'yyy': [1.1,2.3,{'a':1.1,'b':'abcx','c':111},'xyzz'], | |
| 'zzz': (1.1,2.4) | |
| } | |
| unordered_1 = [ | |
| { 'id': 5, 'a': 5.5, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.2 }] }, | |
| { 'id': 1, 'a': 1.1, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.2 }] } | |
| ] | |
| unordered_2 = [ | |
| { 'id': 1, 'a': 1.10001, 'b': [{ 'x': 3.30001, 'y': 2.20001 }, { 'x':1.10001, 'y': 2.20001 }] }, | |
| { 'id': 5, 'a': 5.5, 'b': [{ 'x':1.10001, 'y': 2.20001 }, { 'x': 3.30001, 'y': 2.20001 }] } | |
| ] | |
| unordered_3 = [ | |
| { 'id': 5, 'a': 5.6, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.2 }] }, | |
| { 'id': 1, 'a': 1.1, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.2 }] } | |
| ] | |
| unordered_4 = [ | |
| { 'id': 5, 'a': 5.5, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.2 }] }, | |
| { 'id': 1, 'a': 1.1, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.3 }] } | |
| ] | |
| unorder_sub1 = { 'a': 'xyz', 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 4.4 }] } | |
| unorder_sub2 = { 'a': 'xyz', 'b': [{ 'x': 3.3, 'y': 4.4 }, { 'x':1.1, 'y': 2.2 }] } | |
| sort_keys = { | |
| '': 'id', | |
| '[*].b': 'x' | |
| } | |
| if not values_approx_equal(a, b): | |
| print('a!=b:\n' + values_approx_diff_message(' ', a, b)) | |
| if not values_approx_equal(a, c): | |
| print('a!=c:\n' + values_approx_diff_message(' ', a, c)) | |
| if not values_approx_equal(a, d): | |
| print('a!=d:\n' + values_approx_diff_message(' ', a, d)) | |
| if not values_approx_equal(a, e): | |
| print('a!=e:\n' + values_approx_diff_message(' ', a, e)) | |
| if not values_approx_equal(a, f): | |
| print('a!=f:\n' + values_approx_diff_message(' ', a, f)) | |
| if not values_approx_equal(a, g): | |
| print('a!=g:\n' + values_approx_diff_message(' ', a, g)) | |
| print('-------------------------------------------') | |
| if not values_approx_equal(unordered_1, unordered_2, sort_keys=sort_keys): | |
| print('u1!=u2:\n' + values_approx_diff_message(' ', unordered_1, unordered_2, sort_keys=sort_keys)) | |
| if not values_approx_equal(unordered_1, unordered_3, sort_keys=sort_keys): | |
| print('u1!=u3:\n' + values_approx_diff_message(' ', unordered_1, unordered_3, sort_keys=sort_keys)) | |
| if not values_approx_equal(unordered_1, unordered_4, sort_keys=sort_keys): | |
| print('u1!=u4:\n' + values_approx_diff_message(' ', unordered_1, unordered_4, sort_keys=sort_keys)) | |
| print('-------------------------------------------') | |
| sub_keys = { 'b': 'x' } | |
| if not values_approx_equal(unorder_sub1, unorder_sub2, sort_keys=sub_keys): | |
| print('us1!=us2:\n' + values_approx_diff_message(' ', unorder_sub1, unorder_sub2, sort_keys=sub_keys)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| a!=c: | |
| Values of "ddd.eee.fff" too different: 3.3 vs 3.4 | |
| a!=d: | |
| Values of "aaa" differ: "xxx" vs "xx" | |
| a!=e: | |
| Key "ddd.ggg" only in second | |
| a!=f: | |
| Values of "ccc" too different: 1.1,2.2 vs 1.1,2.3 | |
| a!=g: | |
| Key "www" only in second | |
| Types of "aaa" differ | |
| Values of "aaa" differ: "xxx" vs "123" | |
| Values of "bbb" too different: 1.1 vs 1.2 | |
| Values of "ccc" too different: 1.1,2.2 vs 1.3,2.2 | |
| Values of "ddd.eee.fff" too different: 3.3 vs 3.5 | |
| Values of "xxx" differ: "True" vs "False" | |
| Values of "yyy[1]" too different: 2.2 vs 2.3 | |
| Key "yyy[2].c" only in second | |
| Values of "yyy[2].b" differ: "abc" vs "abcx" | |
| Values of "yyy[3]" differ: "xyz" vs "xyzz" | |
| Values of "zzz[1]" too different: 2.2 vs 2.4 | |
| ------------------------------------------- | |
| u1!=u3: | |
| Values of "[1].a" too different: 5.5 vs 5.6 | |
| u1!=u4: | |
| Values of "[0].b[1].y" too different: 2.2 vs 2.3 | |
| ------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment