Skip to content

Instantly share code, notes, and snippets.

@jgoizueta
Last active July 15, 2022 06:32
Show Gist options
  • Select an option

  • Save jgoizueta/e7a6f28faab4ae11781ef888248f534b to your computer and use it in GitHub Desktop.

Select an option

Save jgoizueta/e7a6f28faab4ae11781ef888248f534b to your computer and use it in GitHub Desktop.
Python approximate dict comparison
import re
def floats_approx_equal(a, b, rel_tol=1e-5, abs_tol=0.0):
# for Python >= 3.5: math.isclose(a, b, rel_tol, abs_tol):
return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
floats_list = re.compile(r"""
^
(?:[-+]?\d*\.?\d+)(?:E([-+]?\d+))? # first float
(?: # optional sequence of:
\s*\,\s* # separator
(?:[-+]?\d*\.?\d+)(?:E([-+]?\d+))? # additional float
)*
$
""", re.IGNORECASE | re.VERBOSE)
def split_floats(n):
return [float(x) for x in n.split(',')]
def floats_list_approx_equal(alist, blist, rel_tol=1e-5, abs_tol=0.0):
if len(alist) != len(blist):
return False
for a, b in zip(alist, blist):
if not floats_approx_equal(a, b, rel_tol, abs_tol):
return False
return True
def values_approx_equal(a, b, rel_tol=1e-5, abs_tol=0.0, sort_keys={}):
return len(values_approx_diff(a, b, rel_tol, abs_tol, sort_keys=sort_keys, only_first=True)) == 0
def values_approx_diff_message(indent, a, b, rel_tol=1e-5, abs_tol=0.0, sort_keys={}):
diffs = values_approx_diff(a, b, rel_tol, abs_tol, sort_keys=sort_keys)
return '\n'.join([indent + msg for msg in diffs])
def lists_approx_diff(a, b, rel_tol=1e-5, abs_tol=0.0, sort_keys = {}, key_prefix='', only_first=False):
spec = ' ' if key_prefix == '' else f' of "{key_prefix}" '
diffs = []
if (a == b):
return diffs
if len(a) != len(b):
diffs.append(f'Lengths{spec}differ')
else:
for i, (a_value, b_value) in enumerate(zip(a, b)):
item_desc = f'{key_prefix}[{i}]'
diffs += values_approx_diff(a_value, b_value, rel_tol, abs_tol, sort_keys, item_desc, only_first)
if only_first and len(diffs) > 0:
return diffs
return diffs
def values_approx_diff(a_value, b_value, rel_tol=1e-5, abs_tol=0.0, sort_keys = {}, key_prefix='', only_first=False):
spec = ' ' if key_prefix == '' else f' of "{key_prefix}" '
diffs = []
if (a_value == b_value):
return diffs
a_type = type(a_value)
b_type = type(b_value)
if a_type != b_type:
diffs.append(f'Types{spec}differ')
if a_type == list:
sort_key = sort_keys.get(re.sub(r'\d+', '*', key_prefix), None)
if sort_key:
a_value = sorted(a_value, key=lambda v: v.get(sort_key))
b_value = sorted(b_value, key=lambda v: v.get(sort_key))
diffs += lists_approx_diff(a_value, b_value, rel_tol, abs_tol, sort_keys, key_prefix)
elif a_type == tuple:
diffs += lists_approx_diff(list(a_value), list(b_value), rel_tol, abs_tol, sort_keys, key_prefix)
elif a_type == dict:
diffs += dicts_approx_diff(a_value, b_value, rel_tol, abs_tol, sort_keys, key_prefix)
elif a_type == float:
if not floats_approx_equal(a_value, b_value, rel_tol, abs_tol):
diffs.append(f'Values{spec}too different: {a_value} vs {b_value}')
elif a_type == str:
if floats_list.match(a_value) and floats_list.match(b_value):
a_values = split_floats(a_value)
b_values = split_floats(b_value)
if not floats_list_approx_equal(a_values, b_values, rel_tol, abs_tol):
diffs.append(f'Values{spec}too different: {a_value} vs {b_value}')
elif a_value != b_value:
diffs.append(f'Values{spec}differ: "{a_value}" vs "{b_value}"')
elif a_value != b_value:
diffs.append(f'Values{spec}differ: "{a_value}" vs "{b_value}"')
return diffs
def dicts_approx_diff(a, b, rel_tol=1e-5, abs_tol=0.0, sort_keys = {}, key_prefix='', only_first=False):
if key_prefix != '':
key_prefix = key_prefix + '.'
diffs = []
if (a == b):
return diffs
a_keys = a.keys()
b_keys = b.keys()
if (a_keys != b_keys):
anotb_keys = [key for key in a_keys if not key in b_keys]
bnota_keys = [key for key in b_keys if not key in a_keys]
diffs += [f'Key "{key_prefix + key}" only in first' for key in anotb_keys]
diffs += [f'Key "{key_prefix + key}" only in second' for key in bnota_keys]
if only_first:
return diffs
for key in a_keys:
if not key in b_keys:
continue
a_value = a.get(key)
b_value = b.get(key)
diffs += values_approx_diff(a_value, b_value, rel_tol, abs_tol, sort_keys, key_prefix + key, only_first)
if only_first and len(diffs) > 0:
return diffs
return diffs
a = {
'aaa':'xxx',
'bbb':1.1,
'ccc':'1.1,2.2',
'ddd':{
'eee':{
'fff':3.3
}
},
'xxx': True,
'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'],
'zzz': (1.1,2.2)
}
b = {
'aaa':'xxx',
'bbb':1.10001,
'ccc':'1.10001,2.2',
'ddd':{
'eee':{
'fff':3.30001
}
},
'xxx': True,
'yyy': [1.10001,2.20001,{'a':1.1,'b':'abc'},'xyz'],
'zzz': (1.10001,2.20001)
}
c = {
'aaa':'xxx',
'bbb':1.1,
'ccc':'1.1,2.2',
'ddd':{
'eee':{
'fff':3.4
}
},
'xxx': True,
'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'],
'zzz': (1.1,2.2)
}
d = {
'aaa':'xx',
'bbb':1.1,
'ccc':'1.1,2.2',
'ddd':{
'eee':{
'fff':3.3
}
},
'xxx': True,
'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'],
'zzz': (1.1,2.2)
}
e = {
'aaa':'xxx',
'bbb':1.1,
'ccc':'1.1,2.2',
'ddd':{
'eee':{
'fff':3.3
},
'ggg':None
},
'xxx': True,
'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'],
'zzz': (1.1,2.2)
}
f = {
'aaa':'xxx',
'bbb':1.1,
'ccc':'1.1,2.3',
'ddd':{
'eee':{
'fff':3.3
}
},
'xxx': True,
'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'],
'zzz': (1.1,2.2)
}
g = {
'aaa':'xxx',
'bbb':1.1,
'ccc':'1.1,2.2',
'ddd':{
'eee':{
'fff':3.3
}
},
'xxx': False,
'yyy': [1.1,2.2,{'a':1.1,'b':'abc'},'xyz'],
'zzz': (1.1,2.2)
}
g = {
'aaa':123,
'bbb':1.2,
'ccc':'1.3,2.2',
'ddd':{
'eee':{
'fff':3.5
}
},
'xxx': False,
'www': 111,
'yyy': [1.1,2.3,{'a':1.1,'b':'abcx','c':111},'xyzz'],
'zzz': (1.1,2.4)
}
unordered_1 = [
{ 'id': 5, 'a': 5.5, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.2 }] },
{ 'id': 1, 'a': 1.1, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.2 }] }
]
unordered_2 = [
{ 'id': 1, 'a': 1.10001, 'b': [{ 'x': 3.30001, 'y': 2.20001 }, { 'x':1.10001, 'y': 2.20001 }] },
{ 'id': 5, 'a': 5.5, 'b': [{ 'x':1.10001, 'y': 2.20001 }, { 'x': 3.30001, 'y': 2.20001 }] }
]
unordered_3 = [
{ 'id': 5, 'a': 5.6, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.2 }] },
{ 'id': 1, 'a': 1.1, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.2 }] }
]
unordered_4 = [
{ 'id': 5, 'a': 5.5, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.2 }] },
{ 'id': 1, 'a': 1.1, 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 2.3 }] }
]
unorder_sub1 = { 'a': 'xyz', 'b': [{ 'x':1.1, 'y': 2.2 }, { 'x': 3.3, 'y': 4.4 }] }
unorder_sub2 = { 'a': 'xyz', 'b': [{ 'x': 3.3, 'y': 4.4 }, { 'x':1.1, 'y': 2.2 }] }
sort_keys = {
'': 'id',
'[*].b': 'x'
}
if not values_approx_equal(a, b):
print('a!=b:\n' + values_approx_diff_message(' ', a, b))
if not values_approx_equal(a, c):
print('a!=c:\n' + values_approx_diff_message(' ', a, c))
if not values_approx_equal(a, d):
print('a!=d:\n' + values_approx_diff_message(' ', a, d))
if not values_approx_equal(a, e):
print('a!=e:\n' + values_approx_diff_message(' ', a, e))
if not values_approx_equal(a, f):
print('a!=f:\n' + values_approx_diff_message(' ', a, f))
if not values_approx_equal(a, g):
print('a!=g:\n' + values_approx_diff_message(' ', a, g))
print('-------------------------------------------')
if not values_approx_equal(unordered_1, unordered_2, sort_keys=sort_keys):
print('u1!=u2:\n' + values_approx_diff_message(' ', unordered_1, unordered_2, sort_keys=sort_keys))
if not values_approx_equal(unordered_1, unordered_3, sort_keys=sort_keys):
print('u1!=u3:\n' + values_approx_diff_message(' ', unordered_1, unordered_3, sort_keys=sort_keys))
if not values_approx_equal(unordered_1, unordered_4, sort_keys=sort_keys):
print('u1!=u4:\n' + values_approx_diff_message(' ', unordered_1, unordered_4, sort_keys=sort_keys))
print('-------------------------------------------')
sub_keys = { 'b': 'x' }
if not values_approx_equal(unorder_sub1, unorder_sub2, sort_keys=sub_keys):
print('us1!=us2:\n' + values_approx_diff_message(' ', unorder_sub1, unorder_sub2, sort_keys=sub_keys))
a!=c:
Values of "ddd.eee.fff" too different: 3.3 vs 3.4
a!=d:
Values of "aaa" differ: "xxx" vs "xx"
a!=e:
Key "ddd.ggg" only in second
a!=f:
Values of "ccc" too different: 1.1,2.2 vs 1.1,2.3
a!=g:
Key "www" only in second
Types of "aaa" differ
Values of "aaa" differ: "xxx" vs "123"
Values of "bbb" too different: 1.1 vs 1.2
Values of "ccc" too different: 1.1,2.2 vs 1.3,2.2
Values of "ddd.eee.fff" too different: 3.3 vs 3.5
Values of "xxx" differ: "True" vs "False"
Values of "yyy[1]" too different: 2.2 vs 2.3
Key "yyy[2].c" only in second
Values of "yyy[2].b" differ: "abc" vs "abcx"
Values of "yyy[3]" differ: "xyz" vs "xyzz"
Values of "zzz[1]" too different: 2.2 vs 2.4
-------------------------------------------
u1!=u3:
Values of "[1].a" too different: 5.5 vs 5.6
u1!=u4:
Values of "[0].b[1].y" too different: 2.2 vs 2.3
-------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment