Skip to content

Instantly share code, notes, and snippets.

@LuluBeatson
Last active November 24, 2025 14:13
Show Gist options
  • Select an option

  • Save LuluBeatson/dbf8d9a0141047e3a60eb1b6a1008ca8 to your computer and use it in GitHub Desktop.

Select an option

Save LuluBeatson/dbf8d9a0141047e3a60eb1b6a1008ca8 to your computer and use it in GitHub Desktop.
Compare Tool Schema
#!/usr/bin/env python3
"""
Compare two JSON files for equivalence, ignoring field order.
Array items are matched by their "name" field.
Supports both .json and .snap files.
"""
import json
import sys
from pathlib import Path
from typing import Any, List, Tuple
def find_differences(
obj1: Any, obj2: Any, path: str = "$"
) -> List[Tuple[str, str, Any, Any]]:
"""
Find all differences between two objects and return their paths.
Returns list of tuples: (path, diff_type, value1, value2)
"""
diffs = []
if type(obj1) is not type(obj2):
diffs.append(
(
path,
"type_mismatch",
type(obj1).__name__,
type(obj2).__name__,
)
)
return diffs
if isinstance(obj1, dict):
all_keys = set(obj1.keys()) | set(obj2.keys())
for key in sorted(all_keys):
new_path = f"{path}.{key}"
if key not in obj1:
diffs.append((new_path, "missing_in_first", None, obj2[key]))
elif key not in obj2:
diffs.append((new_path, "missing_in_second", obj1[key], None))
else:
diffs.extend(find_differences(obj1[key], obj2[key], new_path))
elif isinstance(obj1, list):
if len(obj1) != len(obj2):
diffs.append(
(
path,
"length_mismatch",
len(obj1),
len(obj2),
)
)
else:
for i, (item1, item2) in enumerate(zip(obj1, obj2)):
# Use 'name' field in path if available
if isinstance(item1, dict) and "name" in item1:
new_path = f"{path}[name={item1.get('name')}]"
else:
new_path = f"{path}[{i}]"
diffs.extend(find_differences(item1, item2, new_path))
elif obj1 != obj2:
# For primitive values
diffs.append((path, "value_mismatch", obj1, obj2))
return diffs
def normalize_value(value: Any) -> Any:
"""Recursively normalize a value for comparison."""
if isinstance(value, dict):
# Sort dictionary by keys for consistent comparison
# Remove keys with False bools (omitted bools = False)
# Remove keys with empty objects (omitted objects = {})
normalized_dict = {}
for k, v in sorted(value.items()):
normalized_v = normalize_value(v)
# Skip False boolean values and empty dictionaries
if isinstance(normalized_v, bool) and normalized_v is False:
continue
if isinstance(normalized_v, dict) and len(normalized_v) == 0:
continue
normalized_dict[k] = normalized_v
return normalized_dict
elif isinstance(value, list):
# Sort by 'name' field if present, else by JSON representation
try:
# Check if all items have a 'name' field
has_name = all(isinstance(item, dict) and "name" in item for item in value)
if has_name:
return sorted(
[normalize_value(item) for item in value],
key=lambda x: x.get("name", ""),
)
else:
# For lists without 'name', sort by JSON representation
return sorted(
[normalize_value(item) for item in value],
key=lambda x: json.dumps(x, sort_keys=True),
)
except (TypeError, AttributeError):
# If sorting fails, just normalize each item
return [normalize_value(item) for item in value]
else:
return value
def compare_json_files(
file1: str, file2: str
) -> Tuple[bool, List[Tuple[str, str, Any, Any]]]:
"""
Compare two JSON files for equivalence.
Returns tuple of (is_equivalent, differences).
"""
try:
with open(file1, "r") as f1:
data1 = json.load(f1)
with open(file2, "r") as f2:
data2 = json.load(f2)
# Normalize both structures
normalized1 = normalize_value(data1)
normalized2 = normalize_value(data2)
# Compare normalized structures
if normalized1 == normalized2:
return (True, [])
else:
# Find differences
diffs = find_differences(normalized1, normalized2)
return (False, diffs)
except FileNotFoundError as e:
print(f"Error: {e}", file=sys.stderr)
return (False, [])
except json.JSONDecodeError as e:
print(f"Error parsing JSON: {e}", file=sys.stderr)
return (False, [])
except Exception as e:
print(f"Unexpected error: {e}", file=sys.stderr)
return (False, [])
def format_diff(path: str, diff_type: str, val1: Any, val2: Any) -> str:
"""
Format a single difference in the new output style.
"""
lines = [f" {path}:"]
if diff_type == "type_mismatch":
lines.append(f" (1) type: {val1}")
lines.append(f" (2) type: {val2}")
elif diff_type == "missing_in_first":
lines.append(" (1) <missing>")
lines.append(f" (2) {repr(val2)}")
elif diff_type == "missing_in_second":
lines.append(f" (1) {repr(val1)}")
lines.append(" (2) <missing>")
elif diff_type == "length_mismatch":
lines.append(f" (1) length: {val1}")
lines.append(f" (2) length: {val2}")
elif diff_type == "value_mismatch":
lines.append(" (1)")
lines.append(f" {repr(val1)}")
lines.append(" (2)")
lines.append(f" {repr(val2)}")
return "\n".join(lines)
def main():
if len(sys.argv) != 3:
print("Usage: compare_json.py <path1> <path2>")
print(" path can be a JSON file or directory")
sys.exit(1)
path1 = Path(sys.argv[1])
path2 = Path(sys.argv[2])
# Both are files - compare directly
if path1.is_file() and path2.is_file():
is_equivalent, diffs = compare_json_files(str(path1), str(path2))
if is_equivalent:
print(f"✓ {path1.name}")
sys.exit(0)
else:
print(f"✗ {path1.name}")
for path, diff_type, val1, val2 in diffs:
print(format_diff(path, diff_type, val1, val2))
sys.exit(1)
# Both are directories - compare all matching JSON files
elif path1.is_dir() and path2.is_dir():
# Get all JSON and snap files from both directories
json_files1 = {
f.name: f for f in list(path1.glob("*.json")) + list(path1.glob("*.snap"))
}
json_files2 = {
f.name: f for f in list(path2.glob("*.json")) + list(path2.glob("*.snap"))
}
all_files = sorted(set(json_files1.keys()) | set(json_files2.keys()))
if not all_files:
print("No JSON or snap files found in directories")
sys.exit(1)
failed = []
missing = []
for filename in all_files:
if filename not in json_files1:
missing.append(f"{filename} (missing in {path1})")
continue
if filename not in json_files2:
missing.append(f"{filename} (missing in {path2})")
continue
file1 = json_files1[filename]
file2 = json_files2[filename]
is_equivalent, diffs = compare_json_files(str(file1), str(file2))
if is_equivalent:
print(f"✓ {filename}")
else:
print(f"✗ {filename}")
for path, diff_type, val1, val2 in diffs:
print(format_diff(path, diff_type, val1, val2))
failed.append(filename)
# Print summary
print("\n" + "=" * 60)
print(f"Compared {len(all_files)} files")
if missing:
print(f"\nMissing files ({len(missing)}):")
for f in missing:
print(f" - {f}")
if failed:
print(f"\nFailed comparisons ({len(failed)}):")
for f in failed:
print(f" - {f}")
sys.exit(1)
elif missing:
sys.exit(1)
else:
print("\n✓ All files are equivalent")
sys.exit(0)
else:
print("Error: Both paths must be files or both must be directories")
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment