Created
February 16, 2026 14:49
-
-
Save mshivam019/622a022cba75a1a7d1d9234abaa44bff to your computer and use it in GitHub Desktop.
This script for removing mojibake characters from a json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| from ftfy import fix_text | |
| def fix_obj(o): | |
| if isinstance(o, str): | |
| return fix_text(o) | |
| if isinstance(o, list): | |
| return [fix_obj(x) for x in o] | |
| if isinstance(o, dict): | |
| return {k: fix_obj(v) for k,v in o.items()} | |
| return o | |
| with open('data.json', 'r', encoding='utf-8', errors='replace') as f: | |
| data = json.load(f) | |
| fixed = fix_obj(data) | |
| with open('data_fixed.json', 'w', encoding='utf-8') as f: | |
| json.dump(fixed, f, ensure_ascii=False, indent=2) | |
| print("✅ Written data_fixed.json (ftfy).") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment