Created
September 5, 2025 19:22
-
-
Save fedorov/5e0555f231abd41ebfe2f71335286af9 to your computer and use it in GitHub Desktop.
recursive compare two folders
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import hashlib | |
| import argparse | |
| def md5sum(filename, blocksize=65536): | |
| hash_md5 = hashlib.md5() | |
| with open(filename, "rb") as f: | |
| for chunk in iter(lambda: f.read(blocksize), b""): | |
| hash_md5.update(chunk) | |
| return hash_md5.hexdigest() | |
| def get_all_files(root): | |
| files_set = set() | |
| for dirpath, _, filenames in os.walk(root): | |
| for f in filenames: | |
| rel_path = os.path.relpath(os.path.join(dirpath, f), root) | |
| files_set.add(rel_path) | |
| return files_set | |
| def main(dir1, dir2): | |
| files1 = get_all_files(dir1) | |
| files2 = get_all_files(dir2) | |
| only_in_1 = files1 - files2 | |
| only_in_2 = files2 - files1 | |
| in_both = files1 & files2 | |
| print("Files only in", dir1) | |
| for f in sorted(only_in_1): | |
| print(" ", f) | |
| print("\nFiles only in", dir2) | |
| for f in sorted(only_in_2): | |
| print(" ", f) | |
| print("\nFiles present in both but with different content:") | |
| for f in sorted(in_both): | |
| file1 = os.path.join(dir1, f) | |
| file2 = os.path.join(dir2, f) | |
| if md5sum(file1) != md5sum(file2): | |
| print(" ", f) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Compare two directories recursively.") | |
| parser.add_argument("dir1", help="First directory") | |
| parser.add_argument("dir2", help="Second directory") | |
| args = parser.parse_args() | |
| main(args.dir1, args.dir2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment