Created
October 18, 2024 22:19
-
-
Save xzin-CoRK/03f211518fe908fda91733e8ee7ad73c to your computer and use it in GitHub Desktop.
Identify files that aren't hardlinked
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ############################## | |
| ### missing-hardlinks v1.0 ### | |
| ### by xzin ### | |
| ############################## | |
| #### Adjust the two variables below as needed #### | |
| # Ignored file extensions | |
| # Case-insensitive list of file extensions that should be skipped | |
| ignored_extensions = ['.srt', '.nfo', '.srr', '.txt', '.png', '.jpg', '.clpi', '.mpls', '.m2ts', '.bdmv'] | |
| # Ignored Subdirectories | |
| # Case-sensitive list of subdirectories that should be skipped | |
| ignored_directories = ['Extras', 'extras', 'Subs', 'subs', 'Sample', 'Samples', 'Featurettes', 'Behind the Scenes', '@eaDir'] | |
| #### Script begins here #### | |
| from pathlib import Path | |
| import os | |
| import csv | |
| import argparse | |
| single_linked_files = [] | |
| single_link_size = 0 | |
| def humanbytes(B): | |
| """Helper function to return the given bytes as a human friendly KB, MB, GB, or TB string.""" | |
| B = float(B) | |
| KB = float(1024) | |
| MB = float(KB ** 2) # 1,048,576 | |
| GB = float(KB ** 3) # 1,073,741,824 | |
| TB = float(KB ** 4) # 1,099,511,627,776 | |
| if B < KB: | |
| return '{0} {1}'.format(B,'Bytes' if 0 == B > 1 else 'Byte') | |
| elif KB <= B < MB: | |
| return '{0:.2f} KB'.format(B / KB) | |
| elif MB <= B < GB: | |
| return '{0:.2f} MB'.format(B / MB) | |
| elif GB <= B < TB: | |
| return '{0:.2f} GB'.format(B / GB) | |
| elif TB <= B: | |
| return '{0:.2f} TB'.format(B / TB) | |
| def analyze_directory(directory: Path): | |
| global single_link_size | |
| for item in directory.iterdir(): | |
| if item.is_dir(): | |
| # Ignore subdirectories that match the ignored list, otherwise traverse into the subdirectory | |
| if not bool(set(ignored_directories).intersection(item.parts)): | |
| analyze_directory(item) | |
| else: | |
| if os.stat(item).st_nlink < 2 and item.suffix.lower() not in ignored_extensions: | |
| single_linked_files.append({ | |
| "file_name": item.name, | |
| "file_size": os.stat(item).st_size, | |
| "full_path": str(item) | |
| }) | |
| single_link_size += os.stat(item).st_size | |
| def main(input_directory: Path): | |
| # Recursively scan the specified directory | |
| analyze_directory(input_directory) | |
| # Write the findings, if any, to a CSV file | |
| num_results = len(single_linked_files) | |
| if num_results and num_results > 0: | |
| headers = ['file_name', 'file_size', 'full_path'] | |
| with open("missing-hardlinks-results.csv", 'w') as log: | |
| writer = csv.DictWriter(log, fieldnames=headers) | |
| writer.writeheader() | |
| writer.writerows(single_linked_files) | |
| print(f"Finished scanning directory `{input_directory}`. Found {num_results} files missing hardlinks, totalling {humanbytes(single_link_size)}.") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser("missing-hardlinks") | |
| parser.add_argument("input_directory", help="The directory to scan") | |
| args = parser.parse_args() | |
| if args is None: | |
| print("You must provide an input directory: missing-hardlinks.py /my/directory/here") | |
| elif not Path(args.input_directory).is_dir(): | |
| print("You've specified an invalid directory. Please try again.") | |
| else: | |
| main(Path(args.input_directory)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment