xzin-CoRK/missing-hardlinks.py

## missing-hardlinks.py
##############################
### missing-hardlinks v1.0 ###
###       by xzin          ###
##############################

#### Adjust the two variables below as needed ####

# Ignored file extensions
# Case-insensitive list of file extensions that should be skipped
ignored_extensions = ['.srt', '.nfo', '.srr', '.txt', '.png', '.jpg', '.clpi', '.mpls', '.m2ts', '.bdmv']

# Ignored Subdirectories
# Case-sensitive list of subdirectories that should be skipped
ignored_directories = ['Extras', 'extras', 'Subs', 'subs', 'Sample', 'Samples', 'Featurettes', 'Behind the Scenes', '@eaDir']

#### Script begins here ####

from pathlib import Path
import os
import csv
import argparse

single_linked_files = []
single_link_size = 0

def humanbytes(B):
    """Helper function to return the given bytes as a human friendly KB, MB, GB, or TB string."""
    B = float(B)
    KB = float(1024)
    MB = float(KB ** 2) # 1,048,576
    GB = float(KB ** 3) # 1,073,741,824
    TB = float(KB ** 4) # 1,099,511,627,776

    if B < KB:
        return '{0} {1}'.format(B,'Bytes' if 0 == B > 1 else 'Byte')
    elif KB <= B < MB:
        return '{0:.2f} KB'.format(B / KB)
    elif MB <= B < GB:
        return '{0:.2f} MB'.format(B / MB)
    elif GB <= B < TB:
        return '{0:.2f} GB'.format(B / GB)
    elif TB <= B:
        return '{0:.2f} TB'.format(B / TB)

def analyze_directory(directory: Path):
    global single_link_size

    for item in directory.iterdir():
        if item.is_dir():
            # Ignore subdirectories that match the ignored list, otherwise traverse into the subdirectory
            if not bool(set(ignored_directories).intersection(item.parts)):
                analyze_directory(item)
        else:
            if os.stat(item).st_nlink < 2 and item.suffix.lower() not in ignored_extensions:
                single_linked_files.append({
                    "file_name": item.name,
                    "file_size": os.stat(item).st_size,
                    "full_path": str(item)
                })
                single_link_size += os.stat(item).st_size


def main(input_directory: Path):
    # Recursively scan the specified directory
    analyze_directory(input_directory)

    # Write the findings, if any, to a CSV file
    num_results = len(single_linked_files)

    if num_results and num_results > 0:
        headers = ['file_name', 'file_size', 'full_path']
        with open("missing-hardlinks-results.csv", 'w') as log:
            writer = csv.DictWriter(log, fieldnames=headers)
            writer.writeheader()
            writer.writerows(single_linked_files)

    print(f"Finished scanning directory `{input_directory}`. Found {num_results} files missing hardlinks, totalling {humanbytes(single_link_size)}.")


if __name__ == "__main__":
    parser = argparse.ArgumentParser("missing-hardlinks")
    parser.add_argument("input_directory", help="The directory to scan")
    args = parser.parse_args()
    if args is None:
        print("You must provide an input directory: missing-hardlinks.py /my/directory/here")
    elif not Path(args.input_directory).is_dir():
        print("You've specified an invalid directory. Please try again.")
    else:
        main(Path(args.input_directory))
	##############################
	### missing-hardlinks v1.0 ###
	### by xzin ###
	##############################

	#### Adjust the two variables below as needed ####

	# Ignored file extensions
	# Case-insensitive list of file extensions that should be skipped
	ignored_extensions = ['.srt', '.nfo', '.srr', '.txt', '.png', '.jpg', '.clpi', '.mpls', '.m2ts', '.bdmv']

	# Ignored Subdirectories
	# Case-sensitive list of subdirectories that should be skipped
	ignored_directories = ['Extras', 'extras', 'Subs', 'subs', 'Sample', 'Samples', 'Featurettes', 'Behind the Scenes', '@eaDir']

	#### Script begins here ####

	from pathlib import Path
	import os
	import csv
	import argparse

	single_linked_files = []
	single_link_size = 0

	def humanbytes(B):
	"""Helper function to return the given bytes as a human friendly KB, MB, GB, or TB string."""
	B = float(B)
	KB = float(1024)
	MB = float(KB ** 2) # 1,048,576
	GB = float(KB ** 3) # 1,073,741,824
	TB = float(KB ** 4) # 1,099,511,627,776

	if B < KB:
	return '{0} {1}'.format(B,'Bytes' if 0 == B > 1 else 'Byte')
	elif KB <= B < MB:
	return '{0:.2f} KB'.format(B / KB)
	elif MB <= B < GB:
	return '{0:.2f} MB'.format(B / MB)
	elif GB <= B < TB:
	return '{0:.2f} GB'.format(B / GB)
	elif TB <= B:
	return '{0:.2f} TB'.format(B / TB)

	def analyze_directory(directory: Path):
	global single_link_size

	for item in directory.iterdir():
	if item.is_dir():
	# Ignore subdirectories that match the ignored list, otherwise traverse into the subdirectory
	if not bool(set(ignored_directories).intersection(item.parts)):
	analyze_directory(item)
	else:
	if os.stat(item).st_nlink < 2 and item.suffix.lower() not in ignored_extensions:
	single_linked_files.append({
	"file_name": item.name,
	"file_size": os.stat(item).st_size,
	"full_path": str(item)
	})
	single_link_size += os.stat(item).st_size


	def main(input_directory: Path):
	# Recursively scan the specified directory
	analyze_directory(input_directory)

	# Write the findings, if any, to a CSV file
	num_results = len(single_linked_files)

	if num_results and num_results > 0:
	headers = ['file_name', 'file_size', 'full_path']
	with open("missing-hardlinks-results.csv", 'w') as log:
	writer = csv.DictWriter(log, fieldnames=headers)
	writer.writeheader()
	writer.writerows(single_linked_files)

	print(f"Finished scanning directory `{input_directory}`. Found {num_results} files missing hardlinks, totalling {humanbytes(single_link_size)}.")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser("missing-hardlinks")
	parser.add_argument("input_directory", help="The directory to scan")
	args = parser.parse_args()
	if args is None:
	print("You must provide an input directory: missing-hardlinks.py /my/directory/here")
	elif not Path(args.input_directory).is_dir():
	print("You've specified an invalid directory. Please try again.")
	else:
	main(Path(args.input_directory))
No results found