-
-
Save IcedShake/3dd4a70e4e898bb97a24181c80a3493a to your computer and use it in GitHub Desktop.
Danbooru Pixel Hash
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # Calculates an image's pixel hash as per the algorithm used by Danbooru | |
| from pathlib import Path | |
| import hashlib | |
| from pyvips import Image, Error as VipsError | |
| class DanbooruMediaFileImage: | |
| """Mimics Danbooru's MediaFile::Image, but for hash computation only. | |
| See https://github.com/danbooru/danbooru/blob/bd0c6a37a81f851bd3e7862b97f7cf2fae7d5381/app/logical/media_file/image.rb | |
| """ | |
| def __init__(self, image_path: str): | |
| self.image_path = image_path | |
| self.file_ext = Path(image_path).suffix.lower() | |
| self.image = self.open_image(fail=True) | |
| def open_image(self, **kwargs) -> Image: | |
| if self.file_ext in ['jpeg', 'jpg']: | |
| return Image.new_from_file(self.image_path, autorotate=True, **kwargs) | |
| return Image.new_from_file(self.image_path, **kwargs) | |
| def md5(self) -> str: | |
| hash = hashlib.md5() | |
| with open(self.image_path, 'rb') as f: | |
| hash.update(f.read()) | |
| return hash.hexdigest() | |
| def n_pages(self) -> int | None: | |
| try: | |
| return self.image.get('n-pages') | |
| except VipsError: | |
| return None | |
| def frame_count(self) -> int | None: | |
| match self.file_ext: | |
| case 'gif': | |
| return self.n_pages() | |
| case 'webp': | |
| return self.n_pages() | |
| case 'png': | |
| pass # Not implementing exiftool atm | |
| case 'avif': | |
| pass # Not implementing ffmpeg atm | |
| case _: | |
| return None | |
| def is_video(self) -> bool: | |
| return self.file_ext in ['webm', 'mp4'] | |
| def is_animated(self) -> bool: | |
| frame_count = self.frame_count() or 1 | |
| return self.is_video() or frame_count > 1 | |
| def pixel_hash(self) -> str: | |
| if self.is_animated(): | |
| return self.md5() | |
| try: | |
| return self.pixel_hash_file() | |
| except VipsError: | |
| return self.md5() | |
| def pixel_hash_file(self) -> str: | |
| """Slightly different from Danbooru implementation; it does not write a PAM file.""" | |
| image = self.image | |
| if image.get_typeof("icc-profile-data") != 0: | |
| image = image.icc_transform("srgb") | |
| if image.interpretation != "srgb": | |
| image = image.colourspace("srgb") | |
| if not image.hasalpha(): | |
| image = image.addalpha() | |
| header = ( | |
| "P7\n" | |
| f"WIDTH {image.width}\n" | |
| f"HEIGHT {image.height}\n" | |
| f"DEPTH {image.bands}\n" | |
| "MAXVAL 255\n" | |
| "TUPLTYPE RGB_ALPHA\n" | |
| "ENDHDR\n" | |
| ) | |
| hash = hashlib.md5() | |
| hash.update(header.encode()) | |
| hash.update(image.rawsave_buffer()) | |
| return hash.hexdigest() | |
| if __name__ == '__main__': | |
| import sys | |
| if len(sys.argv) != 2: | |
| print("No file supplied") | |
| exit(1) | |
| infile = Path(sys.argv[1]).resolve() | |
| if not infile.exists(): | |
| print("File does not exist") | |
| exit(1) | |
| image = DanbooruMediaFileImage(infile) | |
| print(f"{infile.name} {image.pixel_hash()}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment