Last active
October 2, 2025 18:32
-
-
Save TypeA2/58d2f6fc2fca8d40d064376c5f19d5a3 to your computer and use it in GitHub Desktop.
Danbooru Pixel Hash
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| from pyvips import Image | |
| from tempfile import TemporaryFile | |
| from pathlib import Path | |
| from typing import cast | |
| from zipfile import ZipFile | |
| import hashlib | |
| import sys | |
| import json | |
| if len(sys.argv) < 2: | |
| print("No file supplied") | |
| exit(1) | |
| def hash_frame(image: Image) -> str: | |
| # https://github.com/danbooru/danbooru/blob/bd0c6a37a81f851bd3e7862b97f7cf2fae7d5381/app/logical/media_file/image.rb#L214 | |
| if image.get_typeof("icc-profile-data") != 0: | |
| image = cast(Image, image.icc_transform("srgb")) | |
| if image.interpretation != "srgb": | |
| image = cast(Image, image.colourspace("srgb")) | |
| if not image.hasalpha(): | |
| image = cast(Image, image.addalpha()) | |
| output_file = TemporaryFile("wb+") | |
| output_file.write("P7\n".encode()) | |
| output_file.write(f"WIDTH {image.width}\n".encode()) | |
| output_file.write(f"HEIGHT {image.height}\n".encode()) | |
| output_file.write(f"DEPTH {image.bands}\n".encode()) | |
| output_file.write("MAXVAL 255\n".encode()) | |
| output_file.write("TUPLTYPE RGB_ALPHA\n".encode()) | |
| output_file.write("ENDHDR\n".encode()) | |
| output_file.flush() | |
| image.rawsave_fd(output_file.fileno()) | |
| output_file.flush() | |
| output_file.seek(0) | |
| file_hash = hashlib.md5() | |
| while chunk := output_file.read(65536): | |
| file_hash.update(chunk) | |
| return file_hash.hexdigest() | |
| def hash_ugoira(infile: Path) -> str: | |
| with ZipFile(infile) as ugoira: | |
| assert "animation.json" in ugoira.namelist(), "animation.json missing" | |
| frames = json.loads(ugoira.read("animation.json").decode()) | |
| pixel_hashes = [] | |
| for frame in frames["frames"]: | |
| pixel_hashes.append({ | |
| "pixel_hash": hash_frame(Image.new_from_buffer(ugoira.read(frame["file"]), "")), | |
| "duration": frame["delay"] | |
| }) | |
| return hashlib.md5(json.dumps({ "frames": pixel_hashes }, separators=(",", ":")).encode()).hexdigest() | |
| def pixel_hash(infile: Path) -> str: | |
| if infile.suffix == ".zip": | |
| return hash_ugoira(infile) | |
| else: | |
| return hash_frame(Image.new_from_file(infile)) | |
| for path in sys.argv[1:]: | |
| files = list(Path().glob(path)) | |
| assert len(files) > 0, "File does not exist" | |
| for file in files: | |
| infile = file.resolve() | |
| print(f"{infile.name}\t {pixel_hash(infile)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment