Skip to content

Instantly share code, notes, and snippets.

@TypeA2
Last active October 2, 2025 18:32
Show Gist options
  • Select an option

  • Save TypeA2/58d2f6fc2fca8d40d064376c5f19d5a3 to your computer and use it in GitHub Desktop.

Select an option

Save TypeA2/58d2f6fc2fca8d40d064376c5f19d5a3 to your computer and use it in GitHub Desktop.
Danbooru Pixel Hash
#!/usr/bin/env python3
from pyvips import Image
from tempfile import TemporaryFile
from pathlib import Path
from typing import cast
from zipfile import ZipFile
import hashlib
import sys
import json
if len(sys.argv) < 2:
print("No file supplied")
exit(1)
def hash_frame(image: Image) -> str:
# https://github.com/danbooru/danbooru/blob/bd0c6a37a81f851bd3e7862b97f7cf2fae7d5381/app/logical/media_file/image.rb#L214
if image.get_typeof("icc-profile-data") != 0:
image = cast(Image, image.icc_transform("srgb"))
if image.interpretation != "srgb":
image = cast(Image, image.colourspace("srgb"))
if not image.hasalpha():
image = cast(Image, image.addalpha())
output_file = TemporaryFile("wb+")
output_file.write("P7\n".encode())
output_file.write(f"WIDTH {image.width}\n".encode())
output_file.write(f"HEIGHT {image.height}\n".encode())
output_file.write(f"DEPTH {image.bands}\n".encode())
output_file.write("MAXVAL 255\n".encode())
output_file.write("TUPLTYPE RGB_ALPHA\n".encode())
output_file.write("ENDHDR\n".encode())
output_file.flush()
image.rawsave_fd(output_file.fileno())
output_file.flush()
output_file.seek(0)
file_hash = hashlib.md5()
while chunk := output_file.read(65536):
file_hash.update(chunk)
return file_hash.hexdigest()
def hash_ugoira(infile: Path) -> str:
with ZipFile(infile) as ugoira:
assert "animation.json" in ugoira.namelist(), "animation.json missing"
frames = json.loads(ugoira.read("animation.json").decode())
pixel_hashes = []
for frame in frames["frames"]:
pixel_hashes.append({
"pixel_hash": hash_frame(Image.new_from_buffer(ugoira.read(frame["file"]), "")),
"duration": frame["delay"]
})
return hashlib.md5(json.dumps({ "frames": pixel_hashes }, separators=(",", ":")).encode()).hexdigest()
def pixel_hash(infile: Path) -> str:
if infile.suffix == ".zip":
return hash_ugoira(infile)
else:
return hash_frame(Image.new_from_file(infile))
for path in sys.argv[1:]:
files = list(Path().glob(path))
assert len(files) > 0, "File does not exist"
for file in files:
infile = file.resolve()
print(f"{infile.name}\t {pixel_hash(infile)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment