Skip to content

Instantly share code, notes, and snippets.

@JunsikChoi
Last active February 21, 2026 03:09
Show Gist options
  • Select an option

  • Save JunsikChoi/0917f778313890419b8ef2e5d5c4d4eb to your computer and use it in GitHub Desktop.

Select an option

Save JunsikChoi/0917f778313890419b8ef2e5d5c4d4eb to your computer and use it in GitHub Desktop.
img-optimize: Claude Code skill to reduce image token cost (resize + OCR)
#!/usr/bin/env python3
"""Optimize images for Claude Code reading.
Resizes images to reduce token cost and avoid API limits.
Token formula: (width * height) / 750
Usage:
img-optimize image.png # Single file → resize
img-optimize *.png # Multiple files → resize
img-optimize --dir ./screenshots # Directory → resize all
img-optimize --max-dim 800 image.png # Custom max dimension
img-optimize --ocr image.png # Extract text via OCR (tesseract)
img-optimize --ocr --lang kor image.png # Korean OCR
img-optimize --ocr --dir ./screenshots # OCR all images in dir
"""
import sys
import os
import glob
import argparse
import subprocess
import shutil
from pathlib import Path
try:
from PIL import Image
except ImportError:
print("Error: Pillow not installed. Run: pip install Pillow")
sys.exit(1)
DEFAULT_MAX_DIM = 1092 # Optimal for Claude: ~1,590 tokens
MULTI_IMAGE_MAX_DIM = 1000 # When processing 20+ images: stay under 2000px limit
OUTPUT_DIR = "/tmp/claude-images"
OCR_OUTPUT_DIR = "/tmp/claude-ocr"
def calc_tokens(width: int, height: int) -> int:
return (width * height) // 750
def optimize_image(path: str, max_dim: int, output_dir: str) -> dict:
img = Image.open(path)
orig_w, orig_h = img.size
orig_tokens = calc_tokens(orig_w, orig_h)
# Check if resize needed
if max(orig_w, orig_h) <= max_dim:
out_path = os.path.join(output_dir, os.path.basename(path))
img.save(out_path, quality=90, optimize=True)
new_tokens = orig_tokens
else:
ratio = max_dim / max(orig_w, orig_h)
new_w = int(orig_w * ratio)
new_h = int(orig_h * ratio)
img = img.resize((new_w, new_h), Image.LANCZOS)
out_path = os.path.join(output_dir, os.path.basename(path))
img.save(out_path, quality=90, optimize=True)
new_tokens = calc_tokens(new_w, new_h)
orig_size = os.path.getsize(path)
new_size = os.path.getsize(out_path)
return {
"original": path,
"optimized": out_path,
"orig_dim": f"{orig_w}x{orig_h}",
"new_dim": f"{img.size[0]}x{img.size[1]}",
"orig_tokens": orig_tokens,
"new_tokens": new_tokens,
"orig_size_kb": orig_size // 1024,
"new_size_kb": new_size // 1024,
"saved_pct": round((1 - new_tokens / orig_tokens) * 100) if orig_tokens > 0 else 0,
}
def ocr_image(path: str, lang: str, output_dir: str) -> dict:
"""Extract text from image using Tesseract OCR."""
if not shutil.which("tesseract"):
print("Error: tesseract not installed. Run: sudo apt install tesseract-ocr")
sys.exit(1)
basename = Path(path).stem
out_path = os.path.join(output_dir, f"{basename}.txt")
result = subprocess.run(
["tesseract", path, os.path.join(output_dir, basename), "-l", lang],
capture_output=True, text=True
)
if result.returncode != 0:
return {"original": path, "error": result.stderr.strip()}
# Read extracted text
text = ""
if os.path.exists(out_path):
with open(out_path) as f:
text = f.read().strip()
# Calculate savings
img = Image.open(path)
orig_tokens = calc_tokens(*img.size)
text_tokens = len(text) // 4 # rough estimate: 4 chars per token
return {
"original": path,
"text_file": out_path,
"orig_dim": f"{img.size[0]}x{img.size[1]}",
"orig_tokens": orig_tokens,
"text_tokens": text_tokens,
"text_chars": len(text),
"saved_pct": round((1 - text_tokens / orig_tokens) * 100) if orig_tokens > 0 else 0,
}
def collect_files(args_files, args_dir):
files = list(args_files) if args_files else []
if args_dir:
for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp", "*.gif", "*.bmp"):
files.extend(glob.glob(os.path.join(args_dir, ext)))
files.extend(glob.glob(os.path.join(args_dir, ext.upper())))
return files
def main():
parser = argparse.ArgumentParser(description="Optimize images for Claude Code")
parser.add_argument("files", nargs="*", help="Image files to process")
parser.add_argument("--dir", help="Directory of images to process")
parser.add_argument("--max-dim", type=int, default=DEFAULT_MAX_DIM,
help=f"Max dimension in pixels (default: {DEFAULT_MAX_DIM})")
parser.add_argument("--output", default=None, help="Output directory")
parser.add_argument("--ocr", action="store_true", help="Extract text via OCR instead of resizing")
parser.add_argument("--lang", default="eng", help="OCR language (default: eng, use kor for Korean, eng+kor for both)")
args = parser.parse_args()
files = collect_files(args.files, args.dir)
if not files:
parser.print_help()
sys.exit(1)
# --- OCR mode ---
if args.ocr:
output_dir = args.output or OCR_OUTPUT_DIR
os.makedirs(output_dir, exist_ok=True)
total_orig = 0
total_text = 0
results = []
for f in sorted(files):
if not os.path.isfile(f):
print(f"[SKIP] {f} (not found)")
continue
r = ocr_image(f, args.lang, output_dir)
if "error" in r:
print(f"[ERR] {os.path.basename(f)}: {r['error']}")
continue
results.append(r)
total_orig += r["orig_tokens"]
total_text += r["text_tokens"]
print(f"[OCR] {os.path.basename(f)}: {r['text_chars']} chars extracted, "
f"{r['orig_tokens']} → ~{r['text_tokens']} tokens (-{r['saved_pct']}%)")
if results:
saved = round((1 - total_text / total_orig) * 100) if total_orig > 0 else 0
print(f"\n--- OCR Summary ---")
print(f"Files: {len(results)}")
print(f"Tokens: {total_orig:,} → ~{total_text:,} ({saved}% saved)")
print(f"Output: {output_dir}/")
# Also create a combined file
combined = os.path.join(output_dir, "_combined.txt")
with open(combined, "w") as out:
for r in results:
out.write(f"=== {os.path.basename(r['original'])} ===\n")
with open(r["text_file"]) as tf:
out.write(tf.read())
out.write("\n\n")
print(f"Combined: {combined}")
return
# --- Resize mode ---
output_dir = args.output or OUTPUT_DIR
os.makedirs(output_dir, exist_ok=True)
max_dim = args.max_dim
if len(files) >= 20 and max_dim > MULTI_IMAGE_MAX_DIM:
max_dim = MULTI_IMAGE_MAX_DIM
print(f"[!] {len(files)} images detected. Auto-reducing max_dim to {max_dim}px")
results = []
total_orig_tokens = 0
total_new_tokens = 0
for f in sorted(files):
if not os.path.isfile(f):
print(f"[SKIP] {f} (not found)")
continue
try:
r = optimize_image(f, max_dim, output_dir)
results.append(r)
total_orig_tokens += r["orig_tokens"]
total_new_tokens += r["new_tokens"]
saved = f" (-{r['saved_pct']}%)" if r["saved_pct"] > 0 else ""
print(f"[OK] {os.path.basename(f)}: {r['orig_dim']} → {r['new_dim']}, "
f"{r['orig_tokens']} → {r['new_tokens']} tokens{saved}")
except Exception as e:
print(f"[ERR] {f}: {e}")
if results:
saved_pct = round((1 - total_new_tokens / total_orig_tokens) * 100) if total_orig_tokens > 0 else 0
print(f"\n--- Summary ---")
print(f"Files: {len(results)}")
print(f"Tokens: {total_orig_tokens:,} → {total_new_tokens:,} ({saved_pct}% saved)")
print(f"Output: {output_dir}/")
if len(results) > 100:
print(f"\n[WARNING] {len(results)} images > 100. Split into batches of ~80.")
if __name__ == "__main__":
main()

img-optimize Installation Guide

Quick Install (one command)

curl -fsSL https://raw.githubusercontent.com/JunsikChoi/claude-img-optimize/main/install.sh | bash

Manual Install

git clone https://github.com/JunsikChoi/claude-img-optimize.git
cd claude-img-optimize
bash install.sh

What Gets Installed

  1. img-optimize CLI → ~/.local/bin/img-optimize
  2. Claude Code skill → ~/.claude/skills/img-optimize/SKILL.md
  3. Pillow (Python dependency)

Full Documentation

See the GitHub repository for complete docs.

name description
img-optimize
Optimize images before reading to reduce token cost and avoid API limits. Auto-resizes to optimal dimensions or extracts text via OCR. Usage: /img <files|--dir> [--ocr] [--lang kor]

Image Optimizer for Claude Code

Optimize images before reading to avoid API limits and reduce token cost.

Usage

/img screenshot.png                    # Resize then read
/img --dir ./screenshots               # Process entire directory
/img --ocr screenshot.png              # Extract text via OCR
/img --ocr --lang kor image.png        # Korean OCR
/img --ocr --lang eng+kor *.png        # Multi-language OCR
/img --max-dim 600 large.png           # Custom max dimension

Behavior

1) Validate Input

  • Check that files/directory exist
  • Supported formats: png, jpg, jpeg, webp, gif, bmp
  • Show usage help if no arguments provided

2) Determine Mode

Condition Mode Reason
--ocr flag OCR extraction User explicitly requested
Text/code screenshot (contextual) Suggest OCR 90% token savings
General images, UI designs Resize Visual information needed
Less than 5 images, small size Read directly Optimization unnecessary

3) Execute

Run the img-optimize CLI tool:

# Resize mode
img-optimize $FILES_OR_DIR_ARGS

# OCR mode
img-optimize --ocr --lang $LANG $FILES_OR_DIR_ARGS

Resize output: /tmp/claude-images/ OCR output: /tmp/claude-ocr/ (individual .txt files + _combined.txt)

4) Read Results

  • Resize: Read optimized images from /tmp/claude-images/ using the Read tool
  • OCR: Read text files from /tmp/claude-ocr/ using the Read tool
  • Answer the user's question about the images

5) Report

Display processing summary:

  • Number of files processed
  • Original vs optimized token comparison
  • Savings percentage

API Limits Reference

Limit Value
Max images per request 100
Max size per image 5 MB
Total request size 32 MB
Resolution limit (20+ images) 2000x2000 px
Token formula (width x height) / 750

Auto-Optimization Rules

The img-optimize script automatically:

  • Resizes to 1092px max dimension (optimal token cost)
  • Reduces to 1000px for batches of 20+ images
  • Warns about batch splitting for 100+ images

Dependencies

  • ~/.local/bin/img-optimize (Python script)
  • Pillow (pip)
  • tesseract-ocr (optional, for OCR mode)

User input

$ARGUMENTS

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment