JunsikChoi/INSTALL.md

## img-optimize
#!/usr/bin/env python3
"""Optimize images for Claude Code reading.

Resizes images to reduce token cost and avoid API limits.
Token formula: (width * height) / 750

Usage:
    img-optimize image.png                    # Single file → resize
    img-optimize *.png                        # Multiple files → resize
    img-optimize --dir ./screenshots          # Directory → resize all
    img-optimize --max-dim 800 image.png      # Custom max dimension
    img-optimize --ocr image.png              # Extract text via OCR (tesseract)
    img-optimize --ocr --lang kor image.png   # Korean OCR
    img-optimize --ocr --dir ./screenshots    # OCR all images in dir
"""

import sys
import os
import glob
import argparse
import subprocess
import shutil
from pathlib import Path

try:
    from PIL import Image
except ImportError:
    print("Error: Pillow not installed. Run: pip install Pillow")
    sys.exit(1)


DEFAULT_MAX_DIM = 1092  # Optimal for Claude: ~1,590 tokens
MULTI_IMAGE_MAX_DIM = 1000  # When processing 20+ images: stay under 2000px limit
OUTPUT_DIR = "/tmp/claude-images"
OCR_OUTPUT_DIR = "/tmp/claude-ocr"


def calc_tokens(width: int, height: int) -> int:
    return (width * height) // 750


def optimize_image(path: str, max_dim: int, output_dir: str) -> dict:
    img = Image.open(path)
    orig_w, orig_h = img.size
    orig_tokens = calc_tokens(orig_w, orig_h)

    # Check if resize needed
    if max(orig_w, orig_h) <= max_dim:
        out_path = os.path.join(output_dir, os.path.basename(path))
        img.save(out_path, quality=90, optimize=True)
        new_tokens = orig_tokens
    else:
        ratio = max_dim / max(orig_w, orig_h)
        new_w = int(orig_w * ratio)
        new_h = int(orig_h * ratio)
        img = img.resize((new_w, new_h), Image.LANCZOS)
        out_path = os.path.join(output_dir, os.path.basename(path))
        img.save(out_path, quality=90, optimize=True)
        new_tokens = calc_tokens(new_w, new_h)

    orig_size = os.path.getsize(path)
    new_size = os.path.getsize(out_path)

    return {
        "original": path,
        "optimized": out_path,
        "orig_dim": f"{orig_w}x{orig_h}",
        "new_dim": f"{img.size[0]}x{img.size[1]}",
        "orig_tokens": orig_tokens,
        "new_tokens": new_tokens,
        "orig_size_kb": orig_size // 1024,
        "new_size_kb": new_size // 1024,
        "saved_pct": round((1 - new_tokens / orig_tokens) * 100) if orig_tokens > 0 else 0,
    }


def ocr_image(path: str, lang: str, output_dir: str) -> dict:
    """Extract text from image using Tesseract OCR."""
    if not shutil.which("tesseract"):
        print("Error: tesseract not installed. Run: sudo apt install tesseract-ocr")
        sys.exit(1)

    basename = Path(path).stem
    out_path = os.path.join(output_dir, f"{basename}.txt")

    result = subprocess.run(
        ["tesseract", path, os.path.join(output_dir, basename), "-l", lang],
        capture_output=True, text=True
    )

    if result.returncode != 0:
        return {"original": path, "error": result.stderr.strip()}

    # Read extracted text
    text = ""
    if os.path.exists(out_path):
        with open(out_path) as f:
            text = f.read().strip()

    # Calculate savings
    img = Image.open(path)
    orig_tokens = calc_tokens(*img.size)
    text_tokens = len(text) // 4  # rough estimate: 4 chars per token

    return {
        "original": path,
        "text_file": out_path,
        "orig_dim": f"{img.size[0]}x{img.size[1]}",
        "orig_tokens": orig_tokens,
        "text_tokens": text_tokens,
        "text_chars": len(text),
        "saved_pct": round((1 - text_tokens / orig_tokens) * 100) if orig_tokens > 0 else 0,
    }


def collect_files(args_files, args_dir):
    files = list(args_files) if args_files else []
    if args_dir:
        for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp", "*.gif", "*.bmp"):
            files.extend(glob.glob(os.path.join(args_dir, ext)))
            files.extend(glob.glob(os.path.join(args_dir, ext.upper())))
    return files


def main():
    parser = argparse.ArgumentParser(description="Optimize images for Claude Code")
    parser.add_argument("files", nargs="*", help="Image files to process")
    parser.add_argument("--dir", help="Directory of images to process")
    parser.add_argument("--max-dim", type=int, default=DEFAULT_MAX_DIM,
                        help=f"Max dimension in pixels (default: {DEFAULT_MAX_DIM})")
    parser.add_argument("--output", default=None, help="Output directory")
    parser.add_argument("--ocr", action="store_true", help="Extract text via OCR instead of resizing")
    parser.add_argument("--lang", default="eng", help="OCR language (default: eng, use kor for Korean, eng+kor for both)")
    args = parser.parse_args()

    files = collect_files(args.files, args.dir)

    if not files:
        parser.print_help()
        sys.exit(1)

    # --- OCR mode ---
    if args.ocr:
        output_dir = args.output or OCR_OUTPUT_DIR
        os.makedirs(output_dir, exist_ok=True)

        total_orig = 0
        total_text = 0
        results = []

        for f in sorted(files):
            if not os.path.isfile(f):
                print(f"[SKIP] {f} (not found)")
                continue
            r = ocr_image(f, args.lang, output_dir)
            if "error" in r:
                print(f"[ERR] {os.path.basename(f)}: {r['error']}")
                continue
            results.append(r)
            total_orig += r["orig_tokens"]
            total_text += r["text_tokens"]
            print(f"[OCR] {os.path.basename(f)}: {r['text_chars']} chars extracted, "
                  f"{r['orig_tokens']} → ~{r['text_tokens']} tokens (-{r['saved_pct']}%)")

        if results:
            saved = round((1 - total_text / total_orig) * 100) if total_orig > 0 else 0
            print(f"\n--- OCR Summary ---")
            print(f"Files: {len(results)}")
            print(f"Tokens: {total_orig:,} → ~{total_text:,} ({saved}% saved)")
            print(f"Output: {output_dir}/")

            # Also create a combined file
            combined = os.path.join(output_dir, "_combined.txt")
            with open(combined, "w") as out:
                for r in results:
                    out.write(f"=== {os.path.basename(r['original'])} ===\n")
                    with open(r["text_file"]) as tf:
                        out.write(tf.read())
                    out.write("\n\n")
            print(f"Combined: {combined}")
        return

    # --- Resize mode ---
    output_dir = args.output or OUTPUT_DIR
    os.makedirs(output_dir, exist_ok=True)

    max_dim = args.max_dim
    if len(files) >= 20 and max_dim > MULTI_IMAGE_MAX_DIM:
        max_dim = MULTI_IMAGE_MAX_DIM
        print(f"[!] {len(files)} images detected. Auto-reducing max_dim to {max_dim}px")

    results = []
    total_orig_tokens = 0
    total_new_tokens = 0

    for f in sorted(files):
        if not os.path.isfile(f):
            print(f"[SKIP] {f} (not found)")
            continue
        try:
            r = optimize_image(f, max_dim, output_dir)
            results.append(r)
            total_orig_tokens += r["orig_tokens"]
            total_new_tokens += r["new_tokens"]
            saved = f" (-{r['saved_pct']}%)" if r["saved_pct"] > 0 else ""
            print(f"[OK] {os.path.basename(f)}: {r['orig_dim']} → {r['new_dim']}, "
                  f"{r['orig_tokens']} → {r['new_tokens']} tokens{saved}")
        except Exception as e:
            print(f"[ERR] {f}: {e}")

    if results:
        saved_pct = round((1 - total_new_tokens / total_orig_tokens) * 100) if total_orig_tokens > 0 else 0
        print(f"\n--- Summary ---")
        print(f"Files: {len(results)}")
        print(f"Tokens: {total_orig_tokens:,} → {total_new_tokens:,} ({saved_pct}% saved)")
        print(f"Output: {output_dir}/")

        if len(results) > 100:
            print(f"\n[WARNING] {len(results)} images > 100. Split into batches of ~80.")


if __name__ == "__main__":
    main()

## INSTALL.md

      
    Raw
  

              INSTALL.md
            
          
    img-optimize Installation Guide

Quick Install (one command)

curl -fsSL https://raw.githubusercontent.com/JunsikChoi/claude-img-optimize/main/install.sh | bash
Manual Install

git clone https://github.com/JunsikChoi/claude-img-optimize.git
cd claude-img-optimize
bash install.sh
What Gets Installed


img-optimize CLI → ~/.local/bin/img-optimize
Claude Code skill → ~/.claude/skills/img-optimize/SKILL.md
Pillow (Python dependency)

Full Documentation

See the GitHub repository for complete docs.

  
## SKILL.md

      
    Raw
  

              SKILL.md
            
          
  name
  description
  
  
  img-optimize
  Optimize images before reading to reduce token cost and avoid API limits. Auto-resizes to optimal dimensions or extracts text via OCR. Usage: /img <files|--dir> [--ocr] [--lang kor]
  
  
Image Optimizer for Claude Code

Optimize images before reading to avoid API limits and reduce token cost.
Usage

/img screenshot.png                    # Resize then read
/img --dir ./screenshots               # Process entire directory
/img --ocr screenshot.png              # Extract text via OCR
/img --ocr --lang kor image.png        # Korean OCR
/img --ocr --lang eng+kor *.png        # Multi-language OCR
/img --max-dim 600 large.png           # Custom max dimension

Behavior

1) Validate Input


Check that files/directory exist
Supported formats: png, jpg, jpeg, webp, gif, bmp
Show usage help if no arguments provided

2) Determine Mode


Condition
Mode
Reason


--ocr flag
OCR extraction
User explicitly requested


Text/code screenshot (contextual)
Suggest OCR
90% token savings


General images, UI designs
Resize
Visual information needed


Less than 5 images, small size
Read directly
Optimization unnecessary


3) Execute

Run the img-optimize CLI tool:
# Resize mode
img-optimize $FILES_OR_DIR_ARGS

# OCR mode
img-optimize --ocr --lang $LANG $FILES_OR_DIR_ARGS
Resize output: /tmp/claude-images/
OCR output: /tmp/claude-ocr/ (individual .txt files + _combined.txt)
4) Read Results


Resize: Read optimized images from /tmp/claude-images/ using the Read tool
OCR: Read text files from /tmp/claude-ocr/ using the Read tool
Answer the user's question about the images

5) Report

Display processing summary:

Number of files processed
Original vs optimized token comparison
Savings percentage

API Limits Reference


Limit
Value


Max images per request
100


Max size per image
5 MB


Total request size
32 MB


Resolution limit (20+ images)
2000x2000 px


Token formula
(width x height) / 750


Auto-Optimization Rules

The img-optimize script automatically:

Resizes to 1092px max dimension (optimal token cost)
Reduces to 1000px for batches of 20+ images
Warns about batch splitting for 100+ images

Dependencies


~/.local/bin/img-optimize (Python script)
Pillow (pip)
tesseract-ocr (optional, for OCR mode)

User input

$ARGUMENTS
	#!/usr/bin/env python3
	"""Optimize images for Claude Code reading.

	Resizes images to reduce token cost and avoid API limits.
	Token formula: (width * height) / 750

	Usage:
	img-optimize image.png # Single file → resize
	img-optimize *.png # Multiple files → resize
	img-optimize --dir ./screenshots # Directory → resize all
	img-optimize --max-dim 800 image.png # Custom max dimension
	img-optimize --ocr image.png # Extract text via OCR (tesseract)
	img-optimize --ocr --lang kor image.png # Korean OCR
	img-optimize --ocr --dir ./screenshots # OCR all images in dir
	"""

	import sys
	import os
	import glob
	import argparse
	import subprocess
	import shutil
	from pathlib import Path

	try:
	from PIL import Image
	except ImportError:
	print("Error: Pillow not installed. Run: pip install Pillow")
	sys.exit(1)


	DEFAULT_MAX_DIM = 1092 # Optimal for Claude: ~1,590 tokens
	MULTI_IMAGE_MAX_DIM = 1000 # When processing 20+ images: stay under 2000px limit
	OUTPUT_DIR = "/tmp/claude-images"
	OCR_OUTPUT_DIR = "/tmp/claude-ocr"


	def calc_tokens(width: int, height: int) -> int:
	return (width * height) // 750


	def optimize_image(path: str, max_dim: int, output_dir: str) -> dict:
	img = Image.open(path)
	orig_w, orig_h = img.size
	orig_tokens = calc_tokens(orig_w, orig_h)

	# Check if resize needed
	if max(orig_w, orig_h) <= max_dim:
	out_path = os.path.join(output_dir, os.path.basename(path))
	img.save(out_path, quality=90, optimize=True)
	new_tokens = orig_tokens
	else:
	ratio = max_dim / max(orig_w, orig_h)
	new_w = int(orig_w * ratio)
	new_h = int(orig_h * ratio)
	img = img.resize((new_w, new_h), Image.LANCZOS)
	out_path = os.path.join(output_dir, os.path.basename(path))
	img.save(out_path, quality=90, optimize=True)
	new_tokens = calc_tokens(new_w, new_h)

	orig_size = os.path.getsize(path)
	new_size = os.path.getsize(out_path)

	return {
	"original": path,
	"optimized": out_path,
	"orig_dim": f"{orig_w}x{orig_h}",
	"new_dim": f"{img.size[0]}x{img.size[1]}",
	"orig_tokens": orig_tokens,
	"new_tokens": new_tokens,
	"orig_size_kb": orig_size // 1024,
	"new_size_kb": new_size // 1024,
	"saved_pct": round((1 - new_tokens / orig_tokens) * 100) if orig_tokens > 0 else 0,
	}


	def ocr_image(path: str, lang: str, output_dir: str) -> dict:
	"""Extract text from image using Tesseract OCR."""
	if not shutil.which("tesseract"):
	print("Error: tesseract not installed. Run: sudo apt install tesseract-ocr")
	sys.exit(1)

	basename = Path(path).stem
	out_path = os.path.join(output_dir, f"{basename}.txt")

	result = subprocess.run(
	["tesseract", path, os.path.join(output_dir, basename), "-l", lang],
	capture_output=True, text=True
	)

	if result.returncode != 0:
	return {"original": path, "error": result.stderr.strip()}

	# Read extracted text
	text = ""
	if os.path.exists(out_path):
	with open(out_path) as f:
	text = f.read().strip()

	# Calculate savings
	img = Image.open(path)
	orig_tokens = calc_tokens(*img.size)
	text_tokens = len(text) // 4 # rough estimate: 4 chars per token

	return {
	"original": path,
	"text_file": out_path,
	"orig_dim": f"{img.size[0]}x{img.size[1]}",
	"orig_tokens": orig_tokens,
	"text_tokens": text_tokens,
	"text_chars": len(text),
	"saved_pct": round((1 - text_tokens / orig_tokens) * 100) if orig_tokens > 0 else 0,
	}


	def collect_files(args_files, args_dir):
	files = list(args_files) if args_files else []
	if args_dir:
	for ext in (".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp"):
	files.extend(glob.glob(os.path.join(args_dir, ext)))
	files.extend(glob.glob(os.path.join(args_dir, ext.upper())))
	return files


	def main():
	parser = argparse.ArgumentParser(description="Optimize images for Claude Code")
	parser.add_argument("files", nargs="*", help="Image files to process")
	parser.add_argument("--dir", help="Directory of images to process")
	parser.add_argument("--max-dim", type=int, default=DEFAULT_MAX_DIM,
	help=f"Max dimension in pixels (default: {DEFAULT_MAX_DIM})")
	parser.add_argument("--output", default=None, help="Output directory")
	parser.add_argument("--ocr", action="store_true", help="Extract text via OCR instead of resizing")
	parser.add_argument("--lang", default="eng", help="OCR language (default: eng, use kor for Korean, eng+kor for both)")
	args = parser.parse_args()

	files = collect_files(args.files, args.dir)

	if not files:
	parser.print_help()
	sys.exit(1)

	# --- OCR mode ---
	if args.ocr:
	output_dir = args.output or OCR_OUTPUT_DIR
	os.makedirs(output_dir, exist_ok=True)

	total_orig = 0
	total_text = 0
	results = []

	for f in sorted(files):
	if not os.path.isfile(f):
	print(f"[SKIP] {f} (not found)")
	continue
	r = ocr_image(f, args.lang, output_dir)
	if "error" in r:
	print(f"[ERR] {os.path.basename(f)}: {r['error']}")
	continue
	results.append(r)
	total_orig += r["orig_tokens"]
	total_text += r["text_tokens"]
	print(f"[OCR] {os.path.basename(f)}: {r['text_chars']} chars extracted, "
	f"{r['orig_tokens']} → ~{r['text_tokens']} tokens (-{r['saved_pct']}%)")

	if results:
	saved = round((1 - total_text / total_orig) * 100) if total_orig > 0 else 0
	print(f"\n--- OCR Summary ---")
	print(f"Files: {len(results)}")
	print(f"Tokens: {total_orig:,} → ~{total_text:,} ({saved}% saved)")
	print(f"Output: {output_dir}/")

	# Also create a combined file
	combined = os.path.join(output_dir, "_combined.txt")
	with open(combined, "w") as out:
	for r in results:
	out.write(f"=== {os.path.basename(r['original'])} ===\n")
	with open(r["text_file"]) as tf:
	out.write(tf.read())
	out.write("\n\n")
	print(f"Combined: {combined}")
	return

	# --- Resize mode ---
	output_dir = args.output or OUTPUT_DIR
	os.makedirs(output_dir, exist_ok=True)

	max_dim = args.max_dim
	if len(files) >= 20 and max_dim > MULTI_IMAGE_MAX_DIM:
	max_dim = MULTI_IMAGE_MAX_DIM
	print(f"[!] {len(files)} images detected. Auto-reducing max_dim to {max_dim}px")

	results = []
	total_orig_tokens = 0
	total_new_tokens = 0

	for f in sorted(files):
	if not os.path.isfile(f):
	print(f"[SKIP] {f} (not found)")
	continue
	try:
	r = optimize_image(f, max_dim, output_dir)
	results.append(r)
	total_orig_tokens += r["orig_tokens"]
	total_new_tokens += r["new_tokens"]
	saved = f" (-{r['saved_pct']}%)" if r["saved_pct"] > 0 else ""
	print(f"[OK] {os.path.basename(f)}: {r['orig_dim']} → {r['new_dim']}, "
	f"{r['orig_tokens']} → {r['new_tokens']} tokens{saved}")
	except Exception as e:
	print(f"[ERR] {f}: {e}")

	if results:
	saved_pct = round((1 - total_new_tokens / total_orig_tokens) * 100) if total_orig_tokens > 0 else 0
	print(f"\n--- Summary ---")
	print(f"Files: {len(results)}")
	print(f"Tokens: {total_orig_tokens:,} → {total_new_tokens:,} ({saved_pct}% saved)")
	print(f"Output: {output_dir}/")

	if len(results) > 100:
	print(f"\n[WARNING] {len(results)} images > 100. Split into batches of ~80.")


	if __name__ == "__main__":
	main()
Condition	Mode	Reason
`--ocr` flag	OCR extraction	User explicitly requested
Text/code screenshot (contextual)	Suggest OCR	90% token savings
General images, UI designs	Resize	Visual information needed
Less than 5 images, small size	Read directly	Optimization unnecessary
Limit	Value
Max images per request	100
Max size per image	5 MB
Total request size	32 MB
Resolution limit (20+ images)	2000x2000 px
Token formula	(width x height) / 750