Skip to content

Instantly share code, notes, and snippets.

@ClimenteA
Created November 1, 2025 19:19
Show Gist options
  • Select an option

  • Save ClimenteA/468597ecfbae63ea368f625611b2b465 to your computer and use it in GitHub Desktop.

Select an option

Save ClimenteA/468597ecfbae63ea368f625611b2b465 to your computer and use it in GitHub Desktop.
Optimize documents in pdfs and images from a given folder
#!/bin/bash
##############################################################################
# PDF to Optimized PNG Converter
# Converts PDFs to high-contrast, sharpened PNG images
# Optimized for text documents (black text on white background)
# Make executable
# sudo chmod +x pdf_to_png_optimizer.sh
# # Convert PDFs and optimize images
# ./pdf_to_png_optimizer.sh ./input_folder/ ./output_folder/
# # High resolution (300 DPI)
# ./pdf_to_png_optimizer.sh -d 300 ./input_folder/ ./output_folder/
# # Smaller output (800px max)
# ./pdf_to_png_optimizer.sh -s 900 ./input_folder/ ./output_folder/
# ```
# ## Output Structure:
# **Input:**
# ```
# input_folder/
# ├── invoice.pdf (3 pages)
# ├── receipt.jpg
# └── document.png
# ```
# **Output:**
# ```
# output_folder/
# ├── invoice_page_001.png
# ├── invoice_page_002.png
# ├── invoice_page_003.png
# ├── receipt_optimized.png
# └── document_optimized.png
##############################################################################
# Don't exit on error - handle errors gracefully
set +e
# Default settings
DPI=200
MAX_SIZE=2000
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
##############################################################################
# Function: Check dependencies
##############################################################################
check_dependencies() {
local missing=0
if ! command -v convert &> /dev/null; then
echo -e "${RED}✗ ImageMagick (convert) is not installed${NC}"
missing=1
fi
if [ $missing -eq 1 ]; then
echo ""
echo "Install missing dependencies:"
echo " Ubuntu/Debian: sudo apt-get install imagemagick"
echo " macOS: brew install imagemagick"
echo " Fedora: sudo dnf install ImageMagick"
exit 1
fi
}
##############################################################################
# Function: Convert PDF to optimized PNGs
##############################################################################
convert_pdf_to_png() {
local input_pdf="$1"
local output_dir="$2"
local base_name=$(basename "$input_pdf" .pdf)
echo -e "${YELLOW}Converting PDF: $(basename "$input_pdf")${NC}"
# Get original file size
local original_size=$(stat -f%z "$input_pdf" 2>/dev/null || stat -c%s "$input_pdf" 2>/dev/null)
# Convert PDF to optimized PNG images
convert -density ${DPI} "$input_pdf" \
-colorspace Gray \
-normalize \
-contrast-stretch 2%x1% \
-level 5%,95% \
-sharpen 0x1 \
-resize ${MAX_SIZE}x${MAX_SIZE}\> \
-background white \
-alpha remove \
-alpha off \
-strip \
"${output_dir}/${base_name}_page_%03d.png" 2>&1 | grep -v "^convert"
# Count generated files
local num_files=$(ls -1 "${output_dir}/${base_name}_page_"*.png 2>/dev/null | wc -l)
if [ $num_files -gt 0 ]; then
echo -e "${GREEN} ✓ Created: ${num_files} page(s)${NC}"
echo " Original PDF: $(echo "scale=1; $original_size / 1024" | bc) KB"
# Calculate total output size
local output_size=$(du -sk "${output_dir}/${base_name}_page_"*.png 2>/dev/null | awk '{sum+=$1} END {print sum}')
echo " Output PNGs: $(echo "scale=1; $output_size" | bc) KB total"
echo ""
return 0
else
echo -e "${RED} ✗ Failed to convert${NC}"
echo ""
return 1
fi
}
##############################################################################
# Function: Optimize image to PNG
##############################################################################
optimize_image_to_png() {
local input="$1"
local output="$2"
echo -e "${YELLOW}Optimizing image: $(basename "$input")${NC}"
# Get original file size
local original_size=$(stat -f%z "$input" 2>/dev/null || stat -c%s "$input" 2>/dev/null)
# Optimize image
convert "$input" \
-auto-orient \
-resize ${MAX_SIZE}x${MAX_SIZE}\> \
-colorspace Gray \
-normalize \
-contrast-stretch 2%x1% \
-level 5%,95% \
-sharpen 0x1 \
-background white \
-alpha remove \
-alpha off \
-strip \
"$output" 2>&1 | grep -v "^convert"
if [ -f "$output" ]; then
local output_size=$(stat -f%z "$output" 2>/dev/null || stat -c%s "$output" 2>/dev/null)
local reduction=$(echo "scale=1; (1 - $output_size / $original_size) * 100" | bc 2>/dev/null || echo "N/A")
echo -e "${GREEN} ✓ Created: $(basename "$output")${NC}"
echo " Original: $(echo "scale=1; $original_size / 1024" | bc) KB"
echo " Optimized: $(echo "scale=1; $output_size / 1024" | bc) KB"
if [ "$reduction" != "N/A" ]; then
echo " Size change: ${reduction}%"
fi
echo ""
return 0
else
echo -e "${RED} ✗ Failed to optimize${NC}"
echo ""
return 1
fi
}
##############################################################################
# Function: Process directory
##############################################################################
process_directory() {
local input_dir="$1"
local output_dir="$2"
echo -e "${BLUE}================================================${NC}"
echo -e "${BLUE}PDF to Optimized PNG Converter${NC}"
echo -e "${BLUE}================================================${NC}"
echo ""
echo "Input directory: $input_dir"
echo "Output directory: $output_dir"
echo "Settings: DPI=$DPI, MaxSize=${MAX_SIZE}px"
echo "Format: Grayscale PNG with enhanced contrast"
echo ""
# Create output directory
mkdir -p "$output_dir"
# Counters
local total_files=0
local processed_files=0
local skipped_files=0
# Process all supported files
shopt -s nullglob # Enable nullglob to handle no matches gracefully
for file in "$input_dir"/*; do
[ -f "$file" ] || continue
local filename=$(basename "$file")
local extension="${filename##*.}"
local basename="${filename%.*}"
# Convert extension to lowercase
extension=$(echo "$extension" | tr '[:upper:]' '[:lower:]')
total_files=$((total_files + 1))
case "$extension" in
pdf)
if convert_pdf_to_png "$file" "$output_dir"; then
processed_files=$((processed_files + 1))
else
skipped_files=$((skipped_files + 1))
fi
;;
jpg|jpeg|png)
local output_png="$output_dir/${basename}_optimized.png"
if optimize_image_to_png "$file" "$output_png"; then
processed_files=$((processed_files + 1))
else
skipped_files=$((skipped_files + 1))
fi
;;
*)
echo -e "${YELLOW}Skipping: $filename (unsupported format)${NC}"
skipped_files=$((skipped_files + 1))
;;
esac
done
shopt -u nullglob # Disable nullglob
# Summary
echo -e "${BLUE}================================================${NC}"
echo -e "${GREEN}Processing Complete!${NC}"
echo -e "${BLUE}================================================${NC}"
echo "Total files found: $total_files"
echo "Processed: $processed_files"
echo "Skipped: $skipped_files"
echo ""
echo "Output location: $output_dir"
# Calculate total sizes
if [ $processed_files -gt 0 ]; then
local output_total=$(du -sk "$output_dir" 2>/dev/null | awk '{print $1}')
echo "Total output size: $(echo "scale=1; $output_total / 1024" | bc) MB"
fi
}
##############################################################################
# Function: Show usage
##############################################################################
show_usage() {
cat << EOF
Usage: $0 [OPTIONS] INPUT_DIR OUTPUT_DIR
Convert PDFs to optimized PNG images and optimize existing images.
Output format: Grayscale PNG with enhanced contrast and sharpening.
Supported input formats: .pdf, .jpg, .jpeg, .png
Options:
-d, --dpi NUM DPI/resolution (default: 200)
-s, --size NUM Max dimension in pixels (default: 2000)
-h, --help Show this help
Examples:
# Basic usage
$0 ./pdfs/ ./output_images/
# High resolution
$0 -d 300 ./documents/ ./high_res/
# Smaller file size
$0 -s 1600 ./scans/ ./compressed/
Processing:
- PDFs → Converted to PNG (one image per page)
- Images (.jpg, .jpeg, .png) → Optimized to PNG
- Multi-page PDFs → Multiple PNG files (filename_page_001.png, etc.)
Optimizations applied:
✓ Grayscale conversion (black text on white background)
✓ Contrast enhancement (darker text, whiter background)
✓ Background whitening
✓ Text sharpening
✓ Automatic resizing
✓ Metadata removal
EOF
}
##############################################################################
# Main script
##############################################################################
# Parse options
while [[ $# -gt 0 ]]; do
case $1 in
-d|--dpi)
DPI="$2"
shift 2
;;
-s|--size)
MAX_SIZE="$2"
shift 2
;;
-h|--help)
show_usage
exit 0
;;
-*)
echo -e "${RED}Unknown option: $1${NC}"
show_usage
exit 1
;;
*)
break
;;
esac
done
# Check arguments
if [ $# -ne 2 ]; then
echo -e "${RED}Error: INPUT_DIR and OUTPUT_DIR required${NC}"
echo ""
show_usage
exit 1
fi
INPUT_DIR="$1"
OUTPUT_DIR="$2"
# Validate input directory
if [ ! -d "$INPUT_DIR" ]; then
echo -e "${RED}Error: Input directory does not exist: $INPUT_DIR${NC}"
exit 1
fi
# Check if output directory exists and is not empty
if [ -d "$OUTPUT_DIR" ] && [ "$(ls -A "$OUTPUT_DIR" 2>/dev/null)" ]; then
echo -e "${YELLOW}Warning: Output directory is not empty: $OUTPUT_DIR${NC}"
read -p "Continue anyway? (y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
echo "Aborted."
exit 1
fi
fi
# Check dependencies
check_dependencies
# Process directory
process_directory "$INPUT_DIR" "$OUTPUT_DIR"
echo -e "${GREEN}Done!${NC}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment