Created
November 1, 2025 19:19
-
-
Save ClimenteA/468597ecfbae63ea368f625611b2b465 to your computer and use it in GitHub Desktop.
Optimize documents in pdfs and images from a given folder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| ############################################################################## | |
| # PDF to Optimized PNG Converter | |
| # Converts PDFs to high-contrast, sharpened PNG images | |
| # Optimized for text documents (black text on white background) | |
| # Make executable | |
| # sudo chmod +x pdf_to_png_optimizer.sh | |
| # # Convert PDFs and optimize images | |
| # ./pdf_to_png_optimizer.sh ./input_folder/ ./output_folder/ | |
| # # High resolution (300 DPI) | |
| # ./pdf_to_png_optimizer.sh -d 300 ./input_folder/ ./output_folder/ | |
| # # Smaller output (800px max) | |
| # ./pdf_to_png_optimizer.sh -s 900 ./input_folder/ ./output_folder/ | |
| # ``` | |
| # ## Output Structure: | |
| # **Input:** | |
| # ``` | |
| # input_folder/ | |
| # ├── invoice.pdf (3 pages) | |
| # ├── receipt.jpg | |
| # └── document.png | |
| # ``` | |
| # **Output:** | |
| # ``` | |
| # output_folder/ | |
| # ├── invoice_page_001.png | |
| # ├── invoice_page_002.png | |
| # ├── invoice_page_003.png | |
| # ├── receipt_optimized.png | |
| # └── document_optimized.png | |
| ############################################################################## | |
| # Don't exit on error - handle errors gracefully | |
| set +e | |
| # Default settings | |
| DPI=200 | |
| MAX_SIZE=2000 | |
| # Colors for output | |
| RED='\033[0;31m' | |
| GREEN='\033[0;32m' | |
| YELLOW='\033[1;33m' | |
| BLUE='\033[0;34m' | |
| NC='\033[0m' # No Color | |
| ############################################################################## | |
| # Function: Check dependencies | |
| ############################################################################## | |
| check_dependencies() { | |
| local missing=0 | |
| if ! command -v convert &> /dev/null; then | |
| echo -e "${RED}✗ ImageMagick (convert) is not installed${NC}" | |
| missing=1 | |
| fi | |
| if [ $missing -eq 1 ]; then | |
| echo "" | |
| echo "Install missing dependencies:" | |
| echo " Ubuntu/Debian: sudo apt-get install imagemagick" | |
| echo " macOS: brew install imagemagick" | |
| echo " Fedora: sudo dnf install ImageMagick" | |
| exit 1 | |
| fi | |
| } | |
| ############################################################################## | |
| # Function: Convert PDF to optimized PNGs | |
| ############################################################################## | |
| convert_pdf_to_png() { | |
| local input_pdf="$1" | |
| local output_dir="$2" | |
| local base_name=$(basename "$input_pdf" .pdf) | |
| echo -e "${YELLOW}Converting PDF: $(basename "$input_pdf")${NC}" | |
| # Get original file size | |
| local original_size=$(stat -f%z "$input_pdf" 2>/dev/null || stat -c%s "$input_pdf" 2>/dev/null) | |
| # Convert PDF to optimized PNG images | |
| convert -density ${DPI} "$input_pdf" \ | |
| -colorspace Gray \ | |
| -normalize \ | |
| -contrast-stretch 2%x1% \ | |
| -level 5%,95% \ | |
| -sharpen 0x1 \ | |
| -resize ${MAX_SIZE}x${MAX_SIZE}\> \ | |
| -background white \ | |
| -alpha remove \ | |
| -alpha off \ | |
| -strip \ | |
| "${output_dir}/${base_name}_page_%03d.png" 2>&1 | grep -v "^convert" | |
| # Count generated files | |
| local num_files=$(ls -1 "${output_dir}/${base_name}_page_"*.png 2>/dev/null | wc -l) | |
| if [ $num_files -gt 0 ]; then | |
| echo -e "${GREEN} ✓ Created: ${num_files} page(s)${NC}" | |
| echo " Original PDF: $(echo "scale=1; $original_size / 1024" | bc) KB" | |
| # Calculate total output size | |
| local output_size=$(du -sk "${output_dir}/${base_name}_page_"*.png 2>/dev/null | awk '{sum+=$1} END {print sum}') | |
| echo " Output PNGs: $(echo "scale=1; $output_size" | bc) KB total" | |
| echo "" | |
| return 0 | |
| else | |
| echo -e "${RED} ✗ Failed to convert${NC}" | |
| echo "" | |
| return 1 | |
| fi | |
| } | |
| ############################################################################## | |
| # Function: Optimize image to PNG | |
| ############################################################################## | |
| optimize_image_to_png() { | |
| local input="$1" | |
| local output="$2" | |
| echo -e "${YELLOW}Optimizing image: $(basename "$input")${NC}" | |
| # Get original file size | |
| local original_size=$(stat -f%z "$input" 2>/dev/null || stat -c%s "$input" 2>/dev/null) | |
| # Optimize image | |
| convert "$input" \ | |
| -auto-orient \ | |
| -resize ${MAX_SIZE}x${MAX_SIZE}\> \ | |
| -colorspace Gray \ | |
| -normalize \ | |
| -contrast-stretch 2%x1% \ | |
| -level 5%,95% \ | |
| -sharpen 0x1 \ | |
| -background white \ | |
| -alpha remove \ | |
| -alpha off \ | |
| -strip \ | |
| "$output" 2>&1 | grep -v "^convert" | |
| if [ -f "$output" ]; then | |
| local output_size=$(stat -f%z "$output" 2>/dev/null || stat -c%s "$output" 2>/dev/null) | |
| local reduction=$(echo "scale=1; (1 - $output_size / $original_size) * 100" | bc 2>/dev/null || echo "N/A") | |
| echo -e "${GREEN} ✓ Created: $(basename "$output")${NC}" | |
| echo " Original: $(echo "scale=1; $original_size / 1024" | bc) KB" | |
| echo " Optimized: $(echo "scale=1; $output_size / 1024" | bc) KB" | |
| if [ "$reduction" != "N/A" ]; then | |
| echo " Size change: ${reduction}%" | |
| fi | |
| echo "" | |
| return 0 | |
| else | |
| echo -e "${RED} ✗ Failed to optimize${NC}" | |
| echo "" | |
| return 1 | |
| fi | |
| } | |
| ############################################################################## | |
| # Function: Process directory | |
| ############################################################################## | |
| process_directory() { | |
| local input_dir="$1" | |
| local output_dir="$2" | |
| echo -e "${BLUE}================================================${NC}" | |
| echo -e "${BLUE}PDF to Optimized PNG Converter${NC}" | |
| echo -e "${BLUE}================================================${NC}" | |
| echo "" | |
| echo "Input directory: $input_dir" | |
| echo "Output directory: $output_dir" | |
| echo "Settings: DPI=$DPI, MaxSize=${MAX_SIZE}px" | |
| echo "Format: Grayscale PNG with enhanced contrast" | |
| echo "" | |
| # Create output directory | |
| mkdir -p "$output_dir" | |
| # Counters | |
| local total_files=0 | |
| local processed_files=0 | |
| local skipped_files=0 | |
| # Process all supported files | |
| shopt -s nullglob # Enable nullglob to handle no matches gracefully | |
| for file in "$input_dir"/*; do | |
| [ -f "$file" ] || continue | |
| local filename=$(basename "$file") | |
| local extension="${filename##*.}" | |
| local basename="${filename%.*}" | |
| # Convert extension to lowercase | |
| extension=$(echo "$extension" | tr '[:upper:]' '[:lower:]') | |
| total_files=$((total_files + 1)) | |
| case "$extension" in | |
| pdf) | |
| if convert_pdf_to_png "$file" "$output_dir"; then | |
| processed_files=$((processed_files + 1)) | |
| else | |
| skipped_files=$((skipped_files + 1)) | |
| fi | |
| ;; | |
| jpg|jpeg|png) | |
| local output_png="$output_dir/${basename}_optimized.png" | |
| if optimize_image_to_png "$file" "$output_png"; then | |
| processed_files=$((processed_files + 1)) | |
| else | |
| skipped_files=$((skipped_files + 1)) | |
| fi | |
| ;; | |
| *) | |
| echo -e "${YELLOW}Skipping: $filename (unsupported format)${NC}" | |
| skipped_files=$((skipped_files + 1)) | |
| ;; | |
| esac | |
| done | |
| shopt -u nullglob # Disable nullglob | |
| # Summary | |
| echo -e "${BLUE}================================================${NC}" | |
| echo -e "${GREEN}Processing Complete!${NC}" | |
| echo -e "${BLUE}================================================${NC}" | |
| echo "Total files found: $total_files" | |
| echo "Processed: $processed_files" | |
| echo "Skipped: $skipped_files" | |
| echo "" | |
| echo "Output location: $output_dir" | |
| # Calculate total sizes | |
| if [ $processed_files -gt 0 ]; then | |
| local output_total=$(du -sk "$output_dir" 2>/dev/null | awk '{print $1}') | |
| echo "Total output size: $(echo "scale=1; $output_total / 1024" | bc) MB" | |
| fi | |
| } | |
| ############################################################################## | |
| # Function: Show usage | |
| ############################################################################## | |
| show_usage() { | |
| cat << EOF | |
| Usage: $0 [OPTIONS] INPUT_DIR OUTPUT_DIR | |
| Convert PDFs to optimized PNG images and optimize existing images. | |
| Output format: Grayscale PNG with enhanced contrast and sharpening. | |
| Supported input formats: .pdf, .jpg, .jpeg, .png | |
| Options: | |
| -d, --dpi NUM DPI/resolution (default: 200) | |
| -s, --size NUM Max dimension in pixels (default: 2000) | |
| -h, --help Show this help | |
| Examples: | |
| # Basic usage | |
| $0 ./pdfs/ ./output_images/ | |
| # High resolution | |
| $0 -d 300 ./documents/ ./high_res/ | |
| # Smaller file size | |
| $0 -s 1600 ./scans/ ./compressed/ | |
| Processing: | |
| - PDFs → Converted to PNG (one image per page) | |
| - Images (.jpg, .jpeg, .png) → Optimized to PNG | |
| - Multi-page PDFs → Multiple PNG files (filename_page_001.png, etc.) | |
| Optimizations applied: | |
| ✓ Grayscale conversion (black text on white background) | |
| ✓ Contrast enhancement (darker text, whiter background) | |
| ✓ Background whitening | |
| ✓ Text sharpening | |
| ✓ Automatic resizing | |
| ✓ Metadata removal | |
| EOF | |
| } | |
| ############################################################################## | |
| # Main script | |
| ############################################################################## | |
| # Parse options | |
| while [[ $# -gt 0 ]]; do | |
| case $1 in | |
| -d|--dpi) | |
| DPI="$2" | |
| shift 2 | |
| ;; | |
| -s|--size) | |
| MAX_SIZE="$2" | |
| shift 2 | |
| ;; | |
| -h|--help) | |
| show_usage | |
| exit 0 | |
| ;; | |
| -*) | |
| echo -e "${RED}Unknown option: $1${NC}" | |
| show_usage | |
| exit 1 | |
| ;; | |
| *) | |
| break | |
| ;; | |
| esac | |
| done | |
| # Check arguments | |
| if [ $# -ne 2 ]; then | |
| echo -e "${RED}Error: INPUT_DIR and OUTPUT_DIR required${NC}" | |
| echo "" | |
| show_usage | |
| exit 1 | |
| fi | |
| INPUT_DIR="$1" | |
| OUTPUT_DIR="$2" | |
| # Validate input directory | |
| if [ ! -d "$INPUT_DIR" ]; then | |
| echo -e "${RED}Error: Input directory does not exist: $INPUT_DIR${NC}" | |
| exit 1 | |
| fi | |
| # Check if output directory exists and is not empty | |
| if [ -d "$OUTPUT_DIR" ] && [ "$(ls -A "$OUTPUT_DIR" 2>/dev/null)" ]; then | |
| echo -e "${YELLOW}Warning: Output directory is not empty: $OUTPUT_DIR${NC}" | |
| read -p "Continue anyway? (y/N): " -n 1 -r | |
| echo | |
| if [[ ! $REPLY =~ ^[Yy]$ ]]; then | |
| echo "Aborted." | |
| exit 1 | |
| fi | |
| fi | |
| # Check dependencies | |
| check_dependencies | |
| # Process directory | |
| process_directory "$INPUT_DIR" "$OUTPUT_DIR" | |
| echo -e "${GREEN}Done!${NC}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment