Last active
March 11, 2026 12:15
-
-
Save 10h30/f6720ebbad3d5acd40e20a9883690bcb to your computer and use it in GitHub Desktop.
Replaces resized WordPress image URLs with originals in an XML export file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # ============================================================================== | |
| # wp-image-fixer.sh | |
| # Replaces resized WordPress image URLs with originals in an XML export file. | |
| # Usage: | |
| # ./wp-image-fixer.sh # dry-run: scan and log results | |
| # ./wp-image-fixer.sh --apply # apply replacements from cached log | |
| # ============================================================================== | |
| set -euo pipefail | |
| LOG=".wp-image-check.log" | |
| URLS=".wp-image-urls.tmp" | |
| MAP=".wp-image-map.txt" | |
| APPLY=false | |
| PARALLEL=50 | |
| TIMEOUT=5 | |
| [[ "${1:-}" == "--apply" ]] && APPLY=true | |
| info() { echo "[info] $*"; } | |
| success() { echo "[ok] $*"; } | |
| warn() { echo "[warn] $*"; } | |
| die() { echo "[error] $*" >&2; exit 1; } | |
| # ------------------------------------------------------------------------------ | |
| # Cleanup on exit or Ctrl+C | |
| # ------------------------------------------------------------------------------ | |
| cleanup() { | |
| rm -f "$URLS" | |
| kill 0 2>/dev/null || true | |
| } | |
| trap cleanup INT TERM | |
| trap 'rm -f "$URLS"' EXIT | |
| # ------------------------------------------------------------------------------ | |
| # Detect existing log / prompt for input | |
| # ------------------------------------------------------------------------------ | |
| if $APPLY; then | |
| [[ -f "$LOG" ]] || die "No scan found. Run the script without --apply first." | |
| INPUT=$(grep '^FILE|' "$LOG" | cut -d'|' -f2) | |
| info "Existing scan found for: $INPUT" | |
| echo | |
| read -rp "Apply fixes using cached results? (y/N): " confirm | |
| [[ "$confirm" != "y" ]] && exit 0 | |
| else | |
| if [[ -f "$LOG" ]]; then | |
| info "Removing previous scan results..." | |
| rm -f "$LOG" | |
| fi | |
| read -rp "Enter WordPress XML export file: " INPUT | |
| [[ -f "$INPUT" ]] || die "File not found: $INPUT" | |
| echo "FILE|$INPUT" > "$LOG" | |
| fi | |
| # ------------------------------------------------------------------------------ | |
| # DRY RUN — scan images and check originals | |
| # ------------------------------------------------------------------------------ | |
| if ! $APPLY; then | |
| echo | |
| info "Extracting resized image URLs..." | |
| grep -oE 'https?://[^"[:space:]]+-[0-9]+x[0-9]+\.(jpg|jpeg|png|webp)' "$INPUT" \ | |
| | sort -u > "$URLS" | |
| TOTAL=$(wc -l < "$URLS" | tr -d ' ') | |
| info "Found $TOTAL resized image URLs" | |
| echo | |
| if [[ "$TOTAL" -eq 0 ]]; then | |
| warn "No resized images found. Nothing to do." | |
| rm -f "$LOG" | |
| exit 0 | |
| fi | |
| EST=$(( (TOTAL / PARALLEL) * TIMEOUT )) | |
| info "Checking originals (~${EST}s estimated)..." | |
| echo | |
| check_url() { | |
| local resized="$1" | |
| local log="$2" | |
| local timeout="$3" | |
| local original | |
| original=$(echo "$resized" | sed -E 's/-[0-9]+x[0-9]+(\.(jpg|jpeg|png|webp))/\1/') | |
| local code | |
| code=$(curl -s -o /dev/null -w "%{http_code}" \ | |
| --max-time "$timeout" \ | |
| --head \ | |
| --retry 1 --retry-delay 1 \ | |
| "$original" 2>/dev/null || echo "000") | |
| # Fall back to GET range if server blocks HEAD | |
| if [[ "$code" == "405" || "$code" == "403" || "$code" == "000" ]]; then | |
| code=$(curl -s -o /dev/null -w "%{http_code}" \ | |
| --max-time "$timeout" \ | |
| -H "Range: bytes=0-0" \ | |
| "$original" 2>/dev/null || echo "000") | |
| fi | |
| if [[ "$code" == "200" || "$code" == "206" || "$code" == "301" || "$code" == "302" ]]; then | |
| echo "OK|$resized|$original" >> "$log" | |
| else | |
| echo "MISS|$resized|$original ($code)" >> "$log" | |
| fi | |
| } | |
| export -f check_url | |
| # Feed via cat to avoid xargs -a flag (not supported on all systems) | |
| cat "$URLS" | xargs -P "$PARALLEL" -I{} bash -c 'check_url "$@"' _ {} "$LOG" "$TIMEOUT" & | |
| WORKER_PID=$! | |
| while kill -0 "$WORKER_PID" 2>/dev/null; do | |
| DONE=$(grep -c '^\(OK\|MISS\)' "$LOG" 2>/dev/null || true) | |
| printf "\r Progress: %d / %d" "$DONE" "$TOTAL" >&2 | |
| sleep 0.3 | |
| done | |
| wait "$WORKER_PID" 2>/dev/null || true | |
| DONE=$(grep -c '^\(OK\|MISS\)' "$LOG" 2>/dev/null || true) | |
| printf "\r Progress: %d / %d\n" "$DONE" "$TOTAL" >&2 | |
| OK_COUNT=$(grep -c '^OK' "$LOG" || true) | |
| MISS_COUNT=$(grep -c '^MISS' "$LOG" || true) | |
| echo | |
| echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | |
| success "$OK_COUNT images ready to fix" | |
| [[ "$MISS_COUNT" -gt 0 ]] && warn "$MISS_COUNT originals not found — will be skipped" | |
| echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | |
| if [[ "$MISS_COUNT" -gt 0 ]]; then | |
| echo | |
| info "Skipped URLs:" | |
| grep '^MISS' "$LOG" | cut -d'|' -f2 | |
| fi | |
| echo | |
| info "To apply fixes run: ./wp-image-fixer.sh --apply" | |
| echo | |
| exit 0 | |
| fi | |
| # ------------------------------------------------------------------------------ | |
| # APPLY — fast single-pass Perl replacement | |
| # ------------------------------------------------------------------------------ | |
| INPUT=$(grep '^FILE|' "$LOG" | cut -d'|' -f2) | |
| OUTPUT="fixed-$(basename "$INPUT")" | |
| echo | |
| info "Applying fixes to $INPUT..." | |
| grep '^OK' "$LOG" | awk -F'|' '{print $2 "\t" $3}' > "$MAP" | |
| COUNT=$(wc -l < "$MAP" | tr -d ' ') | |
| if [[ "$COUNT" -eq 0 ]]; then | |
| warn "No fixes to apply." | |
| rm -f "$MAP" "$LOG" | |
| exit 0 | |
| fi | |
| REPLACED=$(perl - "$INPUT" "$MAP" "$OUTPUT" <<'PERL' | |
| use strict; | |
| use warnings; | |
| my ($infile, $mapfile, $outfile) = @ARGV; | |
| open(my $mfh, '<', $mapfile) or die "Cannot open map: $!"; | |
| my %map; | |
| while (<$mfh>) { | |
| chomp; | |
| my ($from, $to) = split(/\t/, $_, 2); | |
| $map{$from} = $to if defined $from && defined $to; | |
| } | |
| close($mfh); | |
| my $pattern = join('|', map { quotemeta($_) } sort { length($b) <=> length($a) } keys %map); | |
| my $regex = qr/$pattern/; | |
| open(my $in, '<', $infile) or die "Cannot open input: $!"; | |
| open(my $out, '>', $outfile) or die "Cannot open output: $!"; | |
| my %seen; | |
| while (my $line = <$in>) { | |
| $line =~ s/($regex)/do { $seen{$1} = 1; $map{$1} }/ge; | |
| print $out $line; | |
| } | |
| close($in); | |
| close($out); | |
| print scalar keys %seen, "\n"; | |
| PERL | |
| ) | |
| rm -f "$MAP" "$LOG" | |
| echo | |
| echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | |
| success "$REPLACED image URLs replaced — saved as $OUTPUT" | |
| echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | |
| echo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment