Skip to content

Instantly share code, notes, and snippets.

@10h30
Last active March 11, 2026 12:15
Show Gist options
  • Select an option

  • Save 10h30/f6720ebbad3d5acd40e20a9883690bcb to your computer and use it in GitHub Desktop.

Select an option

Save 10h30/f6720ebbad3d5acd40e20a9883690bcb to your computer and use it in GitHub Desktop.
Replaces resized WordPress image URLs with originals in an XML export file
#!/usr/bin/env bash
# ==============================================================================
# wp-image-fixer.sh
# Replaces resized WordPress image URLs with originals in an XML export file.
# Usage:
# ./wp-image-fixer.sh # dry-run: scan and log results
# ./wp-image-fixer.sh --apply # apply replacements from cached log
# ==============================================================================
set -euo pipefail
LOG=".wp-image-check.log"
URLS=".wp-image-urls.tmp"
MAP=".wp-image-map.txt"
APPLY=false
PARALLEL=50
TIMEOUT=5
[[ "${1:-}" == "--apply" ]] && APPLY=true
info() { echo "[info] $*"; }
success() { echo "[ok] $*"; }
warn() { echo "[warn] $*"; }
die() { echo "[error] $*" >&2; exit 1; }
# ------------------------------------------------------------------------------
# Cleanup on exit or Ctrl+C
# ------------------------------------------------------------------------------
cleanup() {
rm -f "$URLS"
kill 0 2>/dev/null || true
}
trap cleanup INT TERM
trap 'rm -f "$URLS"' EXIT
# ------------------------------------------------------------------------------
# Detect existing log / prompt for input
# ------------------------------------------------------------------------------
if $APPLY; then
[[ -f "$LOG" ]] || die "No scan found. Run the script without --apply first."
INPUT=$(grep '^FILE|' "$LOG" | cut -d'|' -f2)
info "Existing scan found for: $INPUT"
echo
read -rp "Apply fixes using cached results? (y/N): " confirm
[[ "$confirm" != "y" ]] && exit 0
else
if [[ -f "$LOG" ]]; then
info "Removing previous scan results..."
rm -f "$LOG"
fi
read -rp "Enter WordPress XML export file: " INPUT
[[ -f "$INPUT" ]] || die "File not found: $INPUT"
echo "FILE|$INPUT" > "$LOG"
fi
# ------------------------------------------------------------------------------
# DRY RUN — scan images and check originals
# ------------------------------------------------------------------------------
if ! $APPLY; then
echo
info "Extracting resized image URLs..."
grep -oE 'https?://[^"[:space:]]+-[0-9]+x[0-9]+\.(jpg|jpeg|png|webp)' "$INPUT" \
| sort -u > "$URLS"
TOTAL=$(wc -l < "$URLS" | tr -d ' ')
info "Found $TOTAL resized image URLs"
echo
if [[ "$TOTAL" -eq 0 ]]; then
warn "No resized images found. Nothing to do."
rm -f "$LOG"
exit 0
fi
EST=$(( (TOTAL / PARALLEL) * TIMEOUT ))
info "Checking originals (~${EST}s estimated)..."
echo
check_url() {
local resized="$1"
local log="$2"
local timeout="$3"
local original
original=$(echo "$resized" | sed -E 's/-[0-9]+x[0-9]+(\.(jpg|jpeg|png|webp))/\1/')
local code
code=$(curl -s -o /dev/null -w "%{http_code}" \
--max-time "$timeout" \
--head \
--retry 1 --retry-delay 1 \
"$original" 2>/dev/null || echo "000")
# Fall back to GET range if server blocks HEAD
if [[ "$code" == "405" || "$code" == "403" || "$code" == "000" ]]; then
code=$(curl -s -o /dev/null -w "%{http_code}" \
--max-time "$timeout" \
-H "Range: bytes=0-0" \
"$original" 2>/dev/null || echo "000")
fi
if [[ "$code" == "200" || "$code" == "206" || "$code" == "301" || "$code" == "302" ]]; then
echo "OK|$resized|$original" >> "$log"
else
echo "MISS|$resized|$original ($code)" >> "$log"
fi
}
export -f check_url
# Feed via cat to avoid xargs -a flag (not supported on all systems)
cat "$URLS" | xargs -P "$PARALLEL" -I{} bash -c 'check_url "$@"' _ {} "$LOG" "$TIMEOUT" &
WORKER_PID=$!
while kill -0 "$WORKER_PID" 2>/dev/null; do
DONE=$(grep -c '^\(OK\|MISS\)' "$LOG" 2>/dev/null || true)
printf "\r Progress: %d / %d" "$DONE" "$TOTAL" >&2
sleep 0.3
done
wait "$WORKER_PID" 2>/dev/null || true
DONE=$(grep -c '^\(OK\|MISS\)' "$LOG" 2>/dev/null || true)
printf "\r Progress: %d / %d\n" "$DONE" "$TOTAL" >&2
OK_COUNT=$(grep -c '^OK' "$LOG" || true)
MISS_COUNT=$(grep -c '^MISS' "$LOG" || true)
echo
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
success "$OK_COUNT images ready to fix"
[[ "$MISS_COUNT" -gt 0 ]] && warn "$MISS_COUNT originals not found — will be skipped"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if [[ "$MISS_COUNT" -gt 0 ]]; then
echo
info "Skipped URLs:"
grep '^MISS' "$LOG" | cut -d'|' -f2
fi
echo
info "To apply fixes run: ./wp-image-fixer.sh --apply"
echo
exit 0
fi
# ------------------------------------------------------------------------------
# APPLY — fast single-pass Perl replacement
# ------------------------------------------------------------------------------
INPUT=$(grep '^FILE|' "$LOG" | cut -d'|' -f2)
OUTPUT="fixed-$(basename "$INPUT")"
echo
info "Applying fixes to $INPUT..."
grep '^OK' "$LOG" | awk -F'|' '{print $2 "\t" $3}' > "$MAP"
COUNT=$(wc -l < "$MAP" | tr -d ' ')
if [[ "$COUNT" -eq 0 ]]; then
warn "No fixes to apply."
rm -f "$MAP" "$LOG"
exit 0
fi
REPLACED=$(perl - "$INPUT" "$MAP" "$OUTPUT" <<'PERL'
use strict;
use warnings;
my ($infile, $mapfile, $outfile) = @ARGV;
open(my $mfh, '<', $mapfile) or die "Cannot open map: $!";
my %map;
while (<$mfh>) {
chomp;
my ($from, $to) = split(/\t/, $_, 2);
$map{$from} = $to if defined $from && defined $to;
}
close($mfh);
my $pattern = join('|', map { quotemeta($_) } sort { length($b) <=> length($a) } keys %map);
my $regex = qr/$pattern/;
open(my $in, '<', $infile) or die "Cannot open input: $!";
open(my $out, '>', $outfile) or die "Cannot open output: $!";
my %seen;
while (my $line = <$in>) {
$line =~ s/($regex)/do { $seen{$1} = 1; $map{$1} }/ge;
print $out $line;
}
close($in);
close($out);
print scalar keys %seen, "\n";
PERL
)
rm -f "$MAP" "$LOG"
echo
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
success "$REPLACED image URLs replaced — saved as $OUTPUT"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment