Created
February 27, 2026 15:30
-
-
Save gerard-kanters/fa7e1b7ed19a4c462b142f8124ac88e9 to your computer and use it in GitHub Desktop.
Cache warming script for AI translate plugin
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # ============================================================================ | |
| # universal-cache-warmer-v9.sh | |
| # ============================================================================ | |
| set -euo pipefail | |
| log() { echo "[$(date '+%F %T')] $*" >&2; } | |
| site="${1:-}" | |
| if [ -z "$site" ]; then echo "Usage: $0 domain.com"; exit 1; fi | |
| clean_site=$(echo "$site" | sed -E 's|https?://||' | sed 's|/||g') | |
| WORKDIR="/tmp/cache-warmup/$clean_site" | |
| mkdir -p "$WORKDIR" 2>/dev/null || true | |
| # 1. Detecteer ALLE talen eenmalig van de homepage | |
| log "Step 1: Detecting all available languages via hreflang..." | |
| # We pakken alle hrefs van link alternate tags. | |
| # Dit vangt ook x-default en afwijkende codes (zoals zh-hans of ka) | |
| all_language_roots=$(curl -sS -k -L --max-time 10 "https://$clean_site/" | \ | |
| sed -n '/<head>/,/<\/head>/p' | \ | |
| grep -oP 'rel="alternate"[^>]+hreflang="[^"]+"[^>]+href="\K[^"]+' | \ | |
| sed 's|/$||' | sort -u) | |
| if [ -z "$all_language_roots" ]; then | |
| log "Error: No hreflang tags found. Is the plugin active?" | |
| exit 1 | |
| fi | |
| # Maak een regex van de taal-roots om de spider te filteren | |
| # We halen het protocol en domein eraf voor de regex | |
| lang_filter_regex=$(echo "$all_language_roots" | sed -E "s|https?://(www\.)?$clean_site||" | grep -v "^$" | tr '\n' '|' | sed 's/|$//') | |
| # 2. Spider de hoofdsite (STRICTE FILTER) | |
| log "Step 2: Crawling main site structure (ignoring translations)..." | |
| SPIDER_SCRIPT="$(dirname "$0")/async_spider.py" | |
| # We filteren de spider output zodat we alleen de 'kale' NL paden overhouden | |
| python3 "$SPIDER_SCRIPT" "$clean_site" "https://$clean_site/" 5 10 | \ | |
| grep -vE "/($lang_filter_regex)/" > "$WORKDIR/base_paths_raw.txt" || true | |
| # 3. Genereer de unieke URL lijst voor ELKE gevonden taal | |
| log "Step 3: Generating final URL list based on detected hreflangs..." | |
| final_list="$WORKDIR/final_warmup_list.txt" | |
| : > "$final_list" | |
| # Extraheer de 'kale' paden (bijv. /contact) | |
| sed -E "s|https?://(www\.)?$clean_site||" "$WORKDIR/base_paths_raw.txt" | \ | |
| sed -E 's|^/||' | sed -E 's|/$||' | sort -u > "$WORKDIR/unique_paths.txt" | |
| while read -r path; do | |
| # Voeg de basis URL toe | |
| echo "https://$clean_site/$path/" >> "$final_list" | |
| # Gebruik de exact gedetecteerde language roots uit stap 1 | |
| for root in $all_language_roots; do | |
| echo "$root/$path/" >> "$final_list" | |
| done | |
| done < "$WORKDIR/unique_paths.txt" | |
| # 4. Opschonen en uniek maken | |
| sed -i 's|//|/|g' "$final_list" | |
| sed -i 's|https:/|https://|g' "$final_list" | |
| sort -u "$final_list" -o "$final_list" | |
| total=$(wc -l < "$final_list") | |
| log "Step 4: Ready to process $total unique URLs." | |
| # 5. Warming | |
| log "Step 5: Warming $total URLs (4 parallel)..." | |
| UA="CacheWarmer/2.2 (+https://$clean_site)" | |
| if command -v pv >/dev/null 2>&1; then | |
| < "$final_list" pv -l -s "$total" | xargs -I {} -P 4 curl -sS -k -o /dev/null -L -A "$UA" --connect-timeout 5 --max-time 20 "{}" | |
| else | |
| < "$final_list" xargs -I {} -P 8 curl -sS -k -o /dev/null -L -A "$UA" --connect-timeout 5 --max-time 20 "{}" | |
| fi | |
| log "Done!" |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script is made to cache all pages and all languages for the AI translate plugin
There is a cache warming feature in the plugin, but if you have multiple sites and a huge number of pages, a script is simply easier to use.
Make sure you make the script executable "chmod +x cache-warmer.sh" and run it using a website home page url e.g. " ./cache-warmer.sh https://netcare.nl"
The script requires async_spider.py to be installed.