-
-
Save verybadsoldier/58b8c6526e0ff57452855662f1005d91 to your computer and use it in GitHub Desktop.
| #!/bin/sh | |
| # | |
| # WiFi Watchdog Installer / Updater / Uninstaller | |
| # | |
| # Usage: | |
| # Install/Update: ./install_wifi_watchdog.sh <TARGET_IP> [INTERVAL_MINUTES] | |
| # Uninstall: ./install_wifi_watchdog.sh uninstall | |
| # | |
| # Examples: | |
| # ./install_wifi_watchdog.sh 192.168.2.1 | |
| # ./install_wifi_watchdog.sh 192.168.2.1 15 | |
| # ./install_wifi_watchdog.sh uninstall | |
| # | |
| # Configuration | |
| INSTALL_DIR="/root" | |
| SCRIPT_NAME="wifi_watchdog.sh" | |
| FULL_PATH="$INSTALL_DIR/$SCRIPT_NAME" | |
| LOG_FILE="/var/log/wifi_watchdog" | |
| STATE_DIR="/var/lib/wifi_watchdog" | |
| # --- UNINSTALL MODE --- | |
| if [ "$1" = "uninstall" ]; then | |
| echo "Uninstalling WiFi Watchdog..." | |
| # 1. Remove from Cron | |
| if crontab -l 2>/dev/null | grep -F "$FULL_PATH" >/dev/null; then | |
| TEMP_CRON=$(mktemp) | |
| crontab -l 2>/dev/null | grep -Fv "$FULL_PATH" > "$TEMP_CRON" | |
| crontab "$TEMP_CRON" | |
| rm "$TEMP_CRON" | |
| echo " - Cron job removed." | |
| else | |
| echo " - No cron job found." | |
| fi | |
| # 2. Delete Script | |
| if [ -f "$FULL_PATH" ]; then | |
| rm -f "$FULL_PATH" | |
| echo " - Script deleted: $FULL_PATH" | |
| else | |
| echo " - Script not found." | |
| fi | |
| # 3. Cleanup Logs and State | |
| rm -f "${LOG_FILE}"* 2>/dev/null | |
| rm -rf "$STATE_DIR" 2>/dev/null | |
| echo " - Logs and state files cleaned up." | |
| echo "Uninstallation complete." | |
| exit 0 | |
| fi | |
| # --- INSTALL / UPDATE MODE --- | |
| TARGET_IP="$1" | |
| INTERVAL="${2:-10}" # Default to 10 minutes | |
| if [ -z "$TARGET_IP" ]; then | |
| echo "Error: You must provide a target IP address or 'uninstall'." | |
| echo "Usage: $0 <IP_TO_PING> [MINUTES]" | |
| echo " $0 uninstall" | |
| exit 1 | |
| fi | |
| # Cron Schedule: */X * * * * | |
| CRON_JOB="*/$INTERVAL * * * * $FULL_PATH" | |
| echo "Starting installation... (v1.5.0)" | |
| echo "Target IP: $TARGET_IP" | |
| echo "Run Frequency: Every $INTERVAL minutes" | |
| # 1. Stop any running instances | |
| killall "$SCRIPT_NAME" 2>/dev/null | |
| # 2. Generate the Script | |
| echo "Generating script at $FULL_PATH..." | |
| # --- WRITE PART A (Dynamic Config) --- | |
| cat << EOF > "$FULL_PATH" | |
| #!/bin/sh | |
| # Flag file indicating the watchdog ran | |
| touch /tmp/wifi_watchdog_check.flag | |
| # --- Configuration --- | |
| # Target IP to ping (Injected by installer) | |
| TARGET="$TARGET_IP" | |
| EOF | |
| # --- WRITE PART B (Static Logic) --- | |
| cat << 'EOF' >> "$FULL_PATH" | |
| # Number of retries before declaring failure | |
| MAX_RETRIES=5 | |
| # Pause between attempts (in seconds) | |
| SLEEP_TIME=5 | |
| # Timeout for RestartNetwork in seconds (detects driver hang) | |
| RESTART_TIMEOUT=120 | |
| # Log tag for syslog | |
| TAG="WiFi_Watchdog" | |
| # Paths | |
| LOG_FILE="/var/log/wifi_watchdog" | |
| STATE_DIR="/var/lib/wifi_watchdog" | |
| STATE_FILE="$STATE_DIR/state" | |
| # Log Rotation Config (Size in bytes: 51200 = 50KB) | |
| MAX_LOG_SIZE=51200 | |
| # --------------------- | |
| # Ensure state directory exists | |
| if [ ! -d "$STATE_DIR" ]; then | |
| mkdir -p "$STATE_DIR" | |
| fi | |
| # --- Log Rotation Logic --- | |
| if [ -f "$LOG_FILE" ]; then | |
| FILE_SIZE=$(wc -c < "$LOG_FILE") | |
| if [ "$FILE_SIZE" -ge "$MAX_LOG_SIZE" ]; then | |
| mv "$LOG_FILE" "$LOG_FILE.0" | |
| TIMESTAMP=$(date "+%Y-%m-%d %H:%M:%S") | |
| echo "$TIMESTAMP - $TAG: Log rotated (limit ${MAX_LOG_SIZE} bytes reached)." > "$LOG_FILE" | |
| fi | |
| fi | |
| # Attempt to extract hostname | |
| HOSTNAME_FILE="/etc/hostname" | |
| if [ -f "$HOSTNAME_FILE" ]; then | |
| HOSTNAME=$(cat "$HOSTNAME_FILE") | |
| fi | |
| if [ -z "$HOSTNAME" ]; then HOSTNAME="SqueezeboxRadio"; fi | |
| # Function: Log to syslog and file | |
| log_event() { | |
| MSG="$1" | |
| TIMESTAMP=$(date "+%Y-%m-%d %H:%M:%S") | |
| logger -t "$TAG" "$MSG" | |
| echo "$TIMESTAMP - $TAG: $MSG" >> "$LOG_FILE" | |
| } | |
| # Function: Action to take on new failure | |
| RestartNetwork() { | |
| log_event "Action Triggered: Restarting network services..." | |
| if [ -x /etc/init.d/wlan ]; then /etc/init.d/wlan stop; fi | |
| killall udhcpc 2>/dev/null | |
| sleep 5 | |
| if [ -x /etc/init.d/wlan ]; then /etc/init.d/wlan start; fi | |
| sleep 5 | |
| log_event "Restarting DHCP client..." | |
| udhcpc -R -a -p/var/run/udhcpc.eth1.pid -b --syslog -ieth1 -H "$HOSTNAME" -s/etc/network/udhcpc_action | |
| log_event "Network restart sequence finished." | |
| } | |
| # Function: Check connection | |
| check_connection() { | |
| ping -c 1 "$TARGET" > /dev/null 2>&1 | |
| return $? | |
| } | |
| # --- Main Logic --- | |
| if check_connection; then | |
| CURRENT_STATUS="OK" | |
| else | |
| # Initial check failed, entering retry loop | |
| count=0 | |
| CURRENT_STATUS="FAIL" | |
| while [ "$count" -lt "$MAX_RETRIES" ]; do | |
| sleep "$SLEEP_TIME" | |
| if check_connection; then | |
| CURRENT_STATUS="OK" | |
| break | |
| fi | |
| count=$((count + 1)) | |
| done | |
| fi | |
| if [ -f "$STATE_FILE" ]; then | |
| PREV_STATUS=$(cat "$STATE_FILE") | |
| else | |
| PREV_STATUS="OK" | |
| fi | |
| if [ "$CURRENT_STATUS" != "$PREV_STATUS" ]; then | |
| # State HAS Changed | |
| echo "$CURRENT_STATUS" > "$STATE_FILE" | |
| if [ "$CURRENT_STATUS" = "FAIL" ]; then | |
| log_event "State Change: OK -> FAIL. Connection lost after retries." | |
| sync | |
| # --- HANG DETECTION WRAPPER START --- | |
| # Run RestartNetwork in background subshell | |
| RestartNetwork & | |
| PID=$! | |
| # Watchdog Loop | |
| WAITED=0 | |
| FINISHED=0 | |
| while [ "$WAITED" -lt "$RESTART_TIMEOUT" ]; do | |
| # Check if PID is still running (signal 0 check) | |
| if ! kill -0 "$PID" 2>/dev/null; then | |
| FINISHED=1 | |
| break | |
| fi | |
| sleep 1 | |
| WAITED=$((WAITED + 1)) | |
| done | |
| if [ "$FINISHED" -eq 0 ]; then | |
| log_event "CRITICAL: RestartNetwork hung for ${RESTART_TIMEOUT}s (Driver freeze). Force REBOOTING." | |
| kill -9 "$PID" 2>/dev/null | |
| sync | |
| /sbin/reboot | |
| exit 1 | |
| fi | |
| # --- HANG DETECTION WRAPPER END --- | |
| log_event "Waiting 15s for network to settle..." | |
| sleep 15 | |
| if check_connection; then | |
| log_event "Network restart SUCCESSFUL." | |
| # BUGFIX: Network is fixed. Write OK to disk so a FUTURE failure is detected. | |
| echo "OK" > "$STATE_FILE" | |
| else | |
| log_event "Network restart FAILED. REBOOTING system." | |
| sync | |
| /sbin/reboot | |
| fi | |
| elif [ "$CURRENT_STATUS" = "OK" ]; then | |
| log_event "State Change: FAIL -> OK. Connection restored." | |
| fi | |
| else | |
| log_event "Status did not change: $CURRENT_STATUS" | |
| if [ "$CURRENT_STATUS" = "FAIL" ]; then | |
| log_event "Persistent Failure: Connection still down." | |
| # No Reboot here. We already tried once (or hung once). | |
| # If that didn't work, we accept defeat to prevent loops. | |
| fi | |
| fi | |
| EOF | |
| # 3. Set Permissions | |
| chmod +x "$FULL_PATH" | |
| # 4. Update Cron | |
| echo "Updating cron job..." | |
| # Clean up OLD jobs first to avoid duplicates or multiple schedules | |
| TEMP_CRON=$(mktemp) | |
| crontab -l 2>/dev/null | grep -Fv "$FULL_PATH" > "$TEMP_CRON" | |
| # Add NEW job | |
| echo "$CRON_JOB" >> "$TEMP_CRON" | |
| # Install new cron file | |
| crontab "$TEMP_CRON" | |
| rm "$TEMP_CRON" | |
| echo "Success! Script installed." | |
| echo "Config: Pinging $TARGET_IP every $INTERVAL minutes." |
It looks as if revision 9 is handling the case where wlan stop just hangs. I was going to point this out, but now I don’t have to !
I have observed this occasionally. For some reason it seems that unloading the module just doesn’t work. I don’t know why.
I don’t think that getting the hostname from
SlimDiscovery.luais a good idea. There is a good chance that this name is incompatible with DNS, and the firmware doesn’t use this name anyway. Simple example: what happens if this name contains a space ?I would suggest just using the name in
/etc/hostname, with a fallback toSqueezeboxRadioif, for some unknown reason, the hostname file has disappeared.
Yes, good point. That logic comes from wlanpoke:
https://github.com/PomDev2/wlanpoke/blob/main/wlanpoke.sh#L79
But I agree the name maybe does not comply to valid DNS names. Using /etc/hostname is an option. But I kinda like that feature that the devices appear with invidual names in the router. Using hostname might lead to every device being just SqueezeboxRadio. Removing all non-alphanumeric characters from the SlimDiscovery.lua thing would also be an option.
But, I guess no point in reinventing the wheel here. The script should imo replicate what the vanilla Radio code is doing which is probably to just use /etc/hostname (at least my Fritzbox reports the original device name to be SqueezeboxRadio).
So, I will change it to that.
It looks as if revision 9 is handling the case where
wlan stopjust hangs. I was going to point this out, but now I don’t have to !I have observed this occasionally. For some reason it seems that unloading the module just doesn’t work. I don’t know why.
Yes, I was greeted this morning my by radio in the bathroom with a red symbol. After connecting wired LAN, the log suggested that the watchdog script stopped running and never finished (while the process was gone also). So, I added that timeout. Let's see if this is the last quirk ;)
I don’t think that getting the hostname from
SlimDiscovery.luais a good idea. There is a good chance that this name is incompatible with DNS, and the firmware doesn’t use this name anyway. Simple example: what happens if this name contains a space ?I would suggest just using the name in
/etc/hostname, with a fallback toSqueezeboxRadioif, for some unknown reason, the hostname file has disappeared.