Created
November 8, 2025 04:55
-
-
Save lukluk/94f3be6a0f4f7e31596fd08972ef7ed4 to your computer and use it in GitHub Desktop.
Rescue from docker limit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| NAMESPACE="apps" | |
| echo "π Scanning pods in namespace: $NAMESPACE ..." | |
| # Step 1: detect pods that are failing (CrashLoopBackOff, Error, etc) | |
| > /tmp/deploy_not_running.txt | |
| kubectl get pods -n "$NAMESPACE" -o json | | |
| jq -r '.items[] | |
| | select(.status.containerStatuses != null) | |
| | select([.status.containerStatuses[].state.waiting.reason] | tostring | test("CrashLoopBackOff|Error|ImagePullBackOff|CreateContainerError")) | |
| | .metadata.ownerReferences[0].name' \ | |
| | sort -u | while read -r RS; do | |
| DEPLOY=$(kubectl get rs "$RS" -n "$NAMESPACE" -o jsonpath='{.metadata.ownerReferences[0].name}' 2>/dev/null) | |
| if [[ -n "$DEPLOY" ]]; then | |
| echo "$DEPLOY" >> /tmp/deploy_not_running.txt | |
| fi | |
| done | |
| sort -u -o /tmp/deploy_not_running.txt /tmp/deploy_not_running.txt | |
| # Step 2: Handle empty list | |
| if [[ ! -s /tmp/deploy_not_running.txt ]]; then | |
| echo "β All deployments appear healthy." | |
| exit 0 | |
| fi | |
| echo "π¨ Problematic deployments detected:" | |
| cat /tmp/deploy_not_running.txt | |
| # Step 3: Delete all except the first | |
| FIRST_DEPLOY=$(head -n 1 /tmp/deploy_not_running.txt) | |
| tail -n +2 /tmp/deploy_not_running.txt > /tmp/deploy_to_delete.txt | |
| if [[ -s /tmp/deploy_to_delete.txt ]]; then | |
| echo "π§Ή Deleting deployments (except $FIRST_DEPLOY):" | |
| while read -r DEPLOY; do | |
| [[ -n "$DEPLOY" ]] && kubectl delete deploy "$DEPLOY" -n "$NAMESPACE" --ignore-not-found | |
| done < /tmp/deploy_to_delete.txt | |
| fi | |
| # Step 4: Sequential restart loop | |
| DEPLOY_LIST=($(cat /tmp/deploy_not_running.txt)) | |
| INDEX=0 | |
| while [[ $INDEX -lt ${#DEPLOY_LIST[@]} ]]; do | |
| DEPLOY=${DEPLOY_LIST[$INDEX]} | |
| echo "π Restarting deployment: $DEPLOY" | |
| kubectl rollout restart deployment "$DEPLOY" -n "$NAMESPACE" | |
| echo "β³ Waiting for pods in $DEPLOY to become Running..." | |
| while true; do | |
| # Count non-running or crashlooping pods for this deployment | |
| NOT_OK=$(kubectl get pods -n "$NAMESPACE" -l app="$DEPLOY" -o json | | |
| jq '[.items[] | select( | |
| (.status.phase != "Running") | |
| or ([.status.containerStatuses[].state.waiting.reason] | tostring | test("CrashLoopBackOff|Error|ImagePullBackOff|CreateContainerError")) | |
| )] | length') | |
| if [[ "$NOT_OK" -eq 0 ]]; then | |
| echo "β $DEPLOY pods are all healthy." | |
| break | |
| else | |
| echo "β Still unhealthy pods ($NOT_OK). Rechecking in 60s..." | |
| sleep 60 | |
| fi | |
| done | |
| INDEX=$((INDEX + 1)) | |
| done | |
| echo "π All failing deployments have been recovered." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment