Created
October 15, 2025 03:27
-
-
Save xxrjun/1fe9769a415ae89287407696c5173701 to your computer and use it in GitHub Desktop.
Multi SSH Helpers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # original source: https://lambda.ai/blog/how-to-serve-deepseek-r1-v3-on-gh200 | |
| # runbootstrap: setup known_hosts and authorized_keys (for passwordless ssh, you need to have your public key in ~/.ssh/id_rsa.pub or ~/.ssh/id_ed25519.pub) | |
| # runip: run command on a specific ip (set ip=host:port or user@host:port) | |
| # runk: run command on the k-th ip in the ips file | |
| # runhead: run command on the first ip in the ips file | |
| # runips: run command on multiple ips in parallel (set ips="..." or read from ips_file) | |
| # runall: run command on all ips in the ips file | |
| # runrest: run command on all but the first ip in the ips file | |
| # sshk: ssh to the k-th ip in the ips file | |
| # ssh_head: ssh to the first ip in the ips file | |
| # rsyncall: rsync a directory to all ips in the ips file | |
| # rsynchead: rsync a directory to the first ip in the ips file | |
| # killall: kill all background jobs started by this script (matching "192.222" | |
| # Example usage: | |
| # runall nvidia-smi | |
| # runall df -h | |
| # rsyncall /local/dir /remote/dir | |
| # Example usage for sudo: | |
| # INTERACTIVE_SSH=1 runhead sudo -v | |
| # k=<line> INTERACTIVE_SSH=1 runk sudo -v | |
| # k=1 INTERACTIVE_SSH=1 runk sudo usermod -aG docker $USER | |
| ips_file=${ips_file:-$HOME/ips.txt} # <-- PLACE YOUR IPS FILE HERE | |
| [[ -f "$ips_file" ]] || { echo "IPs file not found: $ips_file" >&2; exit 1; } | |
| # Example content of ips.txt (supports with or without user/port): | |
| # host1:port | |
| # host2 | |
| # host3 | |
| DEFAULT_USER=${DEFAULT_USER:-$USER} # <-- CHANGE YOUR DEFAULT USER HERE IF NEEDED | |
| DEFAULT_PORT=${DEFAULT_PORT:-22} | |
| export runprefix="${runprefix:-}" | |
| STRIP_COMMENTS=${STRIP_COMMENTS:-1} | |
| ssh_dir=${ssh_dir:-$HOME/.ssh} | |
| mkdir -p "$ssh_dir" && chmod 700 "$ssh_dir" | |
| if [ -f "$ssh_dir/id_ed25519" ]; then ssh_key_file="$ssh_dir/id_ed25519" | |
| elif [ -f "$ssh_dir/id_rsa" ]; then ssh_key_file="$ssh_dir/id_rsa" | |
| else echo "No private key in $ssh_dir (id_ed25519 or id_rsa)"; exit 1; fi | |
| runips_bootstrap(){ | |
| touch "$ssh_dir/authorized_keys"; chmod 600 "$ssh_dir/authorized_keys" | |
| # id_ed25519.pub | |
| if [ -f "$ssh_dir/id_ed25519.pub" ]; then | |
| if ! grep -Fq "$(ssh-keygen -y -f "$ssh_dir/id_ed25519" 2>/dev/null)" "$ssh_dir/authorized_keys"; then | |
| cat "$ssh_dir/id_ed25519.pub" >> "$ssh_dir/authorized_keys" | |
| fi | |
| fi | |
| # id_rsa.pub | |
| if [ -f "$ssh_dir/id_rsa.pub" ]; then | |
| if ! grep -Fq "$(ssh-keygen -y -f "$ssh_dir/id_rsa" 2>/dev/null)" "$ssh_dir/authorized_keys"; then | |
| cat "$ssh_dir/id_rsa.pub" >> "$ssh_dir/authorized_keys" | |
| fi | |
| fi | |
| while IFS= read -r line || [[ -n "$line" ]]; do | |
| parse_target "$line" || continue | |
| [[ -z "${SSH_HOST:-}" ]] && continue | |
| # Record keys as [host]:port and include common types so ED25519 is present. | |
| echo "doing $SSH_HOST:$SSH_PORT" | |
| # ssh-keyscan -T 5 -p "$SSH_PORT" -H -t rsa,ecdsa,ed25519 "$SSH_HOST" \ | |
| # 2>/dev/null >> "$ssh_dir/known_hosts" || true | |
| ssh-keyscan -T 5 -p "$SSH_PORT" -H -t rsa,ecdsa,ed25519 "$SSH_HOST" >> "$ssh_dir/known_hosts" | |
| done < "$ips_file" | |
| } | |
| parse_target() { | |
| local line="$1" | |
| line="${line#"${line%%[![:space:]]*}"}"; line="${line%"${line##*[![:space:]]}"}" | |
| (( STRIP_COMMENTS )) && line="${line%%#*}"; line="${line%"${line##*[![:space:]]}"}" | |
| [[ -z "$line" ]] && return 1 | |
| local up hp host port | |
| if [[ "$line" == *@* ]]; then up="${line%%@*}"; hp="${line#*@}"; SSH_USER="$up"; else hp="$line"; SSH_USER="$DEFAULT_USER"; fi | |
| if [[ "$hp" =~ ^\[(.+)\]:(.+)$ ]]; then host="${BASH_REMATCH[1]}"; port="${BASH_REMATCH[2]}"; | |
| elif [[ "$hp" =~ ^\[(.+)\]$ ]]; then host="${BASH_REMATCH[1]}"; port="$DEFAULT_PORT"; | |
| elif [[ "$hp" == *:* ]]; then host="${hp%%:*}"; port="${hp##*:}"; | |
| else host="$hp"; port="$DEFAULT_PORT"; fi | |
| SSH_HOST="$host"; SSH_PORT="$port"; return 0 | |
| } | |
| runip() { | |
| local target="${ip:?ip not set}" | |
| parse_target "$target" || { echo "invalid target: $target" >&2; return 1; } | |
| local remote_cmd; remote_cmd="$(printf "%q" "$runprefix$*")" | |
| if [[ "${INTERACTIVE_SSH:-0}" -eq 1 ]]; then | |
| ssh -i "$ssh_key_file" -p "$SSH_PORT" -o ConnectTimeout=5 -tt \ | |
| "$SSH_USER@$SSH_HOST" -- stdbuf -oL -eL bash -l -c "$remote_cmd" | |
| else | |
| ssh -i "$ssh_key_file" -p "$SSH_PORT" -o BatchMode=yes -o ConnectTimeout=5 \ | |
| "$SSH_USER@$SSH_HOST" -- stdbuf -oL -eL bash -l -c "$remote_cmd" < /dev/null | |
| fi | |
| } | |
| _line_k() { sed -n "$(( $1 + 1 ))p" "$ips_file"; } | |
| runk() { ip="$(_line_k "${k:?k not set}")" runip "$@"; } | |
| runhead() { ip="$(head -n1 "$ips_file")" runip "$@"; } | |
| runips() { | |
| local had_monitor=0 | |
| [[ $- == *m* ]] && { had_monitor=1; set +m; } | |
| set +b 2>/dev/null || true | |
| local lock="/tmp/runips.lock.$$" | |
| exec 200> "$lock" # 200 is the lock fd | |
| local pids=() i=0 labels=() | |
| while IFS= read -r line || [[ -n "$line" ]]; do | |
| parse_target "$line" || continue | |
| [[ -z "${SSH_HOST:-}" ]] && continue | |
| local label="$SSH_HOST:$SSH_PORT"; [[ "$SSH_USER" != "$DEFAULT_USER" ]] && label="$SSH_USER@$label" | |
| labels[i]="$label" | |
| { | |
| out="$( | |
| ip="$line" runip "$@" \ | |
| | awk -v p="$label\t " '{print p $0}' | |
| )" | |
| flock 200 | |
| printf "%s\n" "$label" | |
| printf "%s\n" "-----------------------------------------------------------------------------------------" | |
| printf "%s\n" "$out" | |
| printf "\n" | |
| flock -u 200 | |
| } & pids[i]=$! | |
| ((i++)) | |
| done < <(printf "%s\n" "$ips") | |
| local total=$i ok=0 fail=0 | |
| for j in "${!pids[@]}"; do | |
| if wait "${pids[j]}"; then ok=$((ok+1)); else fail=$((fail+1)); echo "[FAIL] ${labels[j]}" >&2; fi | |
| done | |
| echo "==================================================" | |
| echo "Total: $total | Success: $ok | Fail: $fail" | |
| echo "==================================================" | |
| rm -f "$lock" | |
| ((had_monitor)) && set -m | |
| } | |
| runall() { ips="$(cat "$ips_file")" runips "$@"; } | |
| runrest() { ips="$(tail -n+2 "$ips_file")" runips "$@"; } # skip the first one | |
| sshk() { | |
| local target="$(_line_k "${k:?k not set}")" | |
| parse_target "$target" || { echo "invalid target: $target" >&2; return 1; } | |
| ssh -i "$ssh_key_file" -p "$SSH_PORT" -o ConnectTimeout=5 "$SSH_USER@$SSH_HOST" | |
| } | |
| alias ssh_head='k=0 sshk' | |
| IGNORE_PATTERNS=( | |
| "archive" | |
| "*.pyc" | |
| "__pycache__" | |
| ".DS_Store" | |
| "node_modules" | |
| ".env" | |
| "venv" | |
| ".venv" | |
| "checkpoints" | |
| "outputs" | |
| "*.whl*" | |
| ".claude" | |
| ".gemini" | |
| "out" | |
| "csrc" | |
| "wandb" | |
| # ".git" | |
| "bin" | |
| ".pytest_cache" | |
| "cuda-samples" | |
| "*.sqlite" | |
| ) | |
| EXCLUDE_ARGS=() | |
| for pat in "${IGNORE_PATTERNS[@]}"; do EXCLUDE_ARGS+=(--exclude="$pat"); done | |
| rsyncall() { | |
| local src="${1:?Source directory required}" | |
| local dst="${2:?Destination directory required}" | |
| local opts="${3:--avz --info=progress2}" | |
| [[ ! -e "$src" ]] && { echo "Source does not exist: $src" >&2; return 1; } | |
| # Get current host identifier | |
| local current_host=$(hostname) | |
| local current_ip=$(hostname -I | awk '{print $1}') | |
| local pids=() labels=() i=0 | |
| while IFS= read -r line || [[ -n "$line" ]]; do | |
| parse_target "$line" || continue | |
| [[ -z "${SSH_HOST:-}" ]] && continue | |
| # Skip if this is the current host | |
| if [[ "$SSH_HOST" == "$current_host" ]] || \ | |
| [[ "$SSH_HOST" == "$current_ip" ]] || \ | |
| [[ "$SSH_HOST" == "localhost" ]] || \ | |
| [[ "$SSH_HOST" == "127.0.0.1" ]]; then | |
| echo "Skipping current host: $SSH_HOST" | |
| continue | |
| fi | |
| local label="$SSH_HOST:$SSH_PORT" | |
| [[ "$SSH_USER" != "$DEFAULT_USER" ]] && label="$SSH_USER@$label" | |
| labels[i]="$label" | |
| { | |
| echo "[$label] Starting rsync..." | |
| if rsync $opts \ | |
| "${EXCLUDE_ARGS[@]}" \ | |
| -e "ssh -i $ssh_key_file -p $SSH_PORT -o ConnectTimeout=5 -o BatchMode=yes" \ | |
| "$src" "$SSH_USER@$SSH_HOST:$dst"; then | |
| echo "[$label] Success" | |
| else | |
| echo "[$label] Failed" >&2 | |
| fi | |
| } & | |
| pids[i]=$! | |
| ((i++)) | |
| done < "$ips_file" | |
| local total=$i ok=0 fail=0 | |
| for j in "${!pids[@]}"; do | |
| if wait "${pids[j]}"; then | |
| ok=$((ok+1)) | |
| else | |
| fail=$((fail+1)) | |
| echo "[FAIL] ${labels[j]}" >&2 | |
| fi | |
| done | |
| echo "==================================================" | |
| echo "Rsync Total: $total | Success: $ok | Fail: $fail" | |
| echo "==================================================" | |
| } | |
| rsynchead() { | |
| local src="${1:?Source required}" | |
| local dst="${2:?Destination required}" | |
| local opts="${3:--avz}" | |
| ip="$(head -n1 "$ips_file")" | |
| parse_target "$ip" || return 1 | |
| rsync $opts --progress \ | |
| "${EXCLUDE_ARGS[@]}" \ | |
| -e "ssh -i $ssh_key_file -p $SSH_PORT -o ConnectTimeout=5" \ | |
| "$src" "$SSH_USER@$SSH_HOST:$dst" | |
| } | |
| killall() { pkill -ife 192.222 || true; } |
Author
xxrjun
commented
Oct 15, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment