Skip to content

Instantly share code, notes, and snippets.

@xxrjun
Created October 15, 2025 03:27
Show Gist options
  • Select an option

  • Save xxrjun/1fe9769a415ae89287407696c5173701 to your computer and use it in GitHub Desktop.

Select an option

Save xxrjun/1fe9769a415ae89287407696c5173701 to your computer and use it in GitHub Desktop.
Multi SSH Helpers
#!/usr/bin/env bash
# original source: https://lambda.ai/blog/how-to-serve-deepseek-r1-v3-on-gh200
# runbootstrap: setup known_hosts and authorized_keys (for passwordless ssh, you need to have your public key in ~/.ssh/id_rsa.pub or ~/.ssh/id_ed25519.pub)
# runip: run command on a specific ip (set ip=host:port or user@host:port)
# runk: run command on the k-th ip in the ips file
# runhead: run command on the first ip in the ips file
# runips: run command on multiple ips in parallel (set ips="..." or read from ips_file)
# runall: run command on all ips in the ips file
# runrest: run command on all but the first ip in the ips file
# sshk: ssh to the k-th ip in the ips file
# ssh_head: ssh to the first ip in the ips file
# rsyncall: rsync a directory to all ips in the ips file
# rsynchead: rsync a directory to the first ip in the ips file
# killall: kill all background jobs started by this script (matching "192.222"
# Example usage:
# runall nvidia-smi
# runall df -h
# rsyncall /local/dir /remote/dir
# Example usage for sudo:
# INTERACTIVE_SSH=1 runhead sudo -v
# k=<line> INTERACTIVE_SSH=1 runk sudo -v
# k=1 INTERACTIVE_SSH=1 runk sudo usermod -aG docker $USER
ips_file=${ips_file:-$HOME/ips.txt} # <-- PLACE YOUR IPS FILE HERE
[[ -f "$ips_file" ]] || { echo "IPs file not found: $ips_file" >&2; exit 1; }
# Example content of ips.txt (supports with or without user/port):
# host1:port
# host2
# host3
DEFAULT_USER=${DEFAULT_USER:-$USER} # <-- CHANGE YOUR DEFAULT USER HERE IF NEEDED
DEFAULT_PORT=${DEFAULT_PORT:-22}
export runprefix="${runprefix:-}"
STRIP_COMMENTS=${STRIP_COMMENTS:-1}
ssh_dir=${ssh_dir:-$HOME/.ssh}
mkdir -p "$ssh_dir" && chmod 700 "$ssh_dir"
if [ -f "$ssh_dir/id_ed25519" ]; then ssh_key_file="$ssh_dir/id_ed25519"
elif [ -f "$ssh_dir/id_rsa" ]; then ssh_key_file="$ssh_dir/id_rsa"
else echo "No private key in $ssh_dir (id_ed25519 or id_rsa)"; exit 1; fi
runips_bootstrap(){
touch "$ssh_dir/authorized_keys"; chmod 600 "$ssh_dir/authorized_keys"
# id_ed25519.pub
if [ -f "$ssh_dir/id_ed25519.pub" ]; then
if ! grep -Fq "$(ssh-keygen -y -f "$ssh_dir/id_ed25519" 2>/dev/null)" "$ssh_dir/authorized_keys"; then
cat "$ssh_dir/id_ed25519.pub" >> "$ssh_dir/authorized_keys"
fi
fi
# id_rsa.pub
if [ -f "$ssh_dir/id_rsa.pub" ]; then
if ! grep -Fq "$(ssh-keygen -y -f "$ssh_dir/id_rsa" 2>/dev/null)" "$ssh_dir/authorized_keys"; then
cat "$ssh_dir/id_rsa.pub" >> "$ssh_dir/authorized_keys"
fi
fi
while IFS= read -r line || [[ -n "$line" ]]; do
parse_target "$line" || continue
[[ -z "${SSH_HOST:-}" ]] && continue
# Record keys as [host]:port and include common types so ED25519 is present.
echo "doing $SSH_HOST:$SSH_PORT"
# ssh-keyscan -T 5 -p "$SSH_PORT" -H -t rsa,ecdsa,ed25519 "$SSH_HOST" \
# 2>/dev/null >> "$ssh_dir/known_hosts" || true
ssh-keyscan -T 5 -p "$SSH_PORT" -H -t rsa,ecdsa,ed25519 "$SSH_HOST" >> "$ssh_dir/known_hosts"
done < "$ips_file"
}
parse_target() {
local line="$1"
line="${line#"${line%%[![:space:]]*}"}"; line="${line%"${line##*[![:space:]]}"}"
(( STRIP_COMMENTS )) && line="${line%%#*}"; line="${line%"${line##*[![:space:]]}"}"
[[ -z "$line" ]] && return 1
local up hp host port
if [[ "$line" == *@* ]]; then up="${line%%@*}"; hp="${line#*@}"; SSH_USER="$up"; else hp="$line"; SSH_USER="$DEFAULT_USER"; fi
if [[ "$hp" =~ ^\[(.+)\]:(.+)$ ]]; then host="${BASH_REMATCH[1]}"; port="${BASH_REMATCH[2]}";
elif [[ "$hp" =~ ^\[(.+)\]$ ]]; then host="${BASH_REMATCH[1]}"; port="$DEFAULT_PORT";
elif [[ "$hp" == *:* ]]; then host="${hp%%:*}"; port="${hp##*:}";
else host="$hp"; port="$DEFAULT_PORT"; fi
SSH_HOST="$host"; SSH_PORT="$port"; return 0
}
runip() {
local target="${ip:?ip not set}"
parse_target "$target" || { echo "invalid target: $target" >&2; return 1; }
local remote_cmd; remote_cmd="$(printf "%q" "$runprefix$*")"
if [[ "${INTERACTIVE_SSH:-0}" -eq 1 ]]; then
ssh -i "$ssh_key_file" -p "$SSH_PORT" -o ConnectTimeout=5 -tt \
"$SSH_USER@$SSH_HOST" -- stdbuf -oL -eL bash -l -c "$remote_cmd"
else
ssh -i "$ssh_key_file" -p "$SSH_PORT" -o BatchMode=yes -o ConnectTimeout=5 \
"$SSH_USER@$SSH_HOST" -- stdbuf -oL -eL bash -l -c "$remote_cmd" < /dev/null
fi
}
_line_k() { sed -n "$(( $1 + 1 ))p" "$ips_file"; }
runk() { ip="$(_line_k "${k:?k not set}")" runip "$@"; }
runhead() { ip="$(head -n1 "$ips_file")" runip "$@"; }
runips() {
local had_monitor=0
[[ $- == *m* ]] && { had_monitor=1; set +m; }
set +b 2>/dev/null || true
local lock="/tmp/runips.lock.$$"
exec 200> "$lock" # 200 is the lock fd
local pids=() i=0 labels=()
while IFS= read -r line || [[ -n "$line" ]]; do
parse_target "$line" || continue
[[ -z "${SSH_HOST:-}" ]] && continue
local label="$SSH_HOST:$SSH_PORT"; [[ "$SSH_USER" != "$DEFAULT_USER" ]] && label="$SSH_USER@$label"
labels[i]="$label"
{
out="$(
ip="$line" runip "$@" \
| awk -v p="$label\t " '{print p $0}'
)"
flock 200
printf "%s\n" "$label"
printf "%s\n" "-----------------------------------------------------------------------------------------"
printf "%s\n" "$out"
printf "\n"
flock -u 200
} & pids[i]=$!
((i++))
done < <(printf "%s\n" "$ips")
local total=$i ok=0 fail=0
for j in "${!pids[@]}"; do
if wait "${pids[j]}"; then ok=$((ok+1)); else fail=$((fail+1)); echo "[FAIL] ${labels[j]}" >&2; fi
done
echo "=================================================="
echo "Total: $total | Success: $ok | Fail: $fail"
echo "=================================================="
rm -f "$lock"
((had_monitor)) && set -m
}
runall() { ips="$(cat "$ips_file")" runips "$@"; }
runrest() { ips="$(tail -n+2 "$ips_file")" runips "$@"; } # skip the first one
sshk() {
local target="$(_line_k "${k:?k not set}")"
parse_target "$target" || { echo "invalid target: $target" >&2; return 1; }
ssh -i "$ssh_key_file" -p "$SSH_PORT" -o ConnectTimeout=5 "$SSH_USER@$SSH_HOST"
}
alias ssh_head='k=0 sshk'
IGNORE_PATTERNS=(
"archive"
"*.pyc"
"__pycache__"
".DS_Store"
"node_modules"
".env"
"venv"
".venv"
"checkpoints"
"outputs"
"*.whl*"
".claude"
".gemini"
"out"
"csrc"
"wandb"
# ".git"
"bin"
".pytest_cache"
"cuda-samples"
"*.sqlite"
)
EXCLUDE_ARGS=()
for pat in "${IGNORE_PATTERNS[@]}"; do EXCLUDE_ARGS+=(--exclude="$pat"); done
rsyncall() {
local src="${1:?Source directory required}"
local dst="${2:?Destination directory required}"
local opts="${3:--avz --info=progress2}"
[[ ! -e "$src" ]] && { echo "Source does not exist: $src" >&2; return 1; }
# Get current host identifier
local current_host=$(hostname)
local current_ip=$(hostname -I | awk '{print $1}')
local pids=() labels=() i=0
while IFS= read -r line || [[ -n "$line" ]]; do
parse_target "$line" || continue
[[ -z "${SSH_HOST:-}" ]] && continue
# Skip if this is the current host
if [[ "$SSH_HOST" == "$current_host" ]] || \
[[ "$SSH_HOST" == "$current_ip" ]] || \
[[ "$SSH_HOST" == "localhost" ]] || \
[[ "$SSH_HOST" == "127.0.0.1" ]]; then
echo "Skipping current host: $SSH_HOST"
continue
fi
local label="$SSH_HOST:$SSH_PORT"
[[ "$SSH_USER" != "$DEFAULT_USER" ]] && label="$SSH_USER@$label"
labels[i]="$label"
{
echo "[$label] Starting rsync..."
if rsync $opts \
"${EXCLUDE_ARGS[@]}" \
-e "ssh -i $ssh_key_file -p $SSH_PORT -o ConnectTimeout=5 -o BatchMode=yes" \
"$src" "$SSH_USER@$SSH_HOST:$dst"; then
echo "[$label] Success"
else
echo "[$label] Failed" >&2
fi
} &
pids[i]=$!
((i++))
done < "$ips_file"
local total=$i ok=0 fail=0
for j in "${!pids[@]}"; do
if wait "${pids[j]}"; then
ok=$((ok+1))
else
fail=$((fail+1))
echo "[FAIL] ${labels[j]}" >&2
fi
done
echo "=================================================="
echo "Rsync Total: $total | Success: $ok | Fail: $fail"
echo "=================================================="
}
rsynchead() {
local src="${1:?Source required}"
local dst="${2:?Destination required}"
local opts="${3:--avz}"
ip="$(head -n1 "$ips_file")"
parse_target "$ip" || return 1
rsync $opts --progress \
"${EXCLUDE_ARGS[@]}" \
-e "ssh -i $ssh_key_file -p $SSH_PORT -o ConnectTimeout=5" \
"$src" "$SSH_USER@$SSH_HOST:$dst"
}
killall() { pkill -ife 192.222 || true; }
@xxrjun
Copy link
Author

xxrjun commented Oct 15, 2025

echo "source /path/to/.multi_ssh_helpers" >> ~/.bashrc
source ~/.bashrc

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment