Skip to content

Instantly share code, notes, and snippets.

@sdboyer
Last active February 27, 2026 13:35
Show Gist options
  • Select an option

  • Save sdboyer/87dac49d1e9387f4372a909d6c5eb91c to your computer and use it in GitHub Desktop.

Select an option

Save sdboyer/87dac49d1e9387f4372a909d6c5eb91c to your computer and use it in GitHub Desktop.
Poolside preflight check script for on-premises installation
$ ./preflight-check.sh
Preflight checks starting... (log: preflight-20260224-180212.log)
[PASS] os: Ubuntu 22.04.5 LTS
[PASS] kernel: 6.10.14-linuxkit
[WARN] cpu: 14 cores (min: 128)
[FAIL] ram: 7GB (min: 512GB)
[WARN] disk /: 823GB free (min: 1000GB)
[WARN] disk /var: 823GB free (min: 1000GB)
[WARN] disk /var/lib: 823GB free (min: 1000GB)
[WARN] disk /opt: 823GB free (min: 1000GB)
[PASS] fstab: no noexec on critical paths
[WARN] gpu: no NVIDIA devices in lspci
[PASS] gpu: no nvidia driver loaded (good for GPU operator)
[WARN] nouveau: not blacklisted (may need to before GPU setup)
[WARN] sysctl: BPF settings may need adjustment (bpf_disabled=0, jit_harden=N/A)
[PASS] selinux: not installed
[PASS] network: outbound OK
[PASS] tools: jq
[FAIL] tools missing: kubectl helm terraform yq
Result: 2 failed, 8 warnings
Details: preflight-20260224-180212.log
#!/usr/bin/env bash
set -euo pipefail
# Preflight Check Script for Poolside Installation
# Outputs pass/fail summary to stdout, detailed info to log file
CONFIG="${POOLIO_COMPONENT:-}"
LOG_FILE="${LOG_FILE:-preflight-$(date +%Y%m%d-%H%M%S).log}"
# Parse config
if [[ -n "$CONFIG" ]]; then
fc() { echo "$CONFIG" | jq -r ".fileConfig.schema$1"; }
TARGET_DISTRO=$(fc '.targetDistro // ""')
MIN_CPU_CORES=$(fc '.minCpuCores // 128')
MIN_RAM_GB=$(fc '.minRamGB // 512')
MIN_DISK_GB=$(fc '.minDiskGB // 1000')
DISK_PATHS=$(fc '.diskPaths // ["/", "/var", "/var/lib", "/opt"] | @json')
AIR_GAPPED=$(fc '.airGapped // false')
REQUIRED_SOFTWARE=$(fc '.requiredSoftware // ["kubectl","helm","terraform","jq","yq"] | @json')
else
TARGET_DISTRO="${TARGET_DISTRO:-}"
MIN_CPU_CORES="${MIN_CPU_CORES:-128}"
MIN_RAM_GB="${MIN_RAM_GB:-512}"
MIN_DISK_GB="${MIN_DISK_GB:-1000}"
DISK_PATHS="${DISK_PATHS:-[\"/\", \"/var\", \"/var/lib\", \"/opt\"]}"
AIR_GAPPED="${AIR_GAPPED:-false}"
REQUIRED_SOFTWARE="${REQUIRED_SOFTWARE:-[\"kubectl\",\"helm\",\"terraform\",\"jq\",\"yq\"]}"
fi
CHECKS='[]'
FAIL_COUNT=0
WARN_COUNT=0
# Collected values for JSON output
OS_VERSION=""
CPU_CORES=0
RAM_GB=0
GPU_COUNT=0
log() { echo "$@" >> "$LOG_FILE"; }
log_section() { log ""; log "=== $1 ==="; }
add_check() {
local name="$1" status="$2" message="$3" detail="${4:-}"
# stdout: simple one-liner
local icon="[PASS]"
[[ "$status" == "warn" ]] && icon="[WARN]"
[[ "$status" == "fail" ]] && icon="[FAIL]"
printf "%-8s %s\n" "$icon" "$message"
# log: include detail
log "$icon $message"
[[ -n "$detail" ]] && log " $detail"
# accumulate for JSON
if [[ -n "$detail" ]]; then
CHECKS=$(echo "$CHECKS" | jq --arg n "$name" --arg s "$status" --arg m "$message" --arg d "$detail" \
'. + [{"name": $n, "status": $s, "message": $m, "detail": $d}]')
else
CHECKS=$(echo "$CHECKS" | jq --arg n "$name" --arg s "$status" --arg m "$message" \
'. + [{"name": $n, "status": $s, "message": $m}]')
fi
[[ "$status" == "fail" ]] && ((FAIL_COUNT++)) || true
[[ "$status" == "warn" ]] && ((WARN_COUNT++)) || true
}
# --- OS ---
check_os() {
log_section "OS"
if [[ ! -f /etc/os-release ]]; then
add_check "os" "fail" "os: unknown (no /etc/os-release)"
return
fi
source /etc/os-release
OS_VERSION="$PRETTY_NAME"
local detected="${ID:-unknown}-${VERSION_ID:-0}"
log "$(cat /etc/os-release)"
if [[ -z "$TARGET_DISTRO" ]]; then
add_check "os" "pass" "os: $PRETTY_NAME"
elif [[ "$ID" == "${TARGET_DISTRO%%-*}" ]] && [[ "$VERSION_ID" == "${TARGET_DISTRO#*-}"* ]]; then
add_check "os" "pass" "os: $PRETTY_NAME"
else
add_check "os" "fail" "os: $PRETTY_NAME (expected $TARGET_DISTRO)"
fi
}
# --- Kernel ---
check_kernel() {
log_section "Kernel"
local kver
kver=$(uname -r)
add_check "kernel" "pass" "kernel: $kver"
log "uname -a: $(uname -a)"
# Boot kernel config
if [[ -f "/boot/config-$kver" ]]; then
log ""
log "Kernel config (/boot/config-$kver) - key settings:"
grep -E "^CONFIG_(PREEMPT|HZ|MODULES|BPF|CGROUP)" "/boot/config-$kver" 2>/dev/null | head -20 >> "$LOG_FILE" || true
fi
# initramfs
if [[ -f "/boot/initramfs-$kver.img" ]] || [[ -f "/boot/initrd.img-$kver" ]]; then
log "initramfs: present"
else
log "initramfs: not found at expected path"
fi
}
# --- Hardware ---
check_cpu() {
log_section "CPU"
if [[ -f /proc/cpuinfo ]]; then
CPU_CORES=$(grep -c "^processor" /proc/cpuinfo)
log "$(head -30 /proc/cpuinfo)"
else
CPU_CORES=$(nproc 2>/dev/null || echo 0)
fi
if [[ $CPU_CORES -ge $MIN_CPU_CORES ]]; then
add_check "cpu" "pass" "cpu: $CPU_CORES cores"
else
add_check "cpu" "warn" "cpu: $CPU_CORES cores (min: $MIN_CPU_CORES)"
fi
}
check_ram() {
log_section "Memory"
if [[ -f /proc/meminfo ]]; then
local mem_kb
mem_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}')
RAM_GB=$((mem_kb / 1024 / 1024))
log "$(cat /proc/meminfo)"
else
RAM_GB=0
fi
if [[ $RAM_GB -ge $MIN_RAM_GB ]]; then
add_check "ram" "pass" "ram: ${RAM_GB}GB"
else
add_check "ram" "fail" "ram: ${RAM_GB}GB (min: ${MIN_RAM_GB}GB)"
fi
}
check_disk() {
log_section "Disk"
log "df -h:"
df -h >> "$LOG_FILE" 2>/dev/null || true
local paths
paths=$(echo "$DISK_PATHS" | jq -r '.[]' 2>/dev/null || echo "$DISK_PATHS" | tr -d '[]"' | tr ',' '\n')
while IFS= read -r path; do
[[ -z "$path" ]] && continue
path=$(echo "$path" | tr -d ' ')
if [[ ! -d "$path" ]]; then
continue # skip non-existent paths silently
fi
local avail_gb=""
if df -BG "$path" &>/dev/null; then
avail_gb=$(df -BG "$path" 2>/dev/null | awk 'NR==2 {print $4}' | tr -d 'G')
else
local blocks
blocks=$(df "$path" 2>/dev/null | awk 'NR==2 {print $4}')
[[ -n "$blocks" && "$blocks" =~ ^[0-9]+$ ]] && avail_gb=$((blocks * 512 / 1024 / 1024 / 1024))
fi
if [[ -n "$avail_gb" && "$avail_gb" =~ ^[0-9]+$ ]]; then
if [[ $avail_gb -ge $MIN_DISK_GB ]]; then
add_check "disk_$path" "pass" "disk $path: ${avail_gb}GB free"
else
add_check "disk_$path" "warn" "disk $path: ${avail_gb}GB free (min: ${MIN_DISK_GB}GB)"
fi
fi
done <<< "$paths"
}
check_fstab() {
log_section "Fstab & Mounts"
if [[ -f /etc/fstab ]]; then
log "/etc/fstab:"
log "$(cat /etc/fstab)"
fi
log ""
log "mount:"
mount >> "$LOG_FILE" 2>/dev/null || true
# Check for noexec on critical paths
local bad_mounts=""
for path in / /var /var/lib /opt /tmp; do
if mount | grep -E "^[^ ]+ on $path " | grep -q "noexec"; then
bad_mounts="$bad_mounts $path"
fi
done
if [[ -n "$bad_mounts" ]]; then
add_check "fstab" "warn" "fstab: noexec on$bad_mounts"
else
add_check "fstab" "pass" "fstab: no noexec on critical paths"
fi
}
# --- GPU ---
check_gpu() {
log_section "GPU"
# Check via lspci (doesn't require drivers)
if command -v lspci &>/dev/null; then
local gpus
gpus=$(lspci | grep -iE "vga|3d|display|nvidia" || true)
GPU_COUNT=$(echo "$gpus" | grep -ci nvidia 2>/dev/null || echo 0)
GPU_COUNT=$(echo "$GPU_COUNT" | tr -d '[:space:]' | head -c 10)
[[ ! "$GPU_COUNT" =~ ^[0-9]+$ ]] && GPU_COUNT=0
log "lspci GPU entries:"
log "$gpus"
if [[ $GPU_COUNT -gt 0 ]]; then
add_check "gpu_detected" "pass" "gpu: $GPU_COUNT NVIDIA device(s) via lspci"
else
add_check "gpu_detected" "warn" "gpu: no NVIDIA devices in lspci"
fi
else
add_check "gpu_detected" "warn" "gpu: lspci not available"
fi
# Check if nvidia driver is loaded (this can be a problem - GPU operator should install)
if lsmod 2>/dev/null | grep -q "^nvidia"; then
log ""
log "nvidia kernel modules loaded:"
lsmod | grep nvidia >> "$LOG_FILE" || true
add_check "gpu_driver" "warn" "gpu: nvidia driver already loaded (GPU operator prefers no host drivers)"
else
add_check "gpu_driver" "pass" "gpu: no nvidia driver loaded (good for GPU operator)"
fi
# If nvidia-smi exists, gather info but don't require it
if command -v nvidia-smi &>/dev/null; then
log ""
log "nvidia-smi output:"
nvidia-smi >> "$LOG_FILE" 2>&1 || log "nvidia-smi failed"
fi
}
check_nouveau() {
if lsmod 2>/dev/null | grep -q "^nouveau"; then
add_check "nouveau" "fail" "nouveau: driver loaded (must blacklist)"
elif grep -rq "blacklist nouveau" /etc/modprobe.d /lib/modprobe.d 2>/dev/null; then
add_check "nouveau" "pass" "nouveau: blacklisted"
else
add_check "nouveau" "warn" "nouveau: not blacklisted (may need to before GPU setup)"
fi
}
# --- Sysctl ---
check_sysctl() {
log_section "Sysctl"
# Known sysctls we care about
local sysctls=(
"kernel.unprivileged_bpf_disabled"
"net.core.bpf_jit_harden"
"kernel.dmesg_restrict"
"kernel.perf_event_paranoid"
"vm.swappiness"
"net.ipv4.ip_forward"
"net.bridge.bridge-nf-call-iptables"
)
log "Runtime sysctl values:"
for key in "${sysctls[@]}"; do
local val
val=$(sysctl -n "$key" 2>/dev/null || echo "N/A")
log " $key = $val"
done
# Persisted sysctls
log ""
log "Persisted sysctl configs:"
for f in /etc/sysctl.conf /etc/sysctl.d/*; do
if [[ -f "$f" ]]; then
log ""
log "$f:"
grep -v "^#" "$f" | grep -v "^$" >> "$LOG_FILE" 2>/dev/null || true
fi
done
# Check BPF settings specifically (GPU compatibility)
local bpf_disabled bpf_harden
bpf_disabled=$(sysctl -n kernel.unprivileged_bpf_disabled 2>/dev/null || echo "N/A")
bpf_harden=$(sysctl -n net.core.bpf_jit_harden 2>/dev/null || echo "N/A")
if [[ "$bpf_disabled" == "0" && "$bpf_harden" == "0" ]]; then
add_check "sysctl_bpf" "pass" "sysctl: BPF settings OK for GPU"
elif [[ "$bpf_disabled" == "N/A" ]]; then
add_check "sysctl_bpf" "pass" "sysctl: BPF settings not applicable"
else
add_check "sysctl_bpf" "warn" "sysctl: BPF settings may need adjustment (bpf_disabled=$bpf_disabled, jit_harden=$bpf_harden)"
fi
}
# --- SELinux ---
check_selinux() {
log_section "SELinux"
if command -v getenforce &>/dev/null; then
local mode
mode=$(getenforce 2>/dev/null || echo "unknown")
log "getenforce: $mode"
if [[ -f /etc/selinux/config ]]; then
log ""
log "/etc/selinux/config:"
log "$(cat /etc/selinux/config)"
fi
add_check "selinux" "pass" "selinux: $mode"
else
add_check "selinux" "pass" "selinux: not installed"
fi
}
# --- Network ---
check_network() {
log_section "Network"
if [[ "$AIR_GAPPED" == "true" ]]; then
add_check "network" "pass" "network: skipped (air-gapped)"
return
fi
# Check for proxy
if [[ -n "${http_proxy:-}" ]] || [[ -n "${https_proxy:-}" ]] || [[ -n "${HTTP_PROXY:-}" ]]; then
log "Proxy detected:"
log " http_proxy=${http_proxy:-${HTTP_PROXY:-}}"
log " https_proxy=${https_proxy:-${HTTPS_PROXY:-}}"
add_check "network_proxy" "warn" "network: proxy configured"
fi
if command -v curl &>/dev/null && curl -s --connect-timeout 5 https://google.com &>/dev/null; then
add_check "network" "pass" "network: outbound OK"
elif command -v curl &>/dev/null; then
add_check "network" "fail" "network: no outbound connectivity"
else
add_check "network" "warn" "network: curl not available to test"
fi
}
# --- Software ---
check_software() {
log_section "Software"
local tools
tools=$(echo "$REQUIRED_SOFTWARE" | jq -r '.[]' 2>/dev/null || echo "$REQUIRED_SOFTWARE" | tr -d '[]"' | tr ',' '\n')
local found="" missing=""
while IFS= read -r tool; do
[[ -z "$tool" ]] && continue
tool=$(echo "$tool" | tr -d ' ')
if command -v "$tool" &>/dev/null; then
local ver
ver=$("$tool" version --short 2>/dev/null || "$tool" --version 2>/dev/null | head -1 || echo "")
ver=$(echo "$ver" | head -1 | sed 's/^[^0-9]*//' | cut -d' ' -f1 | head -c 20)
found="$found $tool"
log " $tool: ${ver:-installed}"
else
missing="$missing $tool"
log " $tool: not found"
fi
done <<< "$tools"
found=$(echo "$found" | xargs) # trim
missing=$(echo "$missing" | xargs)
if [[ -n "$found" ]]; then
add_check "software_found" "pass" "tools: $found"
fi
if [[ -n "$missing" ]]; then
add_check "software_missing" "fail" "tools missing: $missing"
fi
}
# --- dmesg ---
capture_dmesg() {
log_section "dmesg (last 100 lines)"
dmesg 2>/dev/null | tail -100 >> "$LOG_FILE" || log "dmesg not accessible"
}
# --- Main ---
main() {
echo "Preflight checks starting... (log: $LOG_FILE)"
echo ""
log "Preflight check: $(date)"
log "Host: $(hostname)"
check_os
check_kernel
check_cpu
check_ram
check_disk
check_fstab
check_gpu
check_nouveau
check_sysctl
check_selinux
check_network
check_software
capture_dmesg
echo ""
if [[ $FAIL_COUNT -eq 0 ]]; then
echo "Result: OK ($WARN_COUNT warnings)"
else
echo "Result: $FAIL_COUNT failed, $WARN_COUNT warnings"
fi
echo "Details: $LOG_FILE"
# JSON output for component mode
if [[ -n "$CONFIG" ]]; then
local passed=true
[[ $FAIL_COUNT -gt 0 ]] && passed=false
jq -n \
--argjson passed "$passed" \
--arg os_version "$OS_VERSION" \
--argjson cpu_cores "$CPU_CORES" \
--argjson ram_gb "$RAM_GB" \
--argjson gpu_count "$GPU_COUNT" \
--argjson checks "$CHECKS" \
'{passed: $passed, os_version: $os_version, cpu_cores: $cpu_cores, ram_gb: $ram_gb, gpu_count: $gpu_count, checks: $checks}'
fi
exit 0
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment