-
-
Save hammondr/d897c6f901bfe3997ee8a33330610ef7 to your computer and use it in GitHub Desktop.
Poolside preflight check script for on-premises installation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| $ ./preflight-check.sh | |
| Preflight checks starting... (log: preflight-20260224-180212.log) | |
| [PASS] os: Ubuntu 22.04.5 LTS | |
| [PASS] kernel: 6.10.14-linuxkit | |
| [WARN] cpu: 14 cores (min: 128) | |
| [FAIL] ram: 7GB (min: 512GB) | |
| [WARN] disk /: 823GB free (min: 1000GB) | |
| [WARN] disk /var: 823GB free (min: 1000GB) | |
| [WARN] disk /var/lib: 823GB free (min: 1000GB) | |
| [WARN] disk /opt: 823GB free (min: 1000GB) | |
| [PASS] fstab: no noexec on critical paths | |
| [WARN] gpu: no NVIDIA devices in lspci | |
| [PASS] gpu: no nvidia driver loaded (good for GPU operator) | |
| [WARN] nouveau: not blacklisted (may need to before GPU setup) | |
| [WARN] sysctl: BPF settings may need adjustment (bpf_disabled=0, jit_harden=N/A) | |
| [PASS] selinux: not installed | |
| [PASS] network: outbound OK | |
| [PASS] tools: jq | |
| [FAIL] tools missing: kubectl helm terraform yq | |
| Result: 2 failed, 8 warnings | |
| Details: preflight-20260224-180212.log |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| # Preflight Check Script for Poolside Installation | |
| # Outputs pass/fail summary to stdout, detailed info to log file | |
| # Function to check if terminal supports colors | |
| supports_color() { | |
| # Check if stdout is a terminal | |
| if [[ -t 1 ]]; then | |
| # Check TERM environment variable | |
| if [[ -n "$TERM" ]] && [[ "$TERM" != "dumb" ]]; then | |
| # Test if tput is available and can query color support | |
| if command -v tput >/dev/null 2>&1; then | |
| # Check if terminal supports at least 8 colors | |
| if [[ $(tput colors 2>/dev/null) -ge 8 ]]; then | |
| return 0 | |
| fi | |
| fi | |
| fi | |
| fi | |
| return 1 | |
| } | |
| # Parse command line arguments | |
| COLOR_MODE="auto" | |
| while [[ $# -gt 0 ]]; do | |
| case $1 in | |
| --color|--color=always) | |
| COLOR_MODE="always" | |
| shift | |
| ;; | |
| --color=auto) | |
| COLOR_MODE="auto" | |
| shift | |
| ;; | |
| --color=never) | |
| COLOR_MODE="never" | |
| shift | |
| ;; | |
| --color=*) | |
| # Handle invalid color mode | |
| echo "Invalid color mode: ${1#*=}" >&2 | |
| exit 1 | |
| ;; | |
| --color) | |
| COLOR_MODE="always" | |
| shift | |
| ;; | |
| *) | |
| # Pass through other arguments | |
| shift | |
| ;; | |
| esac | |
| done | |
| # Set colors based on color mode | |
| if [[ "$COLOR_MODE" == "never" ]] || | |
| [[ "$COLOR_MODE" == "auto" ]] && ! supports_color; then | |
| # No colors | |
| RED='' | |
| YELLOW='' | |
| GREEN='' | |
| BLUE='' | |
| NC='' | |
| else | |
| # Use colors | |
| RED='\033[0;31m' | |
| YELLOW='\033[1;33m' | |
| GREEN='\033[0;32m' | |
| BLUE='\033[0;34m' | |
| NC='\033[0m' # No Color | |
| fi | |
| CONFIG="${POOLIO_COMPONENT:-}" | |
| LOG_FILE="${LOG_FILE:-preflight-$(date +%Y%m%d-%H%M%S).log}" | |
| # Parse config | |
| if [[ -n "$CONFIG" ]]; then | |
| fc() { echo "$CONFIG" | jq -r ".fileConfig.schema$1"; } | |
| TARGET_DISTRO=$(fc '.targetDistro // ""') | |
| MIN_CPU_CORES=$(fc '.minCpuCores // 128') | |
| MIN_RAM_GB=$(fc '.minRamGB // 512') | |
| MIN_DISK_GB=$(fc '.minDiskGB // 1000') | |
| DISK_PATHS=$(fc '.diskPaths // ["/", "/var", "/var/lib", "/opt"] | @json') | |
| AIR_GAPPED=$(fc '.airGapped // false') | |
| REQUIRED_SOFTWARE=$(fc '.requiredSoftware // ["kubectl","helm","terraform","jq","yq"] | @json') | |
| else | |
| TARGET_DISTRO="${TARGET_DISTRO:-}" | |
| MIN_CPU_CORES="${MIN_CPU_CORES:-128}" | |
| MIN_RAM_GB="${MIN_RAM_GB:-512}" | |
| MIN_DISK_GB="${MIN_DISK_GB:-1000}" | |
| DISK_PATHS="${DISK_PATHS:-[\"/\", \"/var\", \"/var/lib\", \"/opt\"]}" | |
| AIR_GAPPED="${AIR_GAPPED:-false}" | |
| REQUIRED_SOFTWARE="${REQUIRED_SOFTWARE:-[\"kubectl\",\"helm\",\"terraform\",\"jq\",\"yq\"]}" | |
| fi | |
| CHECKS='[]' | |
| FAIL_COUNT=0 | |
| WARN_COUNT=0 | |
| # Collected values for JSON output | |
| OS_VERSION="" | |
| CPU_CORES=0 | |
| RAM_GB=0 | |
| GPU_COUNT=0 | |
| log() { echo "$@" >> "$LOG_FILE"; } | |
| log_section() { log ""; log "=== $1 ==="; } | |
| add_check() { | |
| local name="$1" status="$2" message="$3" detail="${4:-}" | |
| # stdout: simple one-liner with colors | |
| local icon="[PASS]" | |
| local color="$GREEN" | |
| [[ "$status" == "warn" ]] && icon="[WARN]" && color="$YELLOW" | |
| [[ "$status" == "fail" ]] && icon="[FAIL]" && color="$RED" | |
| printf "%b%-8s%b %s\n" "$color" "$icon" "$NC" "$message" | |
| # log: include detail | |
| log "$icon $message" | |
| [[ -n "$detail" ]] && log " $detail" | |
| # accumulate for JSON | |
| if [[ -n "$detail" ]]; then | |
| CHECKS=$(echo "$CHECKS" | jq --arg n "$name" --arg s "$status" --arg m "$message" --arg d "$detail" \ | |
| '. + [{"name": $n, "status": $s, "message": $m, "detail": $d}]') | |
| else | |
| CHECKS=$(echo "$CHECKS" | jq --arg n "$name" --arg s "$status" --arg m "$message" \ | |
| '. + [{"name": $n, "status": $s, "message": $m}]') | |
| fi | |
| [[ "$status" == "fail" ]] && ((FAIL_COUNT++)) || true | |
| [[ "$status" == "warn" ]] && ((WARN_COUNT++)) || true | |
| } | |
| # --- OS --- | |
| check_os() { | |
| log_section "OS" | |
| if [[ ! -f /etc/os-release ]]; then | |
| add_check "os" "fail" "os: unknown (no /etc/os-release)" | |
| return | |
| fi | |
| source /etc/os-release | |
| OS_VERSION="$PRETTY_NAME" | |
| local detected="${ID:-unknown}-${VERSION_ID:-0}" | |
| log "$(cat /etc/os-release)" | |
| if [[ -z "$TARGET_DISTRO" ]]; then | |
| add_check "os" "pass" "os: $PRETTY_NAME" | |
| elif [[ "$ID" == "${TARGET_DISTRO%%-*}" ]] && [[ "$VERSION_ID" == "${TARGET_DISTRO#*-}"* ]]; then | |
| add_check "os" "pass" "os: $PRETTY_NAME" | |
| else | |
| add_check "os" "fail" "os: $PRETTY_NAME (expected $TARGET_DISTRO)" | |
| fi | |
| } | |
| # --- Kernel --- | |
| check_kernel() { | |
| log_section "Kernel" | |
| local kver | |
| kver=$(uname -r) | |
| add_check "kernel" "pass" "kernel: $kver" | |
| log "uname -a: $(uname -a)" | |
| # Boot kernel config | |
| if [[ -f "/boot/config-$kver" ]]; then | |
| log "" | |
| log "Kernel config (/boot/config-$kver) - key settings:" | |
| grep -E "^CONFIG_(PREEMPT|HZ|MODULES|BPF|CGROUP)" "/boot/config-$kver" 2>/dev/null | head -20 >> "$LOG_FILE" || true | |
| fi | |
| # initramfs | |
| if [[ -f "/boot/initramfs-$kver.img" ]] || [[ -f "/boot/initrd.img-$kver" ]]; then | |
| log "initramfs: present" | |
| else | |
| log "initramfs: not found at expected path" | |
| fi | |
| } | |
| # --- Hardware --- | |
| check_cpu() { | |
| log_section "CPU" | |
| if [[ -f /proc/cpuinfo ]]; then | |
| CPU_CORES=$(grep -c "^processor" /proc/cpuinfo) | |
| log "$(head -30 /proc/cpuinfo)" | |
| else | |
| CPU_CORES=$(nproc 2>/dev/null || echo 0) | |
| fi | |
| if [[ $CPU_CORES -ge $MIN_CPU_CORES ]]; then | |
| add_check "cpu" "pass" "cpu: $CPU_CORES cores" | |
| else | |
| add_check "cpu" "warn" "cpu: $CPU_CORES cores (min: $MIN_CPU_CORES)" | |
| fi | |
| } | |
| check_ram() { | |
| log_section "Memory" | |
| if [[ -f /proc/meminfo ]]; then | |
| local mem_kb | |
| mem_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}') | |
| RAM_GB=$((mem_kb / 1024 / 1024)) | |
| log "$(cat /proc/meminfo)" | |
| else | |
| RAM_GB=0 | |
| fi | |
| if [[ $RAM_GB -ge $MIN_RAM_GB ]]; then | |
| add_check "ram" "pass" "ram: ${RAM_GB}GB" | |
| else | |
| add_check "ram" "fail" "ram: ${RAM_GB}GB (min: ${MIN_RAM_GB}GB)" | |
| fi | |
| } | |
| check_disk() { | |
| log_section "Disk" | |
| log "df -h:" | |
| df -h >> "$LOG_FILE" 2>/dev/null || true | |
| local paths | |
| paths=$(echo "$DISK_PATHS" | jq -r '.[]' 2>/dev/null || echo "$DISK_PATHS" | tr -d '[]"' | tr ',' '\n') | |
| while IFS= read -r path; do | |
| [[ -z "$path" ]] && continue | |
| path=$(echo "$path" | tr -d ' ') | |
| if [[ ! -d "$path" ]]; then | |
| continue # skip non-existent paths silently | |
| fi | |
| local avail_gb="" | |
| if df -BG "$path" &>/dev/null; then | |
| avail_gb=$(df -BG "$path" 2>/dev/null | awk 'NR==2 {print $4}' | tr -d 'G') | |
| else | |
| local blocks | |
| blocks=$(df "$path" 2>/dev/null | awk 'NR==2 {print $4}') | |
| [[ -n "$blocks" && "$blocks" =~ ^[0-9]+$ ]] && avail_gb=$((blocks * 512 / 1024 / 1024 / 1024)) | |
| fi | |
| if [[ -n "$avail_gb" && "$avail_gb" =~ ^[0-9]+$ ]]; then | |
| if [[ $avail_gb -ge $MIN_DISK_GB ]]; then | |
| add_check "disk_$path" "pass" "disk $path: ${avail_gb}GB free" | |
| else | |
| add_check "disk_$path" "warn" "disk $path: ${avail_gb}GB free (min: ${MIN_DISK_GB}GB)" | |
| fi | |
| fi | |
| done <<< "$paths" | |
| } | |
| check_fstab() { | |
| log_section "Fstab & Mounts" | |
| if [[ -f /etc/fstab ]]; then | |
| log "/etc/fstab:" | |
| log "$(cat /etc/fstab)" | |
| fi | |
| log "" | |
| log "mount:" | |
| mount >> "$LOG_FILE" 2>/dev/null || true | |
| # Check for noexec on critical paths | |
| local bad_mounts="" | |
| for path in / /var /var/lib /opt /tmp; do | |
| if mount | grep -E "^[^ ]+ on $path " | grep -q "noexec"; then | |
| bad_mounts="$bad_mounts $path" | |
| fi | |
| done | |
| if [[ -n "$bad_mounts" ]]; then | |
| add_check "fstab" "warn" "fstab: noexec on$bad_mounts" | |
| else | |
| add_check "fstab" "pass" "fstab: no noexec on critical paths" | |
| fi | |
| } | |
| # --- GPU --- | |
| check_gpu() { | |
| log_section "GPU" | |
| # Check via lspci (doesn't require drivers) | |
| if command -v lspci &>/dev/null; then | |
| local gpus | |
| gpus=$(lspci | grep -iE "vga|3d|display|nvidia" || true) | |
| GPU_COUNT=$(echo "$gpus" | grep -ci nvidia 2>/dev/null || echo 0) | |
| GPU_COUNT=$(echo "$GPU_COUNT" | tr -d '[:space:]' | head -c 10) | |
| [[ ! "$GPU_COUNT" =~ ^[0-9]+$ ]] && GPU_COUNT=0 | |
| log "lspci GPU entries:" | |
| log "$gpus" | |
| if [[ $GPU_COUNT -gt 0 ]]; then | |
| add_check "gpu_detected" "pass" "gpu: $GPU_COUNT NVIDIA device(s) via lspci" | |
| else | |
| add_check "gpu_detected" "warn" "gpu: no NVIDIA devices in lspci" | |
| fi | |
| else | |
| add_check "gpu_detected" "warn" "gpu: lspci not available" | |
| fi | |
| # Check if nvidia driver is loaded (this can be a problem - GPU operator should install) | |
| if lsmod 2>/dev/null | grep -q "^nvidia"; then | |
| log "" | |
| log "nvidia kernel modules loaded:" | |
| lsmod | grep nvidia >> "$LOG_FILE" || true | |
| add_check "gpu_driver" "warn" "gpu: nvidia driver already loaded (GPU operator prefers no host drivers)" | |
| else | |
| add_check "gpu_driver" "pass" "gpu: no nvidia driver loaded (good for GPU operator)" | |
| fi | |
| # If nvidia-smi exists, gather info but don't require it | |
| if command -v nvidia-smi &>/dev/null; then | |
| log "" | |
| log "nvidia-smi output:" | |
| nvidia-smi >> "$LOG_FILE" 2>&1 || log "nvidia-smi failed" | |
| fi | |
| } | |
| check_nouveau() { | |
| if lsmod 2>/dev/null | grep -q "^nouveau"; then | |
| add_check "nouveau" "fail" "nouveau: driver loaded (must blacklist)" | |
| elif grep -rq "blacklist nouveau" /etc/modprobe.d /lib/modprobe.d 2>/dev/null; then | |
| add_check "nouveau" "pass" "nouveau: blacklisted" | |
| else | |
| add_check "nouveau" "warn" "nouveau: not blacklisted (may need to before GPU setup)" | |
| fi | |
| } | |
| # --- Sysctl --- | |
| check_sysctl() { | |
| log_section "Sysctl" | |
| # Known sysctls we care about | |
| local sysctls=( | |
| "kernel.unprivileged_bpf_disabled" | |
| "net.core.bpf_jit_harden" | |
| "kernel.dmesg_restrict" | |
| "kernel.perf_event_paranoid" | |
| "vm.swappiness" | |
| "net.ipv4.ip_forward" | |
| "net.bridge.bridge-nf-call-iptables" | |
| ) | |
| log "Runtime sysctl values:" | |
| for key in "${sysctls[@]}"; do | |
| local val | |
| val=$(sysctl -n "$key" 2>/dev/null || echo "N/A") | |
| log " $key = $val" | |
| done | |
| # Persisted sysctls | |
| log "" | |
| log "Persisted sysctl configs:" | |
| for f in /etc/sysctl.conf /etc/sysctl.d/*; do | |
| if [[ -f "$f" ]]; then | |
| log "" | |
| log "$f:" | |
| grep -v "^#" "$f" | grep -v "^$" >> "$LOG_FILE" 2>/dev/null || true | |
| fi | |
| done | |
| # Check BPF settings specifically (GPU compatibility) | |
| local bpf_disabled bpf_harden | |
| bpf_disabled=$(sysctl -n kernel.unprivileged_bpf_disabled 2>/dev/null || echo "N/A") | |
| bpf_harden=$(sysctl -n net.core.bpf_jit_harden 2>/dev/null || echo "N/A") | |
| if [[ "$bpf_disabled" == "0" && "$bpf_harden" == "0" ]]; then | |
| add_check "sysctl_bpf" "pass" "sysctl: BPF settings OK for GPU" | |
| elif [[ "$bpf_disabled" == "N/A" ]]; then | |
| add_check "sysctl_bpf" "pass" "sysctl: BPF settings not applicable" | |
| else | |
| add_check "sysctl_bpf" "warn" "sysctl: BPF settings may need adjustment (bpf_disabled=$bpf_disabled, jit_harden=$bpf_harden)" | |
| fi | |
| } | |
| # --- SELinux --- | |
| check_selinux() { | |
| log_section "SELinux" | |
| if command -v getenforce &>/dev/null; then | |
| local mode | |
| mode=$(getenforce 2>/dev/null || echo "unknown") | |
| log "getenforce: $mode" | |
| if [[ -f /etc/selinux/config ]]; then | |
| log "" | |
| log "/etc/selinux/config:" | |
| log "$(cat /etc/selinux/config)" | |
| fi | |
| add_check "selinux" "pass" "selinux: $mode" | |
| else | |
| add_check "selinux" "pass" "selinux: not installed" | |
| fi | |
| } | |
| # --- Network --- | |
| check_network() { | |
| log_section "Network" | |
| if [[ "$AIR_GAPPED" == "true" ]]; then | |
| add_check "network" "pass" "network: skipped (air-gapped)" | |
| return | |
| fi | |
| # Check for proxy | |
| if [[ -n "${http_proxy:-}" ]] || [[ -n "${https_proxy:-}" ]] || [[ -n "${HTTP_PROXY:-}" ]]; then | |
| log "Proxy detected:" | |
| log " http_proxy=${http_proxy:-${HTTP_PROXY:-}}" | |
| log " https_proxy=${https_proxy:-${HTTPS_PROXY:-}}" | |
| add_check "network_proxy" "warn" "network: proxy configured" | |
| fi | |
| if command -v curl &>/dev/null && curl -s --connect-timeout 5 https://google.com &>/dev/null; then | |
| add_check "network" "pass" "network: outbound OK" | |
| elif command -v curl &>/dev/null; then | |
| add_check "network" "fail" "network: no outbound connectivity" | |
| else | |
| add_check "network" "warn" "network: curl not available to test" | |
| fi | |
| } | |
| # --- Software --- | |
| check_software() { | |
| log_section "Software" | |
| local tools | |
| tools=$(echo "$REQUIRED_SOFTWARE" | jq -r '.[]' 2>/dev/null || echo "$REQUIRED_SOFTWARE" | tr -d '[]"' | tr ',' '\n') | |
| local found="" missing="" | |
| while IFS= read -r tool; do | |
| [[ -z "$tool" ]] && continue | |
| tool=$(echo "$tool" | tr -d ' ') | |
| if command -v "$tool" &>/dev/null; then | |
| local ver | |
| ver=$("$tool" version --short 2>/dev/null || "$tool" --version 2>/dev/null | head -1 || echo "") | |
| ver=$(echo "$ver" | head -1 | sed 's/^[^0-9]*//' | cut -d' ' -f1 | head -c 20) | |
| found="$found $tool" | |
| log " $tool: ${ver:-installed}" | |
| else | |
| missing="$missing $tool" | |
| log " $tool: not found" | |
| fi | |
| done <<< "$tools" | |
| found=$(echo "$found" | xargs) # trim | |
| missing=$(echo "$missing" | xargs) | |
| if [[ -n "$found" ]]; then | |
| add_check "software_found" "pass" "tools: $found" | |
| fi | |
| if [[ -n "$missing" ]]; then | |
| add_check "software_missing" "fail" "tools missing: $missing" | |
| fi | |
| } | |
| # --- dmesg --- | |
| capture_dmesg() { | |
| log_section "dmesg (last 100 lines)" | |
| dmesg 2>/dev/null | tail -100 >> "$LOG_FILE" || log "dmesg not accessible" | |
| } | |
| # --- Main --- | |
| main() { | |
| echo "Preflight checks starting... (log: $LOG_FILE)" | |
| echo "" | |
| log "Preflight check: $(date)" | |
| log "Host: $(hostname)" | |
| check_os | |
| check_kernel | |
| check_cpu | |
| check_ram | |
| check_disk | |
| check_fstab | |
| check_gpu | |
| check_nouveau | |
| check_sysctl | |
| check_selinux | |
| check_network | |
| check_software | |
| capture_dmesg | |
| echo "" | |
| if [[ $FAIL_COUNT -eq 0 ]]; then | |
| printf "%bResult: OK ($WARN_COUNT warnings)%b\n" "$GREEN" "$NC" | |
| else | |
| printf "%bResult: $FAIL_COUNT failed, $WARN_COUNT warnings%b\n" "$RED" "$NC" | |
| fi | |
| echo "Details: $LOG_FILE" | |
| # JSON output for component mode | |
| if [[ -n "$CONFIG" ]]; then | |
| local passed=true | |
| [[ $FAIL_COUNT -gt 0 ]] && passed=false | |
| jq -n \ | |
| --argjson passed "$passed" \ | |
| --arg os_version "$OS_VERSION" \ | |
| --argjson cpu_cores "$CPU_CORES" \ | |
| --argjson ram_gb "$RAM_GB" \ | |
| --argjson gpu_count "$GPU_COUNT" \ | |
| --argjson checks "$CHECKS" \ | |
| '{passed: $passed, os_version: $os_version, cpu_cores: $cpu_cores, ram_gb: $ram_gb, gpu_count: $gpu_count, checks: $checks}' | |
| fi | |
| exit 0 | |
| } | |
| main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment