Created
March 19, 2025 14:52
-
-
Save iklobato/2382e9bbef95410c8d08b51ade17c6f1 to your computer and use it in GitHub Desktop.
Configure and run a local ollama model with openhands ai project
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| set -e | |
| function log() { | |
| echo "[$(date +%T)] $1" | |
| } | |
| function setup_dependencies() { | |
| log "Installing required dependencies..." | |
| apt-get update | |
| apt-get install -y curl wget git ufw cpufrequtils bc htop jq lsof build-essential | |
| log "Dependencies installed successfully" | |
| if ! command -v docker &> /dev/null; then | |
| log "Installing Docker..." | |
| curl -fsSL https://get.docker.com -o get-docker.sh | |
| sh get-docker.sh | |
| rm get-docker.sh | |
| log "Docker installed successfully" | |
| else | |
| log "Docker already installed (version: $(docker --version))" | |
| fi | |
| } | |
| function optimize_system() { | |
| log "Optimizing system settings..." | |
| cp /tmp/sysctl-optimizations.conf /etc/sysctl.d/99-ollama-optimizations.conf | |
| sysctl --system | |
| log "Sysctl parameters applied" | |
| echo 'GOVERNOR="performance"' > /etc/default/cpufrequtils | |
| systemctl restart cpufrequtils | |
| log "CPU governor set to performance mode" | |
| mkdir -p /mnt/ramdisk | |
| mount -t tmpfs -o size=2G tmpfs /mnt/ramdisk | |
| grep -q "/mnt/ramdisk" /etc/fstab || echo "tmpfs /mnt/ramdisk tmpfs nodev,nosuid,size=2G 0 0" >> /etc/fstab | |
| log "RAM disk created at /mnt/ramdisk" | |
| log "Memory info: $(free -h | grep Mem)" | |
| log "CPU info: $(lscpu | grep 'Model name\|CPU(s)' | xargs)" | |
| log "System optimization completed" | |
| } | |
| function configure_firewall() { | |
| log "Configuring firewall..." | |
| ufw allow ssh | |
| ufw allow 11434/tcp | |
| ufw allow 3000/tcp | |
| ufw --force enable | |
| log "Firewall enabled with ports: SSH, 11434 (Ollama), 3000 (OpenHands)" | |
| } | |
| function install_ollama() { | |
| log "Installing Ollama..." | |
| curl -fsSL https://ollama.com/install.sh | sh | |
| log "Ollama installed successfully ($(ollama --version))" | |
| } | |
| function setup_model() { | |
| log "Creating model directory at /opt/ollama/models..." | |
| mkdir -p /opt/ollama/models | |
| cp /tmp/Modelfile /opt/ollama/models/Modelfile | |
| log "Building optimized model (this may take several minutes)..." | |
| cd /opt/ollama/models | |
| time ollama create deepseek-optimized -f Modelfile | |
| log "Model built successfully: deepseek-optimized" | |
| log "Model details: $(ollama list | grep deepseek-optimized)" | |
| } | |
| function configure_ollama_service() { | |
| log "Setting up Ollama service..." | |
| cp /tmp/ollama.service /etc/systemd/system/ollama.service | |
| log "Enabling and starting Ollama service..." | |
| systemctl daemon-reload | |
| systemctl enable ollama.service | |
| systemctl start ollama.service | |
| for i in {1..30}; do | |
| if curl -s http://localhost:11434/api/tags &>/dev/null; then | |
| log "Ollama service is running and accepting connections" | |
| break | |
| fi | |
| if [ $i -eq 30 ]; then | |
| log "WARNING: Ollama service not responding after 60 seconds" | |
| log "Service status: $(systemctl status ollama.service | grep Active)" | |
| log "Last 10 log entries: $(journalctl -u ollama -n 10 --no-pager)" | |
| fi | |
| log "Waiting for Ollama API... ($i/30)" | |
| sleep 2 | |
| done | |
| } | |
| function setup_monitoring() { | |
| log "Setting up performance monitoring..." | |
| cp /tmp/ollama-monitor.sh /usr/local/bin/ | |
| chmod +x /usr/local/bin/ollama-monitor.sh | |
| (crontab -l 2>/dev/null | grep -v "ollama-monitor.sh"; echo "*/5 * * * * /usr/local/bin/ollama-monitor.sh") | crontab - | |
| log "Performance monitoring configured to run every 5 minutes via cron" | |
| } | |
| function start_openhands() { | |
| log "Starting OpenHands container..." | |
| docker pull docker.all-hands.dev/all-hands-ai/openhands:0.28 | |
| log "OpenHands image pulled successfully" | |
| docker rm -f openhands-app 2>/dev/null || true | |
| log "Removed any existing OpenHands containers" | |
| docker run -d --restart always \ | |
| -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.28-nikolaik \ | |
| -e LOG_ALL_EVENTS=true \ | |
| -v /var/run/docker.sock:/var/run/docker.sock \ | |
| -v ~/.openhands-state:/.openhands-state \ | |
| -p 3000:3000 \ | |
| --name openhands-app \ | |
| docker.all-hands.dev/all-hands-ai/openhands:0.28 | |
| cp /tmp/configure-openhands.sh /usr/local/bin/ | |
| chmod +x /usr/local/bin/configure-openhands.sh | |
| log "OpenHands container started with ID: $(docker ps -q -f name=openhands-app)" | |
| } | |
| function print_summary() { | |
| IP_ADDRESS=$(curl -s ifconfig.me) | |
| log "============ SETUP COMPLETE ============" | |
| log "Ollama API: http://$IP_ADDRESS:11434" | |
| log "OpenHands UI: http://$IP_ADDRESS:3000" | |
| log "" | |
| log "Model: deepseek-optimized (available at: ollama/deepseek-optimized:latest)" | |
| log "CPU Cores: $(nproc)" | |
| log "Memory: $(free -h | grep Mem | awk '{print $2}')" | |
| log "Disk Space: $(df -h / | awk 'NR==2 {print $2}')" | |
| log "" | |
| log "OpenHands Configuration:" | |
| log "- Custom Model: ollama/deepseek-optimized:latest" | |
| log "- Base URL: http://localhost:11434" | |
| log "" | |
| log "Management Commands:" | |
| log "- View Ollama logs: journalctl -u ollama -f" | |
| log "- View OpenHands logs: docker logs -f openhands-app" | |
| log "- Restart Ollama: systemctl restart ollama" | |
| log "- Restart OpenHands: docker restart openhands-app" | |
| log "" | |
| log "Installation completed on $(date)" | |
| log "==============================================" | |
| } | |
| cat > /tmp/sysctl-optimizations.conf << 'SYSCTL' | |
| vm.swappiness=10 | |
| vm.dirty_ratio=80 | |
| vm.dirty_background_ratio=5 | |
| vm.max_map_count=1048576 | |
| net.core.somaxconn=65535 | |
| net.core.netdev_max_backlog=4096 | |
| net.ipv4.tcp_max_syn_backlog=8192 | |
| net.ipv4.tcp_slow_start_after_idle=0 | |
| net.ipv4.tcp_tw_reuse=1 | |
| net.ipv4.ip_local_port_range=1024 65535 | |
| fs.file-max=2097152 | |
| fs.nr_open=2097152 | |
| SYSCTL | |
| cat > /tmp/Modelfile << 'MODELFILE' | |
| FROM deepseek-r1:14b | |
| PARAMETER num_ctx 4096 | |
| PARAMETER num_batch 512 | |
| PARAMETER num_thread 4 | |
| QUANTIZE q4_k_m | |
| MODELFILE | |
| cat > /tmp/ollama.service << 'OLLAMA_SERVICE' | |
| [Unit] | |
| Description=Ollama Service | |
| After=network.target | |
| [Service] | |
| Environment="OLLAMA_HOST=0.0.0.0" | |
| Environment="OLLAMA_MODELS=/root/.ollama/models/" | |
| Environment="OLLAMA_NUM_THREADS=4" | |
| Environment="OLLAMA_CPU_LAYERS=100" | |
| Environment="OLLAMA_GPU_LAYERS=0" | |
| Environment="OLLAMA_CONTEXT_LENGTH=4096" | |
| Environment="OLLAMA_NUM_GPU=0" | |
| Environment="OLLAMA_MAX_LOADED_MODELS=2" | |
| Environment="OLLAMA_MAX_QUEUE=16" | |
| Environment="OLLAMA_KEEP_ALIVE=30m" | |
| Environment="OLLAMA_TIMEOUT=1800s" | |
| Environment="OLLAMA_LOAD_TIMEOUT=600s" | |
| Environment="OLLAMA_KV_CACHE_TYPE=q8_0" | |
| Environment="OLLAMA_NUM_PARALLEL=4" | |
| ExecStart=/usr/bin/ollama serve | |
| Restart=always | |
| RestartSec=10 | |
| LimitNOFILE=65536 | |
| Nice=-10 | |
| User=root | |
| Group=root | |
| [Install] | |
| WantedBy=multi-user.target | |
| OLLAMA_SERVICE | |
| cat > /tmp/ollama-monitor.sh << 'MONITOR' | |
| #!/bin/bash | |
| log() { | |
| echo "[$(date +%T)] $1" >> /var/log/ollama-monitor.log | |
| } | |
| log "Running performance monitor" | |
| CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}') | |
| log "Current CPU usage: ${CPU_USAGE}%" | |
| MEM_AVAIL=$(free -m | grep Mem | awk '{print $7}') | |
| MEM_TOTAL=$(free -m | grep Mem | awk '{print $2}') | |
| MEM_PCT=$(echo "scale=2; (${MEM_TOTAL}-${MEM_AVAIL})/${MEM_TOTAL}*100" | bc) | |
| log "Current memory usage: ${MEM_PCT}%" | |
| if [ $(echo "$CPU_USAGE > 85" | bc) -eq 1 ] || [ $(echo "$MEM_PCT > 85" | bc) -eq 1 ]; then | |
| log "System under heavy load - throttling service" | |
| systemctl set-environment OLLAMA_MAX_QUEUE=8 | |
| systemctl set-environment OLLAMA_CONTEXT_LENGTH=2048 | |
| systemctl restart ollama | |
| elif [ $(echo "$CPU_USAGE < 30" | bc) -eq 1 ] && [ $(echo "$MEM_PCT < 50" | bc) -eq 1 ]; then | |
| log "System underutilized - increasing capacity" | |
| systemctl set-environment OLLAMA_MAX_QUEUE=32 | |
| systemctl set-environment OLLAMA_CONTEXT_LENGTH=4096 | |
| systemctl restart ollama | |
| fi | |
| LAST_REQUEST=$(journalctl -u ollama --since "30 minutes ago" | grep -E "POST.*generate" | wc -l) | |
| log "Requests in last 30 minutes: $LAST_REQUEST" | |
| if [ "$LAST_REQUEST" -eq 0 ] && [ -f /etc/vast-auto-shutdown ]; then | |
| log "No activity detected for 30 minutes and auto-shutdown enabled" | |
| wall "Shutting down due to inactivity" | |
| shutdown -h now | |
| fi | |
| MONITOR | |
| cat > /tmp/configure-openhands.sh << 'CONFIGURE_OPENHANDS' | |
| #!/bin/bash | |
| echo "[$(date +%T)] Waiting for OpenHands to be ready..." | |
| for i in {1..30}; do | |
| if curl -s http://localhost:3000 &>/dev/null; then | |
| echo "[$(date +%T)] OpenHands is ready!" | |
| break | |
| fi | |
| echo "[$(date +%T)] Waiting for OpenHands... ($i/30)" | |
| sleep 2 | |
| done | |
| echo "[$(date +%T)] Please configure OpenHands manually with:" | |
| echo "- Custom Model: ollama/deepseek-optimized:latest" | |
| echo "- Base URL: http://localhost:11434" | |
| CONFIGURE_OPENHANDS | |
| log "Starting optimized Ollama + OpenHands setup" | |
| log "System: $(uname -a)" | |
| log "Date: $(date)" | |
| log "Directory: $(pwd)" | |
| setup_dependencies | |
| optimize_system | |
| configure_firewall | |
| install_ollama | |
| setup_model | |
| configure_ollama_service | |
| setup_monitoring | |
| start_openhands | |
| print_summary | |
| log "Setup completed successfully" | |
| exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment