You can skip that if you install directly on a machine
- 16 CPU (or the max you have in the system, up to you)
- 32GB RAM (or more, for model caching to avoid heavy disk usage!)
- 32GB vda (or more for /)
- 256GB vdb (or more for /srv - ollama, docker, you can easily resize it later)
- UEFI
- Q35 for Passthrough
- Install on the vda
- Minimal server install with SSH
- Enable 3rd party drivers
- Use vda as /
- Use vdb as /srv
For details how to setup Proxmox hypervisor for Nvidia GPU passthrough follow this tutorial: https://3os.org/infrastructure/proxmox/gpu-passthrough/gpu-passthrough-to-vm/#google_vignette
Add a new PCI device in the web ui
- Works with RTX 3090, select the GPU you see on the PCI list
- all functions - yes
- primary gpu - no (not necessarily, nvidia can act as an accelerator not only as the primary gpu)
- rom bar - yes
- pci express - yes
sudo apt-get update
sudo apt-get upgrade
sudo apt-get install apt-transport-https bc bwm-ng ca-certificates cron curl fio htop iperf \
net-tools parted rsync screen software-properties-common sysstat tmux unzip \
wget rsync gnupg lsb-release sudo lm-sensors
sudo apt autoremove nvidia* --purge
ubuntu-drivers devices
sudo ubuntu-drivers autoinstall
sudo apt install nvidia-driver-550
reboot
nvidia-smi
sudo apt install nvidia-cuda-toolkit
Based on https://docs.docker.com/engine/install/ubuntu/
sudo apt-get update
sudo apt-get install ca-certificates curl
sudo install -m 0755 -d /etc/apt/keyrings
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
sudo chmod a+r /etc/apt/keyrings/docker.asc
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "${UBUNTU_CODENAME:-$VERSION_CODENAME}") stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
sudo usermod -aG docker $USER
systemctl stop docker
mv /var/lib/docker /srv/docker
ln -s /srv/docker /var/lib/docker
systemctl start docker
sudo reboot
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
You can save it to ~/docker-compose.yml or somewhere else
services:
ollama:
container_name: ollama
image: ollama/ollama:latest
restart: unless-stopped
networks:
- genai-network
ports:
- 11434:11434
environment:
- OLLAMA_KEEP_ALIVE=24h
volumes:
- /srv/ollama:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
webui:
container_name: webui
image: ghcr.io/open-webui/open-webui:main
restart: unless-stopped
networks:
- genai-network
ports:
- 8080:8080
environment:
- OLLAMA_BASE_URL=http://ollama:11434
volumes:
- /srv/webui:/app/backend/data
networks:
genai-network:
name: genai-network
external: false
docker compose up -d
docker compose logs -f
docker compose down
docker exec -it ollama ollama pull qwen2.5-coder:32b
- Ollama API is exposed on port 11434
- Open WebUI is exposed on port 8080
- Ollama models are kept in /srv/ollama
- Open WebUI keeps data in /srv/webui