Created
November 21, 2025 00:14
-
-
Save bbrowning/e2efe77b617b741a23ed31333a7ecba9 to your computer and use it in GitHub Desktop.
Dockerfile to create vLLM v0.11.2 containers for DGX Spark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # A crude copy of vLLM's normal Dockerfile that installs | |
| # a released version on DGX Spark | |
| ARG CUDA_VERSION=13.0.2 | |
| ARG PYTHON_VERSION=3.12 | |
| ARG VLLM_VERSION=0.11.2 | |
| ARG BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 | |
| ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl | |
| FROM ${BASE_IMAGE} AS base | |
| ARG CUDA_VERSION | |
| ARG PYTHON_VERSION | |
| ARG TARGETPLATFORM | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| # Install system dependencies and uv, then create Python virtual environment | |
| RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ | |
| && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ | |
| && apt-get update -y \ | |
| && apt-get install -y ccache software-properties-common git curl sudo python3-pip \ | |
| && curl -LsSf https://astral.sh/uv/install.sh | sh \ | |
| && $HOME/.local/bin/uv venv /opt/venv --python ${PYTHON_VERSION} \ | |
| && rm -f /usr/bin/python3 /usr/bin/python3-config /usr/bin/pip \ | |
| && ln -s /opt/venv/bin/python3 /usr/bin/python3 \ | |
| && ln -s /opt/venv/bin/python3-config /usr/bin/python3-config \ | |
| && ln -s /opt/venv/bin/pip /usr/bin/pip \ | |
| && python3 --version && python3 -m pip --version | |
| ARG PYTORCH_CUDA_INDEX_BASE_URL | |
| # Activate virtual environment and add uv to PATH | |
| ENV PATH="/opt/venv/bin:/root/.local/bin:$PATH" | |
| ENV VIRTUAL_ENV="/opt/venv" | |
| # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out | |
| # Reference: https://github.com/astral-sh/uv/pull/1694 | |
| ENV UV_HTTP_TIMEOUT=500 | |
| ENV UV_INDEX_STRATEGY="unsafe-best-match" | |
| # Use copy mode to avoid hardlink failures with Docker cache mounts | |
| ENV UV_LINK_MODE=copy | |
| # Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519 | |
| # as it was causing spam when compiling the CUTLASS kernels | |
| RUN apt-get install -y gcc-10 g++-10 | |
| RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10 | |
| RUN <<EOF | |
| gcc --version | |
| EOF | |
| # Workaround for https://github.com/openai/triton/issues/2507 and | |
| # https://github.com/pytorch/pytorch/issues/107960 -- hopefully | |
| # this won't be needed for future versions of this docker image | |
| # or future versions of triton. | |
| RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ | |
| WORKDIR /workspace | |
| # cuda arch list used by torch | |
| # can be useful for both `dev` and `test` | |
| # explicitly set the list to avoid issues with torch 2.2 | |
| # see https://github.com/pytorch/pytorch/pull/123243 | |
| ARG torch_cuda_arch_list='12.1a' | |
| ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} | |
| ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas | |
| RUN uv pip install --python /opt/venv/bin/python3 \ | |
| torch torchvision torchaudio \ | |
| --index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') | |
| RUN uv pip install --python /opt/venv/bin/python3 \ | |
| "vllm==${VLLM_VERSION}" --no-binary vllm --torch-backend=auto | |
| ENTRYPOINT ["vllm", "serve"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment