bbrowning/Dockerfile.dgx_spark

## Dockerfile.dgx_spark
# A crude copy of vLLM's normal Dockerfile that installs
# a released version on DGX Spark

ARG CUDA_VERSION=13.0.2
ARG PYTHON_VERSION=3.12
ARG VLLM_VERSION=0.11.2
ARG BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl


FROM ${BASE_IMAGE} AS base
ARG CUDA_VERSION
ARG PYTHON_VERSION
ARG TARGETPLATFORM
ENV DEBIAN_FRONTEND=noninteractive

# Install system dependencies and uv, then create Python virtual environment
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
    && apt-get install -y ccache software-properties-common git curl sudo python3-pip \
    && curl -LsSf https://astral.sh/uv/install.sh | sh \
    && $HOME/.local/bin/uv venv /opt/venv --python ${PYTHON_VERSION} \
    && rm -f /usr/bin/python3 /usr/bin/python3-config /usr/bin/pip \
    && ln -s /opt/venv/bin/python3 /usr/bin/python3 \
    && ln -s /opt/venv/bin/python3-config /usr/bin/python3-config \
    && ln -s /opt/venv/bin/pip /usr/bin/pip \
    && python3 --version && python3 -m pip --version

ARG PYTORCH_CUDA_INDEX_BASE_URL

# Activate virtual environment and add uv to PATH
ENV PATH="/opt/venv/bin:/root/.local/bin:$PATH"
ENV VIRTUAL_ENV="/opt/venv"

# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
ENV UV_HTTP_TIMEOUT=500
ENV UV_INDEX_STRATEGY="unsafe-best-match"
# Use copy mode to avoid hardlink failures with Docker cache mounts
ENV UV_LINK_MODE=copy

# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
# as it was causing spam when compiling the CUTLASS kernels
RUN apt-get install -y gcc-10 g++-10
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10
RUN <<EOF
gcc --version
EOF

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/

WORKDIR /workspace


# cuda arch list used by torch
# can be useful for both `dev` and `test`
# explicitly set the list to avoid issues with torch 2.2
# see https://github.com/pytorch/pytorch/pull/123243
ARG torch_cuda_arch_list='12.1a'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}

ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas

RUN uv pip install --python /opt/venv/bin/python3 \
    torch torchvision torchaudio \
    --index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')

RUN uv pip install --python /opt/venv/bin/python3 \
    "vllm==${VLLM_VERSION}" --no-binary vllm --torch-backend=auto

ENTRYPOINT ["vllm", "serve"]
	# A crude copy of vLLM's normal Dockerfile that installs
	# a released version on DGX Spark

	ARG CUDA_VERSION=13.0.2
	ARG PYTHON_VERSION=3.12
	ARG VLLM_VERSION=0.11.2
	ARG BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
	ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl


	FROM ${BASE_IMAGE} AS base
	ARG CUDA_VERSION
	ARG PYTHON_VERSION
	ARG TARGETPLATFORM
	ENV DEBIAN_FRONTEND=noninteractive

	# Install system dependencies and uv, then create Python virtual environment
	RUN echo 'tzdata tzdata/Areas select America' \| debconf-set-selections \
	&& echo 'tzdata tzdata/Zones/America select Los_Angeles' \| debconf-set-selections \
	&& apt-get update -y \
	&& apt-get install -y ccache software-properties-common git curl sudo python3-pip \
	&& curl -LsSf https://astral.sh/uv/install.sh \| sh \
	&& $HOME/.local/bin/uv venv /opt/venv --python ${PYTHON_VERSION} \
	&& rm -f /usr/bin/python3 /usr/bin/python3-config /usr/bin/pip \
	&& ln -s /opt/venv/bin/python3 /usr/bin/python3 \
	&& ln -s /opt/venv/bin/python3-config /usr/bin/python3-config \
	&& ln -s /opt/venv/bin/pip /usr/bin/pip \
	&& python3 --version && python3 -m pip --version

	ARG PYTORCH_CUDA_INDEX_BASE_URL

	# Activate virtual environment and add uv to PATH
	ENV PATH="/opt/venv/bin:/root/.local/bin:$PATH"
	ENV VIRTUAL_ENV="/opt/venv"

	# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
	# Reference: https://github.com/astral-sh/uv/pull/1694
	ENV UV_HTTP_TIMEOUT=500
	ENV UV_INDEX_STRATEGY="unsafe-best-match"
	# Use copy mode to avoid hardlink failures with Docker cache mounts
	ENV UV_LINK_MODE=copy

	# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
	# as it was causing spam when compiling the CUTLASS kernels
	RUN apt-get install -y gcc-10 g++-10
	RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10
	RUN <<EOF
	gcc --version
	EOF

	# Workaround for https://github.com/openai/triton/issues/2507 and
	# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
	# this won't be needed for future versions of this docker image
	# or future versions of triton.
	RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION \| cut -d. -f1,2)/compat/

	WORKDIR /workspace


	# cuda arch list used by torch
	# can be useful for both `dev` and `test`
	# explicitly set the list to avoid issues with torch 2.2
	# see https://github.com/pytorch/pytorch/pull/123243
	ARG torch_cuda_arch_list='12.1a'
	ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}

	ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas

	RUN uv pip install --python /opt/venv/bin/python3 \
	torch torchvision torchaudio \
	--index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION \| cut -d. -f1,2 \| tr -d '.')

	RUN uv pip install --python /opt/venv/bin/python3 \
	"vllm==${VLLM_VERSION}" --no-binary vllm --torch-backend=auto

	ENTRYPOINT ["vllm", "serve"]
No results found