Skip to content

Instantly share code, notes, and snippets.

@theobjectivedad
Created April 15, 2025 10:35
Show Gist options
  • Select an option

  • Save theobjectivedad/6c90d63d7fac44a6b35542c3a0dc3530 to your computer and use it in GitHub Desktop.

Select an option

Save theobjectivedad/6c90d63d7fac44a6b35542c3a0dc3530 to your computer and use it in GitHub Desktop.
Building a VLLM Container Locally

Building a VLLM Container Locally

This is a quick guide that demonstrates how to build a VLLM container locally.

First clone the VLLM branch/fork you would like to build:

git clone https://github.com/theobjectivedad/vllm.git src
cd src
git checkout outlines_subschemas

Next create the following Makefile in your project directory, adjust the configuration variables to meet your specific needs:

.ONESHELL:

SHELL:=/bin/bash
.SHELLFLAGS=-e -o pipefail -c

.SILENT:
MAKEFLAGS+=--no-print-directory

.PHONY: default
default: container

# Paths
WORKSPACE_DIR:=$(CURDIR)
SRC_DIR:=$(WORKSPACE_DIR)/src

# Buildx
BUILDX_BUILDER:=wm-default
PLATFORMS:=linux/amd64

# Docker repository names used in various build steps
PUBLIC_REGISTRY:=quay.io/theobjectivedad
PRIVATE_REGISTRY:=harbor.k8s.wm.k8slab/vllm
REPO_NAME:=vllm-openai
TAG:=latest

GIT_BRANCH:=$(shell git -C $(SRC_DIR) rev-parse --abbrev-ref HEAD)
GIT_COMMIT:=$(shell git -C $(SRC_DIR) rev-parse --short HEAD)

# Image annotations for custom builds
# See: https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys
ANNOTATION_TITLE:=vLLM OpenAI Development Build
ANNOTATION_DESCRIPTION:=vLLM-OpenAI dev build,CUDA $(VLLM_CUDA_VERSION), Arch $(VLLM_CUDA_ARCH), $(GIT_BRANCH)/$(GIT_COMMIT)
ANNOTATION_SOURCE:=https://github.com/theobjectivedad/vllm
ANNOTATION_AUTHORS:=The Objective Dad <theobjectivedad@gmail.com>
ANNOTATION_CREATED:=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ')
ANNOTATION_REF_NAME:=$(TAG)

# vLLM build arguments
PYTHON_VERSION:=3.12
VLLM_FA_ARCH:=80-real
VLLM_CUDA_VERSION:=12.8.0
MAX_JOBS:=10

.PHONY: container
container:
	set -a
	. $(WORKSPACE_DIR)/.env
	set +a
	export
	cd $(SRC_DIR)
	echo "INFO building $(GIT_BRANCH)/$(GIT_COMMIT) => $(PRIVATE_REGISTRY)/$(REPO_NAME):$(TAG)"
	docker buildx build \
		--builder=$(BUILDX_BUILDER) \
		--cache-from=type=registry,ref=$(PRIVATE_REGISTRY)/$(REPO_NAME):build-cache \
		--cache-to=type=registry,ref=$(PRIVATE_REGISTRY)/$(REPO_NAME):build-cache,mode=max,image-manifest=true \
		--tag=$(PRIVATE_REGISTRY)/$(REPO_NAME):$(TAG) \
		--progress=plain \
		--platform=$(PLATFORMS) \
		--output=type=registry \
		--annotation="org.opencontainers.image.authors=$(ANNOTATION_TITLE)" \
		--annotation="org.opencontainers.image.description=$(ANNOTATION_DESCRIPTION)" \
		--annotation="org.opencontainers.image.source=$(ANNOTATION_SOURCE)" \
		--annotation="org.opencontainers.image.authors=$(ANNOTATION_AUTHORS)" \
		--annotation="org.opencontainers.image.created=$(ANNOTATION_CREATED)" \
		--annotation="org.opencontainers.image.ref.name=$(ANNOTATION_REF_NAME)" \
		--target=vllm-openai \
		--build-arg=CUDA_VERSION="$(VLLM_CUDA_VERSION)" \
		--build-arg=vllm_fa_cmake_gpu_arches="$(VLLM_FA_ARCH)" \
		--build-arg=torch_cuda_arch_list="$(TORCH_CUDA_ARCH_LIST)" \
		--build-arg=GIT_REPO_CHECK=0 \
		--build-arg=RUN_WHEEL_CHECK=false \
		--build-arg=nvcc_threads=1 \
		--build-arg=max_jobs=$(MAX_JOBS) \
		--output=type=docker \
		--file=docker/Dockerfile . && \
	echo "INFO Build successful: $(PRIVATE_REGISTRY)/$(REPO_NAME):$(TAG)"

Finally execute the default (container) build target:

make
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment