This is a quick guide that demonstrates how to build a VLLM container locally.
First clone the VLLM branch/fork you would like to build:
git clone https://github.com/theobjectivedad/vllm.git src
cd src
git checkout outlines_subschemasNext create the following Makefile in your project directory, adjust the configuration variables to meet your specific needs:
.ONESHELL:
SHELL:=/bin/bash
.SHELLFLAGS=-e -o pipefail -c
.SILENT:
MAKEFLAGS+=--no-print-directory
.PHONY: default
default: container
# Paths
WORKSPACE_DIR:=$(CURDIR)
SRC_DIR:=$(WORKSPACE_DIR)/src
# Buildx
BUILDX_BUILDER:=wm-default
PLATFORMS:=linux/amd64
# Docker repository names used in various build steps
PUBLIC_REGISTRY:=quay.io/theobjectivedad
PRIVATE_REGISTRY:=harbor.k8s.wm.k8slab/vllm
REPO_NAME:=vllm-openai
TAG:=latest
GIT_BRANCH:=$(shell git -C $(SRC_DIR) rev-parse --abbrev-ref HEAD)
GIT_COMMIT:=$(shell git -C $(SRC_DIR) rev-parse --short HEAD)
# Image annotations for custom builds
# See: https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys
ANNOTATION_TITLE:=vLLM OpenAI Development Build
ANNOTATION_DESCRIPTION:=vLLM-OpenAI dev build,CUDA $(VLLM_CUDA_VERSION), Arch $(VLLM_CUDA_ARCH), $(GIT_BRANCH)/$(GIT_COMMIT)
ANNOTATION_SOURCE:=https://github.com/theobjectivedad/vllm
ANNOTATION_AUTHORS:=The Objective Dad <theobjectivedad@gmail.com>
ANNOTATION_CREATED:=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ')
ANNOTATION_REF_NAME:=$(TAG)
# vLLM build arguments
PYTHON_VERSION:=3.12
VLLM_FA_ARCH:=80-real
VLLM_CUDA_VERSION:=12.8.0
MAX_JOBS:=10
.PHONY: container
container:
set -a
. $(WORKSPACE_DIR)/.env
set +a
export
cd $(SRC_DIR)
echo "INFO building $(GIT_BRANCH)/$(GIT_COMMIT) => $(PRIVATE_REGISTRY)/$(REPO_NAME):$(TAG)"
docker buildx build \
--builder=$(BUILDX_BUILDER) \
--cache-from=type=registry,ref=$(PRIVATE_REGISTRY)/$(REPO_NAME):build-cache \
--cache-to=type=registry,ref=$(PRIVATE_REGISTRY)/$(REPO_NAME):build-cache,mode=max,image-manifest=true \
--tag=$(PRIVATE_REGISTRY)/$(REPO_NAME):$(TAG) \
--progress=plain \
--platform=$(PLATFORMS) \
--output=type=registry \
--annotation="org.opencontainers.image.authors=$(ANNOTATION_TITLE)" \
--annotation="org.opencontainers.image.description=$(ANNOTATION_DESCRIPTION)" \
--annotation="org.opencontainers.image.source=$(ANNOTATION_SOURCE)" \
--annotation="org.opencontainers.image.authors=$(ANNOTATION_AUTHORS)" \
--annotation="org.opencontainers.image.created=$(ANNOTATION_CREATED)" \
--annotation="org.opencontainers.image.ref.name=$(ANNOTATION_REF_NAME)" \
--target=vllm-openai \
--build-arg=CUDA_VERSION="$(VLLM_CUDA_VERSION)" \
--build-arg=vllm_fa_cmake_gpu_arches="$(VLLM_FA_ARCH)" \
--build-arg=torch_cuda_arch_list="$(TORCH_CUDA_ARCH_LIST)" \
--build-arg=GIT_REPO_CHECK=0 \
--build-arg=RUN_WHEEL_CHECK=false \
--build-arg=nvcc_threads=1 \
--build-arg=max_jobs=$(MAX_JOBS) \
--output=type=docker \
--file=docker/Dockerfile . && \
echo "INFO Build successful: $(PRIVATE_REGISTRY)/$(REPO_NAME):$(TAG)"Finally execute the default (container) build target:
make