theobjectivedad/vllm_build.md

## vllm_build.md

      
    Raw
  

              vllm_build.md
            
          
    Building a VLLM Container Locally

This is a quick guide that demonstrates how to build a VLLM container locally.
First clone the VLLM branch/fork you would like to build:
git clone https://github.com/theobjectivedad/vllm.git src
cd src
git checkout outlines_subschemas
Next create the following Makefile in your project directory, adjust the configuration variables to meet your specific needs:
.ONESHELL:

SHELL:=/bin/bash
.SHELLFLAGS=-e -o pipefail -c

.SILENT:
MAKEFLAGS+=--no-print-directory

.PHONY: default
default: container

# Paths
WORKSPACE_DIR:=$(CURDIR)
SRC_DIR:=$(WORKSPACE_DIR)/src

# Buildx
BUILDX_BUILDER:=wm-default
PLATFORMS:=linux/amd64

# Docker repository names used in various build steps
PUBLIC_REGISTRY:=quay.io/theobjectivedad
PRIVATE_REGISTRY:=harbor.k8s.wm.k8slab/vllm
REPO_NAME:=vllm-openai
TAG:=latest

GIT_BRANCH:=$(shell git -C $(SRC_DIR) rev-parse --abbrev-ref HEAD)
GIT_COMMIT:=$(shell git -C $(SRC_DIR) rev-parse --short HEAD)

# Image annotations for custom builds
# See: https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys
ANNOTATION_TITLE:=vLLM OpenAI Development Build
ANNOTATION_DESCRIPTION:=vLLM-OpenAI dev build,CUDA $(VLLM_CUDA_VERSION), Arch $(VLLM_CUDA_ARCH), $(GIT_BRANCH)/$(GIT_COMMIT)
ANNOTATION_SOURCE:=https://github.com/theobjectivedad/vllm
ANNOTATION_AUTHORS:=The Objective Dad <theobjectivedad@gmail.com>
ANNOTATION_CREATED:=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ')
ANNOTATION_REF_NAME:=$(TAG)

# vLLM build arguments
PYTHON_VERSION:=3.12
VLLM_FA_ARCH:=80-real
VLLM_CUDA_VERSION:=12.8.0
MAX_JOBS:=10

.PHONY: container
container:
	set -a
	. $(WORKSPACE_DIR)/.env
	set +a
	export
	cd $(SRC_DIR)
	echo "INFO building $(GIT_BRANCH)/$(GIT_COMMIT) => $(PRIVATE_REGISTRY)/$(REPO_NAME):$(TAG)"
	docker buildx build \
		--builder=$(BUILDX_BUILDER) \
		--cache-from=type=registry,ref=$(PRIVATE_REGISTRY)/$(REPO_NAME):build-cache \
		--cache-to=type=registry,ref=$(PRIVATE_REGISTRY)/$(REPO_NAME):build-cache,mode=max,image-manifest=true \
		--tag=$(PRIVATE_REGISTRY)/$(REPO_NAME):$(TAG) \
		--progress=plain \
		--platform=$(PLATFORMS) \
		--output=type=registry \
		--annotation="org.opencontainers.image.authors=$(ANNOTATION_TITLE)" \
		--annotation="org.opencontainers.image.description=$(ANNOTATION_DESCRIPTION)" \
		--annotation="org.opencontainers.image.source=$(ANNOTATION_SOURCE)" \
		--annotation="org.opencontainers.image.authors=$(ANNOTATION_AUTHORS)" \
		--annotation="org.opencontainers.image.created=$(ANNOTATION_CREATED)" \
		--annotation="org.opencontainers.image.ref.name=$(ANNOTATION_REF_NAME)" \
		--target=vllm-openai \
		--build-arg=CUDA_VERSION="$(VLLM_CUDA_VERSION)" \
		--build-arg=vllm_fa_cmake_gpu_arches="$(VLLM_FA_ARCH)" \
		--build-arg=torch_cuda_arch_list="$(TORCH_CUDA_ARCH_LIST)" \
		--build-arg=GIT_REPO_CHECK=0 \
		--build-arg=RUN_WHEEL_CHECK=false \
		--build-arg=nvcc_threads=1 \
		--build-arg=max_jobs=$(MAX_JOBS) \
		--output=type=docker \
		--file=docker/Dockerfile . && \
	echo "INFO Build successful: $(PRIVATE_REGISTRY)/$(REPO_NAME):$(TAG)"
Finally execute the default (container) build target:
make
No results found