theobjectivedad/axolotl_docker.md

## axolotl_docker.md

      
    Raw
  

              axolotl_docker.md
            
          
    This is the docker image I am using for Axolotl R&D. You may need to tweak TORCH_CUDA_ARCH_LIST to include your GPU architecture. There are several improvments over the official axolotl image including:

Non-root user
Cleaner dependency management
Better caching at buildtime
Highly parameterized build, useful for testing new Python dependencies / versions
Final stage uses Nvidia's runtime container which (hypothetically) should be smaller

Dockerfile

The Dockerfile below was shamelessly taken from here:
###############################################################################
# base-builder
###############################################################################

ARG CONTAINER_CUDA_VERSION
ARG CONTAINER_CUDNN_VERSION
ARG CONTAINER_UBUNTU_VERSION

# nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
FROM nvidia/cuda:${CONTAINER_CUDA_VERSION}-cudnn${CONTAINER_CUDNN_VERSION}-devel-ubuntu${CONTAINER_UBUNTU_VERSION} as base

ARG PYTORCH_VERSION
ARG PYTORCH_REPO

ARG TORCH_CUDA_ARCH_LIST
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}

# Install OS dependencies
RUN --mount=type=cache,target=/var/cache/apt \
    apt-get update && \
    apt-get install -y \
    software-properties-common \
    git \
    build-essential \
    ninja-build \
    libaio-dev \
    pip && \
    ln -s /usr/bin/python3 /usr/bin/python

# This doesn't seem to help with anything...
# Note the build is HIGHLY PARALLELIZED, takes forever , and currently fails
# RUN apt-get install cmake && \ 
# git clone --branch v2.0.0 --recursive https://github.com/pytorch/pytorch && \
# cd pytorch && \
# pip install -r requirements.txt && \
# MAX_JOBS=32 python setup.py bdist_wheel

# Other container dependencies need to have PyTorch installed
RUN --mount=type=cache,target=/root/.cache \
    pip install --extra-index-url ${PYTORCH_REPO} -U \
    "torch==${PYTORCH_VERSION}" \
    packaging


###############################################################################
# builder-deepspeed
###############################################################################

FROM base as builder-deepspeed

ARG DEEPSPEED_VERSION

ARG TORCH_CUDA_ARCH_LIST
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}

RUN git clone --branch v${DEEPSPEED_VERSION} --depth 1 https://github.com/microsoft/DeepSpeed.git && \
    cd DeepSpeed && \
    DS_BUILD_SPARSE_ATTN=0 DS_BUILD_OPS=1 python setup.py bdist_wheel


###############################################################################
# builder-bitsandbytes
###############################################################################

FROM base as builder-bitsandbytes

ARG BITSANDBYTES_VERSION
ARG BITSANDBYTES_CUDA_VERSION
ARG BITSANDBYTES_MAKE_TARGET
ARG MAX_CONCURRENCY

ARG TORCH_CUDA_ARCH_LIST
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}

RUN git clone --branch ${BITSANDBYTES_VERSION} --depth 1 https://github.com/TimDettmers/bitsandbytes.git && \
    cd bitsandbytes && \
    CUDA_VERSION=${BITSANDBYTES_CUDA_VERSION} make -j ${MAX_CONCURRENCY} ${BITSANDBYTES_MAKE_TARGET} && \
    python setup.py bdist_wheel


###############################################################################
# builder-apex
###############################################################################

# Note that this takes forever...
FROM base as builder-apex

ARG APEX_VERSION
ARG MAX_CONCURRENCY

ARG APEX_CUDA_ARCH_LIST
ENV TORCH_CUDA_ARCH_LIST=${APEX_CUDA_ARCH_LIST}

RUN python -m pip uninstall -y apex && \
    git clone --branch ${APEX_VERSION} --depth 1 https://github.com/NVIDIA/apex && \
    cd apex && \
    MAX_JOBS=${MAX_CONCURRENCY} python setup.py bdist_wheel --cpp_ext --cuda_ext


###############################################################################
# builder-flash-attn
###############################################################################

# Note that this takes forever...
FROM base as builder-flash-attn

ARG FLASH_ATTN_VERSION

ARG APEX_CUDA_ARCH_LIST
ENV TORCH_CUDA_ARCH_LIST=${APEX_CUDA_ARCH_LIST}

RUN git clone --branch v${FLASH_ATTN_VERSION} --depth 1 https://github.com/HazyResearch/flash-attention.git && \
    cd flash-attention && \
    MAX_JOBS=${MAX_CONCURRENCY} python setup.py bdist_wheel

###############################################################################
# main
###############################################################################

FROM nvidia/cuda:${CONTAINER_CUDA_VERSION}-cudnn${CONTAINER_CUDNN_VERSION}-runtime-ubuntu${CONTAINER_UBUNTU_VERSION}

# Standard labels
LABEL maintainer="The Objective Dad <theobjectivedad@gmail.com>"
LABEL version="1.0.0"
LABEL description="LLM training environment optimized for Axolotl"
LABEL url="https://www.theobjectivedad.com"

ARG HF_ACCELERATE_VERSION
ARG HF_TRANSFORMERS_VERSION
ARG HF_OPTIMUM_VERSION
ARG HF_PEFT_VERSION
ARG PYTORCH_VERSION
ARG PYTORCH_REPO

# Install runtime OS tools
RUN --mount=type=cache,target=/var/cache/apt \
    apt-get update && \
    apt-get install -y \
    git \
    git-lfs \
    libaio1 \
    pip && \
    ln -s /usr/bin/python3 /usr/bin/python

# Install pre-build wheels
ARG WHL_HOME=/tmp/wheels

RUN mkdir ${WHL_HOME}
COPY --from=builder-apex /apex/dist/*.whl ${WHL_HOME}
COPY --from=builder-flash-attn /flash-attention/dist/*.whl ${WHL_HOME}
COPY --from=builder-bitsandbytes /bitsandbytes/dist/*.whl ${WHL_HOME}
COPY --from=builder-deepspeed /DeepSpeed/dist/*.whl ${WHL_HOME}

# This is stupid but the apex setup.py version is hardcoded to 0.1 so we need to force-reinstall,
# see: https://github.com/NVIDIA/apex/blob/0da3ffb92ee6fbe5336602f0e3989db1cd16f880/setup.py#L797
RUN pip install --force-reinstall $(find ${WHL_HOME} -name "apex-*.whl" -printf "%p ") 

# Install extra python packages, note that we are intentionally locking in versions of all the 
# custom-build libraries, again apex is missing because it has a version hardcoded in setup.py.
# See note above.
RUN pip install --extra-index-url ${PYTORCH_REPO}  \
    "torch==${PYTORCH_VERSION}" \
    "peft@git+https://github.com/huggingface/peft.git@main" \
    "accelerate@git+https://github.com/huggingface/accelerate.git@main" \
    "transformers@git+https://github.com/huggingface/transformers.git@main" \
    "optimum==${HF_OPTIMUM_VERSION}" \
    addict \
    fire \
    PyYAML==6.0 \
    datasets \
    sentencepiece \
    wandb \
    einops \
    xformers \
    bert-score==0.3.13 \
    evaluate==0.4.0 \
    rouge-score==0.1.2 \
    scipy \
    scikit-learn==1.2.2 \
    $(find ${WHL_HOME} -name "*.whl" -printf "file://%p ") 

ARG USERNAME
ARG USER_ID
ARG GROUP_ID

# Add container user & environment
RUN groupadd -g ${GROUP_ID} ${USERNAME} && \
    useradd -u ${USER_ID} -g ${GROUP_ID} -ms /bin/bash ${USERNAME} && \
    echo 'export PS1="\h:\W $ "' >>  /home/${USERNAME}/.bashrc

# Install Axolotl scripts
RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git /opt/axolotl
ENV PYTHONPATH=/opt/axolotl/src:${PYTHONPATH}

ENV WORKSPACE=/workspace
RUN mkdir ${WORKSPACE}

USER ${USERNAME}

# Configure git
RUN git lfs install --skip-repo && \
    git config --global credential.helper store

WORKDIR ${WORKSPACE}
ENTRYPOINT ["/bin/bash"]

Makefile

This is the Makefile I am using to build:
###############################################################################
# Configuration: Training environment build arguments
###############################################################################
USER_ID:=$(shell id -u)
GROUP_ID:=$(shell id -g)
USERNAME:=developer

CONTAINER_CUDA_VERSION:="11.8.0"
CONTAINER_CUDNN_VERSION:="8"
CONTAINER_UBUNTU_VERSION:="22.04"

PYTORCH_CUDA:="cu118"
PYTORCH_VERSION:="2.0.0"
# PYTORCH_REPO:="https://download.pytorch.org/whl/nightly/$(PYTORCH_CUDA)"
PYTORCH_REPO:="https://download.pytorch.org/whl/$(PYTORCH_CUDA)"
TORCHVISION_VERSION:=""
TORCHAUDIO_VERSION:=""
TORCH_CUDA_ARCH_LIST:="8.0;8.6;8.7"

DEEPSPEED_VERSION:="0.9.5"

BITSANDBYTES_VERSION:="0.40.0"
BITSANDBYTES_CUDA_VERSION:="118"
BITSANDBYTES_MAKE_TARGET:="cuda11x"

# Note that Apex 23.05 doesn't recognize 8.7, likely this will change in some future 
# version but we need to make this its own variable for now.
APEX_VERSION:="23.05"
APEX_CUDA_ARCH_LIST:="8.0;8.6"

FLASH_ATTN_VERSION:=1.0.8

HF_ACCELERATE_VERSION:="0.20.3"
HF_TRANSFORMERS_VERSION:="4.30.2"
HF_PEFT_VERSION:="0.3.0"
HF_OPTIMUM_VERSION:="1.9.1"

MAX_CONCURRENCY:=4

###############################################################################
# Configuration: Image / repository
###############################################################################
BUILD_REPO_NAME:=quay.io
BUILD_REPO_USER:=theobjectivedad
BUILD_IMAGE_NAME:=axolotl-main
BUILD_TAG_NAME:=latest
BUILD_FULL_NAME:=$(BUILD_REPO_NAME)/$(BUILD_REPO_USER)/$(BUILD_IMAGE_NAME):$(BUILD_TAG_NAME)

###############################################################################
# Build
###############################################################################

clean:
	@docker rmi $(BUILD_FULL_NAME)

login:
	@docker login --username=$(BUILD_REPO_USER) --password-stdin $(BUILD_REPO_NAME)

build:
	@docker build \
		--progress=plain \
		--build-arg=USER_ID=$(USER_ID) \
		--build-arg=GROUP_ID=$(GROUP_ID) \
		--build-arg=USERNAME=$(USERNAME) \
		--build-arg=CONTAINER_CUDA_VERSION=$(CONTAINER_CUDA_VERSION) \
		--build-arg=CONTAINER_CUDNN_VERSION=$(CONTAINER_CUDNN_VERSION) \
		--build-arg=CONTAINER_UBUNTU_VERSION=$(CONTAINER_UBUNTU_VERSION) \
		--build-arg=PYTORCH_VERSION=$(PYTORCH_VERSION) \
		--build-arg=PYTORCH_REPO=$(PYTORCH_REPO) \
		--build-arg=TORCH_CUDA_ARCH_LIST=$(TORCH_CUDA_ARCH_LIST) \
		--build-arg=DEEPSPEED_VERSION=$(DEEPSPEED_VERSION) \
		--build-arg=BITSANDBYTES_VERSION=$(BITSANDBYTES_VERSION) \
		--build-arg=BITSANDBYTES_CUDA_VERSION=$(BITSANDBYTES_CUDA_VERSION) \
		--build-arg=BITSANDBYTES_MAKE_TARGET=$(BITSANDBYTES_MAKE_TARGET) \
		--build-arg=APEX_VERSION=$(APEX_VERSION) \
		--build-arg=APEX_CUDA_ARCH_LIST=$(APEX_CUDA_ARCH_LIST) \
		--build-arg=FLASH_ATTN_VERSION=$(FLASH_ATTN_VERSION) \
		--build-arg=HF_ACCELERATE_VERSION=$(HF_ACCELERATE_VERSION) \
		--build-arg=HF_TRANSFORMERS_VERSION=$(HF_TRANSFORMERS_VERSION) \
		--build-arg=HF_OPTIMUM_VERSION=$(HF_OPTIMUM_VERSION) \
		--build-arg=HF_PEFT_VERSION=$(HF_PEFT_VERSION) \
		--build-arg=MAX_CONCURRENCY=$(MAX_CONCURRENCY) \
		-t $(BUILD_FULL_NAME) .

push: build
	@docker push $(BUILD_FULL_NAME)

# TODO: you will likely need to change the paths...
run:
	docker run --gpus='all' -it --rm \
	  	--volume=$(WORKSPACE_HOST_PATH):/workspace \
		--volume=$(MODELS_HOST_PATH):/models \
		--volume=$(DATA_HOST_PATH):/data \
		--volume=$(WORK_HOST_PATH):/work \
		--volume=$(WORKSPACE_HOST_PATH)/extern/axolotl:/opt/axolotl \
		--env-file=$(CURDIR)/.env \
		--entrypoint=accelerate \
		$(BUILD_FULL_NAME) \
			launch \
				--config_file /work/accelerate/basic.yaml \
				/opt/axolotl/scripts/finetune.py \
					/work/atheos/config.yaml \
						--inference

.PHONY: clean build login push run 
Building & Running

Once the files are in place just run make build to create the image or make run to run.
No results found