WizardlyBump17/Containerfile

## Containerfile
#Base image: hhttps://gist.github.com/WizardlyBump17/f8a36f0197f7d2bdad957a2a0046d023
FROM localhost/intel-gpu-driver-minimal-oneapi

#The libze-intel-gpu1 package is needed for the `export ONEAPI_DEVICE_SELECTOR=level_zero:0`, otherwise nothing will work
RUN apt update \
  && apt upgrade -y \
  && apt install -y libze-intel-gpu1 \
  && add-apt-repository -y ppa:deadsnakes/ppa \
  && apt install -y \
    python3.11

RUN ln -sT /usr/bin/python3.11 /usr/bin/python \
  && wget https://bootstrap.pypa.io/get-pip.py \
  && python3.11 get-pip.py

WORKDIR ollama
RUN pip install --pre ipex-llm[cpp] \
  && init-ollama \
  && ln -sT /ollama/ollama /usr/bin/ollama \
  && ln -sT /ollama/ollama-lib /usr/bin/ollama-lib

ARG OLLAMA_KEEP_ALIVE=10m
ARG OLLAMA_HOST=0.0.0.0:11434
ARG ONEAPI_DEVICE_SELECTOR=level_zero:0
#if you want to use more Intel Arc GPUs, just add them as per https://github.com/intel/ipex-llm/blob/25e17090506603797f120da5e74283a99dfa33b4/docs/mddocs/Quickstart/ollama_portable_zip_quickstart.md#select-specific-gpus-to-run-ollama-when-multiple-ones-are-available
#I copied most of the official start-ollama.sh inside https://github.com/ipex-llm/ipex-llm/releases/download/v2.3.0-nightly/ollama-ipex-llm-2.3.0b20250725-ubuntu.tgz
RUN echo "#!/bin/bash\nsource /opt/intel/oneapi/setvars.sh\nexport SYCL_CACHE_PERSISTENT=1\nexport ZES_ENABLE_SYSMAN=1\nexport SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1\nexport ONEAPI_DEVICE_SELELECTOR=$ONEAPI_DEVICE_SELECTOR\nexport OLLAMA_KEEP_ALIVE=$OLLAMA_KEEP_ALIVE\nexport OLLAMA_MODELS=/ollama/models/\nexport OLLAMA_HOST=$OLLAMA_HOST\nexport OLLAMA_LOAD_TIMEOUT=30m\n./ollama serve" > start-ollama.sh \
  && chmod +x start-ollama.sh \
  && ln -sT /ollama/start-ollama.sh /usr/bin/start-ollama.sh

CMD ["start-ollama.sh"]

## docker-compose.yaml
services:
  ollama:
    image: "localhost/ollama" #the name of the image you build from the Containerfile
    ports:
      - "11434:11434"
    volumes:
      - "ollama:/ollama/models/" #this will save only the models, so we dont need to download them everytime you do `ollama run` or `ollama pull`
    devices:
      - "/dev/dri/" #this will add the Intel Arc GPU to the container

volumes:
  ollama:
	#Base image: hhttps://gist.github.com/WizardlyBump17/f8a36f0197f7d2bdad957a2a0046d023
	FROM localhost/intel-gpu-driver-minimal-oneapi

	#The libze-intel-gpu1 package is needed for the `export ONEAPI_DEVICE_SELECTOR=level_zero:0`, otherwise nothing will work
	RUN apt update \
	&& apt upgrade -y \
	&& apt install -y libze-intel-gpu1 \
	&& add-apt-repository -y ppa:deadsnakes/ppa \
	&& apt install -y \
	python3.11

	RUN ln -sT /usr/bin/python3.11 /usr/bin/python \
	&& wget https://bootstrap.pypa.io/get-pip.py \
	&& python3.11 get-pip.py

	WORKDIR ollama
	RUN pip install --pre ipex-llm[cpp] \
	&& init-ollama \
	&& ln -sT /ollama/ollama /usr/bin/ollama \
	&& ln -sT /ollama/ollama-lib /usr/bin/ollama-lib

	ARG OLLAMA_KEEP_ALIVE=10m
	ARG OLLAMA_HOST=0.0.0.0:11434
	ARG ONEAPI_DEVICE_SELECTOR=level_zero:0
	#if you want to use more Intel Arc GPUs, just add them as per https://github.com/intel/ipex-llm/blob/25e17090506603797f120da5e74283a99dfa33b4/docs/mddocs/Quickstart/ollama_portable_zip_quickstart.md#select-specific-gpus-to-run-ollama-when-multiple-ones-are-available
	#I copied most of the official start-ollama.sh inside https://github.com/ipex-llm/ipex-llm/releases/download/v2.3.0-nightly/ollama-ipex-llm-2.3.0b20250725-ubuntu.tgz
	RUN echo "#!/bin/bash\nsource /opt/intel/oneapi/setvars.sh\nexport SYCL_CACHE_PERSISTENT=1\nexport ZES_ENABLE_SYSMAN=1\nexport SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1\nexport ONEAPI_DEVICE_SELELECTOR=$ONEAPI_DEVICE_SELECTOR\nexport OLLAMA_KEEP_ALIVE=$OLLAMA_KEEP_ALIVE\nexport OLLAMA_MODELS=/ollama/models/\nexport OLLAMA_HOST=$OLLAMA_HOST\nexport OLLAMA_LOAD_TIMEOUT=30m\n./ollama serve" > start-ollama.sh \
	&& chmod +x start-ollama.sh \
	&& ln -sT /ollama/start-ollama.sh /usr/bin/start-ollama.sh

	CMD ["start-ollama.sh"]
	services:
	ollama:
	image: "localhost/ollama" #the name of the image you build from the Containerfile
	ports:
	- "11434:11434"
	volumes:
	- "ollama:/ollama/models/" #this will save only the models, so we dont need to download them everytime you do `ollama run` or `ollama pull`
	devices:
	- "/dev/dri/" #this will add the Intel Arc GPU to the container

	volumes:
	ollama: