Skip to content

Instantly share code, notes, and snippets.

@tomsing1
Last active November 18, 2025 03:17
Show Gist options
  • Select an option

  • Save tomsing1/2a12738a5317ce8a397bbd1fe9f02443 to your computer and use it in GitHub Desktop.

Select an option

Save tomsing1/2a12738a5317ce8a397bbd1fe9f02443 to your computer and use it in GitHub Desktop.
Experimenting with the clustermq R package on a local SLURM cluster set up via docker
##-------- Setting up slurm-docker-cluster
# clone slurm-docker-cluster repository
git clone https://github.com/giovtorres/slurm-docker-cluster.git
cd slurm-docker-cluster
# build docker images and start containers
make up
make status
make test
make down
##-------- Adding R and clustermq
# create a second Dockerfile that installs R and clustermq
cat > Dockerfile.r << 'EOF'
ARG SLURM_VERSION
FROM slurm-docker-cluster:${SLURM_VERSION}
USER root
RUN dnf -y install epel-release \
&& dnf -y install R-base zeromq-devel \
&& dnf clean all
RUN cat > /usr/lib64/R/etc/Rprofile.site <<'REOF'
options(repos = c(CRAN = sprintf("https://packagemanager.posit.co/cran/latest/bin/linux/rhel9-%s/%s",
R.version["arch"], substr(getRversion(), 1, 3))))
REOF
RUN R -q -e 'install.packages(c("clustermq", "callr"))'
EOF
# create a docker-compose.override.r.yml file
cat > docker-compose.override.r.yml << 'EOF'
x-node-build: &node-build
context: .
dockerfile: Dockerfile.r
args:
SLURM_VERSION: ${SLURM_VERSION:-25.05.3}
BASE_IMAGE: slurm-docker-cluster:${SLURM_VERSION:-25.05.3}
services:
slurmctld:
image: slurmctld-r:${SLURM_VERSION:-25.05.3}
build: *node-build
c1:
image: c1-r:${SLURM_VERSION:-25.05.3}
build: *node-build
c2:
image: c2-r:${SLURM_VERSION:-25.05.3}
build: *node-build
EOF
# build the new docker images defined by the override
docker compose \
-f docker-compose.yml \
-f docker-compose.override.r.yml \
build slurmctld c1 c2
docker images
# start the cluster, using the new images
docker compose \
-f docker-compose.yml \
-f docker-compose.override.r.yml \
up -d
# check that the installation was successful on all 3 nodes
for NODE in slurmctld c1 c2
do
echo ">>> Node" $NODE
docker exec -it $NODE R --vanilla -s -e \
"paste('clustermq', installed.packages()['clustermq', 'Version'])"
done
##--------- Interactively submitting jobs with clustermq
make shell
# create the SLURM template file for clustermq
# see: https://mschubert.github.io/clustermq/articles/userguide.html#slurm
cat > /data/slurm.tmpl << 'EOF'
#!/bin/sh
#SBATCH --job-name={{ job_name }}
#SBATCH --partition=normal
#SBATCH --output={{ log_file | /dev/null }}
#SBATCH --error={{ log_file | /dev/null }}
#SBATCH --mem-per-cpu={{ memory | 4096 }}
#SBATCH --array=1-{{ n_jobs }}
#SBATCH --cpus-per-task={{ cores | 1 }}
ulimit -v $(( 1024 * {{ memory | 4096 }} ))
CMQ_AUTH={{ auth }} R --no-save --no-restore -e 'clustermq:::worker("{{ master }}")'
EOF
# start an interactive R session
R --vanilla
# attach the clustermq package
library(clustermq)
# first, using the multiprocessor backend (e.g. not using SLURM)
options(
clustermq.scheduler = "multiprocess"
)
res <- Q(
fun = function(i) paste(Sys.info()[["nodename"]], i),
i = 1:10,
n_jobs = 2,
timeout = 60
)
print(res)
# second, using the SLURM backend
options(
clustermq.scheduler = "slurm",
clustermq.template = "/data/slurm.tmpl"
)
# we add the Sys.sleep command to slow down the execution enough
# to run make jobs in a separate shell to see the jobs and appear in the
# execution queue
test_fun <- function(i) {Sys.sleep(1); paste(Sys.info()[["nodename"]], i)}
res <- Q(
fun = test_fun,
i = 1:10,
n_jobs = 2, # maps to SLURM array size
memory = 1000, # up to 1000 MB per CPU
timeout = 60
)
print(res)
q()
exit
##--------- cleanup
make down
make clean
docker images
docker rmi slurm-docker-cluster:25.05.3 c1-r:25.05.3 c2-r:25.05.3 slurmctld-r:25.05.3
docker buildx prune --all --force
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment