My Bash setup and aliases
- My local bash_aliases
- Aliases for HPC
- Colored terminal
- my bashrc
- setup new pc
- slurm runner
- slurm launching example
| alias nvgui='__NV_PRIME_RENDER_OFFLOAD=1 __GLX_VENDOR_LIBRARY_NAME=nvidia' | |
| # Clear all installed pip packages | |
| alias clear-pip='pip freeze | sed "s/^.*#egg=//; s/^-e //; s/@.*//" | xargs pip uninstall -y' | |
| # Open JZ proxy and browser | |
| alias yump_prox='ssh -D localhost:9080 -N wassim@apcssh01.in2p3.fr -i ~/.ssh/id_rsa' | |
| alias jz='firefox -P JZ & yump_prox' | |
| # CMake helpers | |
| alias configure='cmake -S . -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=1' | |
| alias mkmk='cmake --build build -j' | |
| # Battery status | |
| alias check_bat='upower -i /org/freedesktop/UPower/devices/battery_BAT0' | |
| # List manually installed apt packages not part of the initial install | |
| alias get-apt='comm -23 <(apt-mark showmanual | sort -u) <(gzip -dc /var/log/installer/initial-status.gz | sed -n "s/^Package: //p" | sort -u)' | |
| # Python and nvidia reset aliases | |
| kill_process(){ | |
| sudo kill -9 $(pidof "$1") | |
| } | |
| alias kill-process=kill_process | |
| alias nv_reset='sudo rmmod nvidia_uvm && sudo modprobe nvidia_uvm' | |
| alias kill-python='kill_process python' | |
| # Rsync copy over ssh with progress, compression, and excluding venv, .git, and build directories | |
| # Rsync copy over ssh with progress, compression, and excluding venv, .git, and build directories | |
| # Usage: cp-jz SRC... DEST [-e EXCLUDE1 EXCLUDE2 ...] | |
| cp_jz() { | |
| if [ "$#" -lt 2 ]; then | |
| echo "Usage: cp-jz SRC... DEST [-e EXCLUDE1 EXCLUDE2 ...]" >&2 | |
| return 2 | |
| fi | |
| local args=() | |
| local extra_excludes=() | |
| local in_excludes=0 | |
| local dest="" | |
| # Parse arguments | |
| for arg in "$@"; do | |
| if [[ "$arg" == "-e" ]]; then | |
| in_excludes=1 | |
| elif [[ $in_excludes -eq 1 ]]; then | |
| extra_excludes+=("--exclude" "$arg") | |
| else | |
| args+=("$arg") | |
| fi | |
| done | |
| if [ "${#args[@]}" -lt 2 ]; then | |
| echo "Usage: cp-jz SRC... DEST [-e EXCLUDE1 EXCLUDE2 ...]" >&2 | |
| return 2 | |
| fi | |
| dest="${args[-1]}" | |
| # Execute rsync with default and extra excludes | |
| rsync -avz --progress \ | |
| --exclude venv \ | |
| --exclude .git \ | |
| --exclude build \ | |
| "${extra_excludes[@]}" \ | |
| -e ssh \ | |
| "${args[@]:0:${#args[@]}-1}" \ | |
| "$dest" | |
| } | |
| alias cp-jz=cp_jz | |
| # Kill process listening on a given port | |
| kill_port(){ | |
| sudo kill $(sudo lsof -t -iTCP:"$1" -sTCP:LISTEN); | |
| } | |
| alias kill-port=kill_port |
| # Module loading | |
| alias purge='module purge' | |
| ## Python and conda | |
| alias py='conda deactivate' | |
| alias pip-get='pip install --user --no-cache-dir' | |
| ## NVHPC | |
| alias nv='module load nvidia-compilers/23.9 cuda/12.2.0 cudnn/8.9.7.29-cuda nccl/2.19.3-1-cuda openmpi/4.1.5-cuda cmake' | |
| ## Compile commands | |
| alias configure='cmake -S . -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=1' | |
| alias mkmk='cmake --build build -j' | |
| # Helpers | |
| alias size='du -h --max-depth=1 ' | |
| alias clear-cache='rm -rf ~/.cache/*' | |
| # Python packages | |
| alias clear-pip='pip freeze | sed "s/^.*#egg=//; s/^-e //; s/@.*//" | xargs pip uninstall -y' | |
| # Queue | |
| alias queue='squeue -u $USER -o "%.18i %.9P %.25j %.2t %.10S %.10M %.6D %R"' | |
| # Slurms | |
| alias gpus_check='srun python -c "import jax;jax.distributed.initialize();print(jax.devices())"' | |
| alias gpu_check='srun python -c "import jax;print(jax.devices())"' | |
| ## Multi Jobs | |
| alias alloc_v100_multi='salloc --account=tkc@v100 --partition=gpu_p2 --gres=gpu:8 --ntasks-per-node=4 --time=00:10:00 --cpus-per-task=10 --hint=nomultithread --qos=qos_gpu-dev --nodes=1' | |
| alias alloc_a100_multi='salloc --account=tkc@a100 -C a100 --gres=gpu:8 --ntasks-per-node=8 --time=00:10:00 --cpus-per-task=8 --hint=nomultithread --qos=qos_gpu_a100-dev --nodes=1' | |
| alias alloc_h100_multi='salloc --account=tkc@h100 -C h100 --gres=gpu:8 --ntasks-per-node=8 --time=00:10:00 --cpus-per-task=8 --hint=nomultithread --qos=qos_gpu_h100-dev --nodes=1' | |
| ## Single Jobs | |
| alias alloc_fast='salloc --account=tkc@v100 --gres=gpu:1 --ntasks-per-node=1 --time=00:10:00 --cpus-per-task=10 --hint=nomultithread --qos=qos_gpu-dev --nodes=1' | |
| alias alloc_v100='salloc --account=tkc@v100 --partition=gpu_p2 --gres=gpu:1 --ntasks-per-node=1 --time=00:10:00 --cpus-per-task=10 --hint=nomultithread --qos=qos_gpu-dev --nodes=1' | |
| alias alloc_a100='salloc --account=tkc@a100 -C a100 --gres=gpu:1 --ntasks-per-node=1 --time=00:10:00 --cpus-per-task=8 --hint=nomultithread --qos=qos_gpu_a100-dev --nodes=1' | |
| alias alloc_h100='salloc --account=tkc@h100 -C h100 --gres=gpu:1 --ntasks-per-node=1 --time=00:10:00 --cpus-per-task=8 --hint=nomultithread --qos=qos_gpu_h100-dev --nodes=1' | |
| alias a100='purge && module load arch/a100 && source $ALL_CCFRWORK/venv/a100/bin/activate ' | |
| alias v100='purge && source $ALL_CCFRWORK/venv/v100/bin/activate ' | |
| alias h100='purge && module load arch/h100 && source $ALL_CCFRWORK/venv/h100/bin/activate ' | |
| alias tp_zen='export CXXFLAGS="-tp=zen2 -noswitcherror"' | |
| # Unset proxy | |
| alias unset_prox='unset no_proxy && unset NO_PROXY' |
| parse_git_branch() { | |
| git branch 2> /dev/null | sed -e '/^[^*]/d' -e 's/* \(.*\)/ (\1)/' | |
| } | |
| # General for all UNIX systels | |
| PS1='\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;33m\]\w\[\033[36m\]$(parse_git_branch)\[\033[01;00m\]\$ ' | |
| # Debian and ubuntu | |
| if [ "$color_prompt" = yes ]; then | |
| PS1='${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;33m\]\w\[\033[36m\]$(parse_git_branch)\[\033[01;00m\]\$ ' | |
| else | |
| PS1='${debian_chroot:+($debian_chroot)}\u@\h:\w\$ ' | |
| fi | |
| unset color_prompt force_color_prompt |
| #!/bin/bash | |
| # bash validate_fg.sh 1.0 c1d1s1 | |
| noise=$1 | |
| td_sp=20.0 | |
| SKY=$2 | |
| # Collect all kmeans job IDs here | |
| job_ids=() | |
| SBATCH_ARGS="--account=apc --partition gpu_v100 --gpus 1" | |
| SBATCH_ARGS="--account=nih@a100 --nodes=1 --gres=gpu:1 --tasks-per-node=1 -C a100" | |
| # ---------- GAL020 ---------- | |
| jid=$(sbatch $SBATCH_ARGS --job-name=FX_20_$noise_$SKY \ | |
| $SLURM_SCRIPT kmeans-model -n 64 -ns 1 -nr $noise \ | |
| -pc 10000 500 500 -tag $SKY -m GAL020 -i LiteBIRD \ | |
| -sp 1.54 $td_sp -3.0 -mi 1000 -o RESULTS -cond) | |
| job_ids+=("$jid") | |
| # ---------- GAL040 ---------- | |
| jid=$(sbatch $SBATCH_ARGS --job-name=FX_40_$noise_$SKY \ | |
| $SLURM_SCRIPT kmeans-model -n 64 -ns 1 -nr $noise \ | |
| -pc 10000 500 500 -tag $SKY -m GAL040 -i LiteBIRD \ | |
| -sp 1.54 $td_sp -3.0 -mi 1000 -o RESULTS -cond) | |
| job_ids+=("$jid") | |
| # ---------- GAL060 ---------- | |
| jid=$(sbatch $SBATCH_ARGS --job-name=FX_60_$noise_$SKY \ | |
| $SLURM_SCRIPT kmeans-model -n 64 -ns 1 -nr $noise \ | |
| -pc 10000 500 500 -tag $SKY -m GAL060 -i LiteBIRD \ | |
| -sp 1.54 $td_sp -3.0 -mi 1000 -o RESULTS -cond) | |
| job_ids+=("$jid") | |
| # ---------- Final analysis job depending on ALL previous jobs ---------- | |
| # Build colon-separated list of job IDs: id1:id2:id3:... | |
| deps=$(IFS=:; echo "${job_ids[*]}") | |
| noise_text=Noiseless | |
| noise_percent=0 | |
| if (( $(echo "$noise > 0.0" | bc -l) )); then | |
| noise_text=Noisy | |
| noise_percent=100 | |
| fi | |
| sbatch --dependency=afterany:$deps \ | |
| $SBATCH_ARGS \ | |
| --job-name=FX_validate_$noise_$SKY \ | |
| $SLURM_SCRIPT r_analysis validate -r kmeans_noise${noise_percent}_${SKY} -t "FURAX $noise_text $SKY" \ | |
| -ird RESULTS --noise-ratio $noise --no-tex --scales 1e-2 1e-3 | |
| sbatch --dependency=afterany:$deps \ | |
| $SBATCH_ARGS \ | |
| --job-name=FX_cache_$noise_$SKY \ | |
| $SLURM_SCRIPT r_analysis cache -r kmeans -ird RESULTS -mi 2000 --no-tex | |
| # enable programmable completion features (you don't need to enable | |
| # this, if it's already enabled in /etc/bash.bashrc and /etc/profile | |
| # sources /etc/bash.bashrc). | |
| if ! shopt -oq posix; then | |
| if [ -f /usr/share/bash-completion/bash_completion ]; then | |
| . /usr/share/bash-completion/bash_completion | |
| elif [ -f /etc/bash_completion ]; then | |
| . /etc/bash_completion | |
| fi | |
| fi | |
| # >>> mamba initialize >>> | |
| # !! Contents within this block are managed by 'micromamba shell init' !! | |
| export MAMBA_EXE='/home/wassim/.local/bin/micromamba'; | |
| export MAMBA_ROOT_PREFIX='/home/wassim/micromamba'; | |
| __mamba_setup="$("$MAMBA_EXE" shell hook --shell bash --root-prefix "$MAMBA_ROOT_PREFIX" 2> /dev/null)" | |
| if [ $? -eq 0 ]; then | |
| eval "$__mamba_setup" | |
| else | |
| alias micromamba="$MAMBA_EXE" # Fallback on help from micromamba activate | |
| fi | |
| unset __mamba_setup | |
| # <<< mamba initialize <<< | |
| # <<< Init nvim >>> | |
| export PATH=$HOME/.local/tools/nvim/bin:$PATH | |
| # <<< Init node >>> | |
| export PATH=$HOME/.local/tools/node/bin:$PATH | |
| # <<< Init ripgrep >>> | |
| export PATH=$HOME/.local/tools/ripgrep:$PATH | |
| # <<< Init kitty >>> | |
| export PATH=$HOME/.local/kitty.app/bin:$PATH | |
| # <<< Init blender >>> | |
| export PATH=$HOME/Software/blender/:$PATH | |
| # <<< Init Quarto >>> | |
| export PATH=$HOME/Software/quarto/bin:$PATH | |
| export QUARTO_PYTHON=$HOME/micromamba/envs/ocr/bin/python | |
| # <<< GEMINI >>> | |
| export GEMINI_API_KEY="AIzaSyAxxafTAFTgcJ7fF3vQsXwrhs1mgOV1mO4" | |
| export ANTHROPIC_MODEL="claude-sonnet-4-5" | |
| # --- NVHPC toggle helpers ------------------------------------------------- | |
| # safe path helpers (no duplicates; handle unset vars) | |
| _path_prepend() { | |
| local var="$1" dir="${2%/}" | |
| [[ -z "$var" || -z "$dir" || ! -e "$dir" ]] && return 0 | |
| eval "local cur=\"\${$var}\"" | |
| case ":$cur:" in *":$dir:"*) ;; *) eval "$var=\"${dir}${cur:+:$cur}\"";; esac | |
| } | |
| _path_remove() { | |
| local var="$1" dir="${2%/}" | |
| eval "local cur=\"\${$var}\"" | |
| [[ -z "$cur" ]] && return 0 | |
| local IFS=':' part out=() | |
| for part in $cur; do | |
| [[ "${part%/}" == "$dir" ]] && continue | |
| out+=("$part") | |
| done | |
| IFS=: eval "$var=\"${out[*]}\"" | |
| } | |
| nvhpc_activate() { | |
| # --- versions/roots (copied from your block) --- | |
| export CUDA_VERSION=12.6 | |
| NVARCH="$(uname -s)_$(uname -m)" | |
| NVCOMPILERS=/opt/nvidia/hpc_sdk | |
| NVHPC_VERSION=25.1_FALSE # NOTE: keep or change if this was a typo | |
| export NVHPC_ROOT="$NVCOMPILERS/$NVARCH/$NVHPC_VERSION" | |
| export CUDA_HOME="$NVHPC_ROOT/cuda/$CUDA_VERSION" | |
| export CUDA_PATH="$CUDA_HOME" | |
| export MATHLIBS="$NVHPC_ROOT/math_libs" | |
| export COMM_LIBS="$NVHPC_ROOT/comm_libs" | |
| # --- library paths --- | |
| _path_prepend LD_LIBRARY_PATH "$MATHLIBS/$CUDA_VERSION/lib64" | |
| _path_prepend LD_LIBRARY_PATH "$NVHPC_ROOT/comm_libs/$CUDA_VERSION/nccl/lib" | |
| _path_prepend LD_LIBRARY_PATH "$NVHPC_ROOT/comm_libs/$CUDA_VERSION/nvshmem/lib" | |
| _path_prepend LD_LIBRARY_PATH "$NVHPC_ROOT/comm_libs/$CUDA_VERSION/openmpi4/latest/lib" | |
| _path_prepend LD_LIBRARY_PATH "$NVHPC_ROOT/comm_libs/$CUDA_VERSION/hpcx/latest/ompi/lib" | |
| _path_prepend LD_LIBRARY_PATH "$CUDA_HOME/extras/CUPTI/lib64" # (fixed your double-colon) | |
| _path_prepend LD_LIBRARY_PATH "$CUDA_HOME/lib64" | |
| _path_prepend LD_LIBRARY_PATH "/usr/lib/x86_64-linux-gnu/openmpi/lib" | |
| # --- PATH / MANPATH --- | |
| _path_prepend PATH "$COMM_LIBS/mpi/bin" | |
| _path_prepend PATH "$COMM_LIBS/$CUDA_VERSION/openmpi4/latest/include" | |
| _path_prepend PATH "$MATHLIBS/$CUDA_VERSION/include" | |
| _path_prepend PATH "$NVHPC_ROOT/compilers/bin" | |
| _path_prepend PATH "$CUDA_HOME/include" | |
| _path_prepend PATH "$CUDA_HOME/bin" | |
| _path_prepend MANPATH "$NVHPC_ROOT/compilers/man" | |
| export NVHPC_ACTIVE=1 | |
| echo "NVHPC: activated (CUDA $CUDA_VERSION @ $NVHPC_ROOT)" | |
| } | |
| nvhpc_deactivate() { | |
| # remove paths first (while vars are still set) | |
| local LDPATHS=( | |
| "$MATHLIBS/$CUDA_VERSION/lib64" | |
| "$NVHPC_ROOT/comm_libs/$CUDA_VERSION/nccl/lib" | |
| "$NVHPC_ROOT/comm_libs/$CUDA_VERSION/nvshmem/lib" | |
| "$NVHPC_ROOT/comm_libs/$CUDA_VERSION/openmpi4/latest/lib" | |
| "$NVHPC_ROOT/comm_libs/$CUDA_VERSION/hpcx/latest/ompi/lib" | |
| "$CUDA_HOME/extras/CUPTI/lib64" | |
| "$CUDA_HOME/lib64" | |
| "/usr/lib/x86_64-linux-gnu/openmpi/lib" | |
| ) | |
| local BINPATHS=( | |
| "$COMM_LIBS/mpi/bin" | |
| "$COMM_LIBS/$CUDA_VERSION/openmpi4/latest/include" | |
| "$MATHLIBS/$CUDA_VERSION/include" | |
| "$NVHPC_ROOT/compilers/bin" | |
| "$CUDA_HOME/include" | |
| "$CUDA_HOME/bin" | |
| ) | |
| local p | |
| for p in "${LDPATHS[@]}"; do _path_remove LD_LIBRARY_PATH "$p"; done | |
| for p in "${BINPATHS[@]}"; do _path_remove PATH "$p"; done | |
| _path_remove MANPATH "$NVHPC_ROOT/compilers/man" | |
| # unset vars | |
| unset CUDA_VERSION NVCOMPILERS NVHPC_VERSION NVHPC_ROOT CUDA_HOME CUDA_PATH MATHLIBS COMM_LIBS NVARCH | |
| unset NVHPC_ACTIVE | |
| echo "NVHPC: deactivated" | |
| } | |
| toggle_nvhpc() { | |
| if [[ "${NVHPC_ACTIVE:-}" == "1" ]]; then | |
| nvhpc_deactivate | |
| else | |
| nvhpc_activate | |
| fi | |
| } | |
| # --- end NVHPC toggle ------------------------------------------------------ | |
| eval "$(codex completion bash)" |
| # Snaps | |
| sudo snap install discord firefox slack zoom-client | |
| sudo snap install code --classic | |
| # Neovim | |
| bash <(curl -s https://raw.githubusercontent.com/ASKabalan/nvim-config-2.5/main/setup.sh) | |
| # Micromamba | |
| "${SHELL}" <(curl -L micro.mamba.pm/install.sh) | |
| # apt installs | |
| sudo apt install -y tree xclip htop nvtop powertop solaar thunderbird imagemagick meld | |
| # compilers | |
| sudo apt install -y build-essential clangd-15 clang-format-15 clang-tidy-15 | |
| # Imagerie | |
| sudo apt install -y libcairo2-dev imagemagick graphviz gimp fontforge | |
| # nvidia | |
| sudo ubuntu-drivers autoinstall | |
| # install cuda | |
| wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-ubuntu2404.pin | |
| sudo mv cuda-ubuntu2404.pin /etc/apt/preferences.d/cuda-repository-pin-600 | |
| wget https://developer.download.nvidia.com/compute/cuda/12.6.1/local_installers/cuda-repo-ubuntu2404-12-6-local_12.6.1-560.35.03-1_amd64.deb | |
| sudo dpkg -i cuda-repo-ubuntu2404-12-6-local_12.6.1-560.35.03-1_amd64.deb | |
| sudo cp /var/cuda-repo-ubuntu2404-12-6-local/cuda-*-keyring.gpg /usr/share/keyrings/ | |
| sudo apt-get update | |
| sudo apt-get -y install cuda-toolkit-12-6 | |
| # Wire plumber | |
| sudo apt install -y wireplumber | |
| systemctl enable --user pipewire | |
| systemctl enable --user wireplumber | |
| systemctl start --user pipewire | |
| systemctl start --user wireplumber |
| #!/bin/bash | |
| ############################################################################################################################## | |
| # USAGE:sbatch --account=nih@a100 --nodes=1 --gres=gpu:1 --tasks-per-node=1 -C a100 | |
| ############################################################################################################################## | |
| #SBATCH --job-name=Likelihoods # nom du job | |
| #SBATCH --cpus-per-task=8 # nombre de CPU par tache pour gpu_p5 (1/8 du noeud 8-GPU) | |
| #SBATCH --nodes=1 | |
| #SBATCH --tasks-per-node=1 | |
| #SBATCH --hint=nomultithread # hyperthreading desactive | |
| #SBATCH --time=02:00:00 # temps d'execution maximum demande (HH:MM:SS) | |
| #SBATCH --qos=qos_gpu_a100-dev | |
| #SBATCH --output=%x_%N.out # nom du fichier de sortie | |
| #SBATCH --error=%x_%N.err # nom du fichier d'erreur (ici commun avec la sortie) | |
| ##SBATCH --exclusive # ressources dediees | |
| #SBATCH --parsable # permet de lancer des jobs paralleles sur le meme noeud | |
| # Nettoyage des modules charges en interactif et herites par defaut | |
| num_nodes=$SLURM_JOB_NUM_NODES | |
| num_gpu_per_node=$SLURM_NTASKS_PER_NODE | |
| OUTPUT_FOLDER_ARGS=1 | |
| # Calculate the number of GPUs | |
| nb_gpus=$(( num_nodes * num_gpu_per_node)) | |
| module purge | |
| echo "Job partition: $SLURM_JOB_PARTITION" | |
| # Decommenter la commande module suivante si vous utilisez la partition "gpu_p5" | |
| # pour avoir acces aux modules compatibles avec cette partition | |
| if [[ "$SLURM_JOB_PARTITION" == "gpu_p5" ]]; then | |
| module load arch/a100 | |
| source /gpfsdswork/projects/rech/tkc/commun/venv/a100/bin/activate | |
| gpu_name=a100 | |
| elif [[ "$SLURM_JOB_PARTITION" == "gpu_p6" ]]; then | |
| module arch/h100 | |
| source /gpfsdswork/projects/rech/tkc/commun/venv/h100/bin/activate | |
| gpu_name=h100 | |
| else | |
| source /gpfsdswork/projects/rech/tkc/commun/venv/v100/bin/activate | |
| gpu_name=v100 | |
| fi | |
| echo "The number of nodes allocated for this job is: $num_nodes" | |
| echo "The number of GPUs allocated for this job is: $nb_gpus" | |
| export EQX_ON_ERROR=nan | |
| export OUTPUT_FOLDER_ARGS=1 | |
| export SEND_EMAIL=1 | |
| function plaunch() { | |
| if [ $# -lt 1 ]; then | |
| echo "Usage: profile_python <python_script> [arguments for the script]" | |
| return 1 | |
| fi | |
| local script_name=$(basename "$1" .py) | |
| local output_dir="prof_traces/$gpu_name/$nb_gpus/$script_name" | |
| local report_dir="out_prof/$gpu_name/$nb_gpus/$script_name" | |
| if [ $OUTPUT_FOLDER_ARGS -eq 1 ]; then | |
| local args=$(echo "${@:2}" | tr ' ' '_') | |
| # Remove characters '/' and '-' from folder name | |
| args=$(echo "$args" | tr -d '/-') | |
| output_dir="prof_traces/$gpu_name/$nb_gpus/$script_name/$args" | |
| report_dir="out_prof/$gpu_name/$nb_gpus/$script_name/$args" | |
| fi | |
| mkdir -p "$output_dir" | |
| mkdir -p "$report_dir" | |
| srun nsys profile -t cuda,nvtx,osrt,mpi -o "$report_dir/report_rank%q{SLURM_PROCID}" python "$@" > "$output_dir/$script_name.err" 2> "$output_dir/$script_name.out" || true | |
| } | |
| function slaunch() { | |
| if [ $# -lt 1 ]; then | |
| echo "Usage: slaunch python <python_script> [arguments for the script]" | |
| return 1 | |
| fi | |
| local script_name | |
| script_name=$(basename "$1" .py) | |
| local output_dir="traces/$gpu_name/$nb_gpus/$script_name" | |
| if [ "$OUTPUT_FOLDER_ARGS" -eq 1 ]; then | |
| local args | |
| args=$(echo "${@:2}" | tr ' ' '_') | |
| # Remove characters '/' and '-' from folder name | |
| args=$(echo "$args" | tr -d '/-') | |
| output_dir="traces/$gpu_name/$nb_gpus/$script_name/$args" | |
| fi | |
| mkdir -p "$output_dir" | |
| # Run the job and capture exit code | |
| srun "$@" > "$output_dir/$script_name.out" 2> "$output_dir/$script_name.err" | |
| local rc=$? | |
| # ---- conditional email ---- | |
| if [ "$SEND_EMAIL" -eq 1 ] && [ -n "$EMAIL" ]; then | |
| { | |
| echo "SLURM job $SLURM_JOB_ID finished." | |
| echo "Job name: $SLURM_JOB_NAME" | |
| echo "Exit code: $rc" | |
| echo "Node: $(hostname)" | |
| echo "Output directory: $output_dir" | |
| echo | |
| echo "Attaching:" | |
| echo " $output_dir/$script_name.out" | |
| echo " $output_dir/$script_name.err" | |
| echo | |
| # Attach .out and .err using uuencode | |
| uuencode "$output_dir/$script_name.out" "${script_name}.out" | |
| uuencode "$output_dir/$script_name.err" "${script_name}.err" | |
| } | mail -s "[$SLURM_JOB_NAME] job $SLURM_JOB_ID finished (rc=$rc)" "$EMAIL" | |
| fi | |
| } | |
| # Echo des commandes lancees | |
| set -x | |
| slaunch "$@" |
Comments are disabled for this gist.