Skip to content

Instantly share code, notes, and snippets.

@rainx0r
Last active August 20, 2024 10:22
Show Gist options
  • Select an option

  • Save rainx0r/a3a1808c8c63dea2d3d9754f731fb1e4 to your computer and use it in GitHub Desktop.

Select an option

Save rainx0r/a3a1808c8c63dea2d3d9754f731fb1e4 to your computer and use it in GitHub Desktop.
Submit file for mtmhsac jax metaworld
# ---------------------------------------------
JobBatchName = "MTMHSAC MT10 Torquescale 8.2"
# ---------------------------------------------------
universe = docker
docker_image = ghcr.io/rainx0r/metaworld-jax:1b4317179ac8b78a5f10c755bc1d3b2ab0d7213f
# -------------------------------------------------
# Event, out and error logs
log = logs/$(exp).$(steps).$(seed).log
output = outs/$(exp).$(steps).$(seed).log
error = logs/error/$(exp).$(steps).$(seed).log
# -----------------------------------
# File Transfer, Input, Output
# should_transfer_files = YES
environment = "mount=$ENV(PWD) WANDB_API_KEY=$ENV(WANDB_API_KEY) PYTHONPATH=$ENV(PWD)"
requirements = (CUDAGlobalMemoryMb > 4000) && \
(CUDACapability > 7.0)
# --------------------------------------
# Resources
request_CPUs = 10
request_memory = 5G
request_GPUs = 1
+GPUMem = 5000
+CanCheckpoint = true
+JobRunTime = 10
MaxJobRetirementTime = 0
# -----------------------------------
# Queue commands. We can use variables and flags to launch our command with multiple options (as you would from the command line)
arguments = $(script) --checkpoint-dir $ENV(PWD)/runs --seed $(seed) --exp-name $(exp) --resume --track --wandb-project-name mw-runs --wandb-entity evangelos-ch --save-model
script = $ENV(PWD)/cleanrl/mtmhsac_jax_checkpoint.py
exp = mtmhsac_mt10_torquescale8.2_new
queue 1 seed in 9409 8347 3797
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment