Skip to content

Instantly share code, notes, and snippets.

@scottt
Created May 23, 2025 17:55
Show Gist options
  • Select an option

  • Save scottt/6c29a7cf3abc353193ce014db2f4db78 to your computer and use it in GitHub Desktop.

Select an option

Save scottt/6c29a7cf3abc353193ce014db2f4db78 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import os
import sys
import subprocess
import time
from pathlib import Path
# --- Configuration ---
# AOTRITON_TARGET_ARCH: comma-separated CMake list e.g. "gfx90a;gfx942;gfx1100;gfx1151;gfx1201"
# 0.10.x https://github.com/ROCm/aotriton/blob/fbb36df18b7b55d28d3ec72352ab07bb0e2a9602/CMakeLists.txt#L61
# 0.9.xa https://github.com/ROCm/aotriton/blob/release/0.9/CMakeLists.txt
# https://github.com/scottt/aotriton/blob/gfx1151-therock/v2python/gpu_targets.py#L26
# gfx1100: 7900 xtx: ?
# gfx1101: 7800 xt: @Max, @Tai
# gfx1102: 7600 xt: @7Shi
# gfx1103: 780M in framework 13: AMDers should have it
AOTRITON_TARGET_ARCH = 'gfx1100;gfx1101;gfx1102;gfx1103'
# We need to install `torch` in the virtual env so we need to create the venv and specify
# the python version used
# NOTE: Python version of Triton wheel must match Python version used to build AOTriton
PYTHON_VER = '3.12'
AOTRITON_NOIMAGE_MODE = True
build_aotriton_09 = True
build_python_bindings = False
def arch_to_gpu(arch):
# aotriton 0.10+ and 0.9 has different GPU naming conventions
# https://github.com/scottt/aotriton/blob/gfx1151-therock/v2python/gpu_targets.py#L26
m = {
'gfx90a': 'MI200',
'gfx942': 'MI300X',
'gfx1100': 'Navi31',
'gfx1101': 'Navi32',
'gfx1102': 'Navi33',
'gfx1103': 'Navi3i',
'gfx1151': 'Navi3.5',
'gfx1201': 'RX9070XT',
}
try:
return m[arch]
except KeyError:
raise ValueError(f'Do not know how to map gpu {arch} to aotriton 0.9 gpu name')
def target_archs_to_gpus(target_archs):
return ';'.join(map(arch_to_gpu, target_archs.split(';')))
# 'gfx1100;gfx1101' -> 'gfx1100-gfx1101'
def target_archs_to_path(archs):
return archs.replace(';', '-')
fork_name = 'aotriton'
os.environ['AOTRITON_SOURCE_DIR'] = f'/w/{fork_name}'
output_dir = Path(f'/o/{fork_name}-{target_archs_to_path(AOTRITON_TARGET_ARCH)}')
os.environ['AOTRITON_BUILD_DIR'] = str((output_dir / 'build').resolve())
triton_wheel_path = None
if (sys.platform == 'win32') and (not AOTRITON_NOIMAGE_MODE):
triton_wheel_path = Path('/work/triton-lshqqytiger/python/dist/triton-3.3.0+gitf8727c94-cp313-cp313-win_amd64.whl')
triton_wheel_path.resolve(strict=True)
source_dir = Path(os.environ['AOTRITON_SOURCE_DIR']).resolve(strict=True)
build_dir = Path(os.environ['AOTRITON_BUILD_DIR']).resolve()
if not build_dir.exists():
build_dir.mkdir(parents=True, exist_ok=True)
venv_dir = build_dir / 'venv'
CCACHE_EXECUTABLE = "ccache"
# --- Setup ---
caches_dir = Path("/caches")
ccache_dir = caches_dir / "ccache"
pip_cache_dir = caches_dir / "pip"
if sys.platform == 'win32':
os.environ['PKG_CONFIG'] = 'C:/Strawberry/perl/bin/pkg-config.bat'
os.environ['PKG_CONFIG_PATH'] = '/xz-output/lib/pkgconfig'
print(f"--- Configuration ---")
print(f"Source Directory: {source_dir}")
print(f"Cache Directory: {caches_dir}")
print(f"LLVM_SYSPATH: {os.environ.get('LLVM_SYSPATH')}")
print(f"LLVM_INCLUDE_DIRS: {os.environ.get('LLVM_INCLUDE_DIRS')}")
print(f"LLVM_LIBRARY_DIR: {os.environ.get('LLVM_LIBRARY_DIR')}")
print(f"PKG_CONFIG_DIR: {os.environ.get('PKG_CONFIG_DIR')}")
print(f"Script Arguments: {sys.argv[1:]}")
print(f"---------------------")
def should_change_caches_dir():
return sys.platform == 'win32'
if should_change_caches_dir():
print(f"Ensuring directories exist...")
ccache_dir.mkdir(parents=True, exist_ok=True)
pip_cache_dir.mkdir(parents=True, exist_ok=True)
print("Setting environment variables...")
if should_change_caches_dir():
os.environ['CCACHE_DIR'] = str(ccache_dir.resolve())
os.environ['PIP_CACHE_DIR'] = str(pip_cache_dir.resolve())
use_ccache = True
if use_ccache:
print(f"Configuring CMake to use ccache ('{CCACHE_EXECUTABLE}')...")
os.environ['CMAKE_C_COMPILER_LAUNCHER'] = CCACHE_EXECUTABLE
os.environ['CMAKE_CXX_COMPILER_LAUNCHER'] = CCACHE_EXECUTABLE
else:
print("Skipping ccache configuration.")
os.environ.pop('CMAKE_C_COMPILER_LAUNCHER', None)
os.environ.pop('CMAKE_CXX_COMPILER_LAUNCHER', None)
print(f"CCACHE_DIR = {os.environ.get('CCACHE_DIR')}")
print(f"PIP_CACHE_DIR = {os.environ.get('PIP_CACHE_DIR')}")
print(f"CMAKE_C_COMPILER_LAUNCHER = {os.environ.get('CMAKE_C_COMPILER_LAUNCHER')}")
print(f"CMAKE_CXX_COMPILER_LAUNCHER = {os.environ.get('CMAKE_CXX_COMPILER_LAUNCHER')}")
# --- Helper function to run commands ---
def run_command(cmd_list, cwd=None):
"""Runs a command, prints it, times it, and checks for errors."""
print(f"\n--- Executing: {' '.join(map(str, cmd_list))} ---", flush=True)
start_time = time.monotonic()
try:
# Use shell=False (default) for better security and argument handling
# check=True raises CalledProcessError on non-zero exit code (like set -e)
process = subprocess.run(cmd_list, cwd=cwd, check=True, text=True)
#stdout=sys.stdout, stderr=sys.stderr) # Redirect streams directly
except FileNotFoundError:
print(f"ERROR: Command not found: {cmd_list[0]}. Is it installed and in PATH?", file=sys.stderr)
sys.exit(1)
except subprocess.CalledProcessError as e:
print(f"ERROR: Command failed with exit code {e.returncode}", file=sys.stderr)
sys.exit(e.returncode)
except Exception as e:
print(f"ERROR: An unexpected error occurred: {e}", file=sys.stderr)
sys.exit(1)
end_time = time.monotonic()
print(f"--- Command finished successfully in {end_time - start_time:.2f} seconds ---", flush=True)
return process
# --- Build Steps ---
if not venv_dir.exists():
cmd = [
"uv",
"venv",
"--python", PYTHON_VER,
str(venv_dir),
]
run_command(cmd)
cmd = [
"uv",
"pip",
"install",
"-r", str(source_dir / "requirements.txt"),
"--python", str(venv_dir),
]
run_command(cmd)
if not AOTRITON_NOIMAGE_MODE:
# https://github.com/astral-sh/uv/issues/8721
cmd = [
"uv",
"pip",
"install", "pip", "torch",
"--python", str(venv_dir),
]
run_command(cmd)
use_aotriton_target_arch = (not build_aotriton_09)
if build_aotriton_09:
TARGET_GPUS = target_archs_to_gpus(AOTRITON_TARGET_ARCH)
cmd = [
"cmake",
#"--trace",
"-GNinja",
f"-DVENV_DIR={str(venv_dir)}",
f"-DCMAKE_INSTALL_PREFIX={str((build_dir / 'install_dir').resolve())}",
"-DCMAKE_BUILD_TYPE=Release",
"-DAOTRITON_GPU_BUILD_TIMEOUT=0",
# AOTRITON_NO_PYTHON must be OFF and AOTRITON_NAME_SUFFIX must be set to run the unit tests
f'-DAOTRITON_NO_PYTHON={"OFF" if build_python_bindings else "ON"}',
"-DHIP_PLATFORM=amd",
f'-DAOTRITON_NOIMAGE_MODE={"ON" if AOTRITON_NOIMAGE_MODE else "OFF"}',
"-S", str(source_dir.resolve()),
"-B", str(build_dir.resolve()),
#"--debug-find",
]
if use_aotriton_target_arch:
cmd.extend([
f"-DAOTRITON_TARGET_ARCH={AOTRITON_TARGET_ARCH}"
])
else: # aotriton-0.9
cmd.extend([
f"-DTARGET_GPUS={TARGET_GPUS}"
])
if build_python_bindings:
cmd.extend([
"-DPython3_EXECUTABLE=/usr/bin/python3",
"-DPython3_INCLUDE_DIR=/usr/include/python3.13",
"-DPython3_LIBRARY=/usr/lib64/libpython3.13.so",
])
if triton_wheel_path is not None:
cmd.append(f"-DINSTALL_TRITON_FROM_WHEEL={str(triton_wheel_path.resolve())}")
if sys.platform == 'win32':
cmd.append('-Ddlfcn-win32_DIR=/dlfcn-output/share/dlfcn-win32')
cmd.extend(sys.argv[1:]) # Add extra arguments from script call
run_command(cmd)
if sys.platform == 'win32':
# Set HIP_PATH to dir containing `bin/ld-lld.exe` for triton\backends\amd\compiler.py
# lld = Path(os.path.join( os.environ['HIP_PATH'] , 'bin', 'ld.lld.exe' ))
os.environ['HIP_PATH'] = '/therock-output-gfx1151/build/dist/rocm/lib/llvm'
os.environ['MAX_JOBS'] = '31'
cmd = [
"ninja", "install"
]
if sys.platform == 'win32' and (not AOTRITON_NOIMAGE_MODE):
cmd.extend(['-j', '1'])
run_command(cmd, cwd=build_dir)
print("\nBuild script completed successfully.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment