Skip to content

Instantly share code, notes, and snippets.

@LunNova
Last active March 1, 2026 18:59
Show Gist options
  • Select an option

  • Save LunNova/f968233ae3a73916b488304a74c20722 to your computer and use it in GitHub Desktop.

Select an option

Save LunNova/f968233ae3a73916b488304a74c20722 to your computer and use it in GitHub Desktop.
diff --git a/csrc/rocm/skinny_gemms.cu b/csrc/rocm/skinny_gemms.cu
index a6cf63f..3ae2583 100644
--- a/csrc/rocm/skinny_gemms.cu
+++ b/csrc/rocm/skinny_gemms.cu
@@ -21,7 +21,8 @@
// However, it may be possible to fix these kernels to handle both issues.
#if defined(__HIPCC__) && \
- (defined(__gfx90a__) || defined(__gfx942__) || defined(__gfx950__))
+ (defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || \
+ defined(__gfx942__) || defined(__gfx950__))
#define __HIP__GFX9__
#endif
@@ -285,9 +286,19 @@ torch::Tensor LLMM1(at::Tensor& in_a, at::Tensor& in_b,
return out_c;
}
+// gfx906 has v_dot2_f32_f16 (VOP3P, 3-src: dst = dot2(s0,s1) + s2)
+// gfx908+ has v_dot2c_f32_f16 (VOP2, compact: dst += dot2(s0,s1))
+#if defined(__gfx906__)
+#define DOT2C_F16_ASM(V0, V2, V3) \
+ asm("v_dot2_f32_f16 %0, %2, %3, %1" : "=v"(V0) : "0"(V0), "v"(V2), "v"(V3));
+#else
+#define DOT2C_F16_ASM(V0, V2, V3) \
+ asm("v_dot2c_f32_f16 %0, %2, %3" : "=v"(V0) : "0"(V0), "v"(V2), "v"(V3));
+#endif
+
#define DOT2C(V0, V2, V3) \
if constexpr (std::is_same_v<scalar_t, half>) { \
- asm("v_dot2c_f32_f16 %0, %2, %3" : "=v"(V0) : "0"(V0), "v"(V2), "v"(V3)); \
+ DOT2C_F16_ASM(V0, V2, V3) \
} else if constexpr (std::is_same_v<scalar_t, __hip_bfloat16>) { \
float2 s = __bfloat1622float2(*((__hip_bfloat162*)(&(V2)))) * \
__bfloat1622float2(*((__hip_bfloat162*)(&(V3)))); \
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 6e67456..321386b 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -102,7 +102,8 @@ def on_mi3xx() -> bool:
@cache
def on_gfx9() -> bool:
GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName
- return any(arch in GPU_ARCH for arch in ["gfx90a", "gfx942", "gfx950"])
+ return any(arch in GPU_ARCH for arch in ["gfx906", "gfx908", "gfx90a",
+ "gfx942", "gfx950"])
@cache
@@ -165,7 +166,7 @@ def use_rocm_custom_paged_attention(
@cache
def flash_attn_triton_available() -> bool:
- if not on_gfx1x():
+ if not (on_gfx1x() or on_gfx9()):
return False
try:
from importlib.util import find_spec
@@ -274,6 +275,10 @@ class RocmPlatform(Platform):
f"is not MLA type while requested for MLA backend."
)
+ if selected_backend == AttentionBackendEnum.FLASH_ATTN:
+ logger.info("Using Flash Attention backend.")
+ return AttentionBackendEnum.FLASH_ATTN.get_path()
+
if selected_backend == AttentionBackendEnum.FLEX_ATTENTION:
logger.info("Using FlexAttention backend.")
return AttentionBackendEnum.FLEX_ATTENTION.get_path()
diff --git a/vllm/v1/attention/backends/fa_utils.py b/vllm/v1/attention/backends/fa_utils.py
index 988cf7c..6ee7c14 100644
--- a/vllm/v1/attention/backends/fa_utils.py
+++ b/vllm/v1/attention/backends/fa_utils.py
@@ -23,6 +23,8 @@ elif current_platform.is_xpu():
get_scheduler_metadata = ipex_ops.get_scheduler_metadata # type: ignore[assignment]
elif current_platform.is_rocm():
+ from vllm._custom_ops import reshape_and_cache_flash # type: ignore[no-redef]
+
try:
from flash_attn import flash_attn_varlen_func # type: ignore[no-redef]
except ImportError:
@@ -33,6 +35,9 @@ elif current_platform.is_rocm():
"to be installed. Please install flash-attn first."
)
+ def get_scheduler_metadata(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-redef,misc]
+ raise NotImplementedError("get_scheduler_metadata is not supported on ROCm")
+
def get_flash_attn_version(requires_alibi: bool = False) -> int | None:
# import here to avoid circular dependencies
@@ -127,4 +132,5 @@ def flash_attn_supports_mla():
def is_flash_attn_varlen_func_available() -> bool:
- return current_platform.is_cuda() or current_platform.is_xpu()
+ return (current_platform.is_cuda() or current_platform.is_xpu()
+ or current_platform.is_rocm())
# TRITON_ATTN is the only one I have happy on gfx906 yet.
nix develop -c env HSA_OVERRIDE_GFX_VERSION=9.0.6 vllm serve Qwen/Qwen3-VL-4B-Instruct --max-model-len 4096 --gpu-memory-utilization 0.95 --limit-mm-per-prompt '{"image": 0, "video": 0}' --attention-backend TRITON_ATTN
@Wulfsta
Copy link

Wulfsta commented Mar 1, 2026

Odd, I still see a failure after applying this patch, though I have not looked at this code path or trace myself. Here is the log:

Error
vllm serve Qwen/Qwen3-VL-4B-Instruct --max-model-len 4096 --gpu-memory-utilization 0.95 --limit-mm-per-prompt '{"image": 0, "video": 0}' --attention-backend FLASH_ATTN
(APIServer pid=3823265) INFO 03-01 12:02:34 [utils.py:325] 
(APIServer pid=3823265) INFO 03-01 12:02:34 [utils.py:325]        █     █     █▄   ▄█
(APIServer pid=3823265) INFO 03-01 12:02:34 [utils.py:325]  ▄▄ ▄█ █     █     █ ▀▄▀ █  version 0.15.1
(APIServer pid=3823265) INFO 03-01 12:02:34 [utils.py:325]   █▄█▀ █     █     █     █  model   Qwen/Qwen3-VL-4B-Instruct
(APIServer pid=3823265) INFO 03-01 12:02:34 [utils.py:325]    ▀▀  ▀▀▀▀▀ ▀▀▀▀▀ ▀     ▀
(APIServer pid=3823265) INFO 03-01 12:02:34 [utils.py:325] 
(APIServer pid=3823265) INFO 03-01 12:02:34 [utils.py:261] non-default args: {'model_tag': 'Qwen/Qwen3-VL-4B-Instruct', 'api_server_count': 1, 'model': 'Qwen/Qwen3-VL-4B-Instruct', 'max_model_len': 4096, 'attention_backend': 'FLASH_ATTN', 'gpu_memory_utilization': 0.95, 'limit_mm_per_prompt': {'image': 0, 'video': 0}}
(APIServer pid=3823265) INFO 03-01 12:02:35 [model.py:541] Resolved architecture: Qwen3VLForConditionalGeneration
(APIServer pid=3823265) INFO 03-01 12:02:35 [model.py:1561] Using max model len 4096
(APIServer pid=3823265) INFO 03-01 12:02:35 [scheduler.py:226] Chunked prefill is enabled with max_num_batched_tokens=2048.
(APIServer pid=3823265) INFO 03-01 12:02:35 [vllm.py:624] Asynchronous scheduling is enabled.
(APIServer pid=3823265) INFO 03-01 12:02:36 [registry.py:143] All limits of multimodal modalities supported by the model are set to 0, running in text-only mode.
(EngineCore_DP0 pid=3823324) INFO 03-01 12:02:47 [core.py:96] Initializing a V1 LLM engine (v0.15.1) with config: model='Qwen/Qwen3-VL-4B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen3-VL-4B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=auto, tensor_parallel_size=1, pipelin
e_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=True, quantization=None, enforce_eager=False, enable_return_routed_experts=False, kv_cache_dtype=auto, device_config=cuda, structured_outputs_config=StructuredOutputsConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_parser='', reasoning_parser_plugin='', enable_in_reasoning=False), observabil
ity_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, kv_cache_metrics=False, kv_cache_metrics_sample=0.01, cudagraph_metrics=False, enable_layerwise_nvtx_tracing=False, enable_mfu_metrics=False, enable_mm_processor_stats=False, enable_logging_iteration_details=False), seed=0, served_model_name=Qwen/Qwen3-VL-4B-Instruct, enable_prefix_caching=True, enable_chun
ked_prefill=True, pooler_config=None, compilation_config={'level': None, 'mode': <CompilationMode.VLLM_COMPILE: 3>, 'debug_dump_path': None, 'cache_dir': '', 'compile_cache_save_format': 'binary', 'backend': 'inductor', 'custom_ops': ['none', '+sparse_attn_indexer'], 'splitting_ops': ['vllm::unified_attention', 'vllm::unified_attention_with_output', 'vllm::unified_mla_attention', 'vllm::unified_mla_attention_with_output', 'vllm::m
amba_mixer2', 'vllm::mamba_mixer', 'vllm::short_conv', 'vllm::linear_attention', 'vllm::plamo2_mamba_mixer', 'vllm::gdn_attention_core', 'vllm::kda_attention', 'vllm::sparse_attn_indexer', 'vllm::rocm_aiter_sparse_attn_indexer', 'vllm::unified_kv_cache_update'], 'compile_mm_encoder': False, 'compile_sizes': [], 'compile_ranges_split_points': [2048], 'inductor_compile_config': {'enable_auto_functionalized_v2': False, 'combo_kernels
': True, 'benchmark_combo_kernel': True}, 'inductor_passes': {}, 'cudagraph_mode': <CUDAGraphMode.FULL_AND_PIECEWISE: (2, 1)>, 'cudagraph_num_of_warmups': 1, 'cudagraph_capture_sizes': [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464, 480, 496, 512], 'cudagraph_
copy_inputs': False, 'cudagraph_specialize_lora': True, 'use_inductor_graph_partition': False, 'pass_config': {'fuse_norm_quant': False, 'fuse_act_quant': False, 'fuse_attn_quant': False, 'eliminate_noops': True, 'enable_sp': False, 'fuse_gemm_comms': False, 'fuse_allreduce_rms': False}, 'max_cudagraph_capture_size': 512, 'dynamic_shapes_config': {'type': <DynamicShapesType.BACKED: 'backed'>, 'evaluate_guards': False, 'assume_32_b
it_indexing': True}, 'local_cache_dir': None, 'static_all_moe_layers': []}
(EngineCore_DP0 pid=3823324) INFO 03-01 12:02:48 [registry.py:143] All limits of multimodal modalities supported by the model are set to 0, running in text-only mode.
(EngineCore_DP0 pid=3823324) INFO 03-01 12:02:48 [parallel_state.py:1212] world_size=1 rank=0 local_rank=0 distributed_init_method=tcp://10.0.0.2:37789 backend=nccl
(EngineCore_DP0 pid=3823324) INFO 03-01 12:02:48 [parallel_state.py:1423] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, PCP rank 0, TP rank 0, EP rank N/A
(EngineCore_DP0 pid=3823324) INFO 03-01 12:02:49 [gpu_model_runner.py:4033] Starting to load model Qwen/Qwen3-VL-4B-Instruct...
(EngineCore_DP0 pid=3823324) INFO 03-01 12:02:49 [rocm.py:398] Using Torch SDPA backend for ViT model.
(EngineCore_DP0 pid=3823324) WARNING 03-01 12:02:49 [activation.py:667] [ROCm] PyTorch's native GELU with tanh approximation is unstable. Falling back to GELU(approximate='none').
(EngineCore_DP0 pid=3823324) INFO 03-01 12:02:49 [mm_encoder_attention.py:77] Using AttentionBackendEnum.TORCH_SDPA for MMEncoderAttention.
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946] EngineCore failed to start.
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946] Traceback (most recent call last):
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/core.py", line 937, in run_engine_core
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     engine_core = EngineCoreProc(*args, engine_index=dp_rank, **kwargs)
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/core.py", line 691, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     super().__init__(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         vllm_config,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ...<3 lines>...
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         internal_dp_balancing,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/core.py", line 105, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     self.model_executor = executor_class(vllm_config)
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]                           ~~~~~~~~~~~~~~^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/executor/abstract.py", line 101, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     self._init_executor()
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ~~~~~~~~~~~~~~~~~~~^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/executor/uniproc_executor.py", line 48, in _init_executor
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     self.driver_worker.load_model()
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/worker/gpu_worker.py", line 275, in load_model
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     self.model_runner.load_model(eep_scale_up=eep_scale_up)
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/worker/gpu_model_runner.py", line 4052, in load_model
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     self.model = model_loader.load_model(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]                  ~~~~~~~~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         vllm_config=self.vllm_config, model_config=self.model_config
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/model_loader/base_loader.py", line 50, in load_model
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     model = initialize_model(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         vllm_config=vllm_config, model_config=model_config, prefix=prefix
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/model_loader/utils.py", line 48, in initialize_model
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     return model_class(vllm_config=vllm_config, prefix=prefix)
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3_vl.py", line 1294, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     self.language_model = Qwen3LLMForCausalLM(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]                           ~~~~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         vllm_config=vllm_config, prefix=maybe_prefix(prefix, "language_model")
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3_vl.py", line 1181, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     self.model = Qwen3LLMModel(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]                  ~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/compilation/decorators.py", line 306, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     old_init(self, **kwargs)
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ~~~~~~~~^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3_vl.py", line 1111, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     super().__init__(vllm_config=vllm_config, prefix=prefix)
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/compilation/decorators.py", line 306, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     old_init(self, **kwargs)
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ~~~~~~~~^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3.py", line 248, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     super().__init__(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         vllm_config=vllm_config, prefix=prefix, decoder_layer_type=Qwen3DecoderLayer
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/compilation/decorators.py", line 306, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     old_init(self, **kwargs)
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ~~~~~~~~^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen2.py", line 394, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     self.start_layer, self.end_layer, self.layers = make_layers(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]                                                     ~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         config.num_hidden_layers,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ...<6 lines>...
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         prefix=f"{prefix}.layers",
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/utils.py", line 707, in make_layers
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}"))
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]                          ~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen2.py", line 396, in <lambda>
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     lambda prefix: decoder_layer_type(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]                    ~~~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         config=config,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ...<2 lines>...
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         prefix=prefix,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ),
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3.py", line 181, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     self.self_attn = Qwen3Attention(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]                      ~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         hidden_size=self.hidden_size,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ...<11 lines>...
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         dual_chunk_attention_config=dual_chunk_attention_config,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3.py", line 118, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     self.attn = Attention(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]                 ~~~~~~~~~^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         self.num_heads,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ...<12 lines>...
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         else {},
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/attention/layer.py", line 234, in __init__
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     self.attn_backend = get_attn_backend(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]                         ~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         head_size,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ...<6 lines>...
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         attn_type=attn_type,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         ^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ^
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/attention/selector.py", line 83, in get_attn_backend
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     return _cached_get_attn_backend(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         backend=backend_enum,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         attn_selector_config=attn_selector_config,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/attention/selector.py", line 96, in _cached_get_attn_backend
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     attention_cls = current_platform.get_attn_backend_cls(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         backend,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]         attn_selector_config=attn_selector_config,
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/platforms/rocm.py", line 343, in get_attn_backend_cls
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     raise RuntimeError(
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     ...<2 lines>...
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946]     )
(EngineCore_DP0 pid=3823324) ERROR 03-01 12:02:50 [core.py:946] RuntimeError: Attention backend FLASH_ATTN is not supported on ROCm. Note that V0 attention backends have been removed.
(EngineCore_DP0 pid=3823324) Process EngineCore_DP0:
(EngineCore_DP0 pid=3823324) Traceback (most recent call last):
(EngineCore_DP0 pid=3823324)   File "/nix/store/slhpx9glq7vl99bwi93bgrhn3syv98s1-python3-3.13.11/lib/python3.13/multiprocessing/process.py", line 313, in _bootstrap
(EngineCore_DP0 pid=3823324)     self.run()
(EngineCore_DP0 pid=3823324)     ~~~~~~~~^^
(EngineCore_DP0 pid=3823324)   File "/nix/store/slhpx9glq7vl99bwi93bgrhn3syv98s1-python3-3.13.11/lib/python3.13/multiprocessing/process.py", line 108, in run
(EngineCore_DP0 pid=3823324)     self._target(*self._args, **self._kwargs)
(EngineCore_DP0 pid=3823324)     ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/core.py", line 950, in run_engine_core
(EngineCore_DP0 pid=3823324)     raise e
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/core.py", line 937, in run_engine_core
(EngineCore_DP0 pid=3823324)     engine_core = EngineCoreProc(*args, engine_index=dp_rank, **kwargs)
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/core.py", line 691, in __init__
(EngineCore_DP0 pid=3823324)     super().__init__(
(EngineCore_DP0 pid=3823324)     ~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324)         vllm_config,
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     ...<3 lines>...
(EngineCore_DP0 pid=3823324)         internal_dp_balancing,
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)     ^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/core.py", line 105, in __init__
(EngineCore_DP0 pid=3823324)     self.model_executor = executor_class(vllm_config)
(EngineCore_DP0 pid=3823324)                           ~~~~~~~~~~~~~~^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/executor/abstract.py", line 101, in __init__
(EngineCore_DP0 pid=3823324)     self._init_executor()
(EngineCore_DP0 pid=3823324)     ~~~~~~~~~~~~~~~~~~~^^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/executor/uniproc_executor.py", line 48, in _init_executor
(EngineCore_DP0 pid=3823324)     self.driver_worker.load_model()
(EngineCore_DP0 pid=3823324)     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/worker/gpu_worker.py", line 275, in load_model
(EngineCore_DP0 pid=3823324)     self.model_runner.load_model(eep_scale_up=eep_scale_up)
(EngineCore_DP0 pid=3823324)     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/worker/gpu_model_runner.py", line 4052, in load_model
(EngineCore_DP0 pid=3823324)     self.model = model_loader.load_model(
(EngineCore_DP0 pid=3823324)                  ~~~~~~~~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324)         vllm_config=self.vllm_config, model_config=self.model_config
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)     ^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/model_loader/base_loader.py", line 50, in load_model
(EngineCore_DP0 pid=3823324)     model = initialize_model(
(EngineCore_DP0 pid=3823324)         vllm_config=vllm_config, model_config=model_config, prefix=prefix
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/model_loader/utils.py", line 48, in initialize_model
(EngineCore_DP0 pid=3823324)     return model_class(vllm_config=vllm_config, prefix=prefix)
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3_vl.py", line 1294, in __init__
(EngineCore_DP0 pid=3823324)     self.language_model = Qwen3LLMForCausalLM(
(EngineCore_DP0 pid=3823324)                           ~~~~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324)         vllm_config=vllm_config, prefix=maybe_prefix(prefix, "language_model")
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)     ^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3_vl.py", line 1181, in __init__
(EngineCore_DP0 pid=3823324)     self.model = Qwen3LLMModel(
(EngineCore_DP0 pid=3823324)                  ~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324)         vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)     ^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/compilation/decorators.py", line 306, in __init__
(EngineCore_DP0 pid=3823324)     old_init(self, **kwargs)
(EngineCore_DP0 pid=3823324)     ~~~~~~~~^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3_vl.py", line 1111, in __init__
(EngineCore_DP0 pid=3823324)     super().__init__(vllm_config=vllm_config, prefix=prefix)
(EngineCore_DP0 pid=3823324)     ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/compilation/decorators.py", line 306, in __init__
(EngineCore_DP0 pid=3823324)     old_init(self, **kwargs)
(EngineCore_DP0 pid=3823324)     ~~~~~~~~^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3.py", line 248, in __init__
(EngineCore_DP0 pid=3823324)     super().__init__(
(EngineCore_DP0 pid=3823324)     ~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324)         vllm_config=vllm_config, prefix=prefix, decoder_layer_type=Qwen3DecoderLayer
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)     ^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/compilation/decorators.py", line 306, in __init__
(EngineCore_DP0 pid=3823324)     old_init(self, **kwargs)
(EngineCore_DP0 pid=3823324)     ~~~~~~~~^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen2.py", line 394, in __init__
(EngineCore_DP0 pid=3823324)     self.start_layer, self.end_layer, self.layers = make_layers(
(EngineCore_DP0 pid=3823324)                                                     ~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324)         config.num_hidden_layers,
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     ...<6 lines>...
(EngineCore_DP0 pid=3823324)         prefix=f"{prefix}.layers",
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)     ^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/utils.py", line 707, in make_layers
(EngineCore_DP0 pid=3823324)     maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}"))
(EngineCore_DP0 pid=3823324)                          ~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen2.py", line 396, in <lambda>
(EngineCore_DP0 pid=3823324)     lambda prefix: decoder_layer_type(
(EngineCore_DP0 pid=3823324)                    ~~~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324)         config=config,
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     ...<2 lines>...
(EngineCore_DP0 pid=3823324)         prefix=prefix,
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     ),
(EngineCore_DP0 pid=3823324)     ^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3.py", line 181, in __init__
(EngineCore_DP0 pid=3823324)     self.self_attn = Qwen3Attention(
(EngineCore_DP0 pid=3823324)                      ~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324)         hidden_size=self.hidden_size,
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     ...<11 lines>...
(EngineCore_DP0 pid=3823324)         dual_chunk_attention_config=dual_chunk_attention_config,
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)     ^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/model_executor/models/qwen3.py", line 118, in __init__
(EngineCore_DP0 pid=3823324)     self.attn = Attention(
(EngineCore_DP0 pid=3823324)                 ~~~~~~~~~^
(EngineCore_DP0 pid=3823324)         self.num_heads,
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     ...<12 lines>...
(EngineCore_DP0 pid=3823324)         else {},
(EngineCore_DP0 pid=3823324)         ^^^^^^^^
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)     ^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/attention/layer.py", line 234, in __init__
(EngineCore_DP0 pid=3823324)     self.attn_backend = get_attn_backend(
(EngineCore_DP0 pid=3823324)                         ~~~~~~~~~~~~~~~~^
(EngineCore_DP0 pid=3823324)         head_size,
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     ...<6 lines>...
(EngineCore_DP0 pid=3823324)         attn_type=attn_type,
(EngineCore_DP0 pid=3823324)         ^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)     ^
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/attention/selector.py", line 83, in get_attn_backend
(EngineCore_DP0 pid=3823324)     return _cached_get_attn_backend(
(EngineCore_DP0 pid=3823324)         backend=backend_enum,
(EngineCore_DP0 pid=3823324)         attn_selector_config=attn_selector_config,
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/attention/selector.py", line 96, in _cached_get_attn_backend
(EngineCore_DP0 pid=3823324)     attention_cls = current_platform.get_attn_backend_cls(
(EngineCore_DP0 pid=3823324)         backend,
(EngineCore_DP0 pid=3823324)         attn_selector_config=attn_selector_config,
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/platforms/rocm.py", line 343, in get_attn_backend_cls
(EngineCore_DP0 pid=3823324)     raise RuntimeError(
(EngineCore_DP0 pid=3823324)     ...<2 lines>...
(EngineCore_DP0 pid=3823324)     )
(EngineCore_DP0 pid=3823324) RuntimeError: Attention backend FLASH_ATTN is not supported on ROCm. Note that V0 attention backends have been removed.
[rank0]:[W301 12:02:51.015256723 ProcessGroupNCCL.cpp:1524] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
(APIServer pid=3823265) Traceback (most recent call last):
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/bin/.vllm-wrapped", line 9, in <module>
(APIServer pid=3823265)     sys.exit(main())
(APIServer pid=3823265)              ~~~~^^
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/entrypoints/cli/main.py", line 73, in main
(APIServer pid=3823265)     args.dispatch_function(args)
(APIServer pid=3823265)     ~~~~~~~~~~~~~~~~~~~~~~^^^^^^
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/entrypoints/cli/serve.py", line 111, in cmd
(APIServer pid=3823265)     uvloop.run(run_server(args))
(APIServer pid=3823265)     ~~~~~~~~~~^^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)   File "/nix/store/lndv95jznxxbk7b920pwh91d9kj96zz1-python3.13-uvloop-0.22.0/lib/python3.13/site-packages/uvloop/__init__.py", line 96, in run
(APIServer pid=3823265)     return __asyncio.run(
(APIServer pid=3823265)            ~~~~~~~~~~~~~^
(APIServer pid=3823265)         wrapper(),
(APIServer pid=3823265)         ^^^^^^^^^^
(APIServer pid=3823265)     ...<2 lines>...
(APIServer pid=3823265)         **run_kwargs
(APIServer pid=3823265)         ^^^^^^^^^^^^
(APIServer pid=3823265)     )
(APIServer pid=3823265)     ^
(APIServer pid=3823265)   File "/nix/store/slhpx9glq7vl99bwi93bgrhn3syv98s1-python3-3.13.11/lib/python3.13/asyncio/runners.py", line 195, in run
(APIServer pid=3823265)     return runner.run(main)
(APIServer pid=3823265)            ~~~~~~~~~~^^^^^^
(APIServer pid=3823265)   File "/nix/store/slhpx9glq7vl99bwi93bgrhn3syv98s1-python3-3.13.11/lib/python3.13/asyncio/runners.py", line 118, in run
(APIServer pid=3823265)     return self._loop.run_until_complete(task)
(APIServer pid=3823265)            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^
(APIServer pid=3823265)   File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
(APIServer pid=3823265)   File "/nix/store/lndv95jznxxbk7b920pwh91d9kj96zz1-python3.13-uvloop-0.22.0/lib/python3.13/site-packages/uvloop/__init__.py", line 48, in wrapper
(APIServer pid=3823265)     return await main
(APIServer pid=3823265)            ^^^^^^^^^^
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/entrypoints/openai/api_server.py", line 919, in run_server
(APIServer pid=3823265)     await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/entrypoints/openai/api_server.py", line 938, in run_server_worker
(APIServer pid=3823265)     async with build_async_engine_client(
(APIServer pid=3823265)                ~~~~~~~~~~~~~~~~~~~~~~~~~^
(APIServer pid=3823265)         args,
(APIServer pid=3823265)         ^^^^^
(APIServer pid=3823265)         client_config=client_config,
(APIServer pid=3823265)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)     ) as engine_client:
(APIServer pid=3823265)     ^
(APIServer pid=3823265)   File "/nix/store/slhpx9glq7vl99bwi93bgrhn3syv98s1-python3-3.13.11/lib/python3.13/contextlib.py", line 214, in __aenter__
(APIServer pid=3823265)     return await anext(self.gen)
(APIServer pid=3823265)            ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/entrypoints/openai/api_server.py", line 147, in build_async_engine_client
(APIServer pid=3823265)     async with build_async_engine_client_from_engine_args(
(APIServer pid=3823265)                ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
(APIServer pid=3823265)         engine_args,
(APIServer pid=3823265)         ^^^^^^^^^^^^
(APIServer pid=3823265)     ...<2 lines>...
(APIServer pid=3823265)         client_config=client_config,
(APIServer pid=3823265)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)     ) as engine:
(APIServer pid=3823265)     ^
(APIServer pid=3823265)   File "/nix/store/slhpx9glq7vl99bwi93bgrhn3syv98s1-python3-3.13.11/lib/python3.13/contextlib.py", line 214, in __aenter__
(APIServer pid=3823265)     return await anext(self.gen)
(APIServer pid=3823265)            ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/entrypoints/openai/api_server.py", line 188, in build_async_engine_client_from_engine_args
(APIServer pid=3823265)     async_llm = AsyncLLM.from_vllm_config(
(APIServer pid=3823265)         vllm_config=vllm_config,
(APIServer pid=3823265)     ...<6 lines>...
(APIServer pid=3823265)         client_index=client_index,
(APIServer pid=3823265)     )
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/async_llm.py", line 228, in from_vllm_config
(APIServer pid=3823265)     return cls(
(APIServer pid=3823265)         vllm_config=vllm_config,
(APIServer pid=3823265)     ...<9 lines>...
(APIServer pid=3823265)         client_index=client_index,
(APIServer pid=3823265)     )
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/async_llm.py", line 155, in __init__
(APIServer pid=3823265)     self.engine_core = EngineCoreClient.make_async_mp_client(
(APIServer pid=3823265)                        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
(APIServer pid=3823265)         vllm_config=vllm_config,
(APIServer pid=3823265)         ^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)     ...<4 lines>...
(APIServer pid=3823265)         client_index=client_index,
(APIServer pid=3823265)         ^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)     )
(APIServer pid=3823265)     ^
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/core_client.py", line 122, in make_async_mp_client
(APIServer pid=3823265)     return AsyncMPClient(*client_args)
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/core_client.py", line 819, in __init__
(APIServer pid=3823265)     super().__init__(
(APIServer pid=3823265)     ~~~~~~~~~~~~~~~~^
(APIServer pid=3823265)         asyncio_mode=True,
(APIServer pid=3823265)         ^^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)     ...<3 lines>...
(APIServer pid=3823265)         client_addresses=client_addresses,
(APIServer pid=3823265)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)     )
(APIServer pid=3823265)     ^
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/core_client.py", line 479, in __init__
(APIServer pid=3823265)     with launch_core_engines(vllm_config, executor_class, log_stats) as (
(APIServer pid=3823265)          ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)   File "/nix/store/slhpx9glq7vl99bwi93bgrhn3syv98s1-python3-3.13.11/lib/python3.13/contextlib.py", line 148, in __exit__
(APIServer pid=3823265)     next(self.gen)
(APIServer pid=3823265)     ~~~~^^^^^^^^^^
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/utils.py", line 933, in launch_core_engines
(APIServer pid=3823265)     wait_for_engine_startup(
(APIServer pid=3823265)     ~~~~~~~~~~~~~~~~~~~~~~~^
(APIServer pid=3823265)         handshake_socket,
(APIServer pid=3823265)         ^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)     ...<6 lines>...
(APIServer pid=3823265)         coordinator.proc if coordinator else None,
(APIServer pid=3823265)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=3823265)     )
(APIServer pid=3823265)     ^
(APIServer pid=3823265)   File "/nix/store/3zwyhsfgky7znllj7rdp9cksgf349dcx-python3.13-vllm-0.15.1/lib/python3.13/site-packages/vllm/v1/engine/utils.py", line 992, in wait_for_engine_startup
(APIServer pid=3823265)     raise RuntimeError(
(APIServer pid=3823265)     ...<3 lines>...
(APIServer pid=3823265)     )
(APIServer pid=3823265) RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}

Edit: Looks like it also needs to be added here: https://github.com/vllm-project/vllm/blob/1892993bc18e243e2c05841314c5e9c06a80c70d/vllm/platforms/rocm.py#L229

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment