Created
September 11, 2025 22:51
-
-
Save RodriMora/0af77e35f55709e0ef5b7c45361b906b to your computer and use it in GitHub Desktop.
error qwen3-next PP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| im testing the new qwen3next model just released with vllm | |
| https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct | |
| CUDA0 and CUDA4= nvidia 5090 (32gb vram) | |
| CUDA2= RTX PRO 6000 BLACKWELL (32gb vram) | |
| CUDA1 = nvidia 3090 (24gb vram) | |
| But got this error: | |
| TORCHDYNAMO_VERBOSE=1 CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=0,2,4,1 VLLM_PP_LAYER_PARTITION="8,29,8,3" VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 vllm serve /mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct --port 8000 --max-model-len 8192 -pp 4 | |
| INFO 09-11 21:59:55 [__init__.py:216] Automatically detected platform cuda. | |
| (APIServer pid=262524) INFO 09-11 21:59:57 [api_server.py:1896] vLLM API server version 0.10.2rc2.dev307+g361ae27f8 | |
| (APIServer pid=262524) INFO 09-11 21:59:57 [utils.py:328] non-default args: {'model_tag': '/mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct', 'model': '/mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct', 'max_model_len': 8192, 'pipeline_parallel_size': 4} | |
| (APIServer pid=262524) INFO 09-11 22:00:05 [__init__.py:742] Resolved architecture: Qwen3NextForCausalLM | |
| (APIServer pid=262524) `torch_dtype` is deprecated! Use `dtype` instead! | |
| (APIServer pid=262524) INFO 09-11 22:00:05 [__init__.py:1815] Using max model len 8192 | |
| (APIServer pid=262524) INFO 09-11 22:00:05 [scheduler.py:222] Chunked prefill is enabled with max_num_batched_tokens=2048. | |
| (APIServer pid=262524) INFO 09-11 22:00:05 [config.py:310] Hybrid or mamba-based model detected: disabling prefix caching since it is not yet supported. | |
| (APIServer pid=262524) INFO 09-11 22:00:05 [config.py:321] Hybrid or mamba-based model detected: setting cudagraph mode to FULL_AND_PIECEWISE in order to optimize performance. | |
| (APIServer pid=262524) INFO 09-11 22:00:06 [config.py:390] Setting attention block size to 544 tokens to ensure that attention page size is >= mamba page size. | |
| (APIServer pid=262524) INFO 09-11 22:00:06 [config.py:411] Padding mamba page size by 1.49% to ensure that mamba page size and attention page size are exactly equal. | |
| INFO 09-11 22:00:10 [__init__.py:216] Automatically detected platform cuda. | |
| (EngineCore_DP0 pid=262791) INFO 09-11 22:00:13 [core.py:654] Waiting for init message from front-end. | |
| (EngineCore_DP0 pid=262791) INFO 09-11 22:00:13 [core.py:76] Initializing a V1 LLM engine (v0.10.2rc2.dev307+g361ae27f8) with config: model='/mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct', speculative_config=None, tokenizer='/mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=4, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=/mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct, enable_prefix_caching=False, chunked_prefill_enabled=True, use_async_output_proc=False, pooler_config=None, compilation_config={"level":3,"debug_dump_path":"","cache_dir":"","backend":"","custom_ops":[],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output","vllm.mamba_mixer2","vllm.mamba_mixer","vllm.short_conv","vllm.linear_attention","vllm.plamo2_mamba_mixer","vllm.gdn_attention"],"use_inductor":true,"compile_sizes":[],"inductor_compile_config":{"enable_auto_functionalized_v2":false},"inductor_passes":{},"cudagraph_mode":[2,1],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"cudagraph_copy_inputs":false,"full_cuda_graph":false,"pass_config":{},"max_capture_size":512,"local_cache_dir":null} | |
| (EngineCore_DP0 pid=262791) WARNING 09-11 22:00:13 [multiproc_worker_utils.py:273] Reducing Torch parallelism from 24 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed. | |
| (EngineCore_DP0 pid=262791) INFO 09-11 22:00:13 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[0, 1, 2, 3], buffer_handle=(4, 16777216, 10, 'psm_43f66670'), local_subscribe_addr='ipc:///tmp/6e40fdfa-6a77-44a0-8d74-2edc9381a5e9', remote_subscribe_addr=None, remote_addr_ipv6=False) | |
| INFO 09-11 22:00:16 [__init__.py:216] Automatically detected platform cuda. | |
| INFO 09-11 22:00:16 [__init__.py:216] Automatically detected platform cuda. | |
| INFO 09-11 22:00:16 [__init__.py:216] Automatically detected platform cuda. | |
| INFO 09-11 22:00:16 [__init__.py:216] Automatically detected platform cuda. | |
| INFO 09-11 22:00:19 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[0], buffer_handle=(1, 10485760, 10, 'psm_317dfe34'), local_subscribe_addr='ipc:///tmp/5a36acde-016f-4ca1-820d-3f7490b06a1e', remote_subscribe_addr=None, remote_addr_ipv6=False) | |
| INFO 09-11 22:00:20 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[0], buffer_handle=(1, 10485760, 10, 'psm_e0b770fc'), local_subscribe_addr='ipc:///tmp/a3967d2c-4555-4a71-91a6-7e1469c764fe', remote_subscribe_addr=None, remote_addr_ipv6=False) | |
| INFO 09-11 22:00:20 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[0], buffer_handle=(1, 10485760, 10, 'psm_884b1f73'), local_subscribe_addr='ipc:///tmp/2b396f4d-c874-4b8f-8cb8-36bf159179a1', remote_subscribe_addr=None, remote_addr_ipv6=False) | |
| INFO 09-11 22:00:20 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[0], buffer_handle=(1, 10485760, 10, 'psm_b8d7f8dc'), local_subscribe_addr='ipc:///tmp/2ee006c2-3d4f-42ad-a76f-ed3b7eafe37c', remote_subscribe_addr=None, remote_addr_ipv6=False) | |
| [W911 22:00:20.781323200 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:54881 (errno: 97 - Address family not supported by protocol). | |
| [W911 22:00:20.989241501 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:54881 (errno: 97 - Address family not supported by protocol). | |
| [W911 22:00:20.000551196 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:54881 (errno: 97 - Address family not supported by protocol). | |
| [W911 22:00:20.028469823 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:54881 (errno: 97 - Address family not supported by protocol). | |
| [W911 22:00:20.028991113 ProcessGroupNCCL.cpp:981] Warning: TORCH_NCCL_AVOID_RECORD_STREAMS is the default now, this environment variable is thus deprecated. (function operator()) | |
| [W911 22:00:20.254256685 ProcessGroupNCCL.cpp:981] Warning: TORCH_NCCL_AVOID_RECORD_STREAMS is the default now, this environment variable is thus deprecated. (function operator()) | |
| [W911 22:00:21.674162060 ProcessGroupNCCL.cpp:981] Warning: TORCH_NCCL_AVOID_RECORD_STREAMS is the default now, this environment variable is thus deprecated. (function operator()) | |
| [W911 22:00:21.674427770 ProcessGroupNCCL.cpp:981] Warning: TORCH_NCCL_AVOID_RECORD_STREAMS is the default now, this environment variable is thus deprecated. (function operator()) | |
| [Gloo] Rank 3 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 0 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 2 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 1 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 2 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 1 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 3 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| INFO 09-11 22:00:21 [__init__.py:1433] Found nccl from library libnccl.so.2 | |
| INFO 09-11 22:00:21 [__init__.py:1433] Found nccl from library libnccl.so.2 | |
| INFO 09-11 22:00:21 [pynccl.py:70] vLLM is using nccl==2.27.3 | |
| INFO 09-11 22:00:21 [pynccl.py:70] vLLM is using nccl==2.27.3 | |
| INFO 09-11 22:00:21 [__init__.py:1433] Found nccl from library libnccl.so.2 | |
| INFO 09-11 22:00:21 [__init__.py:1433] Found nccl from library libnccl.so.2 | |
| INFO 09-11 22:00:21 [pynccl.py:70] vLLM is using nccl==2.27.3 | |
| INFO 09-11 22:00:21 [pynccl.py:70] vLLM is using nccl==2.27.3 | |
| WARNING 09-11 22:00:21 [symm_mem.py:58] SymmMemCommunicator: Device capability 12.0 not supported, communicator is not available. | |
| WARNING 09-11 22:00:21 [symm_mem.py:58] SymmMemCommunicator: Device capability 12.0 not supported, communicator is not available. | |
| WARNING 09-11 22:00:21 [symm_mem.py:58] SymmMemCommunicator: Device capability 12.0 not supported, communicator is not available. | |
| WARNING 09-11 22:00:21 [symm_mem.py:58] SymmMemCommunicator: Device capability 12.0 not supported, communicator is not available. | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| INFO 09-11 22:00:21 [parallel_state.py:1165] rank 3 in world size 4 is assigned as DP rank 0, PP rank 3, TP rank 0, EP rank 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| INFO 09-11 22:00:21 [parallel_state.py:1165] rank 1 in world size 4 is assigned as DP rank 0, PP rank 1, TP rank 0, EP rank 0 | |
| INFO 09-11 22:00:21 [parallel_state.py:1165] rank 2 in world size 4 is assigned as DP rank 0, PP rank 2, TP rank 0, EP rank 0 | |
| INFO 09-11 22:00:21 [parallel_state.py:1165] rank 0 in world size 4 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0 | |
| WARNING 09-11 22:00:21 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer. | |
| WARNING 09-11 22:00:21 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer. | |
| WARNING 09-11 22:00:21 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer. | |
| WARNING 09-11 22:00:21 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer. | |
| (Worker_PP1 pid=262899) INFO 09-11 22:00:21 [gpu_model_runner.py:2338] Starting to load model /mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct... | |
| (Worker_PP3 pid=262901) INFO 09-11 22:00:21 [gpu_model_runner.py:2338] Starting to load model /mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct... | |
| (Worker_PP2 pid=262900) INFO 09-11 22:00:21 [gpu_model_runner.py:2338] Starting to load model /mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct... | |
| (Worker_PP0 pid=262898) INFO 09-11 22:00:22 [gpu_model_runner.py:2338] Starting to load model /mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct... | |
| (Worker_PP3 pid=262901) INFO 09-11 22:00:22 [gpu_model_runner.py:2370] Loading model from scratch... | |
| (Worker_PP1 pid=262899) INFO 09-11 22:00:22 [gpu_model_runner.py:2370] Loading model from scratch... | |
| (Worker_PP2 pid=262900) INFO 09-11 22:00:22 [gpu_model_runner.py:2370] Loading model from scratch... | |
| (Worker_PP3 pid=262901) `torch_dtype` is deprecated! Use `dtype` instead! | |
| (Worker_PP0 pid=262898) INFO 09-11 22:00:22 [gpu_model_runner.py:2370] Loading model from scratch... | |
| (Worker_PP0 pid=262898) `torch_dtype` is deprecated! Use `dtype` instead! | |
| (Worker_PP1 pid=262899) `torch_dtype` is deprecated! Use `dtype` instead! | |
| (Worker_PP2 pid=262900) `torch_dtype` is deprecated! Use `dtype` instead! | |
| (Worker_PP3 pid=262901) INFO 09-11 22:00:22 [cuda.py:353] Using Flash Attention backend on V1 engine. | |
| (Worker_PP2 pid=262900) INFO 09-11 22:00:22 [cuda.py:353] Using Flash Attention backend on V1 engine. | |
| (Worker_PP0 pid=262898) INFO 09-11 22:00:22 [cuda.py:353] Using Flash Attention backend on V1 engine. | |
| (Worker_PP1 pid=262899) INFO 09-11 22:00:22 [cuda.py:353] Using Flash Attention backend on V1 engine. | |
| Loading safetensors checkpoint shards: 0% Completed | 0/41 [00:00<?, ?it/s] | |
| Loading safetensors checkpoint shards: 2% Completed | 1/41 [00:00<00:38, 1.03it/s] | |
| Loading safetensors checkpoint shards: 5% Completed | 2/41 [00:01<00:25, 1.55it/s] | |
| Loading safetensors checkpoint shards: 7% Completed | 3/41 [00:01<00:17, 2.14it/s] | |
| Loading safetensors checkpoint shards: 10% Completed | 4/41 [00:01<00:14, 2.59it/s] | |
| Loading safetensors checkpoint shards: 12% Completed | 5/41 [00:02<00:12, 2.96it/s] | |
| Loading safetensors checkpoint shards: 17% Completed | 7/41 [00:02<00:08, 4.12it/s] | |
| Loading safetensors checkpoint shards: 20% Completed | 8/41 [00:02<00:08, 4.09it/s] | |
| Loading safetensors checkpoint shards: 22% Completed | 9/41 [00:02<00:07, 4.08it/s] | |
| Loading safetensors checkpoint shards: 24% Completed | 10/41 [00:04<00:14, 2.10it/s] | |
| Loading safetensors checkpoint shards: 27% Completed | 11/41 [00:04<00:14, 2.12it/s] | |
| Loading safetensors checkpoint shards: 29% Completed | 12/41 [00:04<00:11, 2.46it/s] | |
| Loading safetensors checkpoint shards: 32% Completed | 13/41 [00:05<00:16, 1.73it/s] | |
| Loading safetensors checkpoint shards: 34% Completed | 14/41 [00:06<00:14, 1.84it/s] | |
| Loading safetensors checkpoint shards: 37% Completed | 15/41 [00:06<00:11, 2.17it/s] | |
| Loading safetensors checkpoint shards: 39% Completed | 16/41 [00:06<00:09, 2.51it/s] | |
| Loading safetensors checkpoint shards: 41% Completed | 17/41 [00:06<00:08, 2.73it/s] | |
| Loading safetensors checkpoint shards: 44% Completed | 18/41 [00:07<00:07, 2.91it/s] | |
| Loading safetensors checkpoint shards: 46% Completed | 19/41 [00:07<00:06, 3.16it/s] | |
| Loading safetensors checkpoint shards: 49% Completed | 20/41 [00:08<00:11, 1.91it/s] | |
| Loading safetensors checkpoint shards: 51% Completed | 21/41 [00:09<00:10, 1.97it/s] | |
| Loading safetensors checkpoint shards: 54% Completed | 22/41 [00:09<00:08, 2.31it/s] | |
| Loading safetensors checkpoint shards: 56% Completed | 23/41 [00:09<00:06, 2.63it/s] | |
| Loading safetensors checkpoint shards: 59% Completed | 24/41 [00:09<00:05, 2.93it/s] | |
| Loading safetensors checkpoint shards: 61% Completed | 25/41 [00:10<00:05, 3.17it/s] | |
| Loading safetensors checkpoint shards: 63% Completed | 26/41 [00:10<00:06, 2.16it/s] | |
| Loading safetensors checkpoint shards: 66% Completed | 27/41 [00:11<00:09, 1.51it/s] | |
| Loading safetensors checkpoint shards: 68% Completed | 28/41 [00:12<00:07, 1.64it/s] | |
| (Worker_PP3 pid=262901) INFO 09-11 22:00:35 [default_loader.py:268] Loading weights took 12.69 seconds | |
| Loading safetensors checkpoint shards: 71% Completed | 29/41 [00:12<00:06, 1.98it/s] | |
| Loading safetensors checkpoint shards: 73% Completed | 30/41 [00:12<00:04, 2.32it/s] | |
| (Worker_PP3 pid=262901) INFO 09-11 22:00:35 [gpu_model_runner.py:2392] Model loading took 10.3913 GiB and 13.065484 seconds | |
| Loading safetensors checkpoint shards: 76% Completed | 31/41 [00:13<00:03, 2.63it/s] | |
| Loading safetensors checkpoint shards: 78% Completed | 32/41 [00:13<00:03, 2.91it/s] | |
| Loading safetensors checkpoint shards: 80% Completed | 33/41 [00:13<00:02, 3.15it/s] | |
| Loading safetensors checkpoint shards: 83% Completed | 34/41 [00:14<00:02, 3.34it/s] | |
| Loading safetensors checkpoint shards: 85% Completed | 35/41 [00:14<00:03, 1.99it/s] | |
| Loading safetensors checkpoint shards: 88% Completed | 36/41 [00:15<00:02, 2.06it/s] | |
| Loading safetensors checkpoint shards: 90% Completed | 37/41 [00:15<00:01, 2.41it/s] | |
| (Worker_PP2 pid=262900) INFO 09-11 22:00:38 [default_loader.py:268] Loading weights took 15.68 seconds | |
| Loading safetensors checkpoint shards: 93% Completed | 38/41 [00:15<00:01, 2.72it/s] | |
| (Worker_PP2 pid=262900) INFO 09-11 22:00:38 [gpu_model_runner.py:2392] Model loading took 25.7324 GiB and 16.077111 seconds | |
| Loading safetensors checkpoint shards: 95% Completed | 39/41 [00:16<00:00, 2.99it/s] | |
| Loading safetensors checkpoint shards: 98% Completed | 40/41 [00:16<00:00, 3.20it/s] | |
| Loading safetensors checkpoint shards: 100% Completed | 41/41 [00:16<00:00, 3.38it/s] | |
| Loading safetensors checkpoint shards: 100% Completed | 41/41 [00:16<00:00, 2.45it/s] | |
| (Worker_PP0 pid=262898) | |
| (Worker_PP0 pid=262898) INFO 09-11 22:00:39 [default_loader.py:268] Loading weights took 16.72 seconds | |
| (Worker_PP0 pid=262898) INFO 09-11 22:00:39 [gpu_model_runner.py:2392] Model loading took 25.7324 GiB and 17.015547 seconds | |
| (Worker_PP1 pid=262899) INFO 09-11 22:00:52 [default_loader.py:268] Loading weights took 29.60 seconds | |
| (Worker_PP1 pid=262899) INFO 09-11 22:00:52 [gpu_model_runner.py:2392] Model loading took 90.1555 GiB and 30.043717 seconds | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] WorkerProc hit an exception. | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/cuda_graph.py", line 119, in __call__ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self.runnable(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 305, in __call__ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = self.compiled_callable(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 736, in compile_wrapper | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1495, in __call__ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._torchdynamo_orig_callable( | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 629, in __call__ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile( | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1111, in _compile | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] guarded_code = compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_utils_internal.py", line 97, in wrapper_function | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return function(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 793, in compile_inner | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 832, in _compile_inner | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] out_code = transform_code_object(code, transform) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/bytecode_transformation.py", line 1424, in transform_code_object | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] transformations(instructions, code_options) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 267, in _fn | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 753, in transform | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] tracer.run() | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 3497, in run | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] super().run() | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1363, in run | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] while self.step(): | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1267, in step | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.dispatch_table[inst.opcode](self, inst) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 2528, in UNPACK_SEQUENCE | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] val = seq.unpack_var_sequence(self, idxes=range(inst.argval)) # type: ignore[arg-type] | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/lazy.py", line 201, in realize_and_forward | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return getattr(self.realize(), name)(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/tensor.py", line 592, in unpack_var_sequence | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] assert len(idxes) == length, ( | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] AssertionError: Can't unpack a tensor of 2048 rows into a tuple of 2 elements. | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] from user code: | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/cuda_graph.py", line 119, in __call__ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self.runnable(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 305, in __call__ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = self.compiled_callable(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 736, in compile_wrapper | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1495, in __call__ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._torchdynamo_orig_callable( | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 629, in __call__ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile( | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1111, in _compile | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] guarded_code = compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_utils_internal.py", line 97, in wrapper_function | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return function(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 793, in compile_inner | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 832, in _compile_inner | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] out_code = transform_code_object(code, transform) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/bytecode_transformation.py", line 1424, in transform_code_object | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] transformations(instructions, code_options) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 267, in _fn | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 753, in transform | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] tracer.run() | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 3497, in run | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] super().run() | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1363, in run | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] while self.step(): | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1267, in step | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.dispatch_table[inst.opcode](self, inst) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 2528, in UNPACK_SEQUENCE | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] val = seq.unpack_var_sequence(self, idxes=range(inst.argval)) # type: ignore[arg-type] | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/lazy.py", line 201, in realize_and_forward | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return getattr(self.realize(), name)(*args, **kwargs) | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/tensor.py", line 592, in unpack_var_sequence | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] assert len(idxes) == length, ( | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] AssertionError: Can't unpack a tensor of 2048 rows into a tuple of 2 elements. | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] from user code: | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP2 pid=262900) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] WorkerProc hit an exception. | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/cuda_graph.py", line 119, in __call__ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self.runnable(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 305, in __call__ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = self.compiled_callable(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 736, in compile_wrapper | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1495, in __call__ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._torchdynamo_orig_callable( | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 629, in __call__ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile( | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1111, in _compile | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] guarded_code = compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_utils_internal.py", line 97, in wrapper_function | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return function(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 793, in compile_inner | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 832, in _compile_inner | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] out_code = transform_code_object(code, transform) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/bytecode_transformation.py", line 1424, in transform_code_object | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] transformations(instructions, code_options) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 267, in _fn | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 753, in transform | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] tracer.run() | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 3497, in run | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] super().run() | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1363, in run | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] while self.step(): | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1267, in step | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.dispatch_table[inst.opcode](self, inst) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 2528, in UNPACK_SEQUENCE | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] val = seq.unpack_var_sequence(self, idxes=range(inst.argval)) # type: ignore[arg-type] | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/lazy.py", line 201, in realize_and_forward | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return getattr(self.realize(), name)(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/tensor.py", line 592, in unpack_var_sequence | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] assert len(idxes) == length, ( | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] AssertionError: Can't unpack a tensor of 2048 rows into a tuple of 2 elements. | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] from user code: | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/cuda_graph.py", line 119, in __call__ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self.runnable(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 305, in __call__ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = self.compiled_callable(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 736, in compile_wrapper | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1495, in __call__ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._torchdynamo_orig_callable( | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 629, in __call__ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile( | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1111, in _compile | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] guarded_code = compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_utils_internal.py", line 97, in wrapper_function | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return function(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 793, in compile_inner | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 832, in _compile_inner | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] out_code = transform_code_object(code, transform) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/bytecode_transformation.py", line 1424, in transform_code_object | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] transformations(instructions, code_options) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 267, in _fn | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 753, in transform | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] tracer.run() | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 3497, in run | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] super().run() | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1363, in run | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] while self.step(): | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1267, in step | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.dispatch_table[inst.opcode](self, inst) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 2528, in UNPACK_SEQUENCE | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] val = seq.unpack_var_sequence(self, idxes=range(inst.argval)) # type: ignore[arg-type] | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/lazy.py", line 201, in realize_and_forward | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return getattr(self.realize(), name)(*args, **kwargs) | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/tensor.py", line 592, in unpack_var_sequence | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] assert len(idxes) == length, ( | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] AssertionError: Can't unpack a tensor of 2048 rows into a tuple of 2 elements. | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] from user code: | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP1 pid=262899) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] WorkerProc hit an exception. | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/cuda_graph.py", line 119, in __call__ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self.runnable(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 305, in __call__ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = self.compiled_callable(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 736, in compile_wrapper | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1495, in __call__ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._torchdynamo_orig_callable( | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 629, in __call__ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile( | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1111, in _compile | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] guarded_code = compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_utils_internal.py", line 97, in wrapper_function | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return function(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 793, in compile_inner | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 832, in _compile_inner | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] out_code = transform_code_object(code, transform) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/bytecode_transformation.py", line 1424, in transform_code_object | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] transformations(instructions, code_options) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 267, in _fn | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 753, in transform | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] tracer.run() | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 3497, in run | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] super().run() | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1363, in run | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] while self.step(): | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1267, in step | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.dispatch_table[inst.opcode](self, inst) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 2528, in UNPACK_SEQUENCE | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] val = seq.unpack_var_sequence(self, idxes=range(inst.argval)) # type: ignore[arg-type] | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/lazy.py", line 201, in realize_and_forward | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return getattr(self.realize(), name)(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/tensor.py", line 592, in unpack_var_sequence | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] assert len(idxes) == length, ( | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] AssertionError: Can't unpack a tensor of 2048 rows into a tuple of 2 elements. | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] from user code: | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/cuda_graph.py", line 119, in __call__ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self.runnable(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 305, in __call__ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] output = self.compiled_callable(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 736, in compile_wrapper | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1495, in __call__ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return self._torchdynamo_orig_callable( | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 629, in __call__ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile( | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1111, in _compile | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] guarded_code = compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_utils_internal.py", line 97, in wrapper_function | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return function(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 793, in compile_inner | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return _compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 832, in _compile_inner | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] out_code = transform_code_object(code, transform) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/bytecode_transformation.py", line 1424, in transform_code_object | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] transformations(instructions, code_options) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 267, in _fn | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 753, in transform | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] tracer.run() | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 3497, in run | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] super().run() | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1363, in run | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] while self.step(): | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1267, in step | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] self.dispatch_table[inst.opcode](self, inst) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 2528, in UNPACK_SEQUENCE | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] val = seq.unpack_var_sequence(self, idxes=range(inst.argval)) # type: ignore[arg-type] | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/lazy.py", line 201, in realize_and_forward | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] return getattr(self.realize(), name)(*args, **kwargs) | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/tensor.py", line 592, in unpack_var_sequence | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] assert len(idxes) == length, ( | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] AssertionError: Can't unpack a tensor of 2048 rows into a tuple of 2 elements. | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] from user code: | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP3 pid=262901) ERROR 09-11 22:00:52 [multiproc_executor.py:654] | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] WorkerProc hit an exception. | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/cuda_graph.py", line 119, in __call__ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return self.runnable(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 305, in __call__ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] output = self.compiled_callable(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 736, in compile_wrapper | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1495, in __call__ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return self._torchdynamo_orig_callable( | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 629, in __call__ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return _compile( | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1111, in _compile | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] guarded_code = compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_utils_internal.py", line 97, in wrapper_function | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return function(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 793, in compile_inner | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return _compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 832, in _compile_inner | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] out_code = transform_code_object(code, transform) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/bytecode_transformation.py", line 1424, in transform_code_object | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] transformations(instructions, code_options) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 267, in _fn | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 753, in transform | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] tracer.run() | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 3497, in run | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] super().run() | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1363, in run | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] while self.step(): | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1267, in step | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] self.dispatch_table[inst.opcode](self, inst) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 2528, in UNPACK_SEQUENCE | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] val = seq.unpack_var_sequence(self, idxes=range(inst.argval)) # type: ignore[arg-type] | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/tensor.py", line 592, in unpack_var_sequence | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] assert len(idxes) == length, ( | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] AssertionError: Can't unpack a tensor of 2048 rows into a tuple of 2 elements. | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] from user code: | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/cuda_graph.py", line 119, in __call__ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return self.runnable(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 305, in __call__ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] output = self.compiled_callable(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 736, in compile_wrapper | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1495, in __call__ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return self._torchdynamo_orig_callable( | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 629, in __call__ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return _compile( | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 1111, in _compile | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] guarded_code = compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_utils_internal.py", line 97, in wrapper_function | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return function(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 793, in compile_inner | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return _compile_inner(code, one_graph, hooks, transform) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 832, in _compile_inner | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] out_code = transform_code_object(code, transform) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/bytecode_transformation.py", line 1424, in transform_code_object | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] transformations(instructions, code_options) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 267, in _fn | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] return fn(*args, **kwargs) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/convert_frame.py", line 753, in transform | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] tracer.run() | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 3497, in run | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] super().run() | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1363, in run | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] while self.step(): | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 1267, in step | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] self.dispatch_table[inst.opcode](self, inst) | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/symbolic_convert.py", line 2528, in UNPACK_SEQUENCE | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] val = seq.unpack_var_sequence(self, idxes=range(inst.argval)) # type: ignore[arg-type] | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/_dynamo/variables/tensor.py", line 592, in unpack_var_sequence | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] assert len(idxes) == length, ( | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] AssertionError: Can't unpack a tensor of 2048 rows into a tuple of 2 elements. | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] from user code: | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] | |
| (Worker_PP0 pid=262898) ERROR 09-11 22:00:54 [multiproc_executor.py:654] | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] EngineCore failed to start. | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] Traceback (most recent call last): | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 709, in run_engine_core | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] engine_core = EngineCoreProc(*args, **kwargs) | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 505, in __init__ | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] super().__init__(vllm_config, executor_class, log_stats, | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 91, in __init__ | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] self._initialize_kv_caches(vllm_config) | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 183, in _initialize_kv_caches | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] self.model_executor.determine_available_memory()) | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/executor/abstract.py", line 84, in determine_available_memory | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] return self.collective_rpc("determine_available_memory") | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 257, in collective_rpc | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] result = result.result() | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] ^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] File "/usr/lib/python3.12/concurrent/futures/_base.py", line 456, in result | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] return self.__get_result() | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] ^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] File "/usr/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] raise self._exception | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] File "/usr/lib/python3.12/concurrent/futures/thread.py", line 59, in run | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] result = self.fn(*self.args, **self.kwargs) | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 243, in get_response | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] raise RuntimeError( | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] RuntimeError: Worker failed with error 'Can't unpack a tensor of 2048 rows into a tuple of 2 elements. | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] from user code: | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] hidden_states, residual = layer( | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:54 [core.py:718] ', please check the stack trace above for the root cause | |
| (EngineCore_DP0 pid=262791) ERROR 09-11 22:00:55 [multiproc_executor.py:149] Worker proc VllmWorker-2 died unexpectedly, shutting down executor. | |
| (EngineCore_DP0 pid=262791) Process EngineCore_DP0: | |
| (EngineCore_DP0 pid=262791) Traceback (most recent call last): | |
| (EngineCore_DP0 pid=262791) File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap | |
| (EngineCore_DP0 pid=262791) self.run() | |
| (EngineCore_DP0 pid=262791) File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run | |
| (EngineCore_DP0 pid=262791) self._target(*self._args, **self._kwargs) | |
| (EngineCore_DP0 pid=262791) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 722, in run_engine_core | |
| (EngineCore_DP0 pid=262791) raise e | |
| (EngineCore_DP0 pid=262791) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 709, in run_engine_core | |
| (EngineCore_DP0 pid=262791) engine_core = EngineCoreProc(*args, **kwargs) | |
| (EngineCore_DP0 pid=262791) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 505, in __init__ | |
| (EngineCore_DP0 pid=262791) super().__init__(vllm_config, executor_class, log_stats, | |
| (EngineCore_DP0 pid=262791) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 91, in __init__ | |
| (EngineCore_DP0 pid=262791) self._initialize_kv_caches(vllm_config) | |
| (EngineCore_DP0 pid=262791) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 183, in _initialize_kv_caches | |
| (EngineCore_DP0 pid=262791) self.model_executor.determine_available_memory()) | |
| (EngineCore_DP0 pid=262791) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) File "/home/ubuntuai/vllm_source/vllm/v1/executor/abstract.py", line 84, in determine_available_memory | |
| (EngineCore_DP0 pid=262791) return self.collective_rpc("determine_available_memory") | |
| (EngineCore_DP0 pid=262791) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 257, in collective_rpc | |
| (EngineCore_DP0 pid=262791) result = result.result() | |
| (EngineCore_DP0 pid=262791) ^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) File "/usr/lib/python3.12/concurrent/futures/_base.py", line 456, in result | |
| (EngineCore_DP0 pid=262791) return self.__get_result() | |
| (EngineCore_DP0 pid=262791) ^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) File "/usr/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result | |
| (EngineCore_DP0 pid=262791) raise self._exception | |
| (EngineCore_DP0 pid=262791) File "/usr/lib/python3.12/concurrent/futures/thread.py", line 59, in run | |
| (EngineCore_DP0 pid=262791) result = self.fn(*self.args, **self.kwargs) | |
| (EngineCore_DP0 pid=262791) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=262791) File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 243, in get_response | |
| (EngineCore_DP0 pid=262791) raise RuntimeError( | |
| (EngineCore_DP0 pid=262791) RuntimeError: Worker failed with error 'Can't unpack a tensor of 2048 rows into a tuple of 2 elements. | |
| (EngineCore_DP0 pid=262791) | |
| (EngineCore_DP0 pid=262791) from user code: | |
| (EngineCore_DP0 pid=262791) File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (EngineCore_DP0 pid=262791) hidden_states, residual = layer( | |
| (EngineCore_DP0 pid=262791) ', please check the stack trace above for the root cause | |
| (APIServer pid=262524) Traceback (most recent call last): | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/.venv/bin/vllm", line 10, in <module> | |
| (APIServer pid=262524) sys.exit(main()) | |
| (APIServer pid=262524) ^^^^^^ | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/entrypoints/cli/main.py", line 54, in main | |
| (APIServer pid=262524) args.dispatch_function(args) | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/entrypoints/cli/serve.py", line 50, in cmd | |
| (APIServer pid=262524) uvloop.run(run_server(args)) | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/uvloop/__init__.py", line 109, in run | |
| (APIServer pid=262524) return __asyncio.run( | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "/usr/lib/python3.12/asyncio/runners.py", line 195, in run | |
| (APIServer pid=262524) return runner.run(main) | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run | |
| (APIServer pid=262524) return self._loop.run_until_complete(task) | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/uvloop/__init__.py", line 61, in wrapper | |
| (APIServer pid=262524) return await main | |
| (APIServer pid=262524) ^^^^^^^^^^ | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/entrypoints/openai/api_server.py", line 1941, in run_server | |
| (APIServer pid=262524) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs) | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/entrypoints/openai/api_server.py", line 1961, in run_server_worker | |
| (APIServer pid=262524) async with build_async_engine_client( | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__ | |
| (APIServer pid=262524) return await anext(self.gen) | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/entrypoints/openai/api_server.py", line 179, in build_async_engine_client | |
| (APIServer pid=262524) async with build_async_engine_client_from_engine_args( | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__ | |
| (APIServer pid=262524) return await anext(self.gen) | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/entrypoints/openai/api_server.py", line 221, in build_async_engine_client_from_engine_args | |
| (APIServer pid=262524) async_llm = AsyncLLM.from_vllm_config( | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/utils/__init__.py", line 1589, in inner | |
| (APIServer pid=262524) return fn(*args, **kwargs) | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/v1/engine/async_llm.py", line 205, in from_vllm_config | |
| (APIServer pid=262524) return cls( | |
| (APIServer pid=262524) ^^^^ | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/v1/engine/async_llm.py", line 129, in __init__ | |
| (APIServer pid=262524) self.engine_core = EngineCoreClient.make_async_mp_client( | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core_client.py", line 102, in make_async_mp_client | |
| (APIServer pid=262524) return AsyncMPClient(*client_args) | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core_client.py", line 769, in __init__ | |
| (APIServer pid=262524) super().__init__( | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core_client.py", line 448, in __init__ | |
| (APIServer pid=262524) with launch_core_engines(vllm_config, executor_class, | |
| (APIServer pid=262524) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=262524) File "/usr/lib/python3.12/contextlib.py", line 144, in __exit__ | |
| (APIServer pid=262524) next(self.gen) | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/v1/engine/utils.py", line 729, in launch_core_engines | |
| (APIServer pid=262524) wait_for_engine_startup( | |
| (APIServer pid=262524) File "/home/ubuntuai/vllm_source/vllm/v1/engine/utils.py", line 782, in wait_for_engine_startup | |
| (APIServer pid=262524) raise RuntimeError("Engine core initialization failed. " | |
| (APIServer pid=262524) RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment