Created
September 11, 2025 22:50
-
-
Save RodriMora/e3b6967d915e5625eefce17447190be8 to your computer and use it in GitHub Desktop.
error enforce eager
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| VLLM_COMPILE_LEVEL=0 TORCHDYNAMO_VERBOSE=1 CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=0,2,4,1 VLLM_PP_LAYER_PARTITION="8,29,8,3" VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 vllm serve /mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct --port 8000 --max-model-len 8192 -pp 4 --enforce-eager | |
| INFO 09-11 22:50:23 [__init__.py:216] Automatically detected platform cuda. | |
| (APIServer pid=290069) INFO 09-11 22:50:25 [api_server.py:1896] vLLM API server version 0.10.2rc2.dev313+g79ac59f32 | |
| (APIServer pid=290069) INFO 09-11 22:50:25 [utils.py:328] non-default args: {'model_tag': '/mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct', 'model': '/mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct', 'max_model_len': 8192, 'enforce_eager': True, 'pipeline_parallel_size': 4} | |
| (APIServer pid=290069) INFO 09-11 22:50:33 [__init__.py:742] Resolved architecture: Qwen3NextForCausalLM | |
| (APIServer pid=290069) `torch_dtype` is deprecated! Use `dtype` instead! | |
| (APIServer pid=290069) INFO 09-11 22:50:33 [__init__.py:1815] Using max model len 8192 | |
| (APIServer pid=290069) INFO 09-11 22:50:33 [scheduler.py:222] Chunked prefill is enabled with max_num_batched_tokens=2048. | |
| (APIServer pid=290069) INFO 09-11 22:50:33 [config.py:310] Hybrid or mamba-based model detected: disabling prefix caching since it is not yet supported. | |
| (APIServer pid=290069) INFO 09-11 22:50:33 [config.py:321] Hybrid or mamba-based model detected: setting cudagraph mode to FULL_AND_PIECEWISE in order to optimize performance. | |
| (APIServer pid=290069) INFO 09-11 22:50:34 [config.py:390] Setting attention block size to 544 tokens to ensure that attention page size is >= mamba page size. | |
| (APIServer pid=290069) INFO 09-11 22:50:34 [config.py:411] Padding mamba page size by 1.49% to ensure that mamba page size and attention page size are exactly equal. | |
| (APIServer pid=290069) INFO 09-11 22:50:34 [__init__.py:3400] Cudagraph is disabled under eager mode | |
| INFO 09-11 22:50:38 [__init__.py:216] Automatically detected platform cuda. | |
| (EngineCore_DP0 pid=290315) INFO 09-11 22:50:41 [core.py:654] Waiting for init message from front-end. | |
| (EngineCore_DP0 pid=290315) INFO 09-11 22:50:41 [core.py:76] Initializing a V1 LLM engine (v0.10.2rc2.dev313+g79ac59f32) with config: model='/mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct', speculative_config=None, tokenizer='/mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=4, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=/mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct, enable_prefix_caching=False, chunked_prefill_enabled=True, use_async_output_proc=False, pooler_config=None, compilation_config={"level":0,"debug_dump_path":"","cache_dir":"","backend":"","custom_ops":[],"splitting_ops":null,"use_inductor":true,"compile_sizes":[],"inductor_compile_config":{"enable_auto_functionalized_v2":false},"inductor_passes":{},"cudagraph_mode":0,"use_cudagraph":true,"cudagraph_num_of_warmups":0,"cudagraph_capture_sizes":[],"cudagraph_copy_inputs":false,"full_cuda_graph":false,"pass_config":{},"max_capture_size":0,"local_cache_dir":null} | |
| (EngineCore_DP0 pid=290315) WARNING 09-11 22:50:41 [multiproc_worker_utils.py:273] Reducing Torch parallelism from 24 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed. | |
| (EngineCore_DP0 pid=290315) INFO 09-11 22:50:41 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[0, 1, 2, 3], buffer_handle=(4, 16777216, 10, 'psm_db6bde43'), local_subscribe_addr='ipc:///tmp/9618ebbb-d35b-4984-a78d-2c31583be2f6', remote_subscribe_addr=None, remote_addr_ipv6=False) | |
| INFO 09-11 22:50:44 [__init__.py:216] Automatically detected platform cuda. | |
| INFO 09-11 22:50:44 [__init__.py:216] Automatically detected platform cuda. | |
| INFO 09-11 22:50:44 [__init__.py:216] Automatically detected platform cuda. | |
| INFO 09-11 22:50:44 [__init__.py:216] Automatically detected platform cuda. | |
| INFO 09-11 22:50:48 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[0], buffer_handle=(1, 10485760, 10, 'psm_6a6affa3'), local_subscribe_addr='ipc:///tmp/854a3a94-1fea-44bb-b272-2130e3efb27d', remote_subscribe_addr=None, remote_addr_ipv6=False) | |
| INFO 09-11 22:50:48 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[0], buffer_handle=(1, 10485760, 10, 'psm_57609887'), local_subscribe_addr='ipc:///tmp/b4d0ebcc-443f-4e95-9751-7856f0a86de7', remote_subscribe_addr=None, remote_addr_ipv6=False) | |
| INFO 09-11 22:50:48 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[0], buffer_handle=(1, 10485760, 10, 'psm_d78f89d2'), local_subscribe_addr='ipc:///tmp/37821843-8c36-4c62-aef9-c9215cac7021', remote_subscribe_addr=None, remote_addr_ipv6=False) | |
| INFO 09-11 22:50:48 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[0], buffer_handle=(1, 10485760, 10, 'psm_cb352bd4'), local_subscribe_addr='ipc:///tmp/261ab28f-8978-45f1-bf86-89a8580aff47', remote_subscribe_addr=None, remote_addr_ipv6=False) | |
| [W911 22:50:48.239283851 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:48121 (errno: 97 - Address family not supported by protocol). | |
| [W911 22:50:48.241025996 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:48121 (errno: 97 - Address family not supported by protocol). | |
| [W911 22:50:48.265629535 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:48121 (errno: 97 - Address family not supported by protocol). | |
| [W911 22:50:48.325850235 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:48121 (errno: 97 - Address family not supported by protocol). | |
| [W911 22:50:48.326415866 ProcessGroupNCCL.cpp:981] Warning: TORCH_NCCL_AVOID_RECORD_STREAMS is the default now, this environment variable is thus deprecated. (function operator()) | |
| [W911 22:50:49.573117444 ProcessGroupNCCL.cpp:981] Warning: TORCH_NCCL_AVOID_RECORD_STREAMS is the default now, this environment variable is thus deprecated. (function operator()) | |
| [W911 22:50:49.581194306 ProcessGroupNCCL.cpp:981] Warning: TORCH_NCCL_AVOID_RECORD_STREAMS is the default now, this environment variable is thus deprecated. (function operator()) | |
| [W911 22:50:49.585772877 ProcessGroupNCCL.cpp:981] Warning: TORCH_NCCL_AVOID_RECORD_STREAMS is the default now, this environment variable is thus deprecated. (function operator()) | |
| [Gloo] Rank 0 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 1 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 3 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 2 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 2 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 0 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 3 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| [Gloo] Rank 1 is connected to 3 peer ranks. Expected number of connected peer ranks is : 3 | |
| INFO 09-11 22:50:49 [__init__.py:1433] Found nccl from library libnccl.so.2 | |
| INFO 09-11 22:50:49 [__init__.py:1433] Found nccl from library libnccl.so.2 | |
| INFO 09-11 22:50:49 [pynccl.py:70] vLLM is using nccl==2.27.3 | |
| INFO 09-11 22:50:49 [pynccl.py:70] vLLM is using nccl==2.27.3 | |
| INFO 09-11 22:50:49 [__init__.py:1433] Found nccl from library libnccl.so.2 | |
| INFO 09-11 22:50:49 [__init__.py:1433] Found nccl from library libnccl.so.2 | |
| INFO 09-11 22:50:49 [pynccl.py:70] vLLM is using nccl==2.27.3 | |
| INFO 09-11 22:50:49 [pynccl.py:70] vLLM is using nccl==2.27.3 | |
| WARNING 09-11 22:50:49 [symm_mem.py:58] SymmMemCommunicator: Device capability 12.0 not supported, communicator is not available. | |
| WARNING 09-11 22:50:49 [symm_mem.py:58] SymmMemCommunicator: Device capability 12.0 not supported, communicator is not available. | |
| WARNING 09-11 22:50:49 [symm_mem.py:58] SymmMemCommunicator: Device capability 12.0 not supported, communicator is not available. | |
| WARNING 09-11 22:50:49 [symm_mem.py:58] SymmMemCommunicator: Device capability 12.0 not supported, communicator is not available. | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| INFO 09-11 22:50:49 [parallel_state.py:1165] rank 3 in world size 4 is assigned as DP rank 0, PP rank 3, TP rank 0, EP rank 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| INFO 09-11 22:50:49 [parallel_state.py:1165] rank 2 in world size 4 is assigned as DP rank 0, PP rank 2, TP rank 0, EP rank 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
| INFO 09-11 22:50:49 [parallel_state.py:1165] rank 0 in world size 4 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0 | |
| INFO 09-11 22:50:49 [parallel_state.py:1165] rank 1 in world size 4 is assigned as DP rank 0, PP rank 1, TP rank 0, EP rank 0 | |
| WARNING 09-11 22:50:49 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer. | |
| WARNING 09-11 22:50:49 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer. | |
| WARNING 09-11 22:50:49 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer. | |
| WARNING 09-11 22:50:49 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer. | |
| (Worker_PP3 pid=290448) INFO 09-11 22:50:49 [gpu_model_runner.py:2338] Starting to load model /mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct... | |
| (Worker_PP2 pid=290447) INFO 09-11 22:50:49 [gpu_model_runner.py:2338] Starting to load model /mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct... | |
| (Worker_PP1 pid=290446) INFO 09-11 22:50:49 [gpu_model_runner.py:2338] Starting to load model /mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct... | |
| (Worker_PP0 pid=290445) INFO 09-11 22:50:49 [gpu_model_runner.py:2338] Starting to load model /mnt/llms/models/Qwen/Qwen3-Next-80B-A3B-Instruct... | |
| (Worker_PP3 pid=290448) INFO 09-11 22:50:50 [gpu_model_runner.py:2370] Loading model from scratch... | |
| (Worker_PP1 pid=290446) INFO 09-11 22:50:50 [gpu_model_runner.py:2370] Loading model from scratch... | |
| (Worker_PP2 pid=290447) INFO 09-11 22:50:50 [gpu_model_runner.py:2370] Loading model from scratch... | |
| (Worker_PP3 pid=290448) `torch_dtype` is deprecated! Use `dtype` instead! | |
| (Worker_PP0 pid=290445) INFO 09-11 22:50:50 [gpu_model_runner.py:2370] Loading model from scratch... | |
| (Worker_PP1 pid=290446) `torch_dtype` is deprecated! Use `dtype` instead! | |
| (Worker_PP0 pid=290445) `torch_dtype` is deprecated! Use `dtype` instead! | |
| (Worker_PP2 pid=290447) `torch_dtype` is deprecated! Use `dtype` instead! | |
| (Worker_PP3 pid=290448) INFO 09-11 22:50:50 [cuda.py:353] Using Flash Attention backend on V1 engine. | |
| (Worker_PP0 pid=290445) INFO 09-11 22:50:50 [cuda.py:353] Using Flash Attention backend on V1 engine. | |
| (Worker_PP1 pid=290446) INFO 09-11 22:50:50 [cuda.py:353] Using Flash Attention backend on V1 engine. | |
| (Worker_PP2 pid=290447) INFO 09-11 22:50:50 [cuda.py:353] Using Flash Attention backend on V1 engine. | |
| Loading safetensors checkpoint shards: 0% Completed | 0/41 [00:00<?, ?it/s] | |
| Loading safetensors checkpoint shards: 2% Completed | 1/41 [00:00<00:38, 1.04it/s] | |
| Loading safetensors checkpoint shards: 5% Completed | 2/41 [00:01<00:24, 1.56it/s] | |
| Loading safetensors checkpoint shards: 7% Completed | 3/41 [00:01<00:17, 2.15it/s] | |
| Loading safetensors checkpoint shards: 10% Completed | 4/41 [00:01<00:14, 2.61it/s] | |
| Loading safetensors checkpoint shards: 12% Completed | 5/41 [00:02<00:12, 2.97it/s] | |
| Loading safetensors checkpoint shards: 17% Completed | 7/41 [00:02<00:08, 4.15it/s] | |
| Loading safetensors checkpoint shards: 20% Completed | 8/41 [00:02<00:08, 4.12it/s] | |
| Loading safetensors checkpoint shards: 22% Completed | 9/41 [00:02<00:07, 4.01it/s] | |
| Loading safetensors checkpoint shards: 24% Completed | 10/41 [00:04<00:14, 2.09it/s] | |
| Loading safetensors checkpoint shards: 27% Completed | 11/41 [00:04<00:14, 2.12it/s] | |
| Loading safetensors checkpoint shards: 29% Completed | 12/41 [00:04<00:11, 2.46it/s] | |
| Loading safetensors checkpoint shards: 32% Completed | 13/41 [00:05<00:16, 1.74it/s] | |
| Loading safetensors checkpoint shards: 34% Completed | 14/41 [00:06<00:14, 1.85it/s] | |
| Loading safetensors checkpoint shards: 37% Completed | 15/41 [00:06<00:11, 2.20it/s] | |
| Loading safetensors checkpoint shards: 39% Completed | 16/41 [00:06<00:09, 2.53it/s] | |
| Loading safetensors checkpoint shards: 41% Completed | 17/41 [00:06<00:08, 2.76it/s] | |
| Loading safetensors checkpoint shards: 44% Completed | 18/41 [00:07<00:07, 2.94it/s] | |
| Loading safetensors checkpoint shards: 46% Completed | 19/41 [00:07<00:06, 3.18it/s] | |
| Loading safetensors checkpoint shards: 49% Completed | 20/41 [00:08<00:10, 1.92it/s] | |
| Loading safetensors checkpoint shards: 51% Completed | 21/41 [00:08<00:10, 1.99it/s] | |
| Loading safetensors checkpoint shards: 54% Completed | 22/41 [00:09<00:08, 2.34it/s] | |
| Loading safetensors checkpoint shards: 56% Completed | 23/41 [00:09<00:06, 2.66it/s] | |
| Loading safetensors checkpoint shards: 59% Completed | 24/41 [00:09<00:05, 2.95it/s] | |
| Loading safetensors checkpoint shards: 61% Completed | 25/41 [00:09<00:04, 3.20it/s] | |
| Loading safetensors checkpoint shards: 63% Completed | 26/41 [00:10<00:06, 2.19it/s] | |
| Loading safetensors checkpoint shards: 66% Completed | 27/41 [00:11<00:09, 1.53it/s] | |
| Loading safetensors checkpoint shards: 68% Completed | 28/41 [00:12<00:07, 1.66it/s] | |
| Loading safetensors checkpoint shards: 71% Completed | 29/41 [00:12<00:05, 2.00it/s] | |
| (Worker_PP3 pid=290448) INFO 09-11 22:51:03 [default_loader.py:268] Loading weights took 12.66 seconds | |
| Loading safetensors checkpoint shards: 73% Completed | 30/41 [00:12<00:04, 2.35it/s] | |
| (Worker_PP3 pid=290448) INFO 09-11 22:51:03 [gpu_model_runner.py:2392] Model loading took 10.3913 GiB and 13.020506 seconds | |
| Loading safetensors checkpoint shards: 76% Completed | 31/41 [00:13<00:03, 2.66it/s] | |
| Loading safetensors checkpoint shards: 78% Completed | 32/41 [00:13<00:03, 2.94it/s] | |
| Loading safetensors checkpoint shards: 80% Completed | 33/41 [00:13<00:02, 3.18it/s] | |
| Loading safetensors checkpoint shards: 83% Completed | 34/41 [00:13<00:02, 3.36it/s] | |
| Loading safetensors checkpoint shards: 85% Completed | 35/41 [00:14<00:02, 2.00it/s] | |
| Loading safetensors checkpoint shards: 88% Completed | 36/41 [00:15<00:02, 2.08it/s] | |
| Loading safetensors checkpoint shards: 90% Completed | 37/41 [00:15<00:01, 2.43it/s] | |
| (Worker_PP2 pid=290447) INFO 09-11 22:51:06 [default_loader.py:268] Loading weights took 15.70 seconds | |
| Loading safetensors checkpoint shards: 93% Completed | 38/41 [00:15<00:01, 2.75it/s] | |
| Loading safetensors checkpoint shards: 95% Completed | 39/41 [00:16<00:00, 3.01it/s] | |
| (Worker_PP2 pid=290447) INFO 09-11 22:51:06 [gpu_model_runner.py:2392] Model loading took 25.7324 GiB and 16.074549 seconds | |
| Loading safetensors checkpoint shards: 98% Completed | 40/41 [00:16<00:00, 3.22it/s] | |
| Loading safetensors checkpoint shards: 100% Completed | 41/41 [00:16<00:00, 3.40it/s] | |
| Loading safetensors checkpoint shards: 100% Completed | 41/41 [00:16<00:00, 2.47it/s] | |
| (Worker_PP0 pid=290445) | |
| (Worker_PP0 pid=290445) INFO 09-11 22:51:06 [default_loader.py:268] Loading weights took 16.59 seconds | |
| (Worker_PP0 pid=290445) INFO 09-11 22:51:07 [gpu_model_runner.py:2392] Model loading took 25.7324 GiB and 16.873340 seconds | |
| (Worker_PP1 pid=290446) INFO 09-11 22:51:20 [default_loader.py:268] Loading weights took 29.81 seconds | |
| (Worker_PP1 pid=290446) INFO 09-11 22:51:20 [gpu_model_runner.py:2392] Model loading took 90.1555 GiB and 30.245101 seconds | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] WorkerProc hit an exception. | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 223, in __call__ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self.forward(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ValueError: too many values to unpack (expected 2) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 223, in __call__ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self.forward(*args, **kwargs) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ValueError: too many values to unpack (expected 2) | |
| (Worker_PP3 pid=290448) ERROR 09-11 22:51:20 [multiproc_executor.py:654] | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] WorkerProc hit an exception. | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 223, in __call__ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self.forward(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ValueError: too many values to unpack (expected 2) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 223, in __call__ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self.forward(*args, **kwargs) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ValueError: too many values to unpack (expected 2) | |
| (Worker_PP2 pid=290447) ERROR 09-11 22:51:20 [multiproc_executor.py:654] | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] WorkerProc hit an exception. | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 223, in __call__ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self.forward(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ValueError: too many values to unpack (expected 2) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 223, in __call__ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] return self.forward(*args, **kwargs) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] ValueError: too many values to unpack (expected 2) | |
| (Worker_PP1 pid=290446) ERROR 09-11 22:51:20 [multiproc_executor.py:654] | |
| (Worker_PP0 pid=290445) WARNING 09-11 22:51:21 [fused_moe.py:727] Using default MoE config. Performance might be sub-optimal! Config file not found at ['/home/ubuntuai/vllm_source/vllm/model_executor/layers/fused_moe/configs/E=512,N=512,device_name=NVIDIA_GeForce_RTX_5090.json'] | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] WorkerProc hit an exception. | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 223, in __call__ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] return self.forward(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ValueError: too many values to unpack (expected 2) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] Traceback (most recent call last): | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 649, in worker_busy_loop | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] output = func(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_worker.py", line 263, in determine_available_memory | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] self.model_runner.profile_run() | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 3031, in profile_run | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] = self._dummy_run(self.max_num_tokens, is_profile=True) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] return func(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/v1/worker/gpu_model_runner.py", line 2809, in _dummy_run | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] outputs = self.model( | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] return self._call_impl(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] return forward_call(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 1165, in forward | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] hidden_states = self.model(input_ids, positions, intermediate_tensors, | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/compilation/decorators.py", line 223, in __call__ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] return self.forward(*args, **kwargs) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] File "/home/ubuntuai/vllm_source/vllm/model_executor/models/qwen3_next.py", line 945, in forward | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] hidden_states, residual = layer( | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ^^^^^^^^^^^^^^^^^^^^^^^ | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] ValueError: too many values to unpack (expected 2) | |
| (Worker_PP0 pid=290445) ERROR 09-11 22:51:23 [multiproc_executor.py:654] | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] EngineCore failed to start. | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] Traceback (most recent call last): | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 709, in run_engine_core | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] engine_core = EngineCoreProc(*args, **kwargs) | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 505, in __init__ | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] super().__init__(vllm_config, executor_class, log_stats, | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 91, in __init__ | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] self._initialize_kv_caches(vllm_config) | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 183, in _initialize_kv_caches | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] self.model_executor.determine_available_memory()) | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/executor/abstract.py", line 84, in determine_available_memory | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] return self.collective_rpc("determine_available_memory") | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 257, in collective_rpc | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] result = result.result() | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] ^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] File "/usr/lib/python3.12/concurrent/futures/_base.py", line 456, in result | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] return self.__get_result() | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] ^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] File "/usr/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] raise self._exception | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] File "/usr/lib/python3.12/concurrent/futures/thread.py", line 59, in run | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] result = self.fn(*self.args, **self.kwargs) | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 243, in get_response | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] raise RuntimeError( | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:23 [core.py:718] RuntimeError: Worker failed with error 'too many values to unpack (expected 2)', please check the stack trace above for the root cause | |
| (EngineCore_DP0 pid=290315) ERROR 09-11 22:51:24 [multiproc_executor.py:149] Worker proc VllmWorker-2 died unexpectedly, shutting down executor. | |
| (EngineCore_DP0 pid=290315) Process EngineCore_DP0: | |
| (EngineCore_DP0 pid=290315) Traceback (most recent call last): | |
| (EngineCore_DP0 pid=290315) File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap | |
| (EngineCore_DP0 pid=290315) self.run() | |
| (EngineCore_DP0 pid=290315) File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run | |
| (EngineCore_DP0 pid=290315) self._target(*self._args, **self._kwargs) | |
| (EngineCore_DP0 pid=290315) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 722, in run_engine_core | |
| (EngineCore_DP0 pid=290315) raise e | |
| (EngineCore_DP0 pid=290315) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 709, in run_engine_core | |
| (EngineCore_DP0 pid=290315) engine_core = EngineCoreProc(*args, **kwargs) | |
| (EngineCore_DP0 pid=290315) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 505, in __init__ | |
| (EngineCore_DP0 pid=290315) super().__init__(vllm_config, executor_class, log_stats, | |
| (EngineCore_DP0 pid=290315) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 91, in __init__ | |
| (EngineCore_DP0 pid=290315) self._initialize_kv_caches(vllm_config) | |
| (EngineCore_DP0 pid=290315) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core.py", line 183, in _initialize_kv_caches | |
| (EngineCore_DP0 pid=290315) self.model_executor.determine_available_memory()) | |
| (EngineCore_DP0 pid=290315) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) File "/home/ubuntuai/vllm_source/vllm/v1/executor/abstract.py", line 84, in determine_available_memory | |
| (EngineCore_DP0 pid=290315) return self.collective_rpc("determine_available_memory") | |
| (EngineCore_DP0 pid=290315) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 257, in collective_rpc | |
| (EngineCore_DP0 pid=290315) result = result.result() | |
| (EngineCore_DP0 pid=290315) ^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) File "/usr/lib/python3.12/concurrent/futures/_base.py", line 456, in result | |
| (EngineCore_DP0 pid=290315) return self.__get_result() | |
| (EngineCore_DP0 pid=290315) ^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) File "/usr/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result | |
| (EngineCore_DP0 pid=290315) raise self._exception | |
| (EngineCore_DP0 pid=290315) File "/usr/lib/python3.12/concurrent/futures/thread.py", line 59, in run | |
| (EngineCore_DP0 pid=290315) result = self.fn(*self.args, **self.kwargs) | |
| (EngineCore_DP0 pid=290315) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (EngineCore_DP0 pid=290315) File "/home/ubuntuai/vllm_source/vllm/v1/executor/multiproc_executor.py", line 243, in get_response | |
| (EngineCore_DP0 pid=290315) raise RuntimeError( | |
| (EngineCore_DP0 pid=290315) RuntimeError: Worker failed with error 'too many values to unpack (expected 2)', please check the stack trace above for the root cause | |
| (APIServer pid=290069) Traceback (most recent call last): | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/.venv/bin/vllm", line 10, in <module> | |
| (APIServer pid=290069) sys.exit(main()) | |
| (APIServer pid=290069) ^^^^^^ | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/entrypoints/cli/main.py", line 54, in main | |
| (APIServer pid=290069) args.dispatch_function(args) | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/entrypoints/cli/serve.py", line 50, in cmd | |
| (APIServer pid=290069) uvloop.run(run_server(args)) | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/uvloop/__init__.py", line 109, in run | |
| (APIServer pid=290069) return __asyncio.run( | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "/usr/lib/python3.12/asyncio/runners.py", line 195, in run | |
| (APIServer pid=290069) return runner.run(main) | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run | |
| (APIServer pid=290069) return self._loop.run_until_complete(task) | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/.venv/lib/python3.12/site-packages/uvloop/__init__.py", line 61, in wrapper | |
| (APIServer pid=290069) return await main | |
| (APIServer pid=290069) ^^^^^^^^^^ | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/entrypoints/openai/api_server.py", line 1941, in run_server | |
| (APIServer pid=290069) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs) | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/entrypoints/openai/api_server.py", line 1961, in run_server_worker | |
| (APIServer pid=290069) async with build_async_engine_client( | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__ | |
| (APIServer pid=290069) return await anext(self.gen) | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/entrypoints/openai/api_server.py", line 179, in build_async_engine_client | |
| (APIServer pid=290069) async with build_async_engine_client_from_engine_args( | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__ | |
| (APIServer pid=290069) return await anext(self.gen) | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/entrypoints/openai/api_server.py", line 221, in build_async_engine_client_from_engine_args | |
| (APIServer pid=290069) async_llm = AsyncLLM.from_vllm_config( | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/utils/__init__.py", line 1589, in inner | |
| (APIServer pid=290069) return fn(*args, **kwargs) | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/v1/engine/async_llm.py", line 205, in from_vllm_config | |
| (APIServer pid=290069) return cls( | |
| (APIServer pid=290069) ^^^^ | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/v1/engine/async_llm.py", line 129, in __init__ | |
| (APIServer pid=290069) self.engine_core = EngineCoreClient.make_async_mp_client( | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core_client.py", line 102, in make_async_mp_client | |
| (APIServer pid=290069) return AsyncMPClient(*client_args) | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core_client.py", line 769, in __init__ | |
| (APIServer pid=290069) super().__init__( | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/v1/engine/core_client.py", line 448, in __init__ | |
| (APIServer pid=290069) with launch_core_engines(vllm_config, executor_class, | |
| (APIServer pid=290069) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| (APIServer pid=290069) File "/usr/lib/python3.12/contextlib.py", line 144, in __exit__ | |
| (APIServer pid=290069) next(self.gen) | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/v1/engine/utils.py", line 729, in launch_core_engines | |
| (APIServer pid=290069) wait_for_engine_startup( | |
| (APIServer pid=290069) File "/home/ubuntuai/vllm_source/vllm/v1/engine/utils.py", line 782, in wait_for_engine_startup | |
| (APIServer pid=290069) raise RuntimeError("Engine core initialization failed. " | |
| (APIServer pid=290069) RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): { |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment