适用于slurm
启动脚本
sbatch run.sh 测试脚本
bash test.sh
| from qwen_agent.llm import get_chat_model | |
| import os | |
| import random | |
| import json | |
| api_key = os.environ["APIKEY"] | |
| server_addr = os.environ["SERVER_ADDR"] | |
| server_port = os.environ["SERVER_PORT"] | |
| model_name = os.environ["MODEL_NAME"] | |
| user_content_tp = "请使用 query_weather 工具查询{}的天气,并返回结果。" | |
| base_url = f"http://{server_addr}:{server_port}/v1" | |
| llm = get_chat_model({ | |
| "model": model_name, | |
| "model_server": base_url, | |
| "api_key": api_key, | |
| "generate_cfg": { | |
| "extra_body": { | |
| "chat_template_kwargs": {"enable_thinking": False} # default to True | |
| } | |
| } | |
| }) | |
| def get_weather(need_city): | |
| weather = "晴天" | |
| if need_city.startswith("北京"): | |
| weather = "阴天" | |
| elif need_city.startswith("上海"): | |
| weather = "多云" | |
| elif need_city.startswith("广州"): | |
| weather = "小雨" | |
| elif need_city.startswith("深圳"): | |
| weather = "大雨" | |
| elif need_city.startswith("杭州"): | |
| weather = "雷阵雨" | |
| elif need_city.startswith("成都"): | |
| weather = "小雪" | |
| elif need_city.startswith("武汉"): | |
| weather = "大雪" | |
| elif need_city.startswith("西安"): | |
| weather = "暴风雪" | |
| elif need_city.startswith("重庆"): | |
| weather = "大风" | |
| elif need_city.startswith("南京"): | |
| weather = "雾霾" | |
| elif need_city.startswith("天津"): | |
| weather = "沙尘暴" | |
| elif need_city.startswith("苏州"): | |
| weather = "冰雹" | |
| elif need_city.startswith("长沙"): | |
| weather = "霜冻" | |
| return weather | |
| def get_random_city(): | |
| cities = [ | |
| "北京", | |
| "上海", | |
| "广州", | |
| "深圳", | |
| "杭州", | |
| "成都", | |
| "武汉", | |
| "西安", | |
| "重庆", | |
| "南京", | |
| "天津", | |
| "苏州", | |
| "长沙" | |
| ] | |
| return random.choice(cities) | |
| def get_tool(): | |
| return { | |
| "type": "function", | |
| "function": { | |
| "name": "query_weather", | |
| "description": "Get weather of an location, the user shoud supply a location first", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "city": { | |
| "type": "string", | |
| "description": "The city, e.g. Beijing" | |
| } | |
| }, | |
| "required": [ | |
| "city" | |
| ] | |
| } | |
| } | |
| } | |
| def chat_completion_request(messages, tools=None): | |
| for responses in llm.chat( | |
| messages=messages, | |
| functions=tools, | |
| ): | |
| pass | |
| messages.extend(responses) | |
| return messages | |
| def main(): | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": user_content_tp.format(get_random_city()) | |
| } | |
| ] | |
| print(f"本次请求: {messages[0]['content']}") | |
| res = chat_completion_request(messages=messages, tools=[get_tool()]) | |
| for res_msg in res: | |
| if fn_call := res_msg.get("function_call", None): | |
| fn_name: str = fn_call['name'] | |
| fn_args: dict = json.loads(fn_call["arguments"]) | |
| fn_res: str = get_weather(fn_args["city"]) | |
| messages.append({ | |
| "role": "function", | |
| "name": fn_name, | |
| "content": fn_res, | |
| }) | |
| res = chat_completion_request(messages=messages, tools=[get_tool()]) | |
| print(json.dumps(res, ensure_ascii=False, indent=4)) | |
| if __name__ == "__main__": | |
| for i in range(100): | |
| main() |
| #!/bin/bash | |
| #SBATCH --gpus=4 | |
| #SBATCH -p gpu_4090 | |
| #SBATCH | |
| # Start server with BF16 model on 4 GPUs using TP=4 | |
| source /data/apps/miniforge3/25.11.0-1/etc/profile.d/conda.sh | |
| conda activate qwen_py310-new | |
| tensor_parallel_size=4 | |
| pipeline_parallel_size=1 | |
| max_model_len=4096 | |
| max_num_batched_tokens=65535 | |
| max_num_seqs=128 | |
| ckpt_path="./QwQ-32B" | |
| vllm serve \ | |
| $ckpt_path \ | |
| --host 0.0.0.0 \ | |
| --trust-remote-code \ | |
| --tensor-parallel-size ${tensor_parallel_size} \ | |
| --pipeline-parallel-size ${pipeline_parallel_size} \ | |
| --max_model_len ${max_model_len} \ | |
| --max_num_batched_tokens ${max_num_batched_tokens} \ | |
| --gpu-memory-utilization 0.9 \ | |
| --disable-log-requests \ | |
| --max-num-seqs ${max_num_seqs} \ | |
| --api-key '123456' \ | |
| --reasoning-parser qwen3 |
| #!/bin/bash | |
| # 参考文献 https://qwen.readthedocs.io/en/latest/framework/function_call.html#vllm | |
| SERVER_ADDR=`squeue|tail -n1|awk '{print $8}'` | |
| ssh $SERVER_ADDR "lsof -i:8000" | |
| if [ $? -eq 0 ]; then | |
| echo "服务已启动" | |
| else | |
| echo "服务未启用,中止测试" | |
| exit 1 | |
| fi | |
| source /data/apps/miniforge3/25.11.0-1/etc/profile.d/conda.sh | |
| conda activate qwen_py310-new | |
| SERVER_ADDR=${SERVER_ADDR} SERVER_PORT=8000 APIKEY=123456 MODEL_NAME='./QwQ-32B' python qwq_function_tool_test.py |