bio-punk/qwq_function_tool_test.py

## readme.md

      
    Raw
  

              readme.md
            
          
    千文3和QwQ的function tool调用

适用于slurm
启动脚本
sbatch run.sh 
测试脚本
bash test.sh


## qwq_function_tool_test.py
from qwen_agent.llm import get_chat_model
import os
import random
import json

api_key = os.environ["APIKEY"]
server_addr = os.environ["SERVER_ADDR"]
server_port = os.environ["SERVER_PORT"]
model_name = os.environ["MODEL_NAME"]

user_content_tp = "请使用 query_weather 工具查询{}的天气，并返回结果。"
base_url = f"http://{server_addr}:{server_port}/v1"
llm = get_chat_model({
    "model": model_name,
    "model_server": base_url,
    "api_key": api_key,
    "generate_cfg": {
        "extra_body": {
            "chat_template_kwargs": {"enable_thinking": False}  # default to True
        }
    }
})


def get_weather(need_city):
    weather = "晴天"
    if need_city.startswith("北京"):
        weather = "阴天"
    elif need_city.startswith("上海"):
        weather = "多云"
    elif need_city.startswith("广州"):
        weather = "小雨"
    elif need_city.startswith("深圳"):
        weather = "大雨"
    elif need_city.startswith("杭州"):
        weather = "雷阵雨"
    elif need_city.startswith("成都"):
        weather = "小雪"
    elif need_city.startswith("武汉"):
        weather = "大雪"
    elif need_city.startswith("西安"):
        weather = "暴风雪"
    elif need_city.startswith("重庆"):
        weather = "大风"
    elif need_city.startswith("南京"):
        weather = "雾霾"
    elif need_city.startswith("天津"):
        weather = "沙尘暴"
    elif need_city.startswith("苏州"):
        weather = "冰雹"
    elif need_city.startswith("长沙"):
        weather = "霜冻"
    return weather


def get_random_city():
    cities = [
        "北京",
        "上海",
        "广州",
        "深圳",
        "杭州",
        "成都",
        "武汉",
        "西安",
        "重庆",
        "南京",
        "天津",
        "苏州",
        "长沙"
    ]
    return random.choice(cities)


def get_tool():
    return {
        "type": "function",
        "function": {
            "name": "query_weather",
            "description": "Get weather of an location, the user shoud supply a location first",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {
                        "type": "string",
                        "description": "The city, e.g. Beijing"
                    }
                },
                "required": [
                    "city"
                ]
            }
        }
    }


def chat_completion_request(messages, tools=None):
    for responses in llm.chat(
        messages=messages,
        functions=tools,
    ):
        pass
    messages.extend(responses)
    return messages


def main():
    messages = [
        {
            "role": "user",
            "content": user_content_tp.format(get_random_city())
        }
    ]
    print(f"本次请求: {messages[0]['content']}")
    res = chat_completion_request(messages=messages, tools=[get_tool()])
    for res_msg in res:
        if fn_call := res_msg.get("function_call", None):
            fn_name: str = fn_call['name']
            fn_args: dict = json.loads(fn_call["arguments"])
            fn_res: str = get_weather(fn_args["city"])
            messages.append({
                "role": "function",
                "name": fn_name,
                "content": fn_res,
            })
    res = chat_completion_request(messages=messages, tools=[get_tool()])
    print(json.dumps(res, ensure_ascii=False, indent=4))


if __name__ == "__main__":
    for i in range(100):
        main()

## run.sh
#!/bin/bash
#SBATCH --gpus=4
#SBATCH -p gpu_4090
#SBATCH

# Start server with BF16 model on 4 GPUs using TP=4
source /data/apps/miniforge3/25.11.0-1/etc/profile.d/conda.sh
conda activate qwen_py310-new

tensor_parallel_size=4
pipeline_parallel_size=1
max_model_len=4096
max_num_batched_tokens=65535
max_num_seqs=128
ckpt_path="./QwQ-32B"

vllm serve \
    $ckpt_path \
    --host 0.0.0.0 \
    --trust-remote-code \
    --tensor-parallel-size ${tensor_parallel_size} \
    --pipeline-parallel-size ${pipeline_parallel_size} \
    --max_model_len ${max_model_len} \
    --max_num_batched_tokens ${max_num_batched_tokens} \
    --gpu-memory-utilization 0.9 \
    --disable-log-requests \
    --max-num-seqs ${max_num_seqs} \
    --api-key '123456' \
    --reasoning-parser qwen3

## test.sh
#!/bin/bash
# 参考文献 https://qwen.readthedocs.io/en/latest/framework/function_call.html#vllm

SERVER_ADDR=`squeue|tail -n1|awk '{print $8}'`
ssh $SERVER_ADDR "lsof -i:8000"
if [ $? -eq 0 ]; then
    echo "服务已启动"
else
    echo "服务未启用,中止测试"
    exit 1
fi

source /data/apps/miniforge3/25.11.0-1/etc/profile.d/conda.sh
conda activate qwen_py310-new

SERVER_ADDR=${SERVER_ADDR} SERVER_PORT=8000 APIKEY=123456 MODEL_NAME='./QwQ-32B' python qwq_function_tool_test.py
	from qwen_agent.llm import get_chat_model
	import os
	import random
	import json

	api_key = os.environ["APIKEY"]
	server_addr = os.environ["SERVER_ADDR"]
	server_port = os.environ["SERVER_PORT"]
	model_name = os.environ["MODEL_NAME"]

	user_content_tp = "请使用 query_weather 工具查询{}的天气，并返回结果。"
	base_url = f"http://{server_addr}:{server_port}/v1"
	llm = get_chat_model({
	"model": model_name,
	"model_server": base_url,
	"api_key": api_key,
	"generate_cfg": {
	"extra_body": {
	"chat_template_kwargs": {"enable_thinking": False} # default to True
	}
	}
	})


	def get_weather(need_city):
	weather = "晴天"
	if need_city.startswith("北京"):
	weather = "阴天"
	elif need_city.startswith("上海"):
	weather = "多云"
	elif need_city.startswith("广州"):
	weather = "小雨"
	elif need_city.startswith("深圳"):
	weather = "大雨"
	elif need_city.startswith("杭州"):
	weather = "雷阵雨"
	elif need_city.startswith("成都"):
	weather = "小雪"
	elif need_city.startswith("武汉"):
	weather = "大雪"
	elif need_city.startswith("西安"):
	weather = "暴风雪"
	elif need_city.startswith("重庆"):
	weather = "大风"
	elif need_city.startswith("南京"):
	weather = "雾霾"
	elif need_city.startswith("天津"):
	weather = "沙尘暴"
	elif need_city.startswith("苏州"):
	weather = "冰雹"
	elif need_city.startswith("长沙"):
	weather = "霜冻"
	return weather


	def get_random_city():
	cities = [
	"北京",
	"上海",
	"广州",
	"深圳",
	"杭州",
	"成都",
	"武汉",
	"西安",
	"重庆",
	"南京",
	"天津",
	"苏州",
	"长沙"
	]
	return random.choice(cities)


	def get_tool():
	return {
	"type": "function",
	"function": {
	"name": "query_weather",
	"description": "Get weather of an location, the user shoud supply a location first",
	"parameters": {
	"type": "object",
	"properties": {
	"city": {
	"type": "string",
	"description": "The city, e.g. Beijing"
	}
	},
	"required": [
	"city"
	]
	}
	}
	}


	def chat_completion_request(messages, tools=None):
	for responses in llm.chat(
	messages=messages,
	functions=tools,
	):
	pass
	messages.extend(responses)
	return messages


	def main():
	messages = [
	{
	"role": "user",
	"content": user_content_tp.format(get_random_city())
	}
	]
	print(f"本次请求: {messages[0]['content']}")
	res = chat_completion_request(messages=messages, tools=[get_tool()])
	for res_msg in res:
	if fn_call := res_msg.get("function_call", None):
	fn_name: str = fn_call['name']
	fn_args: dict = json.loads(fn_call["arguments"])
	fn_res: str = get_weather(fn_args["city"])
	messages.append({
	"role": "function",
	"name": fn_name,
	"content": fn_res,
	})
	res = chat_completion_request(messages=messages, tools=[get_tool()])
	print(json.dumps(res, ensure_ascii=False, indent=4))


	if __name__ == "__main__":
	for i in range(100):
	main()
	#!/bin/bash
	#SBATCH --gpus=4
	#SBATCH -p gpu_4090
	#SBATCH

	# Start server with BF16 model on 4 GPUs using TP=4
	source /data/apps/miniforge3/25.11.0-1/etc/profile.d/conda.sh
	conda activate qwen_py310-new

	tensor_parallel_size=4
	pipeline_parallel_size=1
	max_model_len=4096
	max_num_batched_tokens=65535
	max_num_seqs=128
	ckpt_path="./QwQ-32B"

	vllm serve \
	$ckpt_path \
	--host 0.0.0.0 \
	--trust-remote-code \
	--tensor-parallel-size ${tensor_parallel_size} \
	--pipeline-parallel-size ${pipeline_parallel_size} \
	--max_model_len ${max_model_len} \
	--max_num_batched_tokens ${max_num_batched_tokens} \
	--gpu-memory-utilization 0.9 \
	--disable-log-requests \
	--max-num-seqs ${max_num_seqs} \
	--api-key '123456' \
	--reasoning-parser qwen3
	#!/bin/bash
	# 参考文献 https://qwen.readthedocs.io/en/latest/framework/function_call.html#vllm

	SERVER_ADDR=`squeue\|tail -n1\|awk '{print $8}'`
	ssh $SERVER_ADDR "lsof -i:8000"
	if [ $? -eq 0 ]; then
	echo "服务已启动"
	else
	echo "服务未启用,中止测试"
	exit 1
	fi

	source /data/apps/miniforge3/25.11.0-1/etc/profile.d/conda.sh
	conda activate qwen_py310-new

	SERVER_ADDR=${SERVER_ADDR} SERVER_PORT=8000 APIKEY=123456 MODEL_NAME='./QwQ-32B' python qwq_function_tool_test.py