Skip to content

Instantly share code, notes, and snippets.

@bbrowning
Created September 11, 2025 17:27
Show Gist options
  • Select an option

  • Save bbrowning/428b6a26f9f419ae672f38621eaf819c to your computer and use it in GitHub Desktop.

Select an option

Save bbrowning/428b6a26f9f419ae672f38621eaf819c to your computer and use it in GitHub Desktop.
Llguidance, vllm guided_grammar, and Hermes models
import json
from openai import OpenAI
def hermes_grammar_from_tools(tools: list[dict]) -> str:
tool_funcs = ""
for tool in tools:
tool_funcs += " | " if tool_funcs else ""
tool_funcs += f"fun_{tool['function']['name']}"
grammar = f"start: ({tool_funcs})* f_end\n"
grammar += "f_end: TEXT\n"
grammar += "TEXT: /(.|\\n)*/\n"
grammar += f"TOOL_CALL_START_TOKEN: \"<tool_call>\"\n"
grammar += f"TOOL_CALL_END_TOKEN: \"</tool_call>\"\n"
grammar += tools_to_json_grammar(tools)
return grammar
def tools_to_json_grammar(tools: list[dict]) -> str:
grammar = ""
for tool in tools:
fun_name = f"fun_{tool['function']['name']}"
grammar += f"{fun_name}_lz[lazy]: TEXT TOOL_CALL_START_TOKEN\n"
grammar += f"{fun_name}: {fun_name}_lz \"\\n\" "
grammar += f"%json {{"
grammar += f"\"type\": \"object\","
grammar += f"\"properties\": {{"
grammar += f"\"name\": {{\"const\": \"{tool['function']['name']}\"}},"
grammar += f"\"arguments\": {{"
grammar += f"\"type\": \"object\","
grammar += f"\"properties\": {json.dumps(tool['function']['parameters']['properties'])},"
grammar += f"\"required\": {json.dumps(tool['function']['parameters']['required'])}"
grammar += f"}}}},"
grammar += f"\"required\": [\"name\", \"arguments\"]"
grammar += "} \"\\n\" TOOL_CALL_END_TOKEN\n"
return grammar
def main():
client = OpenAI(
base_url="http://localhost:8000/v1",
api_key="dummy",
)
messages = [{
"role": "user",
"content": "What is the temperature in Boston today in fahrenheit? And when does the sun set?"
}]
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The unit of temperature"
}
},
"required": ["location", "unit"]
}
}
},
{
"type": "function",
"function": {
"name": "get_time_of_sunset",
"description": "Get the time of sunset in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
}
},
"required": ["location"]
}
}
},
{
"type": "function",
"function": {
"name": "get_stock_prices",
"description": "Get the price of a stock",
"parameters": {
"type": "object",
"properties": {
"ticker": {
"type": "string",
"description": "The stock ticker, e.g. AAPL"
},
"currency": {
"type": "string",
"description": "The currency, e.g. USD",
"default": "USD"
}
},
"required": ["ticker"]
}
}
}
]
extra_body = {
"guided_grammar": hermes_grammar_from_tools(tools),
}
response = client.chat.completions.create(
model="Qwen/Qwen3-0.6B",
messages=messages,
tools=tools,
tool_choice="auto",
extra_body=extra_body,
)
print(response)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment