Skip to content

Instantly share code, notes, and snippets.

@shunting314
Created December 11, 2025 19:53
Show Gist options
  • Select an option

  • Save shunting314/9b1861d65d283417c4c5e483b5fd6bb5 to your computer and use it in GitHub Desktop.

Select an option

Save shunting314/9b1861d65d283417c4c5e483b5fd6bb5 to your computer and use it in GitHub Desktop.
import torch
from torch import nn
from torch import distributed
import contextlib
import os
from vllm import LLM, SamplingParams
os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
os.environ["VLLM_ATTENTION_BACKEND"] = os.getenv("VLLM_ATTENTION_BACKEND", "FLEX_ATTENTION")
class script_args:
# model_name = "Qwen/Qwen3-0.6B"
model_name = "meta-llama/Meta-Llama-3-8B"
profile = True
compile = True
if __name__ == "__main__":
if script_args.profile:
profile = torch.profiler.profile(with_stack=True)
else:
profile = contextlib.nullcontext()
if script_args.compile:
compilation_config = None
else:
from vllm.config import CompilationConfig, CUDAGraphMode, CompilationMode
compilation_config = CompilationConfig(cudagraph_mode=CUDAGraphMode.NONE, mode=CompilationMode.NONE)
llm = LLM(model=script_args.model_name, compilation_config=compilation_config)
sampling_params = SamplingParams(temperature=0.7, max_tokens=128)
# sampling_params = SamplingParams(temperature=0, max_tokens=128)
requests = [
# "Tell me a joke.",
"How to estimate the value of pi in mathematics?",
"How to estimate the value of pi in mathematics?",
"How to estimate the value of pi in mathematics?",
"How to estimate the value of pi in mathematics?",
# "How does quicksort works?",
]
if script_args.profile:
# do a warmup if profiling
outputs = llm.generate(requests, sampling_params)
with profile:
outputs = llm.generate(requests, sampling_params)
assert len(outputs) == len(requests)
for i, req_text in enumerate(requests):
print(f"Response for request {i}: {outputs[i].outputs[0].text}")
if script_args.profile:
path = "/tmp/profile.json"
profile.export_chrome_trace(path)
print(f"Profile written to {path}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment