Skip to content

Instantly share code, notes, and snippets.

@alexeldeib
Created January 15, 2026 06:21
Show Gist options
  • Select an option

  • Save alexeldeib/8b40ba5bcfa8711fb18eba994fe2c03a to your computer and use it in GitHub Desktop.

Select an option

Save alexeldeib/8b40ba5bcfa8711fb18eba994fe2c03a to your computer and use it in GitHub Desktop.
nodejs openai load tester (with love from claude)
#!/usr/bin/env node
import { program } from "commander";
import OpenAI from "openai";
import pLimit from "p-limit";
import { readFileSync, writeFileSync } from "fs";
import { extname } from "path";
import { Agent, request } from "undici";
// ─────────────────────────────────────────────────────────────────────────────
// CLI Setup
// ─────────────────────────────────────────────────────────────────────────────
program
.name("load_test")
.description("Load test an OpenAI-compatible inference endpoint")
.option("--prompts <path>", "Path to prompts.json file")
.option("--synthetic", "Use synthetic prompts (like vllm bench)")
.option("--input-tokens <n>", "Input tokens for synthetic prompts", parseInt, 1500)
.option("--output-tokens <n>", "Output tokens for synthetic prompts", parseInt, 500)
.requiredOption("--model <model>", "Model name to use")
.requiredOption("--url <url>", "Base URL for API")
.requiredOption("--token <token>", "Bearer token / API key")
.requiredOption("--project <project>", "OpenAI-Project header value")
.requiredOption("--concurrency <n>", "Number of concurrent workers", parseInt)
.option("--total-requests <n>", "Stop after N total requests", parseInt)
.option("--duration <seconds>", "Stop after N seconds", parseFloat)
.option("--timeout <seconds>", "Per-request timeout in seconds", parseFloat, 30)
.option("--replacement", "Sample with replacement (default)", true)
.option("--no-replacement", "Sample without replacement (cycles on exhaustion)")
.option("--output <path>", "Output file path (CSV or JSON based on extension)")
.option("--reasoning-effort <level>", "Reasoning effort: low, medium, high", "low")
.option("--temperature <n>", "Temperature for sampling", parseFloat, 1)
.option("--max-tokens <n>", "Maximum tokens in response", parseInt, 8192)
.option("--raw", "Use raw HTTP with connection pooling (bypass OpenAI SDK)")
.option("--target-input-tokens <n>", "Truncate/pad real prompts to this token count", parseInt)
.parse();
const opts = program.opts();
if (!opts.totalRequests && !opts.duration) {
console.error("Error: Must specify at least one of --total-requests or --duration");
process.exit(1);
}
if (!opts.prompts && !opts.synthetic) {
console.error("Error: Must specify either --prompts <path> or --synthetic");
process.exit(1);
}
if (opts.prompts && opts.synthetic) {
console.error("Error: Cannot use both --prompts and --synthetic");
process.exit(1);
}
// ─────────────────────────────────────────────────────────────────────────────
// Synthetic Prompt Generation (matches vllm bench style)
// ─────────────────────────────────────────────────────────────────────────────
function generateSyntheticPrompt(inputTokens) {
// Generate a prompt that's approximately inputTokens in size
// Rough estimate: 1 token ≈ 4 characters for English text
// We use a mix of words to make it more realistic
const words = [
"the", "be", "to", "of", "and", "a", "in", "that", "have", "I",
"it", "for", "not", "on", "with", "he", "as", "you", "do", "at",
"this", "but", "his", "by", "from", "they", "we", "say", "her", "she",
"or", "an", "will", "my", "one", "all", "would", "there", "their", "what",
"so", "up", "out", "if", "about", "who", "get", "which", "go", "me",
"when", "make", "can", "like", "time", "no", "just", "him", "know", "take",
"people", "into", "year", "your", "good", "some", "could", "them", "see", "other",
"than", "then", "now", "look", "only", "come", "its", "over", "think", "also",
"back", "after", "use", "two", "how", "our", "work", "first", "well", "way",
"even", "new", "want", "because", "any", "these", "give", "day", "most", "us",
];
// Target character count (4 chars per token is a rough estimate)
const targetChars = inputTokens * 4;
let content = "Please analyze the following text and provide a detailed summary:\n\n";
while (content.length < targetChars) {
// Add random words to build up the content
const sentenceLength = 10 + Math.floor(Math.random() * 15);
const sentence = [];
for (let i = 0; i < sentenceLength; i++) {
sentence.push(words[Math.floor(Math.random() * words.length)]);
}
// Capitalize first word
sentence[0] = sentence[0].charAt(0).toUpperCase() + sentence[0].slice(1);
content += sentence.join(" ") + ". ";
}
// Trim to approximately the right length
content = content.slice(0, targetChars);
return [{ role: "user", content }];
}
function createSyntheticSampler(inputTokens) {
// Generate a pool of synthetic prompts to sample from
// This avoids generating on every request
const poolSize = 100;
const pool = [];
for (let i = 0; i < poolSize; i++) {
pool.push(generateSyntheticPrompt(inputTokens));
}
return () => pool[Math.floor(Math.random() * pool.length)];
}
// ─────────────────────────────────────────────────────────────────────────────
// Load and Validate Prompts
// ─────────────────────────────────────────────────────────────────────────────
function loadPrompts(path) {
console.log(`Loading prompts from ${path}...`);
const raw = readFileSync(path, "utf-8");
const data = JSON.parse(raw);
if (!Array.isArray(data)) {
console.error("Error: prompts.json must be an array");
process.exit(1);
}
const validPrompts = [];
let invalidCount = 0;
for (let i = 0; i < data.length; i++) {
const item = data[i];
if (!Array.isArray(item)) {
invalidCount++;
continue;
}
// Validate each message in the conversation
const validMessages = item.every(
(msg) =>
msg &&
typeof msg === "object" &&
typeof msg.role === "string" &&
typeof msg.content === "string"
);
if (!validMessages) {
invalidCount++;
continue;
}
validPrompts.push(item);
}
console.log(`Loaded ${validPrompts.length} valid prompts (${invalidCount} invalid skipped)`);
return validPrompts;
}
// ─────────────────────────────────────────────────────────────────────────────
// Prompt Length Normalization
// ─────────────────────────────────────────────────────────────────────────────
function estimateTokens(text) {
// Rough estimate: ~4 characters per token for English text
return Math.ceil(text.length / 4);
}
function normalizePromptLength(messages, targetTokens) {
// Get the user message (last one with role="user")
const userMsgIdx = messages.findLastIndex(m => m.role === "user");
if (userMsgIdx === -1) return messages;
const userMsg = messages[userMsgIdx];
const currentTokens = estimateTokens(userMsg.content);
const targetChars = targetTokens * 4;
let newContent;
if (currentTokens > targetTokens) {
// Truncate
newContent = userMsg.content.slice(0, targetChars);
} else if (currentTokens < targetTokens) {
// Repeat content to reach target
newContent = userMsg.content;
while (newContent.length < targetChars) {
newContent += "\n\n" + userMsg.content;
}
newContent = newContent.slice(0, targetChars);
} else {
return messages;
}
// Return new messages array with modified user message
const newMessages = [...messages];
newMessages[userMsgIdx] = { ...userMsg, content: newContent };
return newMessages;
}
function normalizeAllPrompts(prompts, targetTokens) {
console.log(`Normalizing prompts to ~${targetTokens} tokens...`);
const normalized = prompts.map(p => normalizePromptLength(p, targetTokens));
// Report stats
const lengths = normalized.map(p => {
const userMsg = p.find(m => m.role === "user");
return userMsg ? estimateTokens(userMsg.content) : 0;
});
const avg = lengths.reduce((a, b) => a + b, 0) / lengths.length;
const min = Math.min(...lengths);
const max = Math.max(...lengths);
console.log(`Normalized: avg=${avg.toFixed(0)}, min=${min}, max=${max} tokens`);
return normalized;
}
// ─────────────────────────────────────────────────────────────────────────────
// Sampling
// ─────────────────────────────────────────────────────────────────────────────
function createSampler(prompts, withReplacement) {
if (withReplacement) {
return () => prompts[Math.floor(Math.random() * prompts.length)];
}
// Without replacement: shuffle and cycle
let pool = [];
let index = 0;
function shuffle(arr) {
const copy = [...arr];
for (let i = copy.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[copy[i], copy[j]] = [copy[j], copy[i]];
}
return copy;
}
return () => {
if (index >= pool.length) {
pool = shuffle(prompts);
index = 0;
}
return pool[index++];
};
}
// ─────────────────────────────────────────────────────────────────────────────
// Statistics
// ─────────────────────────────────────────────────────────────────────────────
function percentile(arr, p) {
if (arr.length === 0) return 0;
const sorted = [...arr].sort((a, b) => a - b);
const idx = Math.ceil((p / 100) * sorted.length) - 1;
return sorted[Math.max(0, idx)];
}
function formatMs(ms) {
return ms.toFixed(2);
}
function printHistogram(latencies, bucketCount = 20) {
if (latencies.length === 0) return;
const sorted = [...latencies].sort((a, b) => a - b);
const min = sorted[0];
const max = sorted[sorted.length - 1];
const range = max - min;
if (range === 0) {
console.log(` All requests: ${min.toFixed(0)} ms`);
return;
}
const bucketSize = range / bucketCount;
const buckets = new Array(bucketCount).fill(0);
for (const lat of latencies) {
const idx = Math.min(Math.floor((lat - min) / bucketSize), bucketCount - 1);
buckets[idx]++;
}
const maxCount = Math.max(...buckets);
const barWidth = 40;
console.log("");
console.log(" Latency Distribution:");
console.log(` ${min.toFixed(0)}ms ${"─".repeat(barWidth + 10)} ${max.toFixed(0)}ms`);
console.log("");
for (let i = 0; i < bucketCount; i++) {
const bucketStart = min + i * bucketSize;
const bucketEnd = min + (i + 1) * bucketSize;
const count = buckets[i];
const barLen = maxCount > 0 ? Math.round((count / maxCount) * barWidth) : 0;
const bar = "█".repeat(barLen);
const label = `${(bucketStart / 1000).toFixed(1)}s-${(bucketEnd / 1000).toFixed(1)}s`;
const countStr = count.toString().padStart(4);
console.log(` ${label.padEnd(13)} │${bar.padEnd(barWidth)}│ ${countStr}`);
}
console.log("");
}
// ─────────────────────────────────────────────────────────────────────────────
// Raw HTTP Client (matching vllm's aiohttp TCPConnector settings)
// ─────────────────────────────────────────────────────────────────────────────
function createRawClient(baseUrl, token, project, concurrency, timeoutSec) {
// Create undici Agent with connection pooling similar to aiohttp TCPConnector
const agent = new Agent({
connections: concurrency, // Max connections (like aiohttp limit)
pipelining: 1, // HTTP/1.1 pipelining
keepAliveTimeout: 60 * 1000, // 60s keep-alive (like aiohttp)
keepAliveMaxTimeout: 600 * 1000, // Max keep-alive
connect: {
timeout: 30 * 1000, // Connection timeout
rejectUnauthorized: true, // Verify SSL
},
});
const endpoint = `${baseUrl}/chat/completions`;
return async function makeRawRequest(messages, model, reasoningEffort, temperature, maxTokens) {
const body = JSON.stringify({
model,
messages,
reasoning_effort: reasoningEffort,
temperature,
max_tokens: maxTokens,
});
const { statusCode, body: responseBody } = await request(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${token}`,
"OpenAI-Project": project,
},
body,
dispatcher: agent,
bodyTimeout: timeoutSec * 1000,
headersTimeout: timeoutSec * 1000,
});
// Consume the response body
const chunks = [];
for await (const chunk of responseBody) {
chunks.push(chunk);
}
const responseText = Buffer.concat(chunks).toString("utf-8");
if (statusCode >= 400) {
const error = new Error(`HTTP ${statusCode}: ${responseText.slice(0, 200)}`);
error.status = statusCode;
throw error;
}
return JSON.parse(responseText);
};
}
// ─────────────────────────────────────────────────────────────────────────────
// Main Execution
// ─────────────────────────────────────────────────────────────────────────────
async function main() {
let sampler;
let promptSource;
let effectiveMaxTokens = opts.maxTokens;
if (opts.synthetic) {
promptSource = `SYNTHETIC (${opts.inputTokens} input tokens, ${opts.outputTokens} output tokens)`;
sampler = createSyntheticSampler(opts.inputTokens);
effectiveMaxTokens = opts.outputTokens; // Use output-tokens for synthetic mode
console.log(`Generating synthetic prompts...`);
} else {
let prompts = loadPrompts(opts.prompts);
if (prompts.length === 0) {
console.error("Error: No valid prompts found");
process.exit(1);
}
// Normalize prompt lengths if requested
if (opts.targetInputTokens) {
prompts = normalizeAllPrompts(prompts, opts.targetInputTokens);
promptSource = `FILE (${prompts.length} prompts, normalized to ~${opts.targetInputTokens} tokens)`;
} else {
// Report original prompt sizes
const lengths = prompts.map(p => {
const userMsg = p.find(m => m.role === "user");
return userMsg ? estimateTokens(userMsg.content) : 0;
});
const avg = lengths.reduce((a, b) => a + b, 0) / lengths.length;
console.log(`Original prompt sizes: avg=${avg.toFixed(0)} tokens`);
promptSource = `FILE (${prompts.length} prompts, avg ~${avg.toFixed(0)} tokens)`;
}
sampler = createSampler(prompts, opts.replacement);
}
const mode = opts.raw ? "RAW (undici + connection pool)" : "SDK (OpenAI JS)";
console.log(`Mode: ${mode}`);
console.log(`Prompts: ${promptSource}`);
console.log(`Concurrency: ${opts.concurrency}`);
console.log("");
// Create the appropriate client
let sdkClient = null;
let rawRequest = null;
if (opts.raw) {
rawRequest = createRawClient(
opts.url,
opts.token,
opts.project,
opts.concurrency,
opts.timeout
);
} else {
sdkClient = new OpenAI({
apiKey: opts.token,
baseURL: opts.url,
defaultHeaders: {
"OpenAI-Project": opts.project,
},
timeout: opts.timeout * 1000,
});
}
const limit = pLimit(opts.concurrency);
// Metrics
const results = [];
let completed = 0;
let successes = 0;
let failures = 0;
const errorBreakdown = { timeout: 0, "4xx": 0, "5xx": 0, other: 0 };
const startTime = Date.now();
let stopping = false;
const abortController = new AbortController();
// Progress display
const progressInterval = setInterval(() => {
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
const rps = (completed / (Date.now() - startTime)) * 1000;
process.stdout.write(
`\rCompleted: ${completed} | Errors: ${failures} | Elapsed: ${elapsed}s | RPS: ${rps.toFixed(1)} `
);
}, 500);
// Check termination conditions
function shouldStop() {
if (stopping) return true;
if (opts.totalRequests && completed >= opts.totalRequests) return true;
if (opts.duration && (Date.now() - startTime) / 1000 >= opts.duration) return true;
return false;
}
// Single request worker
async function makeRequest() {
if (shouldStop()) return;
const messages = sampler();
const requestStart = Date.now();
let status = "success";
let errorType = null;
let error = null;
let ttftMs = null;
let firstTokenTime = null;
let inputTokens = null;
let outputTokens = null;
try {
if (opts.raw) {
// Raw mode doesn't support streaming yet - just measure E2E
await rawRequest(
messages,
opts.model,
opts.reasoningEffort,
opts.temperature,
effectiveMaxTokens
);
} else {
// Use streaming to capture TTFT
const stream = await sdkClient.chat.completions.create(
{
messages,
model: opts.model,
reasoning_effort: opts.reasoningEffort,
temperature: opts.temperature,
max_tokens: effectiveMaxTokens,
stream: true,
stream_options: { include_usage: true },
},
{ signal: abortController.signal }
);
// Iterate through the stream
for await (const chunk of stream) {
// Record TTFT on first chunk with content
if (firstTokenTime === null && chunk.choices?.[0]?.delta?.content) {
firstTokenTime = Date.now();
ttftMs = firstTokenTime - requestStart;
}
// Capture usage from final chunk
if (chunk.usage) {
inputTokens = chunk.usage.prompt_tokens;
outputTokens = chunk.usage.completion_tokens;
}
}
}
successes++;
} catch (err) {
// Ignore abort errors - these are expected during SIGINT
if (err.name === "AbortError" || abortController.signal.aborted) {
return;
}
failures++;
status = "error";
error = err.message;
// Still record TTFT if we got it before the error
if (firstTokenTime !== null) {
ttftMs = firstTokenTime - requestStart;
}
if (
err.code === "ETIMEDOUT" ||
err.code === "ECONNABORTED" ||
err.code === "UND_ERR_HEADERS_TIMEOUT" ||
err.code === "UND_ERR_BODY_TIMEOUT" ||
err.message?.includes("timeout")
) {
errorType = "timeout";
errorBreakdown.timeout++;
} else if (err.status >= 400 && err.status < 500) {
errorType = "4xx";
errorBreakdown["4xx"]++;
} else if (err.status >= 500) {
errorType = "5xx";
errorBreakdown["5xx"]++;
} else {
errorType = "other";
errorBreakdown.other++;
}
}
const latency = Date.now() - requestStart;
const decodeMs = ttftMs !== null ? latency - ttftMs : null;
completed++;
results.push({
requestId: completed,
timestamp: new Date(requestStart).toISOString(),
latencyMs: latency,
ttftMs,
decodeMs,
inputTokens,
outputTokens,
status,
errorType,
error,
});
}
// Ctrl+C handler
process.on("SIGINT", () => {
if (stopping) {
// Second SIGINT - force exit
console.log("\n\nForce exiting...");
process.exit(1);
}
console.log("\n\nReceived SIGINT, aborting in-flight requests...");
stopping = true;
abortController.abort();
});
// Spawn workers
const workers = [];
const maxRequests = opts.totalRequests || Infinity;
// Keep spawning requests until termination
async function spawnRequests() {
while (!shouldStop()) {
if (workers.length < maxRequests) {
const task = limit(makeRequest);
workers.push(task);
// Don't await here - let p-limit manage concurrency
task.catch(() => {}); // Swallow rejections, we track them in makeRequest
}
// Small yield to prevent tight loop
await new Promise((resolve) => setImmediate(resolve));
}
}
// Run until done
await spawnRequests();
// Wait for all in-flight to complete (or be cancelled)
await Promise.allSettled(workers);
clearInterval(progressInterval);
console.log("\n");
// ─────────────────────────────────────────────────────────────────────────────
// Final Statistics
// ─────────────────────────────────────────────────────────────────────────────
const totalTime = (Date.now() - startTime) / 1000;
const latencies = results.filter((r) => r.status === "success").map((r) => r.latencyMs);
console.log("═══════════════════════════════════════════════════════════════════");
console.log(" LOAD TEST RESULTS ");
console.log("═══════════════════════════════════════════════════════════════════");
console.log(`Mode: ${mode}`);
console.log(`Prompts: ${promptSource}`);
console.log(`Total Requests: ${completed}`);
console.log(`Successes: ${successes}`);
console.log(`Failures: ${failures}`);
console.log(`Success Rate: ${((successes / completed) * 100).toFixed(2)}%`);
console.log(`Total Duration: ${totalTime.toFixed(2)}s`);
console.log(`Throughput: ${(completed / totalTime).toFixed(2)} req/s`);
console.log("───────────────────────────────────────────────────────────────────");
console.log("Error Breakdown:");
console.log(` Timeouts: ${errorBreakdown.timeout}`);
console.log(` 4xx Errors: ${errorBreakdown["4xx"]}`);
console.log(` 5xx Errors: ${errorBreakdown["5xx"]}`);
console.log(` Other: ${errorBreakdown.other}`);
console.log("───────────────────────────────────────────────────────────────────");
console.log("Latency (successful requests only):");
if (latencies.length > 0) {
const ttfts = results.filter((r) => r.ttftMs !== null).map((r) => r.ttftMs);
const decodeTimes = results.filter((r) => r.decodeMs !== null).map((r) => r.decodeMs);
console.log(` Min: ${formatMs(Math.min(...latencies))} ms`);
console.log(` Max: ${formatMs(Math.max(...latencies))} ms`);
console.log(` Mean: ${formatMs(latencies.reduce((a, b) => a + b, 0) / latencies.length)} ms`);
console.log(` P50: ${formatMs(percentile(latencies, 50))} ms`);
console.log(` P90: ${formatMs(percentile(latencies, 90))} ms`);
console.log(` P99: ${formatMs(percentile(latencies, 99))} ms`);
if (ttfts.length > 0) {
const meanTtft = ttfts.reduce((a, b) => a + b, 0) / ttfts.length;
const meanDecode = decodeTimes.length > 0 ? decodeTimes.reduce((a, b) => a + b, 0) / decodeTimes.length : 0;
console.log(` Mean TTFT: ${formatMs(meanTtft)} ms (prefill)`);
console.log(` Mean Decode: ${formatMs(meanDecode)} ms (generation)`);
}
// Token stats
const inputToks = results.filter((r) => r.inputTokens !== null).map((r) => r.inputTokens);
const outputToks = results.filter((r) => r.outputTokens !== null).map((r) => r.outputTokens);
if (inputToks.length > 0) {
console.log(` P50 Input Toks: ${percentile(inputToks, 50).toFixed(0)}`);
console.log(` P90 Input Toks: ${percentile(inputToks, 90).toFixed(0)}`);
console.log(` P50 Output Toks: ${percentile(outputToks, 50).toFixed(0)}`);
console.log(` P90 Output Toks: ${percentile(outputToks, 90).toFixed(0)}`);
}
printHistogram(latencies);
} else {
console.log(" No successful requests to measure");
}
// Time to Nth result (sorted by completion time)
// timestamp field records request START, so completion = timestamp + latencyMs
const successResults = results
.filter((r) => r.status === "success")
.map((r) => ({
...r,
completionTime: new Date(r.timestamp).getTime() + r.latencyMs,
}))
.sort((a, b) => a.completionTime - b.completionTime);
if (successResults.length >= 30) {
console.log("───────────────────────────────────────────────────────────────────");
console.log("Time to Nth Result (from test start):");
const time30 = (successResults[29].completionTime - startTime) / 1000;
console.log(` 30th: ${time30.toFixed(2)}s`);
if (successResults.length >= 150) {
const time150 = (successResults[149].completionTime - startTime) / 1000;
console.log(` 150th: ${time150.toFixed(2)}s`);
}
if (successResults.length >= 400) {
const time400 = (successResults[399].completionTime - startTime) / 1000;
console.log(` 400th: ${time400.toFixed(2)}s`);
}
}
console.log("═══════════════════════════════════════════════════════════════════");
// ─────────────────────────────────────────────────────────────────────────────
// Output File
// ─────────────────────────────────────────────────────────────────────────────
if (opts.output) {
const ext = extname(opts.output).toLowerCase();
if (ext === ".json") {
writeFileSync(opts.output, JSON.stringify(results, null, 2));
console.log(`\nResults written to ${opts.output}`);
} else {
// Default to CSV
const header = "requestId,timestamp,latencyMs,ttftMs,decodeMs,inputTokens,outputTokens,status,errorType,error\n";
const rows = results
.map(
(r) =>
`${r.requestId},${r.timestamp},${r.latencyMs},${r.ttftMs ?? ""},${r.decodeMs ?? ""},${r.inputTokens ?? ""},${r.outputTokens ?? ""},${r.status},${r.errorType || ""},${(r.error || "").replace(/,/g, ";")}`
)
.join("\n");
writeFileSync(opts.output, header + rows);
console.log(`\nResults written to ${opts.output}`);
}
}
}
main().catch((err) => {
console.error("Fatal error:", err);
process.exit(1);
});
{
"name": "inference-load-tester",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "inference-load-tester",
"version": "1.0.0",
"dependencies": {
"commander": "^12.1.0",
"openai": "^4.77.0",
"p-limit": "^6.1.0",
"undici": "^7.2.0"
}
},
"node_modules/@types/node": {
"version": "18.19.130",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz",
"integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==",
"license": "MIT",
"dependencies": {
"undici-types": "~5.26.4"
}
},
"node_modules/@types/node-fetch": {
"version": "2.6.13",
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
"license": "MIT",
"dependencies": {
"@types/node": "*",
"form-data": "^4.0.4"
}
},
"node_modules/abort-controller": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
"integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
"license": "MIT",
"dependencies": {
"event-target-shim": "^5.0.0"
},
"engines": {
"node": ">=6.5"
}
},
"node_modules/agentkeepalive": {
"version": "4.6.0",
"resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz",
"integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==",
"license": "MIT",
"dependencies": {
"humanize-ms": "^1.2.1"
},
"engines": {
"node": ">= 8.0.0"
}
},
"node_modules/asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
"license": "MIT"
},
"node_modules/call-bind-apply-helpers": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
"function-bind": "^1.1.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"license": "MIT",
"dependencies": {
"delayed-stream": "~1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/commander": {
"version": "12.1.0",
"resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz",
"integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==",
"license": "MIT",
"engines": {
"node": ">=18"
}
},
"node_modules/delayed-stream": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"license": "MIT",
"engines": {
"node": ">=0.4.0"
}
},
"node_modules/dunder-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.1",
"es-errors": "^1.3.0",
"gopd": "^1.2.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-define-property": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-errors": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-object-atoms": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-set-tostringtag": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
"get-intrinsic": "^1.2.6",
"has-tostringtag": "^1.0.2",
"hasown": "^2.0.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/event-target-shim": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
"integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/form-data": {
"version": "4.0.5",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
"integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
"license": "MIT",
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
"es-set-tostringtag": "^2.1.0",
"hasown": "^2.0.2",
"mime-types": "^2.1.12"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/form-data-encoder": {
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
"integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==",
"license": "MIT"
},
"node_modules/formdata-node": {
"version": "4.4.1",
"resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
"integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
"license": "MIT",
"dependencies": {
"node-domexception": "1.0.0",
"web-streams-polyfill": "4.0.0-beta.3"
},
"engines": {
"node": ">= 12.20"
}
},
"node_modules/function-bind": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/get-intrinsic": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.2",
"es-define-property": "^1.0.1",
"es-errors": "^1.3.0",
"es-object-atoms": "^1.1.1",
"function-bind": "^1.1.2",
"get-proto": "^1.0.1",
"gopd": "^1.2.0",
"has-symbols": "^1.1.0",
"hasown": "^2.0.2",
"math-intrinsics": "^1.1.0"
},
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/get-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
"license": "MIT",
"dependencies": {
"dunder-proto": "^1.0.1",
"es-object-atoms": "^1.0.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/gopd": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/has-symbols": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/has-tostringtag": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
"integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
"license": "MIT",
"dependencies": {
"has-symbols": "^1.0.3"
},
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/hasown": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
"license": "MIT",
"dependencies": {
"function-bind": "^1.1.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/humanize-ms": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
"integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
"license": "MIT",
"dependencies": {
"ms": "^2.0.0"
}
},
"node_modules/math-intrinsics": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/mime-db": {
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
"license": "MIT",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/mime-types": {
"version": "2.1.35",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
"license": "MIT",
"dependencies": {
"mime-db": "1.52.0"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/ms": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
"license": "MIT"
},
"node_modules/node-domexception": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
"deprecated": "Use your platform's native DOMException instead",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "github",
"url": "https://paypal.me/jimmywarting"
}
],
"license": "MIT",
"engines": {
"node": ">=10.5.0"
}
},
"node_modules/node-fetch": {
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
"license": "MIT",
"dependencies": {
"whatwg-url": "^5.0.0"
},
"engines": {
"node": "4.x || >=6.0.0"
},
"peerDependencies": {
"encoding": "^0.1.0"
},
"peerDependenciesMeta": {
"encoding": {
"optional": true
}
}
},
"node_modules/openai": {
"version": "4.104.0",
"resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz",
"integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==",
"license": "Apache-2.0",
"dependencies": {
"@types/node": "^18.11.18",
"@types/node-fetch": "^2.6.4",
"abort-controller": "^3.0.0",
"agentkeepalive": "^4.2.1",
"form-data-encoder": "1.7.2",
"formdata-node": "^4.3.2",
"node-fetch": "^2.6.7"
},
"bin": {
"openai": "bin/cli"
},
"peerDependencies": {
"ws": "^8.18.0",
"zod": "^3.23.8"
},
"peerDependenciesMeta": {
"ws": {
"optional": true
},
"zod": {
"optional": true
}
}
},
"node_modules/p-limit": {
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-6.2.0.tgz",
"integrity": "sha512-kuUqqHNUqoIWp/c467RI4X6mmyuojY5jGutNU0wVTmEOOfcuwLqyMVoAi9MKi2Ak+5i9+nhmrK4ufZE8069kHA==",
"license": "MIT",
"dependencies": {
"yocto-queue": "^1.1.1"
},
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
"license": "MIT"
},
"node_modules/undici": {
"version": "7.18.2",
"resolved": "https://registry.npmjs.org/undici/-/undici-7.18.2.tgz",
"integrity": "sha512-y+8YjDFzWdQlSE9N5nzKMT3g4a5UBX1HKowfdXh0uvAnTaqqwqB92Jt4UXBAeKekDs5IaDKyJFR4X1gYVCgXcw==",
"license": "MIT",
"engines": {
"node": ">=20.18.1"
}
},
"node_modules/undici-types": {
"version": "5.26.5",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
"license": "MIT"
},
"node_modules/web-streams-polyfill": {
"version": "4.0.0-beta.3",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
"integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
"license": "MIT",
"engines": {
"node": ">= 14"
}
},
"node_modules/webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
"license": "BSD-2-Clause"
},
"node_modules/whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
"license": "MIT",
"dependencies": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
}
},
"node_modules/yocto-queue": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
"integrity": "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==",
"license": "MIT",
"engines": {
"node": ">=12.20"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
}
}
}
{
"name": "inference-load-tester",
"version": "1.0.0",
"description": "Load testing tool for OpenAI-compatible inference endpoints",
"type": "module",
"main": "load_test.js",
"scripts": {
"start": "node load_test.js"
},
"dependencies": {
"openai": "^4.77.0",
"p-limit": "^6.1.0",
"commander": "^12.1.0",
"undici": "^7.2.0"
}
}
@alexeldeib
Copy link
Author

usage e.g.

node load_test.js \
  --prompts /Users/aeldeib/Downloads/prompts.json \
  --model openai/gpt-oss-120b \
  --url https://api.inference.wandb.ai/v1 \
  --token "xxx" \
  --project wandb/inference-test \
  --concurrency 400 \
  --total-requests 400 \
  --timeout 180 \
  --output results-real-orig-c100.csv \
  --max-tokens 250 --target-input-tokens 500 # if neither specified, these will use whatever is in the raw prompts/no constrained output

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment