Created
January 15, 2026 06:21
-
-
Save alexeldeib/8b40ba5bcfa8711fb18eba994fe2c03a to your computer and use it in GitHub Desktop.
nodejs openai load tester (with love from claude)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env node | |
| import { program } from "commander"; | |
| import OpenAI from "openai"; | |
| import pLimit from "p-limit"; | |
| import { readFileSync, writeFileSync } from "fs"; | |
| import { extname } from "path"; | |
| import { Agent, request } from "undici"; | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // CLI Setup | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| program | |
| .name("load_test") | |
| .description("Load test an OpenAI-compatible inference endpoint") | |
| .option("--prompts <path>", "Path to prompts.json file") | |
| .option("--synthetic", "Use synthetic prompts (like vllm bench)") | |
| .option("--input-tokens <n>", "Input tokens for synthetic prompts", parseInt, 1500) | |
| .option("--output-tokens <n>", "Output tokens for synthetic prompts", parseInt, 500) | |
| .requiredOption("--model <model>", "Model name to use") | |
| .requiredOption("--url <url>", "Base URL for API") | |
| .requiredOption("--token <token>", "Bearer token / API key") | |
| .requiredOption("--project <project>", "OpenAI-Project header value") | |
| .requiredOption("--concurrency <n>", "Number of concurrent workers", parseInt) | |
| .option("--total-requests <n>", "Stop after N total requests", parseInt) | |
| .option("--duration <seconds>", "Stop after N seconds", parseFloat) | |
| .option("--timeout <seconds>", "Per-request timeout in seconds", parseFloat, 30) | |
| .option("--replacement", "Sample with replacement (default)", true) | |
| .option("--no-replacement", "Sample without replacement (cycles on exhaustion)") | |
| .option("--output <path>", "Output file path (CSV or JSON based on extension)") | |
| .option("--reasoning-effort <level>", "Reasoning effort: low, medium, high", "low") | |
| .option("--temperature <n>", "Temperature for sampling", parseFloat, 1) | |
| .option("--max-tokens <n>", "Maximum tokens in response", parseInt, 8192) | |
| .option("--raw", "Use raw HTTP with connection pooling (bypass OpenAI SDK)") | |
| .option("--target-input-tokens <n>", "Truncate/pad real prompts to this token count", parseInt) | |
| .parse(); | |
| const opts = program.opts(); | |
| if (!opts.totalRequests && !opts.duration) { | |
| console.error("Error: Must specify at least one of --total-requests or --duration"); | |
| process.exit(1); | |
| } | |
| if (!opts.prompts && !opts.synthetic) { | |
| console.error("Error: Must specify either --prompts <path> or --synthetic"); | |
| process.exit(1); | |
| } | |
| if (opts.prompts && opts.synthetic) { | |
| console.error("Error: Cannot use both --prompts and --synthetic"); | |
| process.exit(1); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Synthetic Prompt Generation (matches vllm bench style) | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| function generateSyntheticPrompt(inputTokens) { | |
| // Generate a prompt that's approximately inputTokens in size | |
| // Rough estimate: 1 token ≈ 4 characters for English text | |
| // We use a mix of words to make it more realistic | |
| const words = [ | |
| "the", "be", "to", "of", "and", "a", "in", "that", "have", "I", | |
| "it", "for", "not", "on", "with", "he", "as", "you", "do", "at", | |
| "this", "but", "his", "by", "from", "they", "we", "say", "her", "she", | |
| "or", "an", "will", "my", "one", "all", "would", "there", "their", "what", | |
| "so", "up", "out", "if", "about", "who", "get", "which", "go", "me", | |
| "when", "make", "can", "like", "time", "no", "just", "him", "know", "take", | |
| "people", "into", "year", "your", "good", "some", "could", "them", "see", "other", | |
| "than", "then", "now", "look", "only", "come", "its", "over", "think", "also", | |
| "back", "after", "use", "two", "how", "our", "work", "first", "well", "way", | |
| "even", "new", "want", "because", "any", "these", "give", "day", "most", "us", | |
| ]; | |
| // Target character count (4 chars per token is a rough estimate) | |
| const targetChars = inputTokens * 4; | |
| let content = "Please analyze the following text and provide a detailed summary:\n\n"; | |
| while (content.length < targetChars) { | |
| // Add random words to build up the content | |
| const sentenceLength = 10 + Math.floor(Math.random() * 15); | |
| const sentence = []; | |
| for (let i = 0; i < sentenceLength; i++) { | |
| sentence.push(words[Math.floor(Math.random() * words.length)]); | |
| } | |
| // Capitalize first word | |
| sentence[0] = sentence[0].charAt(0).toUpperCase() + sentence[0].slice(1); | |
| content += sentence.join(" ") + ". "; | |
| } | |
| // Trim to approximately the right length | |
| content = content.slice(0, targetChars); | |
| return [{ role: "user", content }]; | |
| } | |
| function createSyntheticSampler(inputTokens) { | |
| // Generate a pool of synthetic prompts to sample from | |
| // This avoids generating on every request | |
| const poolSize = 100; | |
| const pool = []; | |
| for (let i = 0; i < poolSize; i++) { | |
| pool.push(generateSyntheticPrompt(inputTokens)); | |
| } | |
| return () => pool[Math.floor(Math.random() * pool.length)]; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Load and Validate Prompts | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| function loadPrompts(path) { | |
| console.log(`Loading prompts from ${path}...`); | |
| const raw = readFileSync(path, "utf-8"); | |
| const data = JSON.parse(raw); | |
| if (!Array.isArray(data)) { | |
| console.error("Error: prompts.json must be an array"); | |
| process.exit(1); | |
| } | |
| const validPrompts = []; | |
| let invalidCount = 0; | |
| for (let i = 0; i < data.length; i++) { | |
| const item = data[i]; | |
| if (!Array.isArray(item)) { | |
| invalidCount++; | |
| continue; | |
| } | |
| // Validate each message in the conversation | |
| const validMessages = item.every( | |
| (msg) => | |
| msg && | |
| typeof msg === "object" && | |
| typeof msg.role === "string" && | |
| typeof msg.content === "string" | |
| ); | |
| if (!validMessages) { | |
| invalidCount++; | |
| continue; | |
| } | |
| validPrompts.push(item); | |
| } | |
| console.log(`Loaded ${validPrompts.length} valid prompts (${invalidCount} invalid skipped)`); | |
| return validPrompts; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Prompt Length Normalization | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| function estimateTokens(text) { | |
| // Rough estimate: ~4 characters per token for English text | |
| return Math.ceil(text.length / 4); | |
| } | |
| function normalizePromptLength(messages, targetTokens) { | |
| // Get the user message (last one with role="user") | |
| const userMsgIdx = messages.findLastIndex(m => m.role === "user"); | |
| if (userMsgIdx === -1) return messages; | |
| const userMsg = messages[userMsgIdx]; | |
| const currentTokens = estimateTokens(userMsg.content); | |
| const targetChars = targetTokens * 4; | |
| let newContent; | |
| if (currentTokens > targetTokens) { | |
| // Truncate | |
| newContent = userMsg.content.slice(0, targetChars); | |
| } else if (currentTokens < targetTokens) { | |
| // Repeat content to reach target | |
| newContent = userMsg.content; | |
| while (newContent.length < targetChars) { | |
| newContent += "\n\n" + userMsg.content; | |
| } | |
| newContent = newContent.slice(0, targetChars); | |
| } else { | |
| return messages; | |
| } | |
| // Return new messages array with modified user message | |
| const newMessages = [...messages]; | |
| newMessages[userMsgIdx] = { ...userMsg, content: newContent }; | |
| return newMessages; | |
| } | |
| function normalizeAllPrompts(prompts, targetTokens) { | |
| console.log(`Normalizing prompts to ~${targetTokens} tokens...`); | |
| const normalized = prompts.map(p => normalizePromptLength(p, targetTokens)); | |
| // Report stats | |
| const lengths = normalized.map(p => { | |
| const userMsg = p.find(m => m.role === "user"); | |
| return userMsg ? estimateTokens(userMsg.content) : 0; | |
| }); | |
| const avg = lengths.reduce((a, b) => a + b, 0) / lengths.length; | |
| const min = Math.min(...lengths); | |
| const max = Math.max(...lengths); | |
| console.log(`Normalized: avg=${avg.toFixed(0)}, min=${min}, max=${max} tokens`); | |
| return normalized; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Sampling | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| function createSampler(prompts, withReplacement) { | |
| if (withReplacement) { | |
| return () => prompts[Math.floor(Math.random() * prompts.length)]; | |
| } | |
| // Without replacement: shuffle and cycle | |
| let pool = []; | |
| let index = 0; | |
| function shuffle(arr) { | |
| const copy = [...arr]; | |
| for (let i = copy.length - 1; i > 0; i--) { | |
| const j = Math.floor(Math.random() * (i + 1)); | |
| [copy[i], copy[j]] = [copy[j], copy[i]]; | |
| } | |
| return copy; | |
| } | |
| return () => { | |
| if (index >= pool.length) { | |
| pool = shuffle(prompts); | |
| index = 0; | |
| } | |
| return pool[index++]; | |
| }; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Statistics | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| function percentile(arr, p) { | |
| if (arr.length === 0) return 0; | |
| const sorted = [...arr].sort((a, b) => a - b); | |
| const idx = Math.ceil((p / 100) * sorted.length) - 1; | |
| return sorted[Math.max(0, idx)]; | |
| } | |
| function formatMs(ms) { | |
| return ms.toFixed(2); | |
| } | |
| function printHistogram(latencies, bucketCount = 20) { | |
| if (latencies.length === 0) return; | |
| const sorted = [...latencies].sort((a, b) => a - b); | |
| const min = sorted[0]; | |
| const max = sorted[sorted.length - 1]; | |
| const range = max - min; | |
| if (range === 0) { | |
| console.log(` All requests: ${min.toFixed(0)} ms`); | |
| return; | |
| } | |
| const bucketSize = range / bucketCount; | |
| const buckets = new Array(bucketCount).fill(0); | |
| for (const lat of latencies) { | |
| const idx = Math.min(Math.floor((lat - min) / bucketSize), bucketCount - 1); | |
| buckets[idx]++; | |
| } | |
| const maxCount = Math.max(...buckets); | |
| const barWidth = 40; | |
| console.log(""); | |
| console.log(" Latency Distribution:"); | |
| console.log(` ${min.toFixed(0)}ms ${"─".repeat(barWidth + 10)} ${max.toFixed(0)}ms`); | |
| console.log(""); | |
| for (let i = 0; i < bucketCount; i++) { | |
| const bucketStart = min + i * bucketSize; | |
| const bucketEnd = min + (i + 1) * bucketSize; | |
| const count = buckets[i]; | |
| const barLen = maxCount > 0 ? Math.round((count / maxCount) * barWidth) : 0; | |
| const bar = "█".repeat(barLen); | |
| const label = `${(bucketStart / 1000).toFixed(1)}s-${(bucketEnd / 1000).toFixed(1)}s`; | |
| const countStr = count.toString().padStart(4); | |
| console.log(` ${label.padEnd(13)} │${bar.padEnd(barWidth)}│ ${countStr}`); | |
| } | |
| console.log(""); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Raw HTTP Client (matching vllm's aiohttp TCPConnector settings) | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| function createRawClient(baseUrl, token, project, concurrency, timeoutSec) { | |
| // Create undici Agent with connection pooling similar to aiohttp TCPConnector | |
| const agent = new Agent({ | |
| connections: concurrency, // Max connections (like aiohttp limit) | |
| pipelining: 1, // HTTP/1.1 pipelining | |
| keepAliveTimeout: 60 * 1000, // 60s keep-alive (like aiohttp) | |
| keepAliveMaxTimeout: 600 * 1000, // Max keep-alive | |
| connect: { | |
| timeout: 30 * 1000, // Connection timeout | |
| rejectUnauthorized: true, // Verify SSL | |
| }, | |
| }); | |
| const endpoint = `${baseUrl}/chat/completions`; | |
| return async function makeRawRequest(messages, model, reasoningEffort, temperature, maxTokens) { | |
| const body = JSON.stringify({ | |
| model, | |
| messages, | |
| reasoning_effort: reasoningEffort, | |
| temperature, | |
| max_tokens: maxTokens, | |
| }); | |
| const { statusCode, body: responseBody } = await request(endpoint, { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json", | |
| "Authorization": `Bearer ${token}`, | |
| "OpenAI-Project": project, | |
| }, | |
| body, | |
| dispatcher: agent, | |
| bodyTimeout: timeoutSec * 1000, | |
| headersTimeout: timeoutSec * 1000, | |
| }); | |
| // Consume the response body | |
| const chunks = []; | |
| for await (const chunk of responseBody) { | |
| chunks.push(chunk); | |
| } | |
| const responseText = Buffer.concat(chunks).toString("utf-8"); | |
| if (statusCode >= 400) { | |
| const error = new Error(`HTTP ${statusCode}: ${responseText.slice(0, 200)}`); | |
| error.status = statusCode; | |
| throw error; | |
| } | |
| return JSON.parse(responseText); | |
| }; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Main Execution | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| async function main() { | |
| let sampler; | |
| let promptSource; | |
| let effectiveMaxTokens = opts.maxTokens; | |
| if (opts.synthetic) { | |
| promptSource = `SYNTHETIC (${opts.inputTokens} input tokens, ${opts.outputTokens} output tokens)`; | |
| sampler = createSyntheticSampler(opts.inputTokens); | |
| effectiveMaxTokens = opts.outputTokens; // Use output-tokens for synthetic mode | |
| console.log(`Generating synthetic prompts...`); | |
| } else { | |
| let prompts = loadPrompts(opts.prompts); | |
| if (prompts.length === 0) { | |
| console.error("Error: No valid prompts found"); | |
| process.exit(1); | |
| } | |
| // Normalize prompt lengths if requested | |
| if (opts.targetInputTokens) { | |
| prompts = normalizeAllPrompts(prompts, opts.targetInputTokens); | |
| promptSource = `FILE (${prompts.length} prompts, normalized to ~${opts.targetInputTokens} tokens)`; | |
| } else { | |
| // Report original prompt sizes | |
| const lengths = prompts.map(p => { | |
| const userMsg = p.find(m => m.role === "user"); | |
| return userMsg ? estimateTokens(userMsg.content) : 0; | |
| }); | |
| const avg = lengths.reduce((a, b) => a + b, 0) / lengths.length; | |
| console.log(`Original prompt sizes: avg=${avg.toFixed(0)} tokens`); | |
| promptSource = `FILE (${prompts.length} prompts, avg ~${avg.toFixed(0)} tokens)`; | |
| } | |
| sampler = createSampler(prompts, opts.replacement); | |
| } | |
| const mode = opts.raw ? "RAW (undici + connection pool)" : "SDK (OpenAI JS)"; | |
| console.log(`Mode: ${mode}`); | |
| console.log(`Prompts: ${promptSource}`); | |
| console.log(`Concurrency: ${opts.concurrency}`); | |
| console.log(""); | |
| // Create the appropriate client | |
| let sdkClient = null; | |
| let rawRequest = null; | |
| if (opts.raw) { | |
| rawRequest = createRawClient( | |
| opts.url, | |
| opts.token, | |
| opts.project, | |
| opts.concurrency, | |
| opts.timeout | |
| ); | |
| } else { | |
| sdkClient = new OpenAI({ | |
| apiKey: opts.token, | |
| baseURL: opts.url, | |
| defaultHeaders: { | |
| "OpenAI-Project": opts.project, | |
| }, | |
| timeout: opts.timeout * 1000, | |
| }); | |
| } | |
| const limit = pLimit(opts.concurrency); | |
| // Metrics | |
| const results = []; | |
| let completed = 0; | |
| let successes = 0; | |
| let failures = 0; | |
| const errorBreakdown = { timeout: 0, "4xx": 0, "5xx": 0, other: 0 }; | |
| const startTime = Date.now(); | |
| let stopping = false; | |
| const abortController = new AbortController(); | |
| // Progress display | |
| const progressInterval = setInterval(() => { | |
| const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); | |
| const rps = (completed / (Date.now() - startTime)) * 1000; | |
| process.stdout.write( | |
| `\rCompleted: ${completed} | Errors: ${failures} | Elapsed: ${elapsed}s | RPS: ${rps.toFixed(1)} ` | |
| ); | |
| }, 500); | |
| // Check termination conditions | |
| function shouldStop() { | |
| if (stopping) return true; | |
| if (opts.totalRequests && completed >= opts.totalRequests) return true; | |
| if (opts.duration && (Date.now() - startTime) / 1000 >= opts.duration) return true; | |
| return false; | |
| } | |
| // Single request worker | |
| async function makeRequest() { | |
| if (shouldStop()) return; | |
| const messages = sampler(); | |
| const requestStart = Date.now(); | |
| let status = "success"; | |
| let errorType = null; | |
| let error = null; | |
| let ttftMs = null; | |
| let firstTokenTime = null; | |
| let inputTokens = null; | |
| let outputTokens = null; | |
| try { | |
| if (opts.raw) { | |
| // Raw mode doesn't support streaming yet - just measure E2E | |
| await rawRequest( | |
| messages, | |
| opts.model, | |
| opts.reasoningEffort, | |
| opts.temperature, | |
| effectiveMaxTokens | |
| ); | |
| } else { | |
| // Use streaming to capture TTFT | |
| const stream = await sdkClient.chat.completions.create( | |
| { | |
| messages, | |
| model: opts.model, | |
| reasoning_effort: opts.reasoningEffort, | |
| temperature: opts.temperature, | |
| max_tokens: effectiveMaxTokens, | |
| stream: true, | |
| stream_options: { include_usage: true }, | |
| }, | |
| { signal: abortController.signal } | |
| ); | |
| // Iterate through the stream | |
| for await (const chunk of stream) { | |
| // Record TTFT on first chunk with content | |
| if (firstTokenTime === null && chunk.choices?.[0]?.delta?.content) { | |
| firstTokenTime = Date.now(); | |
| ttftMs = firstTokenTime - requestStart; | |
| } | |
| // Capture usage from final chunk | |
| if (chunk.usage) { | |
| inputTokens = chunk.usage.prompt_tokens; | |
| outputTokens = chunk.usage.completion_tokens; | |
| } | |
| } | |
| } | |
| successes++; | |
| } catch (err) { | |
| // Ignore abort errors - these are expected during SIGINT | |
| if (err.name === "AbortError" || abortController.signal.aborted) { | |
| return; | |
| } | |
| failures++; | |
| status = "error"; | |
| error = err.message; | |
| // Still record TTFT if we got it before the error | |
| if (firstTokenTime !== null) { | |
| ttftMs = firstTokenTime - requestStart; | |
| } | |
| if ( | |
| err.code === "ETIMEDOUT" || | |
| err.code === "ECONNABORTED" || | |
| err.code === "UND_ERR_HEADERS_TIMEOUT" || | |
| err.code === "UND_ERR_BODY_TIMEOUT" || | |
| err.message?.includes("timeout") | |
| ) { | |
| errorType = "timeout"; | |
| errorBreakdown.timeout++; | |
| } else if (err.status >= 400 && err.status < 500) { | |
| errorType = "4xx"; | |
| errorBreakdown["4xx"]++; | |
| } else if (err.status >= 500) { | |
| errorType = "5xx"; | |
| errorBreakdown["5xx"]++; | |
| } else { | |
| errorType = "other"; | |
| errorBreakdown.other++; | |
| } | |
| } | |
| const latency = Date.now() - requestStart; | |
| const decodeMs = ttftMs !== null ? latency - ttftMs : null; | |
| completed++; | |
| results.push({ | |
| requestId: completed, | |
| timestamp: new Date(requestStart).toISOString(), | |
| latencyMs: latency, | |
| ttftMs, | |
| decodeMs, | |
| inputTokens, | |
| outputTokens, | |
| status, | |
| errorType, | |
| error, | |
| }); | |
| } | |
| // Ctrl+C handler | |
| process.on("SIGINT", () => { | |
| if (stopping) { | |
| // Second SIGINT - force exit | |
| console.log("\n\nForce exiting..."); | |
| process.exit(1); | |
| } | |
| console.log("\n\nReceived SIGINT, aborting in-flight requests..."); | |
| stopping = true; | |
| abortController.abort(); | |
| }); | |
| // Spawn workers | |
| const workers = []; | |
| const maxRequests = opts.totalRequests || Infinity; | |
| // Keep spawning requests until termination | |
| async function spawnRequests() { | |
| while (!shouldStop()) { | |
| if (workers.length < maxRequests) { | |
| const task = limit(makeRequest); | |
| workers.push(task); | |
| // Don't await here - let p-limit manage concurrency | |
| task.catch(() => {}); // Swallow rejections, we track them in makeRequest | |
| } | |
| // Small yield to prevent tight loop | |
| await new Promise((resolve) => setImmediate(resolve)); | |
| } | |
| } | |
| // Run until done | |
| await spawnRequests(); | |
| // Wait for all in-flight to complete (or be cancelled) | |
| await Promise.allSettled(workers); | |
| clearInterval(progressInterval); | |
| console.log("\n"); | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Final Statistics | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| const totalTime = (Date.now() - startTime) / 1000; | |
| const latencies = results.filter((r) => r.status === "success").map((r) => r.latencyMs); | |
| console.log("═══════════════════════════════════════════════════════════════════"); | |
| console.log(" LOAD TEST RESULTS "); | |
| console.log("═══════════════════════════════════════════════════════════════════"); | |
| console.log(`Mode: ${mode}`); | |
| console.log(`Prompts: ${promptSource}`); | |
| console.log(`Total Requests: ${completed}`); | |
| console.log(`Successes: ${successes}`); | |
| console.log(`Failures: ${failures}`); | |
| console.log(`Success Rate: ${((successes / completed) * 100).toFixed(2)}%`); | |
| console.log(`Total Duration: ${totalTime.toFixed(2)}s`); | |
| console.log(`Throughput: ${(completed / totalTime).toFixed(2)} req/s`); | |
| console.log("───────────────────────────────────────────────────────────────────"); | |
| console.log("Error Breakdown:"); | |
| console.log(` Timeouts: ${errorBreakdown.timeout}`); | |
| console.log(` 4xx Errors: ${errorBreakdown["4xx"]}`); | |
| console.log(` 5xx Errors: ${errorBreakdown["5xx"]}`); | |
| console.log(` Other: ${errorBreakdown.other}`); | |
| console.log("───────────────────────────────────────────────────────────────────"); | |
| console.log("Latency (successful requests only):"); | |
| if (latencies.length > 0) { | |
| const ttfts = results.filter((r) => r.ttftMs !== null).map((r) => r.ttftMs); | |
| const decodeTimes = results.filter((r) => r.decodeMs !== null).map((r) => r.decodeMs); | |
| console.log(` Min: ${formatMs(Math.min(...latencies))} ms`); | |
| console.log(` Max: ${formatMs(Math.max(...latencies))} ms`); | |
| console.log(` Mean: ${formatMs(latencies.reduce((a, b) => a + b, 0) / latencies.length)} ms`); | |
| console.log(` P50: ${formatMs(percentile(latencies, 50))} ms`); | |
| console.log(` P90: ${formatMs(percentile(latencies, 90))} ms`); | |
| console.log(` P99: ${formatMs(percentile(latencies, 99))} ms`); | |
| if (ttfts.length > 0) { | |
| const meanTtft = ttfts.reduce((a, b) => a + b, 0) / ttfts.length; | |
| const meanDecode = decodeTimes.length > 0 ? decodeTimes.reduce((a, b) => a + b, 0) / decodeTimes.length : 0; | |
| console.log(` Mean TTFT: ${formatMs(meanTtft)} ms (prefill)`); | |
| console.log(` Mean Decode: ${formatMs(meanDecode)} ms (generation)`); | |
| } | |
| // Token stats | |
| const inputToks = results.filter((r) => r.inputTokens !== null).map((r) => r.inputTokens); | |
| const outputToks = results.filter((r) => r.outputTokens !== null).map((r) => r.outputTokens); | |
| if (inputToks.length > 0) { | |
| console.log(` P50 Input Toks: ${percentile(inputToks, 50).toFixed(0)}`); | |
| console.log(` P90 Input Toks: ${percentile(inputToks, 90).toFixed(0)}`); | |
| console.log(` P50 Output Toks: ${percentile(outputToks, 50).toFixed(0)}`); | |
| console.log(` P90 Output Toks: ${percentile(outputToks, 90).toFixed(0)}`); | |
| } | |
| printHistogram(latencies); | |
| } else { | |
| console.log(" No successful requests to measure"); | |
| } | |
| // Time to Nth result (sorted by completion time) | |
| // timestamp field records request START, so completion = timestamp + latencyMs | |
| const successResults = results | |
| .filter((r) => r.status === "success") | |
| .map((r) => ({ | |
| ...r, | |
| completionTime: new Date(r.timestamp).getTime() + r.latencyMs, | |
| })) | |
| .sort((a, b) => a.completionTime - b.completionTime); | |
| if (successResults.length >= 30) { | |
| console.log("───────────────────────────────────────────────────────────────────"); | |
| console.log("Time to Nth Result (from test start):"); | |
| const time30 = (successResults[29].completionTime - startTime) / 1000; | |
| console.log(` 30th: ${time30.toFixed(2)}s`); | |
| if (successResults.length >= 150) { | |
| const time150 = (successResults[149].completionTime - startTime) / 1000; | |
| console.log(` 150th: ${time150.toFixed(2)}s`); | |
| } | |
| if (successResults.length >= 400) { | |
| const time400 = (successResults[399].completionTime - startTime) / 1000; | |
| console.log(` 400th: ${time400.toFixed(2)}s`); | |
| } | |
| } | |
| console.log("═══════════════════════════════════════════════════════════════════"); | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Output File | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| if (opts.output) { | |
| const ext = extname(opts.output).toLowerCase(); | |
| if (ext === ".json") { | |
| writeFileSync(opts.output, JSON.stringify(results, null, 2)); | |
| console.log(`\nResults written to ${opts.output}`); | |
| } else { | |
| // Default to CSV | |
| const header = "requestId,timestamp,latencyMs,ttftMs,decodeMs,inputTokens,outputTokens,status,errorType,error\n"; | |
| const rows = results | |
| .map( | |
| (r) => | |
| `${r.requestId},${r.timestamp},${r.latencyMs},${r.ttftMs ?? ""},${r.decodeMs ?? ""},${r.inputTokens ?? ""},${r.outputTokens ?? ""},${r.status},${r.errorType || ""},${(r.error || "").replace(/,/g, ";")}` | |
| ) | |
| .join("\n"); | |
| writeFileSync(opts.output, header + rows); | |
| console.log(`\nResults written to ${opts.output}`); | |
| } | |
| } | |
| } | |
| main().catch((err) => { | |
| console.error("Fatal error:", err); | |
| process.exit(1); | |
| }); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "name": "inference-load-tester", | |
| "version": "1.0.0", | |
| "lockfileVersion": 3, | |
| "requires": true, | |
| "packages": { | |
| "": { | |
| "name": "inference-load-tester", | |
| "version": "1.0.0", | |
| "dependencies": { | |
| "commander": "^12.1.0", | |
| "openai": "^4.77.0", | |
| "p-limit": "^6.1.0", | |
| "undici": "^7.2.0" | |
| } | |
| }, | |
| "node_modules/@types/node": { | |
| "version": "18.19.130", | |
| "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz", | |
| "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "undici-types": "~5.26.4" | |
| } | |
| }, | |
| "node_modules/@types/node-fetch": { | |
| "version": "2.6.13", | |
| "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz", | |
| "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "@types/node": "*", | |
| "form-data": "^4.0.4" | |
| } | |
| }, | |
| "node_modules/abort-controller": { | |
| "version": "3.0.0", | |
| "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", | |
| "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "event-target-shim": "^5.0.0" | |
| }, | |
| "engines": { | |
| "node": ">=6.5" | |
| } | |
| }, | |
| "node_modules/agentkeepalive": { | |
| "version": "4.6.0", | |
| "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz", | |
| "integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "humanize-ms": "^1.2.1" | |
| }, | |
| "engines": { | |
| "node": ">= 8.0.0" | |
| } | |
| }, | |
| "node_modules/asynckit": { | |
| "version": "0.4.0", | |
| "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", | |
| "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", | |
| "license": "MIT" | |
| }, | |
| "node_modules/call-bind-apply-helpers": { | |
| "version": "1.0.2", | |
| "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", | |
| "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "es-errors": "^1.3.0", | |
| "function-bind": "^1.1.2" | |
| }, | |
| "engines": { | |
| "node": ">= 0.4" | |
| } | |
| }, | |
| "node_modules/combined-stream": { | |
| "version": "1.0.8", | |
| "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", | |
| "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "delayed-stream": "~1.0.0" | |
| }, | |
| "engines": { | |
| "node": ">= 0.8" | |
| } | |
| }, | |
| "node_modules/commander": { | |
| "version": "12.1.0", | |
| "resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz", | |
| "integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">=18" | |
| } | |
| }, | |
| "node_modules/delayed-stream": { | |
| "version": "1.0.0", | |
| "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", | |
| "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">=0.4.0" | |
| } | |
| }, | |
| "node_modules/dunder-proto": { | |
| "version": "1.0.1", | |
| "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", | |
| "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "call-bind-apply-helpers": "^1.0.1", | |
| "es-errors": "^1.3.0", | |
| "gopd": "^1.2.0" | |
| }, | |
| "engines": { | |
| "node": ">= 0.4" | |
| } | |
| }, | |
| "node_modules/es-define-property": { | |
| "version": "1.0.1", | |
| "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", | |
| "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">= 0.4" | |
| } | |
| }, | |
| "node_modules/es-errors": { | |
| "version": "1.3.0", | |
| "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", | |
| "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">= 0.4" | |
| } | |
| }, | |
| "node_modules/es-object-atoms": { | |
| "version": "1.1.1", | |
| "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", | |
| "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "es-errors": "^1.3.0" | |
| }, | |
| "engines": { | |
| "node": ">= 0.4" | |
| } | |
| }, | |
| "node_modules/es-set-tostringtag": { | |
| "version": "2.1.0", | |
| "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", | |
| "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "es-errors": "^1.3.0", | |
| "get-intrinsic": "^1.2.6", | |
| "has-tostringtag": "^1.0.2", | |
| "hasown": "^2.0.2" | |
| }, | |
| "engines": { | |
| "node": ">= 0.4" | |
| } | |
| }, | |
| "node_modules/event-target-shim": { | |
| "version": "5.0.1", | |
| "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", | |
| "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">=6" | |
| } | |
| }, | |
| "node_modules/form-data": { | |
| "version": "4.0.5", | |
| "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", | |
| "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "asynckit": "^0.4.0", | |
| "combined-stream": "^1.0.8", | |
| "es-set-tostringtag": "^2.1.0", | |
| "hasown": "^2.0.2", | |
| "mime-types": "^2.1.12" | |
| }, | |
| "engines": { | |
| "node": ">= 6" | |
| } | |
| }, | |
| "node_modules/form-data-encoder": { | |
| "version": "1.7.2", | |
| "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", | |
| "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==", | |
| "license": "MIT" | |
| }, | |
| "node_modules/formdata-node": { | |
| "version": "4.4.1", | |
| "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", | |
| "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "node-domexception": "1.0.0", | |
| "web-streams-polyfill": "4.0.0-beta.3" | |
| }, | |
| "engines": { | |
| "node": ">= 12.20" | |
| } | |
| }, | |
| "node_modules/function-bind": { | |
| "version": "1.1.2", | |
| "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", | |
| "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", | |
| "license": "MIT", | |
| "funding": { | |
| "url": "https://github.com/sponsors/ljharb" | |
| } | |
| }, | |
| "node_modules/get-intrinsic": { | |
| "version": "1.3.0", | |
| "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", | |
| "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "call-bind-apply-helpers": "^1.0.2", | |
| "es-define-property": "^1.0.1", | |
| "es-errors": "^1.3.0", | |
| "es-object-atoms": "^1.1.1", | |
| "function-bind": "^1.1.2", | |
| "get-proto": "^1.0.1", | |
| "gopd": "^1.2.0", | |
| "has-symbols": "^1.1.0", | |
| "hasown": "^2.0.2", | |
| "math-intrinsics": "^1.1.0" | |
| }, | |
| "engines": { | |
| "node": ">= 0.4" | |
| }, | |
| "funding": { | |
| "url": "https://github.com/sponsors/ljharb" | |
| } | |
| }, | |
| "node_modules/get-proto": { | |
| "version": "1.0.1", | |
| "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", | |
| "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "dunder-proto": "^1.0.1", | |
| "es-object-atoms": "^1.0.0" | |
| }, | |
| "engines": { | |
| "node": ">= 0.4" | |
| } | |
| }, | |
| "node_modules/gopd": { | |
| "version": "1.2.0", | |
| "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", | |
| "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">= 0.4" | |
| }, | |
| "funding": { | |
| "url": "https://github.com/sponsors/ljharb" | |
| } | |
| }, | |
| "node_modules/has-symbols": { | |
| "version": "1.1.0", | |
| "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", | |
| "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">= 0.4" | |
| }, | |
| "funding": { | |
| "url": "https://github.com/sponsors/ljharb" | |
| } | |
| }, | |
| "node_modules/has-tostringtag": { | |
| "version": "1.0.2", | |
| "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", | |
| "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "has-symbols": "^1.0.3" | |
| }, | |
| "engines": { | |
| "node": ">= 0.4" | |
| }, | |
| "funding": { | |
| "url": "https://github.com/sponsors/ljharb" | |
| } | |
| }, | |
| "node_modules/hasown": { | |
| "version": "2.0.2", | |
| "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", | |
| "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "function-bind": "^1.1.2" | |
| }, | |
| "engines": { | |
| "node": ">= 0.4" | |
| } | |
| }, | |
| "node_modules/humanize-ms": { | |
| "version": "1.2.1", | |
| "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", | |
| "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "ms": "^2.0.0" | |
| } | |
| }, | |
| "node_modules/math-intrinsics": { | |
| "version": "1.1.0", | |
| "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", | |
| "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">= 0.4" | |
| } | |
| }, | |
| "node_modules/mime-db": { | |
| "version": "1.52.0", | |
| "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", | |
| "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">= 0.6" | |
| } | |
| }, | |
| "node_modules/mime-types": { | |
| "version": "2.1.35", | |
| "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", | |
| "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "mime-db": "1.52.0" | |
| }, | |
| "engines": { | |
| "node": ">= 0.6" | |
| } | |
| }, | |
| "node_modules/ms": { | |
| "version": "2.1.3", | |
| "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", | |
| "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", | |
| "license": "MIT" | |
| }, | |
| "node_modules/node-domexception": { | |
| "version": "1.0.0", | |
| "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", | |
| "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", | |
| "deprecated": "Use your platform's native DOMException instead", | |
| "funding": [ | |
| { | |
| "type": "github", | |
| "url": "https://github.com/sponsors/jimmywarting" | |
| }, | |
| { | |
| "type": "github", | |
| "url": "https://paypal.me/jimmywarting" | |
| } | |
| ], | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">=10.5.0" | |
| } | |
| }, | |
| "node_modules/node-fetch": { | |
| "version": "2.7.0", | |
| "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", | |
| "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "whatwg-url": "^5.0.0" | |
| }, | |
| "engines": { | |
| "node": "4.x || >=6.0.0" | |
| }, | |
| "peerDependencies": { | |
| "encoding": "^0.1.0" | |
| }, | |
| "peerDependenciesMeta": { | |
| "encoding": { | |
| "optional": true | |
| } | |
| } | |
| }, | |
| "node_modules/openai": { | |
| "version": "4.104.0", | |
| "resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz", | |
| "integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==", | |
| "license": "Apache-2.0", | |
| "dependencies": { | |
| "@types/node": "^18.11.18", | |
| "@types/node-fetch": "^2.6.4", | |
| "abort-controller": "^3.0.0", | |
| "agentkeepalive": "^4.2.1", | |
| "form-data-encoder": "1.7.2", | |
| "formdata-node": "^4.3.2", | |
| "node-fetch": "^2.6.7" | |
| }, | |
| "bin": { | |
| "openai": "bin/cli" | |
| }, | |
| "peerDependencies": { | |
| "ws": "^8.18.0", | |
| "zod": "^3.23.8" | |
| }, | |
| "peerDependenciesMeta": { | |
| "ws": { | |
| "optional": true | |
| }, | |
| "zod": { | |
| "optional": true | |
| } | |
| } | |
| }, | |
| "node_modules/p-limit": { | |
| "version": "6.2.0", | |
| "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-6.2.0.tgz", | |
| "integrity": "sha512-kuUqqHNUqoIWp/c467RI4X6mmyuojY5jGutNU0wVTmEOOfcuwLqyMVoAi9MKi2Ak+5i9+nhmrK4ufZE8069kHA==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "yocto-queue": "^1.1.1" | |
| }, | |
| "engines": { | |
| "node": ">=18" | |
| }, | |
| "funding": { | |
| "url": "https://github.com/sponsors/sindresorhus" | |
| } | |
| }, | |
| "node_modules/tr46": { | |
| "version": "0.0.3", | |
| "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", | |
| "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", | |
| "license": "MIT" | |
| }, | |
| "node_modules/undici": { | |
| "version": "7.18.2", | |
| "resolved": "https://registry.npmjs.org/undici/-/undici-7.18.2.tgz", | |
| "integrity": "sha512-y+8YjDFzWdQlSE9N5nzKMT3g4a5UBX1HKowfdXh0uvAnTaqqwqB92Jt4UXBAeKekDs5IaDKyJFR4X1gYVCgXcw==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">=20.18.1" | |
| } | |
| }, | |
| "node_modules/undici-types": { | |
| "version": "5.26.5", | |
| "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", | |
| "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", | |
| "license": "MIT" | |
| }, | |
| "node_modules/web-streams-polyfill": { | |
| "version": "4.0.0-beta.3", | |
| "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", | |
| "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">= 14" | |
| } | |
| }, | |
| "node_modules/webidl-conversions": { | |
| "version": "3.0.1", | |
| "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", | |
| "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", | |
| "license": "BSD-2-Clause" | |
| }, | |
| "node_modules/whatwg-url": { | |
| "version": "5.0.0", | |
| "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", | |
| "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", | |
| "license": "MIT", | |
| "dependencies": { | |
| "tr46": "~0.0.3", | |
| "webidl-conversions": "^3.0.0" | |
| } | |
| }, | |
| "node_modules/yocto-queue": { | |
| "version": "1.2.2", | |
| "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz", | |
| "integrity": "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==", | |
| "license": "MIT", | |
| "engines": { | |
| "node": ">=12.20" | |
| }, | |
| "funding": { | |
| "url": "https://github.com/sponsors/sindresorhus" | |
| } | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "name": "inference-load-tester", | |
| "version": "1.0.0", | |
| "description": "Load testing tool for OpenAI-compatible inference endpoints", | |
| "type": "module", | |
| "main": "load_test.js", | |
| "scripts": { | |
| "start": "node load_test.js" | |
| }, | |
| "dependencies": { | |
| "openai": "^4.77.0", | |
| "p-limit": "^6.1.0", | |
| "commander": "^12.1.0", | |
| "undici": "^7.2.0" | |
| } | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
usage e.g.