alexeldeib/load-test.js

## load-test.js
#!/usr/bin/env node

import { program } from "commander";
import OpenAI from "openai";
import pLimit from "p-limit";
import { readFileSync, writeFileSync } from "fs";
import { extname } from "path";
import { Agent, request } from "undici";

// ─────────────────────────────────────────────────────────────────────────────
// CLI Setup
// ─────────────────────────────────────────────────────────────────────────────

program
  .name("load_test")
  .description("Load test an OpenAI-compatible inference endpoint")
  .option("--prompts <path>", "Path to prompts.json file")
  .option("--synthetic", "Use synthetic prompts (like vllm bench)")
  .option("--input-tokens <n>", "Input tokens for synthetic prompts", parseInt, 1500)
  .option("--output-tokens <n>", "Output tokens for synthetic prompts", parseInt, 500)
  .requiredOption("--model <model>", "Model name to use")
  .requiredOption("--url <url>", "Base URL for API")
  .requiredOption("--token <token>", "Bearer token / API key")
  .requiredOption("--project <project>", "OpenAI-Project header value")
  .requiredOption("--concurrency <n>", "Number of concurrent workers", parseInt)
  .option("--total-requests <n>", "Stop after N total requests", parseInt)
  .option("--duration <seconds>", "Stop after N seconds", parseFloat)
  .option("--timeout <seconds>", "Per-request timeout in seconds", parseFloat, 30)
  .option("--replacement", "Sample with replacement (default)", true)
  .option("--no-replacement", "Sample without replacement (cycles on exhaustion)")
  .option("--output <path>", "Output file path (CSV or JSON based on extension)")
  .option("--reasoning-effort <level>", "Reasoning effort: low, medium, high", "low")
  .option("--temperature <n>", "Temperature for sampling", parseFloat, 1)
  .option("--max-tokens <n>", "Maximum tokens in response", parseInt, 8192)
  .option("--raw", "Use raw HTTP with connection pooling (bypass OpenAI SDK)")
  .option("--target-input-tokens <n>", "Truncate/pad real prompts to this token count", parseInt)
  .parse();

const opts = program.opts();

if (!opts.totalRequests && !opts.duration) {
  console.error("Error: Must specify at least one of --total-requests or --duration");
  process.exit(1);
}

if (!opts.prompts && !opts.synthetic) {
  console.error("Error: Must specify either --prompts <path> or --synthetic");
  process.exit(1);
}

if (opts.prompts && opts.synthetic) {
  console.error("Error: Cannot use both --prompts and --synthetic");
  process.exit(1);
}

// ─────────────────────────────────────────────────────────────────────────────
// Synthetic Prompt Generation (matches vllm bench style)
// ─────────────────────────────────────────────────────────────────────────────

function generateSyntheticPrompt(inputTokens) {
  // Generate a prompt that's approximately inputTokens in size
  // Rough estimate: 1 token ≈ 4 characters for English text
  // We use a mix of words to make it more realistic

  const words = [
    "the", "be", "to", "of", "and", "a", "in", "that", "have", "I",
    "it", "for", "not", "on", "with", "he", "as", "you", "do", "at",
    "this", "but", "his", "by", "from", "they", "we", "say", "her", "she",
    "or", "an", "will", "my", "one", "all", "would", "there", "their", "what",
    "so", "up", "out", "if", "about", "who", "get", "which", "go", "me",
    "when", "make", "can", "like", "time", "no", "just", "him", "know", "take",
    "people", "into", "year", "your", "good", "some", "could", "them", "see", "other",
    "than", "then", "now", "look", "only", "come", "its", "over", "think", "also",
    "back", "after", "use", "two", "how", "our", "work", "first", "well", "way",
    "even", "new", "want", "because", "any", "these", "give", "day", "most", "us",
  ];

  // Target character count (4 chars per token is a rough estimate)
  const targetChars = inputTokens * 4;

  let content = "Please analyze the following text and provide a detailed summary:\n\n";

  while (content.length < targetChars) {
    // Add random words to build up the content
    const sentenceLength = 10 + Math.floor(Math.random() * 15);
    const sentence = [];
    for (let i = 0; i < sentenceLength; i++) {
      sentence.push(words[Math.floor(Math.random() * words.length)]);
    }
    // Capitalize first word
    sentence[0] = sentence[0].charAt(0).toUpperCase() + sentence[0].slice(1);
    content += sentence.join(" ") + ". ";
  }

  // Trim to approximately the right length
  content = content.slice(0, targetChars);

  return [{ role: "user", content }];
}

function createSyntheticSampler(inputTokens) {
  // Generate a pool of synthetic prompts to sample from
  // This avoids generating on every request
  const poolSize = 100;
  const pool = [];
  for (let i = 0; i < poolSize; i++) {
    pool.push(generateSyntheticPrompt(inputTokens));
  }

  return () => pool[Math.floor(Math.random() * pool.length)];
}

// ─────────────────────────────────────────────────────────────────────────────
// Load and Validate Prompts
// ─────────────────────────────────────────────────────────────────────────────

function loadPrompts(path) {
  console.log(`Loading prompts from ${path}...`);
  const raw = readFileSync(path, "utf-8");
  const data = JSON.parse(raw);

  if (!Array.isArray(data)) {
    console.error("Error: prompts.json must be an array");
    process.exit(1);
  }

  const validPrompts = [];
  let invalidCount = 0;

  for (let i = 0; i < data.length; i++) {
    const item = data[i];
    if (!Array.isArray(item)) {
      invalidCount++;
      continue;
    }
    // Validate each message in the conversation
    const validMessages = item.every(
      (msg) =>
        msg &&
        typeof msg === "object" &&
        typeof msg.role === "string" &&
        typeof msg.content === "string"
    );
    if (!validMessages) {
      invalidCount++;
      continue;
    }
    validPrompts.push(item);
  }

  console.log(`Loaded ${validPrompts.length} valid prompts (${invalidCount} invalid skipped)`);
  return validPrompts;
}

// ─────────────────────────────────────────────────────────────────────────────
// Prompt Length Normalization
// ─────────────────────────────────────────────────────────────────────────────

function estimateTokens(text) {
  // Rough estimate: ~4 characters per token for English text
  return Math.ceil(text.length / 4);
}

function normalizePromptLength(messages, targetTokens) {
  // Get the user message (last one with role="user")
  const userMsgIdx = messages.findLastIndex(m => m.role === "user");
  if (userMsgIdx === -1) return messages;

  const userMsg = messages[userMsgIdx];
  const currentTokens = estimateTokens(userMsg.content);
  const targetChars = targetTokens * 4;

  let newContent;
  if (currentTokens > targetTokens) {
    // Truncate
    newContent = userMsg.content.slice(0, targetChars);
  } else if (currentTokens < targetTokens) {
    // Repeat content to reach target
    newContent = userMsg.content;
    while (newContent.length < targetChars) {
      newContent += "\n\n" + userMsg.content;
    }
    newContent = newContent.slice(0, targetChars);
  } else {
    return messages;
  }

  // Return new messages array with modified user message
  const newMessages = [...messages];
  newMessages[userMsgIdx] = { ...userMsg, content: newContent };
  return newMessages;
}

function normalizeAllPrompts(prompts, targetTokens) {
  console.log(`Normalizing prompts to ~${targetTokens} tokens...`);
  const normalized = prompts.map(p => normalizePromptLength(p, targetTokens));

  // Report stats
  const lengths = normalized.map(p => {
    const userMsg = p.find(m => m.role === "user");
    return userMsg ? estimateTokens(userMsg.content) : 0;
  });
  const avg = lengths.reduce((a, b) => a + b, 0) / lengths.length;
  const min = Math.min(...lengths);
  const max = Math.max(...lengths);
  console.log(`Normalized: avg=${avg.toFixed(0)}, min=${min}, max=${max} tokens`);

  return normalized;
}

// ─────────────────────────────────────────────────────────────────────────────
// Sampling
// ─────────────────────────────────────────────────────────────────────────────

function createSampler(prompts, withReplacement) {
  if (withReplacement) {
    return () => prompts[Math.floor(Math.random() * prompts.length)];
  }

  // Without replacement: shuffle and cycle
  let pool = [];
  let index = 0;

  function shuffle(arr) {
    const copy = [...arr];
    for (let i = copy.length - 1; i > 0; i--) {
      const j = Math.floor(Math.random() * (i + 1));
      [copy[i], copy[j]] = [copy[j], copy[i]];
    }
    return copy;
  }

  return () => {
    if (index >= pool.length) {
      pool = shuffle(prompts);
      index = 0;
    }
    return pool[index++];
  };
}

// ─────────────────────────────────────────────────────────────────────────────
// Statistics
// ─────────────────────────────────────────────────────────────────────────────

function percentile(arr, p) {
  if (arr.length === 0) return 0;
  const sorted = [...arr].sort((a, b) => a - b);
  const idx = Math.ceil((p / 100) * sorted.length) - 1;
  return sorted[Math.max(0, idx)];
}

function formatMs(ms) {
  return ms.toFixed(2);
}

function printHistogram(latencies, bucketCount = 20) {
  if (latencies.length === 0) return;

  const sorted = [...latencies].sort((a, b) => a - b);
  const min = sorted[0];
  const max = sorted[sorted.length - 1];
  const range = max - min;

  if (range === 0) {
    console.log(`  All requests: ${min.toFixed(0)} ms`);
    return;
  }

  const bucketSize = range / bucketCount;
  const buckets = new Array(bucketCount).fill(0);

  for (const lat of latencies) {
    const idx = Math.min(Math.floor((lat - min) / bucketSize), bucketCount - 1);
    buckets[idx]++;
  }

  const maxCount = Math.max(...buckets);
  const barWidth = 40;

  console.log("");
  console.log("  Latency Distribution:");
  console.log(`  ${min.toFixed(0)}ms ${"─".repeat(barWidth + 10)} ${max.toFixed(0)}ms`);
  console.log("");

  for (let i = 0; i < bucketCount; i++) {
    const bucketStart = min + i * bucketSize;
    const bucketEnd = min + (i + 1) * bucketSize;
    const count = buckets[i];
    const barLen = maxCount > 0 ? Math.round((count / maxCount) * barWidth) : 0;
    const bar = "█".repeat(barLen);
    const label = `${(bucketStart / 1000).toFixed(1)}s-${(bucketEnd / 1000).toFixed(1)}s`;
    const countStr = count.toString().padStart(4);
    console.log(`  ${label.padEnd(13)} │${bar.padEnd(barWidth)}│ ${countStr}`);
  }
  console.log("");
}

// ─────────────────────────────────────────────────────────────────────────────
// Raw HTTP Client (matching vllm's aiohttp TCPConnector settings)
// ─────────────────────────────────────────────────────────────────────────────

function createRawClient(baseUrl, token, project, concurrency, timeoutSec) {
  // Create undici Agent with connection pooling similar to aiohttp TCPConnector
  const agent = new Agent({
    connections: concurrency,           // Max connections (like aiohttp limit)
    pipelining: 1,                      // HTTP/1.1 pipelining
    keepAliveTimeout: 60 * 1000,        // 60s keep-alive (like aiohttp)
    keepAliveMaxTimeout: 600 * 1000,    // Max keep-alive
    connect: {
      timeout: 30 * 1000,               // Connection timeout
      rejectUnauthorized: true,         // Verify SSL
    },
  });

  const endpoint = `${baseUrl}/chat/completions`;

  return async function makeRawRequest(messages, model, reasoningEffort, temperature, maxTokens) {
    const body = JSON.stringify({
      model,
      messages,
      reasoning_effort: reasoningEffort,
      temperature,
      max_tokens: maxTokens,
    });

    const { statusCode, body: responseBody } = await request(endpoint, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        "Authorization": `Bearer ${token}`,
        "OpenAI-Project": project,
      },
      body,
      dispatcher: agent,
      bodyTimeout: timeoutSec * 1000,
      headersTimeout: timeoutSec * 1000,
    });

    // Consume the response body
    const chunks = [];
    for await (const chunk of responseBody) {
      chunks.push(chunk);
    }
    const responseText = Buffer.concat(chunks).toString("utf-8");

    if (statusCode >= 400) {
      const error = new Error(`HTTP ${statusCode}: ${responseText.slice(0, 200)}`);
      error.status = statusCode;
      throw error;
    }

    return JSON.parse(responseText);
  };
}

// ─────────────────────────────────────────────────────────────────────────────
// Main Execution
// ─────────────────────────────────────────────────────────────────────────────

async function main() {
  let sampler;
  let promptSource;
  let effectiveMaxTokens = opts.maxTokens;

  if (opts.synthetic) {
    promptSource = `SYNTHETIC (${opts.inputTokens} input tokens, ${opts.outputTokens} output tokens)`;
    sampler = createSyntheticSampler(opts.inputTokens);
    effectiveMaxTokens = opts.outputTokens; // Use output-tokens for synthetic mode
    console.log(`Generating synthetic prompts...`);
  } else {
    let prompts = loadPrompts(opts.prompts);
    if (prompts.length === 0) {
      console.error("Error: No valid prompts found");
      process.exit(1);
    }

    // Normalize prompt lengths if requested
    if (opts.targetInputTokens) {
      prompts = normalizeAllPrompts(prompts, opts.targetInputTokens);
      promptSource = `FILE (${prompts.length} prompts, normalized to ~${opts.targetInputTokens} tokens)`;
    } else {
      // Report original prompt sizes
      const lengths = prompts.map(p => {
        const userMsg = p.find(m => m.role === "user");
        return userMsg ? estimateTokens(userMsg.content) : 0;
      });
      const avg = lengths.reduce((a, b) => a + b, 0) / lengths.length;
      console.log(`Original prompt sizes: avg=${avg.toFixed(0)} tokens`);
      promptSource = `FILE (${prompts.length} prompts, avg ~${avg.toFixed(0)} tokens)`;
    }

    sampler = createSampler(prompts, opts.replacement);
  }

  const mode = opts.raw ? "RAW (undici + connection pool)" : "SDK (OpenAI JS)";
  console.log(`Mode: ${mode}`);
  console.log(`Prompts: ${promptSource}`);
  console.log(`Concurrency: ${opts.concurrency}`);
  console.log("");

  // Create the appropriate client
  let sdkClient = null;
  let rawRequest = null;

  if (opts.raw) {
    rawRequest = createRawClient(
      opts.url,
      opts.token,
      opts.project,
      opts.concurrency,
      opts.timeout
    );
  } else {
    sdkClient = new OpenAI({
      apiKey: opts.token,
      baseURL: opts.url,
      defaultHeaders: {
        "OpenAI-Project": opts.project,
      },
      timeout: opts.timeout * 1000,
    });
  }

  const limit = pLimit(opts.concurrency);

  // Metrics
  const results = [];
  let completed = 0;
  let successes = 0;
  let failures = 0;
  const errorBreakdown = { timeout: 0, "4xx": 0, "5xx": 0, other: 0 };

  const startTime = Date.now();
  let stopping = false;
  const abortController = new AbortController();

  // Progress display
  const progressInterval = setInterval(() => {
    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
    const rps = (completed / (Date.now() - startTime)) * 1000;
    process.stdout.write(
      `\rCompleted: ${completed} | Errors: ${failures} | Elapsed: ${elapsed}s | RPS: ${rps.toFixed(1)}    `
    );
  }, 500);

  // Check termination conditions
  function shouldStop() {
    if (stopping) return true;
    if (opts.totalRequests && completed >= opts.totalRequests) return true;
    if (opts.duration && (Date.now() - startTime) / 1000 >= opts.duration) return true;
    return false;
  }

  // Single request worker
  async function makeRequest() {
    if (shouldStop()) return;

    const messages = sampler();
    const requestStart = Date.now();
    let status = "success";
    let errorType = null;
    let error = null;
    let ttftMs = null;
    let firstTokenTime = null;
    let inputTokens = null;
    let outputTokens = null;

    try {
      if (opts.raw) {
        // Raw mode doesn't support streaming yet - just measure E2E
        await rawRequest(
          messages,
          opts.model,
          opts.reasoningEffort,
          opts.temperature,
          effectiveMaxTokens
        );
      } else {
        // Use streaming to capture TTFT
        const stream = await sdkClient.chat.completions.create(
          {
            messages,
            model: opts.model,
            reasoning_effort: opts.reasoningEffort,
            temperature: opts.temperature,
            max_tokens: effectiveMaxTokens,
            stream: true,
            stream_options: { include_usage: true },
          },
          { signal: abortController.signal }
        );

        // Iterate through the stream
        for await (const chunk of stream) {
          // Record TTFT on first chunk with content
          if (firstTokenTime === null && chunk.choices?.[0]?.delta?.content) {
            firstTokenTime = Date.now();
            ttftMs = firstTokenTime - requestStart;
          }
          // Capture usage from final chunk
          if (chunk.usage) {
            inputTokens = chunk.usage.prompt_tokens;
            outputTokens = chunk.usage.completion_tokens;
          }
        }
      }
      successes++;
    } catch (err) {
      // Ignore abort errors - these are expected during SIGINT
      if (err.name === "AbortError" || abortController.signal.aborted) {
        return;
      }

      failures++;
      status = "error";
      error = err.message;

      // Still record TTFT if we got it before the error
      if (firstTokenTime !== null) {
        ttftMs = firstTokenTime - requestStart;
      }

      if (
        err.code === "ETIMEDOUT" ||
        err.code === "ECONNABORTED" ||
        err.code === "UND_ERR_HEADERS_TIMEOUT" ||
        err.code === "UND_ERR_BODY_TIMEOUT" ||
        err.message?.includes("timeout")
      ) {
        errorType = "timeout";
        errorBreakdown.timeout++;
      } else if (err.status >= 400 && err.status < 500) {
        errorType = "4xx";
        errorBreakdown["4xx"]++;
      } else if (err.status >= 500) {
        errorType = "5xx";
        errorBreakdown["5xx"]++;
      } else {
        errorType = "other";
        errorBreakdown.other++;
      }
    }

    const latency = Date.now() - requestStart;
    const decodeMs = ttftMs !== null ? latency - ttftMs : null;
    completed++;

    results.push({
      requestId: completed,
      timestamp: new Date(requestStart).toISOString(),
      latencyMs: latency,
      ttftMs,
      decodeMs,
      inputTokens,
      outputTokens,
      status,
      errorType,
      error,
    });
  }

  // Ctrl+C handler
  process.on("SIGINT", () => {
    if (stopping) {
      // Second SIGINT - force exit
      console.log("\n\nForce exiting...");
      process.exit(1);
    }
    console.log("\n\nReceived SIGINT, aborting in-flight requests...");
    stopping = true;
    abortController.abort();
  });

  // Spawn workers
  const workers = [];
  const maxRequests = opts.totalRequests || Infinity;

  // Keep spawning requests until termination
  async function spawnRequests() {
    while (!shouldStop()) {
      if (workers.length < maxRequests) {
        const task = limit(makeRequest);
        workers.push(task);
        // Don't await here - let p-limit manage concurrency
        task.catch(() => {}); // Swallow rejections, we track them in makeRequest
      }
      // Small yield to prevent tight loop
      await new Promise((resolve) => setImmediate(resolve));
    }
  }

  // Run until done
  await spawnRequests();
  // Wait for all in-flight to complete (or be cancelled)
  await Promise.allSettled(workers);

  clearInterval(progressInterval);
  console.log("\n");

  // ─────────────────────────────────────────────────────────────────────────────
  // Final Statistics
  // ─────────────────────────────────────────────────────────────────────────────

  const totalTime = (Date.now() - startTime) / 1000;
  const latencies = results.filter((r) => r.status === "success").map((r) => r.latencyMs);

  console.log("═══════════════════════════════════════════════════════════════════");
  console.log("                         LOAD TEST RESULTS                          ");
  console.log("═══════════════════════════════════════════════════════════════════");
  console.log(`Mode:              ${mode}`);
  console.log(`Prompts:           ${promptSource}`);
  console.log(`Total Requests:    ${completed}`);
  console.log(`Successes:         ${successes}`);
  console.log(`Failures:          ${failures}`);
  console.log(`Success Rate:      ${((successes / completed) * 100).toFixed(2)}%`);
  console.log(`Total Duration:    ${totalTime.toFixed(2)}s`);
  console.log(`Throughput:        ${(completed / totalTime).toFixed(2)} req/s`);
  console.log("───────────────────────────────────────────────────────────────────");
  console.log("Error Breakdown:");
  console.log(`  Timeouts:        ${errorBreakdown.timeout}`);
  console.log(`  4xx Errors:      ${errorBreakdown["4xx"]}`);
  console.log(`  5xx Errors:      ${errorBreakdown["5xx"]}`);
  console.log(`  Other:           ${errorBreakdown.other}`);
  console.log("───────────────────────────────────────────────────────────────────");
  console.log("Latency (successful requests only):");
  if (latencies.length > 0) {
    const ttfts = results.filter((r) => r.ttftMs !== null).map((r) => r.ttftMs);
    const decodeTimes = results.filter((r) => r.decodeMs !== null).map((r) => r.decodeMs);

    console.log(`  Min:             ${formatMs(Math.min(...latencies))} ms`);
    console.log(`  Max:             ${formatMs(Math.max(...latencies))} ms`);
    console.log(`  Mean:            ${formatMs(latencies.reduce((a, b) => a + b, 0) / latencies.length)} ms`);
    console.log(`  P50:             ${formatMs(percentile(latencies, 50))} ms`);
    console.log(`  P90:             ${formatMs(percentile(latencies, 90))} ms`);
    console.log(`  P99:             ${formatMs(percentile(latencies, 99))} ms`);
    if (ttfts.length > 0) {
      const meanTtft = ttfts.reduce((a, b) => a + b, 0) / ttfts.length;
      const meanDecode = decodeTimes.length > 0 ? decodeTimes.reduce((a, b) => a + b, 0) / decodeTimes.length : 0;
      console.log(`  Mean TTFT:       ${formatMs(meanTtft)} ms (prefill)`);
      console.log(`  Mean Decode:     ${formatMs(meanDecode)} ms (generation)`);
    }
    // Token stats
    const inputToks = results.filter((r) => r.inputTokens !== null).map((r) => r.inputTokens);
    const outputToks = results.filter((r) => r.outputTokens !== null).map((r) => r.outputTokens);
    if (inputToks.length > 0) {
      console.log(`  P50 Input Toks:  ${percentile(inputToks, 50).toFixed(0)}`);
      console.log(`  P90 Input Toks:  ${percentile(inputToks, 90).toFixed(0)}`);
      console.log(`  P50 Output Toks: ${percentile(outputToks, 50).toFixed(0)}`);
      console.log(`  P90 Output Toks: ${percentile(outputToks, 90).toFixed(0)}`);
    }
    printHistogram(latencies);
  } else {
    console.log("  No successful requests to measure");
  }

  // Time to Nth result (sorted by completion time)
  // timestamp field records request START, so completion = timestamp + latencyMs
  const successResults = results
    .filter((r) => r.status === "success")
    .map((r) => ({
      ...r,
      completionTime: new Date(r.timestamp).getTime() + r.latencyMs,
    }))
    .sort((a, b) => a.completionTime - b.completionTime);

  if (successResults.length >= 30) {
    console.log("───────────────────────────────────────────────────────────────────");
    console.log("Time to Nth Result (from test start):");
    const time30 = (successResults[29].completionTime - startTime) / 1000;
    console.log(`  30th:            ${time30.toFixed(2)}s`);
    if (successResults.length >= 150) {
      const time150 = (successResults[149].completionTime - startTime) / 1000;
      console.log(`  150th:           ${time150.toFixed(2)}s`);
    }
    if (successResults.length >= 400) {
      const time400 = (successResults[399].completionTime - startTime) / 1000;
      console.log(`  400th:           ${time400.toFixed(2)}s`);
    }
  }
  console.log("═══════════════════════════════════════════════════════════════════");

  // ─────────────────────────────────────────────────────────────────────────────
  // Output File
  // ─────────────────────────────────────────────────────────────────────────────

  if (opts.output) {
    const ext = extname(opts.output).toLowerCase();
    if (ext === ".json") {
      writeFileSync(opts.output, JSON.stringify(results, null, 2));
      console.log(`\nResults written to ${opts.output}`);
    } else {
      // Default to CSV
      const header = "requestId,timestamp,latencyMs,ttftMs,decodeMs,inputTokens,outputTokens,status,errorType,error\n";
      const rows = results
        .map(
          (r) =>
            `${r.requestId},${r.timestamp},${r.latencyMs},${r.ttftMs ?? ""},${r.decodeMs ?? ""},${r.inputTokens ?? ""},${r.outputTokens ?? ""},${r.status},${r.errorType || ""},${(r.error || "").replace(/,/g, ";")}`
        )
        .join("\n");
      writeFileSync(opts.output, header + rows);
      console.log(`\nResults written to ${opts.output}`);
    }
  }
}

main().catch((err) => {
  console.error("Fatal error:", err);
  process.exit(1);
});

## package-lock.json
{
  "name": "inference-load-tester",
  "version": "1.0.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "inference-load-tester",
      "version": "1.0.0",
      "dependencies": {
        "commander": "^12.1.0",
        "openai": "^4.77.0",
        "p-limit": "^6.1.0",
        "undici": "^7.2.0"
      }
    },
    "node_modules/@types/node": {
      "version": "18.19.130",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz",
      "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==",
      "license": "MIT",
      "dependencies": {
        "undici-types": "~5.26.4"
      }
    },
    "node_modules/@types/node-fetch": {
      "version": "2.6.13",
      "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
      "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
      "license": "MIT",
      "dependencies": {
        "@types/node": "*",
        "form-data": "^4.0.4"
      }
    },
    "node_modules/abort-controller": {
      "version": "3.0.0",
      "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
      "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
      "license": "MIT",
      "dependencies": {
        "event-target-shim": "^5.0.0"
      },
      "engines": {
        "node": ">=6.5"
      }
    },
    "node_modules/agentkeepalive": {
      "version": "4.6.0",
      "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz",
      "integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==",
      "license": "MIT",
      "dependencies": {
        "humanize-ms": "^1.2.1"
      },
      "engines": {
        "node": ">= 8.0.0"
      }
    },
    "node_modules/asynckit": {
      "version": "0.4.0",
      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
      "license": "MIT"
    },
    "node_modules/call-bind-apply-helpers": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
      "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
      "license": "MIT",
      "dependencies": {
        "es-errors": "^1.3.0",
        "function-bind": "^1.1.2"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/combined-stream": {
      "version": "1.0.8",
      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
      "license": "MIT",
      "dependencies": {
        "delayed-stream": "~1.0.0"
      },
      "engines": {
        "node": ">= 0.8"
      }
    },
    "node_modules/commander": {
      "version": "12.1.0",
      "resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz",
      "integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==",
      "license": "MIT",
      "engines": {
        "node": ">=18"
      }
    },
    "node_modules/delayed-stream": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
      "license": "MIT",
      "engines": {
        "node": ">=0.4.0"
      }
    },
    "node_modules/dunder-proto": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
      "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
      "license": "MIT",
      "dependencies": {
        "call-bind-apply-helpers": "^1.0.1",
        "es-errors": "^1.3.0",
        "gopd": "^1.2.0"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/es-define-property": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/es-errors": {
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/es-object-atoms": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
      "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
      "license": "MIT",
      "dependencies": {
        "es-errors": "^1.3.0"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/es-set-tostringtag": {
      "version": "2.1.0",
      "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
      "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
      "license": "MIT",
      "dependencies": {
        "es-errors": "^1.3.0",
        "get-intrinsic": "^1.2.6",
        "has-tostringtag": "^1.0.2",
        "hasown": "^2.0.2"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/event-target-shim": {
      "version": "5.0.1",
      "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
      "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
      "license": "MIT",
      "engines": {
        "node": ">=6"
      }
    },
    "node_modules/form-data": {
      "version": "4.0.5",
      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
      "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
      "license": "MIT",
      "dependencies": {
        "asynckit": "^0.4.0",
        "combined-stream": "^1.0.8",
        "es-set-tostringtag": "^2.1.0",
        "hasown": "^2.0.2",
        "mime-types": "^2.1.12"
      },
      "engines": {
        "node": ">= 6"
      }
    },
    "node_modules/form-data-encoder": {
      "version": "1.7.2",
      "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
      "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==",
      "license": "MIT"
    },
    "node_modules/formdata-node": {
      "version": "4.4.1",
      "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
      "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
      "license": "MIT",
      "dependencies": {
        "node-domexception": "1.0.0",
        "web-streams-polyfill": "4.0.0-beta.3"
      },
      "engines": {
        "node": ">= 12.20"
      }
    },
    "node_modules/function-bind": {
      "version": "1.1.2",
      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
      "license": "MIT",
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/get-intrinsic": {
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
      "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
      "license": "MIT",
      "dependencies": {
        "call-bind-apply-helpers": "^1.0.2",
        "es-define-property": "^1.0.1",
        "es-errors": "^1.3.0",
        "es-object-atoms": "^1.1.1",
        "function-bind": "^1.1.2",
        "get-proto": "^1.0.1",
        "gopd": "^1.2.0",
        "has-symbols": "^1.1.0",
        "hasown": "^2.0.2",
        "math-intrinsics": "^1.1.0"
      },
      "engines": {
        "node": ">= 0.4"
      },
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/get-proto": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
      "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
      "license": "MIT",
      "dependencies": {
        "dunder-proto": "^1.0.1",
        "es-object-atoms": "^1.0.0"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/gopd": {
      "version": "1.2.0",
      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.4"
      },
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/has-symbols": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
      "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.4"
      },
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/has-tostringtag": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
      "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
      "license": "MIT",
      "dependencies": {
        "has-symbols": "^1.0.3"
      },
      "engines": {
        "node": ">= 0.4"
      },
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/hasown": {
      "version": "2.0.2",
      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
      "license": "MIT",
      "dependencies": {
        "function-bind": "^1.1.2"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/humanize-ms": {
      "version": "1.2.1",
      "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
      "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
      "license": "MIT",
      "dependencies": {
        "ms": "^2.0.0"
      }
    },
    "node_modules/math-intrinsics": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
      "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/mime-db": {
      "version": "1.52.0",
      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.6"
      }
    },
    "node_modules/mime-types": {
      "version": "2.1.35",
      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
      "license": "MIT",
      "dependencies": {
        "mime-db": "1.52.0"
      },
      "engines": {
        "node": ">= 0.6"
      }
    },
    "node_modules/ms": {
      "version": "2.1.3",
      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
      "license": "MIT"
    },
    "node_modules/node-domexception": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
      "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
      "deprecated": "Use your platform's native DOMException instead",
      "funding": [
        {
          "type": "github",
          "url": "https://github.com/sponsors/jimmywarting"
        },
        {
          "type": "github",
          "url": "https://paypal.me/jimmywarting"
        }
      ],
      "license": "MIT",
      "engines": {
        "node": ">=10.5.0"
      }
    },
    "node_modules/node-fetch": {
      "version": "2.7.0",
      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
      "license": "MIT",
      "dependencies": {
        "whatwg-url": "^5.0.0"
      },
      "engines": {
        "node": "4.x || >=6.0.0"
      },
      "peerDependencies": {
        "encoding": "^0.1.0"
      },
      "peerDependenciesMeta": {
        "encoding": {
          "optional": true
        }
      }
    },
    "node_modules/openai": {
      "version": "4.104.0",
      "resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz",
      "integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==",
      "license": "Apache-2.0",
      "dependencies": {
        "@types/node": "^18.11.18",
        "@types/node-fetch": "^2.6.4",
        "abort-controller": "^3.0.0",
        "agentkeepalive": "^4.2.1",
        "form-data-encoder": "1.7.2",
        "formdata-node": "^4.3.2",
        "node-fetch": "^2.6.7"
      },
      "bin": {
        "openai": "bin/cli"
      },
      "peerDependencies": {
        "ws": "^8.18.0",
        "zod": "^3.23.8"
      },
      "peerDependenciesMeta": {
        "ws": {
          "optional": true
        },
        "zod": {
          "optional": true
        }
      }
    },
    "node_modules/p-limit": {
      "version": "6.2.0",
      "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-6.2.0.tgz",
      "integrity": "sha512-kuUqqHNUqoIWp/c467RI4X6mmyuojY5jGutNU0wVTmEOOfcuwLqyMVoAi9MKi2Ak+5i9+nhmrK4ufZE8069kHA==",
      "license": "MIT",
      "dependencies": {
        "yocto-queue": "^1.1.1"
      },
      "engines": {
        "node": ">=18"
      },
      "funding": {
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
    "node_modules/tr46": {
      "version": "0.0.3",
      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
      "license": "MIT"
    },
    "node_modules/undici": {
      "version": "7.18.2",
      "resolved": "https://registry.npmjs.org/undici/-/undici-7.18.2.tgz",
      "integrity": "sha512-y+8YjDFzWdQlSE9N5nzKMT3g4a5UBX1HKowfdXh0uvAnTaqqwqB92Jt4UXBAeKekDs5IaDKyJFR4X1gYVCgXcw==",
      "license": "MIT",
      "engines": {
        "node": ">=20.18.1"
      }
    },
    "node_modules/undici-types": {
      "version": "5.26.5",
      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
      "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
      "license": "MIT"
    },
    "node_modules/web-streams-polyfill": {
      "version": "4.0.0-beta.3",
      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
      "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
      "license": "MIT",
      "engines": {
        "node": ">= 14"
      }
    },
    "node_modules/webidl-conversions": {
      "version": "3.0.1",
      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
      "license": "BSD-2-Clause"
    },
    "node_modules/whatwg-url": {
      "version": "5.0.0",
      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
      "license": "MIT",
      "dependencies": {
        "tr46": "~0.0.3",
        "webidl-conversions": "^3.0.0"
      }
    },
    "node_modules/yocto-queue": {
      "version": "1.2.2",
      "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
      "integrity": "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==",
      "license": "MIT",
      "engines": {
        "node": ">=12.20"
      },
      "funding": {
        "url": "https://github.com/sponsors/sindresorhus"
      }
    }
  }
}

## package.json
{
  "name": "inference-load-tester",
  "version": "1.0.0",
  "description": "Load testing tool for OpenAI-compatible inference endpoints",
  "type": "module",
  "main": "load_test.js",
  "scripts": {
    "start": "node load_test.js"
  },
  "dependencies": {
    "openai": "^4.77.0",
    "p-limit": "^6.1.0",
    "commander": "^12.1.0",
    "undici": "^7.2.0"
  }
}
	{
	"name": "inference-load-tester",
	"version": "1.0.0",
	"lockfileVersion": 3,
	"requires": true,
	"packages": {
	"": {
	"name": "inference-load-tester",
	"version": "1.0.0",
	"dependencies": {
	"commander": "^12.1.0",
	"openai": "^4.77.0",
	"p-limit": "^6.1.0",
	"undici": "^7.2.0"
	}
	},
	"node_modules/@types/node": {
	"version": "18.19.130",
	"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz",
	"integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==",
	"license": "MIT",
	"dependencies": {
	"undici-types": "~5.26.4"
	}
	},
	"node_modules/@types/node-fetch": {
	"version": "2.6.13",
	"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
	"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
	"license": "MIT",
	"dependencies": {
	"@types/node": "*",
	"form-data": "^4.0.4"
	}
	},
	"node_modules/abort-controller": {
	"version": "3.0.0",
	"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
	"integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
	"license": "MIT",
	"dependencies": {
	"event-target-shim": "^5.0.0"
	},
	"engines": {
	"node": ">=6.5"
	}
	},
	"node_modules/agentkeepalive": {
	"version": "4.6.0",
	"resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz",
	"integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==",
	"license": "MIT",
	"dependencies": {
	"humanize-ms": "^1.2.1"
	},
	"engines": {
	"node": ">= 8.0.0"
	}
	},
	"node_modules/asynckit": {
	"version": "0.4.0",
	"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
	"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
	"license": "MIT"
	},
	"node_modules/call-bind-apply-helpers": {
	"version": "1.0.2",
	"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
	"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
	"license": "MIT",
	"dependencies": {
	"es-errors": "^1.3.0",
	"function-bind": "^1.1.2"
	},
	"engines": {
	"node": ">= 0.4"
	}
	},
	"node_modules/combined-stream": {
	"version": "1.0.8",
	"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
	"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
	"license": "MIT",
	"dependencies": {
	"delayed-stream": "~1.0.0"
	},
	"engines": {
	"node": ">= 0.8"
	}
	},
	"node_modules/commander": {
	"version": "12.1.0",
	"resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz",
	"integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==",
	"license": "MIT",
	"engines": {
	"node": ">=18"
	}
	},
	"node_modules/delayed-stream": {
	"version": "1.0.0",
	"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
	"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
	"license": "MIT",
	"engines": {
	"node": ">=0.4.0"
	}
	},
	"node_modules/dunder-proto": {
	"version": "1.0.1",
	"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
	"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
	"license": "MIT",
	"dependencies": {
	"call-bind-apply-helpers": "^1.0.1",
	"es-errors": "^1.3.0",
	"gopd": "^1.2.0"
	},
	"engines": {
	"node": ">= 0.4"
	}
	},
	"node_modules/es-define-property": {
	"version": "1.0.1",
	"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
	"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
	"license": "MIT",
	"engines": {
	"node": ">= 0.4"
	}
	},
	"node_modules/es-errors": {
	"version": "1.3.0",
	"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
	"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
	"license": "MIT",
	"engines": {
	"node": ">= 0.4"
	}
	},
	"node_modules/es-object-atoms": {
	"version": "1.1.1",
	"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
	"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
	"license": "MIT",
	"dependencies": {
	"es-errors": "^1.3.0"
	},
	"engines": {
	"node": ">= 0.4"
	}
	},
	"node_modules/es-set-tostringtag": {
	"version": "2.1.0",
	"resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
	"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
	"license": "MIT",
	"dependencies": {
	"es-errors": "^1.3.0",
	"get-intrinsic": "^1.2.6",
	"has-tostringtag": "^1.0.2",
	"hasown": "^2.0.2"
	},
	"engines": {
	"node": ">= 0.4"
	}
	},
	"node_modules/event-target-shim": {
	"version": "5.0.1",
	"resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
	"integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
	"license": "MIT",
	"engines": {
	"node": ">=6"
	}
	},
	"node_modules/form-data": {
	"version": "4.0.5",
	"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
	"integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
	"license": "MIT",
	"dependencies": {
	"asynckit": "^0.4.0",
	"combined-stream": "^1.0.8",
	"es-set-tostringtag": "^2.1.0",
	"hasown": "^2.0.2",
	"mime-types": "^2.1.12"
	},
	"engines": {
	"node": ">= 6"
	}
	},
	"node_modules/form-data-encoder": {
	"version": "1.7.2",
	"resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
	"integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==",
	"license": "MIT"
	},
	"node_modules/formdata-node": {
	"version": "4.4.1",
	"resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
	"integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
	"license": "MIT",
	"dependencies": {
	"node-domexception": "1.0.0",
	"web-streams-polyfill": "4.0.0-beta.3"
	},
	"engines": {
	"node": ">= 12.20"
	}
	},
	"node_modules/function-bind": {
	"version": "1.1.2",
	"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
	"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
	"license": "MIT",
	"funding": {
	"url": "https://github.com/sponsors/ljharb"
	}
	},
	"node_modules/get-intrinsic": {
	"version": "1.3.0",
	"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
	"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
	"license": "MIT",
	"dependencies": {
	"call-bind-apply-helpers": "^1.0.2",
	"es-define-property": "^1.0.1",
	"es-errors": "^1.3.0",
	"es-object-atoms": "^1.1.1",
	"function-bind": "^1.1.2",
	"get-proto": "^1.0.1",
	"gopd": "^1.2.0",
	"has-symbols": "^1.1.0",
	"hasown": "^2.0.2",
	"math-intrinsics": "^1.1.0"
	},
	"engines": {
	"node": ">= 0.4"
	},
	"funding": {
	"url": "https://github.com/sponsors/ljharb"
	}
	},
	"node_modules/get-proto": {
	"version": "1.0.1",
	"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
	"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
	"license": "MIT",
	"dependencies": {
	"dunder-proto": "^1.0.1",
	"es-object-atoms": "^1.0.0"
	},
	"engines": {
	"node": ">= 0.4"
	}
	},
	"node_modules/gopd": {
	"version": "1.2.0",
	"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
	"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
	"license": "MIT",
	"engines": {
	"node": ">= 0.4"
	},
	"funding": {
	"url": "https://github.com/sponsors/ljharb"
	}
	},
	"node_modules/has-symbols": {
	"version": "1.1.0",
	"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
	"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
	"license": "MIT",
	"engines": {
	"node": ">= 0.4"
	},
	"funding": {
	"url": "https://github.com/sponsors/ljharb"
	}
	},
	"node_modules/has-tostringtag": {
	"version": "1.0.2",
	"resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
	"integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
	"license": "MIT",
	"dependencies": {
	"has-symbols": "^1.0.3"
	},
	"engines": {
	"node": ">= 0.4"
	},
	"funding": {
	"url": "https://github.com/sponsors/ljharb"
	}
	},
	"node_modules/hasown": {
	"version": "2.0.2",
	"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
	"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
	"license": "MIT",
	"dependencies": {
	"function-bind": "^1.1.2"
	},
	"engines": {
	"node": ">= 0.4"
	}
	},
	"node_modules/humanize-ms": {
	"version": "1.2.1",
	"resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
	"integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
	"license": "MIT",
	"dependencies": {
	"ms": "^2.0.0"
	}
	},
	"node_modules/math-intrinsics": {
	"version": "1.1.0",
	"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
	"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
	"license": "MIT",
	"engines": {
	"node": ">= 0.4"
	}
	},
	"node_modules/mime-db": {
	"version": "1.52.0",
	"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
	"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
	"license": "MIT",
	"engines": {
	"node": ">= 0.6"
	}
	},
	"node_modules/mime-types": {
	"version": "2.1.35",
	"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
	"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
	"license": "MIT",
	"dependencies": {
	"mime-db": "1.52.0"
	},
	"engines": {
	"node": ">= 0.6"
	}
	},
	"node_modules/ms": {
	"version": "2.1.3",
	"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
	"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
	"license": "MIT"
	},
	"node_modules/node-domexception": {
	"version": "1.0.0",
	"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
	"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
	"deprecated": "Use your platform's native DOMException instead",
	"funding": [
	{
	"type": "github",
	"url": "https://github.com/sponsors/jimmywarting"
	},
	{
	"type": "github",
	"url": "https://paypal.me/jimmywarting"
	}
	],
	"license": "MIT",
	"engines": {
	"node": ">=10.5.0"
	}
	},
	"node_modules/node-fetch": {
	"version": "2.7.0",
	"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
	"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
	"license": "MIT",
	"dependencies": {
	"whatwg-url": "^5.0.0"
	},
	"engines": {
	"node": "4.x \|\| >=6.0.0"
	},
	"peerDependencies": {
	"encoding": "^0.1.0"
	},
	"peerDependenciesMeta": {
	"encoding": {
	"optional": true
	}
	}
	},
	"node_modules/openai": {
	"version": "4.104.0",
	"resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz",
	"integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==",
	"license": "Apache-2.0",
	"dependencies": {
	"@types/node": "^18.11.18",
	"@types/node-fetch": "^2.6.4",
	"abort-controller": "^3.0.0",
	"agentkeepalive": "^4.2.1",
	"form-data-encoder": "1.7.2",
	"formdata-node": "^4.3.2",
	"node-fetch": "^2.6.7"
	},
	"bin": {
	"openai": "bin/cli"
	},
	"peerDependencies": {
	"ws": "^8.18.0",
	"zod": "^3.23.8"
	},
	"peerDependenciesMeta": {
	"ws": {
	"optional": true
	},
	"zod": {
	"optional": true
	}
	}
	},
	"node_modules/p-limit": {
	"version": "6.2.0",
	"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-6.2.0.tgz",
	"integrity": "sha512-kuUqqHNUqoIWp/c467RI4X6mmyuojY5jGutNU0wVTmEOOfcuwLqyMVoAi9MKi2Ak+5i9+nhmrK4ufZE8069kHA==",
	"license": "MIT",
	"dependencies": {
	"yocto-queue": "^1.1.1"
	},
	"engines": {
	"node": ">=18"
	},
	"funding": {
	"url": "https://github.com/sponsors/sindresorhus"
	}
	},
	"node_modules/tr46": {
	"version": "0.0.3",
	"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
	"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
	"license": "MIT"
	},
	"node_modules/undici": {
	"version": "7.18.2",
	"resolved": "https://registry.npmjs.org/undici/-/undici-7.18.2.tgz",
	"integrity": "sha512-y+8YjDFzWdQlSE9N5nzKMT3g4a5UBX1HKowfdXh0uvAnTaqqwqB92Jt4UXBAeKekDs5IaDKyJFR4X1gYVCgXcw==",
	"license": "MIT",
	"engines": {
	"node": ">=20.18.1"
	}
	},
	"node_modules/undici-types": {
	"version": "5.26.5",
	"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
	"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
	"license": "MIT"
	},
	"node_modules/web-streams-polyfill": {
	"version": "4.0.0-beta.3",
	"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
	"integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
	"license": "MIT",
	"engines": {
	"node": ">= 14"
	}
	},
	"node_modules/webidl-conversions": {
	"version": "3.0.1",
	"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
	"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
	"license": "BSD-2-Clause"
	},
	"node_modules/whatwg-url": {
	"version": "5.0.0",
	"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
	"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
	"license": "MIT",
	"dependencies": {
	"tr46": "~0.0.3",
	"webidl-conversions": "^3.0.0"
	}
	},
	"node_modules/yocto-queue": {
	"version": "1.2.2",
	"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
	"integrity": "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==",
	"license": "MIT",
	"engines": {
	"node": ">=12.20"
	},
	"funding": {
	"url": "https://github.com/sponsors/sindresorhus"
	}
	}
	}
	}
	{
	"name": "inference-load-tester",
	"version": "1.0.0",
	"description": "Load testing tool for OpenAI-compatible inference endpoints",
	"type": "module",
	"main": "load_test.js",
	"scripts": {
	"start": "node load_test.js"
	},
	"dependencies": {
	"openai": "^4.77.0",
	"p-limit": "^6.1.0",
	"commander": "^12.1.0",
	"undici": "^7.2.0"
	}
	}