Created
March 11, 2026 02:42
-
-
Save rndmcnlly/6680b63aeb26b5ea9ffbb56dab3030dc to your computer and use it in GitHub Desktop.
Empirical test: OpenRouter 1h cache TTL -- prompt_cache_ttl vs cache_control.ttl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # requires-python = ">=3.11" | |
| # dependencies = ["httpx", "python-dotenv"] | |
| # /// | |
| """ | |
| Empirical test: does OpenRouter's 1h cache TTL survive a 6-minute gap? | |
| Tests three approaches against anthropic/claude-sonnet-4 via OpenRouter: | |
| A) prompt_cache_ttl: 3600 (undocumented, used in PR #16850) | |
| B) cache_control.ttl: "1h" (documented per-message approach) | |
| C) baseline (no TTL extension, default 5-min expiry) | |
| Protocol for each approach: | |
| 1. Send request with ~5k tokens of system context -> expect cache WRITE | |
| 2. Wait 6 minutes (past 5-min default TTL) | |
| 3. Send same request again -> expect cache HIT only if TTL > 5 min | |
| Pass criteria: | |
| - A and/or B show cached_tokens > 0 on the second request | |
| - C shows cached_tokens == 0 on the second request (control) | |
| Usage: | |
| echo 'OPENROUTER_API_KEY=sk-or-...' > .env | |
| uv run --script test_cache_ttl.py | |
| Or: | |
| export OPENROUTER_API_KEY=sk-or-... | |
| uv run --script test_cache_ttl.py | |
| Docs: https://openrouter.ai/docs/guides/best-practices/prompt-caching | |
| """ | |
| import hashlib | |
| import os | |
| import sys | |
| import time | |
| from dotenv import load_dotenv | |
| import httpx | |
| load_dotenv() | |
| API = "https://openrouter.ai/api/v1/chat/completions" | |
| MODEL = "anthropic/claude-sonnet-4" | |
| DELAY = 6 * 60 # 6 minutes in seconds | |
| RETRIES = 3 | |
| TIMEOUT = 120 | |
| # ~5k tokens of deterministic filler (well above 2048 minimum for sonnet) | |
| FILLER = ("The quick brown fox jumps over the lazy dog. " * 120 + "\n") * 10 | |
| key = os.environ.get("OPENROUTER_API_KEY") | |
| if not key: | |
| print("error: set OPENROUTER_API_KEY in env or .env file", file=sys.stderr) | |
| sys.exit(1) | |
| headers = { | |
| "Authorization": f"Bearer {key}", | |
| "Content-Type": "application/json", | |
| "HTTP-Referer": "https://github.com/anomalyco/opencode", | |
| "X-Title": "opencode-cache-ttl-test", | |
| } | |
| def tag(label: str) -> str: | |
| return hashlib.sha256(f"cache-ttl-test-{label}-{os.getpid()}".encode()).hexdigest()[:16] | |
| def messages_plain(): | |
| return [ | |
| { | |
| "role": "system", | |
| "content": f"You are a test assistant. Reference material:\n{FILLER}", | |
| }, | |
| {"role": "user", "content": "Say 'ok' and nothing else."}, | |
| ] | |
| def messages_with_cc(ttl: str | None = None): | |
| cc: dict = {"type": "ephemeral"} | |
| if ttl: | |
| cc["ttl"] = ttl | |
| return [ | |
| { | |
| "role": "system", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": f"You are a test assistant. Reference material:\n{FILLER}", | |
| "cache_control": cc, | |
| }, | |
| ], | |
| }, | |
| {"role": "user", "content": "Say 'ok' and nothing else."}, | |
| ] | |
| def send(label: str, body: dict) -> dict: | |
| for attempt in range(1, RETRIES + 1): | |
| try: | |
| print(f" [{label}] request (attempt {attempt}/{RETRIES})...", flush=True) | |
| r = httpx.post(API, headers=headers, json=body, timeout=TIMEOUT) | |
| r.raise_for_status() | |
| data = r.json() | |
| usage = data.get("usage", {}) | |
| details = usage.get("prompt_tokens_details", {}) | |
| cached = details.get("cached_tokens", 0) | |
| written = details.get("cache_write_tokens", 0) | |
| prompt = usage.get("prompt_tokens", 0) | |
| print(f" [{label}] prompt={prompt} cached={cached} written={written}") | |
| return {"prompt": prompt, "cached": cached, "written": written} | |
| except (httpx.TimeoutException, httpx.HTTPStatusError) as e: | |
| print(f" [{label}] attempt {attempt} failed: {e}", flush=True) | |
| if attempt == RETRIES: | |
| print(f" [{label}] all {RETRIES} attempts failed, aborting", file=sys.stderr) | |
| sys.exit(1) | |
| wait = 2 ** attempt | |
| print(f" [{label}] retrying in {wait}s...", flush=True) | |
| time.sleep(wait) | |
| assert False, "unreachable" | |
| def approach_a(session: str) -> dict: | |
| """prompt_cache_ttl: 3600 (undocumented)""" | |
| return { | |
| "model": MODEL, | |
| "messages": messages_plain(), | |
| "max_tokens": 8, | |
| "prompt_cache_key": session, | |
| "prompt_cache_ttl": 3600, | |
| } | |
| def approach_b(session: str) -> dict: | |
| """cache_control with ttl: '1h' (documented)""" | |
| return { | |
| "model": MODEL, | |
| "messages": messages_with_cc(ttl="1h"), | |
| "max_tokens": 8, | |
| "prompt_cache_key": session, | |
| } | |
| def approach_c(session: str) -> dict: | |
| """baseline: no TTL extension""" | |
| return { | |
| "model": MODEL, | |
| "messages": messages_plain(), | |
| "max_tokens": 8, | |
| "prompt_cache_key": session, | |
| } | |
| def run(): | |
| approaches = { | |
| "A (prompt_cache_ttl=3600)": approach_a, | |
| "B (cache_control ttl=1h)": approach_b, | |
| "C (baseline, no TTL ext)": approach_c, | |
| } | |
| sessions = {name: tag(name) for name in approaches} | |
| results: dict[str, dict] = {} | |
| # --- Round 1: prime the caches --- | |
| print("=== Round 1: priming caches ===", flush=True) | |
| for name, build in approaches.items(): | |
| r1 = send(f"{name} R1", build(sessions[name])) | |
| results[f"{name}_r1"] = r1 | |
| # --- Wait --- | |
| print(f"\n=== Waiting {DELAY}s ({DELAY // 60}m) to exceed 5-min default TTL ===", flush=True) | |
| for elapsed in range(DELAY): | |
| remaining = DELAY - elapsed | |
| if remaining % 60 == 0: | |
| print(f" {remaining // 60}m remaining...", flush=True) | |
| time.sleep(1) | |
| # --- Round 2: test cache survival --- | |
| print("\n=== Round 2: testing cache survival after 6-min gap ===", flush=True) | |
| for name, build in approaches.items(): | |
| r2 = send(f"{name} R2", build(sessions[name])) | |
| results[f"{name}_r2"] = r2 | |
| # --- Verdict --- | |
| print("\n=== Results ===") | |
| print(f"{'Approach':<35} {'R1 cached':>10} {'R1 written':>11} {'R2 cached':>10} {'R2 written':>11} Verdict") | |
| print("-" * 100) | |
| for name in approaches: | |
| r1 = results[f"{name}_r1"] | |
| r2 = results[f"{name}_r2"] | |
| hit = r2["cached"] > 0 | |
| verdict = "CACHE HIT (TTL > 5m)" if hit else "CACHE MISS (TTL <= 5m)" | |
| print(f"{name:<35} {r1['cached']:>10} {r1['written']:>11} {r2['cached']:>10} {r2['written']:>11} {verdict}") | |
| print() | |
| c_hit = results["C (baseline, no TTL ext)_r2"]["cached"] > 0 | |
| a_hit = results["A (prompt_cache_ttl=3600)_r2"]["cached"] > 0 | |
| b_hit = results["B (cache_control ttl=1h)_r2"]["cached"] > 0 | |
| if c_hit: | |
| print("WARNING: baseline also got a cache hit -- 6 min may not have been enough,") | |
| print(" or OpenRouter changed default TTL. Results inconclusive.") | |
| else: | |
| if a_hit and b_hit: | |
| print("Both A (undocumented) and B (documented) survived. Either approach works.") | |
| elif b_hit and not a_hit: | |
| print("Only B (documented cache_control ttl) survived. PR should use that approach.") | |
| elif a_hit and not b_hit: | |
| print("Only A (undocumented prompt_cache_ttl) survived. Surprising -- needs investigation.") | |
| else: | |
| print("Neither A nor B survived. 1h TTL may not be working as expected.") | |
| if __name__ == "__main__": | |
| run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment