lizthegrey/index.html

## index.html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>LLM Performance vs Cost - Interactive Pareto Frontier</title>
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <style>
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
            margin: 0;
            padding: 20px;
            background: #1a1a1a;
        }

        .container {
            max-width: 1400px;
            margin: 0 auto;
            background: #2a2a2a;
            padding: 30px;
            border-radius: 8px;
            box-shadow: 0 2px 8px rgba(0,0,0,0.3);
        }

        h1 {
            margin-top: 0;
            color: #e0e0e0;
        }

        .controls {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
            gap: 20px;
            margin: 20px 0;
            padding: 20px;
            background: #333;
            border-radius: 4px;
        }

        .control-group {
            display: flex;
            flex-direction: column;
        }

        label {
            font-weight: 600;
            margin-bottom: 8px;
            color: #ccc;
            font-size: 14px;
        }

        input[type="range"] {
            width: 100%;
        }

        .value-display {
            color: #0066cc;
            font-weight: 600;
            margin-top: 4px;
            font-size: 13px;
        }

        .presets {
            display: flex;
            gap: 10px;
            flex-wrap: wrap;
        }

        button {
            padding: 8px 16px;
            border: 1px solid #555;
            background: #3a3a3a;
            color: #e0e0e0;
            border-radius: 4px;
            cursor: pointer;
            font-size: 13px;
            transition: all 0.2s;
        }

        button:hover {
            background: #4a4a4a;
        }

        button.active {
            background: #0066cc;
            color: white;
            border-color: #0066cc;
        }

        .chart-container {
            margin-top: 30px;
        }

        .tooltip {
            position: absolute;
            padding: 10px;
            background: rgba(0, 0, 0, 0.8);
            color: white;
            border-radius: 4px;
            pointer-events: none;
            font-size: 12px;
            opacity: 0;
            transition: opacity 0.2s;
        }

        .legend {
            display: flex;
            gap: 20px;
            margin-top: 20px;
            flex-wrap: wrap;
        }

        .legend-item {
            display: flex;
            align-items: center;
            gap: 8px;
            font-size: 13px;
            color: #ccc;
        }

        .legend-circle {
            width: 12px;
            height: 12px;
            border-radius: 50%;
        }

        .info {
            margin-top: 20px;
            padding: 15px;
            background: #333;
            border-left: 4px solid #0066cc;
            border-radius: 4px;
            font-size: 14px;
            color: #ccc;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>LLM Performance vs Cost: Interactive Pareto Frontier</h1>

        <div class="info">
            <strong>Why this matters:</strong> Google's Gemini 3 Flash chart assumes a fixed 5:1 output:input ratio with no caching.
            Real-world usage varies dramatically—coding agents often run 50:1+ ratios with 80-90% cache hits,
            completely changing the cost picture and Pareto frontier.<br><br>
            <strong>Note:</strong> Prices shown are for interactive/real-time usage. Batch processing (50% discount) is available
            from some providers but adds significant latency, making it unsuitable for the interactive use cases these Elo scores measure.
        </div>

        <div class="controls">
            <div class="control-group">
                <label>Benchmark</label>
                <div style="display: flex; gap: 10px;">
                    <label style="display: flex; align-items: center; gap: 6px; font-weight: normal;">
                        <input type="radio" name="benchmark" value="lmarena" checked style="width: auto; margin: 0;">
                        General Chat (LMArena)
                    </label>
                    <label style="display: flex; align-items: center; gap: 6px; font-weight: normal;">
                        <input type="radio" name="benchmark" value="coding" style="width: auto; margin: 0;">
                        Coding (SWE-Bench)
                    </label>
                </div>
            </div>

            <div class="control-group">
                <label for="inputOutputRatio">Input:Output Token Ratio</label>
                <input type="range" id="inputOutputRatio" min="0" max="100" value="5" step="1">
                <div class="value-display" id="ratioDisplay">1:5</div>
            </div>

            <div class="control-group">
                <label for="cacheHitRate">Cache Hit Rate (%)</label>
                <input type="range" id="cacheHitRate" min="0" max="100" value="0" step="5">
                <div class="value-display" id="cacheDisplay">0%</div>
            </div>

            <div class="control-group">
                <label for="thinkingOverhead">Thinking Token Overhead (%)</label>
                <input type="range" id="thinkingOverhead" min="0" max="300" value="100" step="25">
                <div class="value-display" id="thinkingDisplay">100%</div>
            </div>
        </div>

        <div class="presets">
            <button onclick="applyPreset('casual')">Casual Chat (1:1, no cache)</button>
            <button onclick="applyPreset('coding')">Coding Agent (1:50, 85% cache)</button>
            <button onclick="applyPreset('rag')">RAG System (1:10, 70% cache)</button>
            <button onclick="applyPreset('docs')">Document Analysis (100:1, 95% cache)</button>
            <button onclick="applyPreset('google')">Google's Scenario (1:5, no cache)</button>
        </div>

        <div class="chart-container">
            <svg id="chart"></svg>
        </div>

        <div class="legend" id="legend"></div>
    </div>

    <div class="tooltip" id="tooltip"></div>

    <script>
        // Model data with Elo scores and pricing (per million tokens)
        // Pricing from https://ai.google.dev/gemini-api/docs/pricing (text only)
        // thinkingMode: 'full' = full thinking overhead, 'minimal' = reduced overhead, null = no thinking
        // lmarenaElo: LMArena Elo score (general chat), codingScore: SWE-Bench Verified % or CodeClash Elo
        const models = [
            { name: 'Gemini 3 Pro', provider: 'Google', lmarenaElo: 1490, codingScore: 76.2, inputPrice: 2.0, outputPrice: 12.0, cachePrice: 0.20, batchDiscount: false, thinkingMode: 'full' },
            { name: 'Gemini 3 Flash', provider: 'Google', lmarenaElo: 1480, codingScore: 76.2, inputPrice: 0.5, outputPrice: 3.0, cachePrice: 0.05, batchDiscount: false, thinkingMode: 'full' },
            { name: 'Grok 4.1 (thinking)', provider: 'xAI', lmarenaElo: 1477, codingScore: null, inputPrice: 3.0, outputPrice: 15.0, cachePrice: null, batchDiscount: false, thinkingMode: 'full' },
            { name: 'Opus 4.5 (thinking)', provider: 'Anthropic', lmarenaElo: 1470, codingScore: 80.9, inputPrice: 5.0, outputPrice: 25.0, cachePrice: 0.50, batchDiscount: true, thinkingMode: 'full' },
            { name: 'Opus 4.5', provider: 'Anthropic', lmarenaElo: 1467, codingScore: 74.6, inputPrice: 5.0, outputPrice: 25.0, cachePrice: 0.50, batchDiscount: true, thinkingMode: null },
            { name: 'Grok 4.1', provider: 'xAI', lmarenaElo: 1466, codingScore: null, inputPrice: 3.0, outputPrice: 15.0, cachePrice: null, batchDiscount: false, thinkingMode: null },
            { name: 'Gemini 3 Flash (thinking-min)', provider: 'Google', lmarenaElo: 1464, codingScore: null, inputPrice: 0.5, outputPrice: 3.0, cachePrice: 0.05, batchDiscount: false, thinkingMode: 'minimal' },
            { name: 'GPT-5.1 High', provider: 'OpenAI', lmarenaElo: 1458, codingScore: 76.3, inputPrice: 30.0, outputPrice: 60.0, cachePrice: null, batchDiscount: false, thinkingMode: 'full' },
            { name: 'Gemini 2.5 Pro', provider: 'Google', lmarenaElo: 1451, codingScore: 53.6, inputPrice: 1.25, outputPrice: 10.0, cachePrice: 0.125, batchDiscount: false, thinkingMode: 'full' },
            { name: 'Sonnet 4.5 (thinking)', provider: 'Anthropic', lmarenaElo: 1450, codingScore: 77.2, inputPrice: 3.0, outputPrice: 15.0, cachePrice: 0.30, batchDiscount: true, thinkingMode: 'full' },
            { name: 'Sonnet 4.5', provider: 'Anthropic', lmarenaElo: 1450, codingScore: 77.2, inputPrice: 3.0, outputPrice: 15.0, cachePrice: 0.30, batchDiscount: true, thinkingMode: null },
            { name: 'GPT-4o', provider: 'OpenAI', lmarenaElo: 1440, codingScore: null, inputPrice: 2.5, outputPrice: 10.0, cachePrice: null, batchDiscount: false, thinkingMode: 'full' },
            { name: 'Haiku 4.5', provider: 'Anthropic', lmarenaElo: 1420, codingScore: 73.3, inputPrice: 1.0, outputPrice: 5.0, cachePrice: 0.10, batchDiscount: true, thinkingMode: null },
            { name: 'Gemini 2.5 Flash', provider: 'Google', lmarenaElo: 1408, codingScore: null, inputPrice: 0.30, outputPrice: 2.50, cachePrice: 0.03, batchDiscount: false, thinkingMode: 'full' },
            { name: 'Gemini 2.5 Flash-Lite', provider: 'Google', lmarenaElo: 1378, codingScore: null, inputPrice: 0.10, outputPrice: 0.40, cachePrice: 0.01, batchDiscount: false, thinkingMode: 'minimal' },
            { name: 'Sonnet 3.5', provider: 'Anthropic', lmarenaElo: 1372, codingScore: null, inputPrice: 3.0, outputPrice: 15.0, cachePrice: 0.30, batchDiscount: true, thinkingMode: null },
            { name: 'Gemini 2.0 Flash', provider: 'Google', lmarenaElo: 1360, codingScore: null, inputPrice: 0.10, outputPrice: 0.40, cachePrice: 0.025, batchDiscount: false, thinkingMode: 'full' },
            { name: 'Haiku 3.5', provider: 'Anthropic', lmarenaElo: 1323, codingScore: null, inputPrice: 0.80, outputPrice: 4.0, cachePrice: 0.08, batchDiscount: true, thinkingMode: null },
            { name: 'Opus 3', provider: 'Anthropic', lmarenaElo: 1322, codingScore: null, inputPrice: 15.0, outputPrice: 75.0, cachePrice: 1.50, batchDiscount: true, thinkingMode: null },
            { name: 'Gemini 1.5 Flash', provider: 'Google', lmarenaElo: 1310, codingScore: null, inputPrice: 0.13, outputPrice: 0.38, cachePrice: null, batchDiscount: false, thinkingMode: 'full' },
            { name: 'Sonnet 3', provider: 'Anthropic', lmarenaElo: 1281, codingScore: null, inputPrice: 3.0, outputPrice: 15.0, cachePrice: 0.30, batchDiscount: true, thinkingMode: null },
            { name: 'Haiku 3', provider: 'Anthropic', lmarenaElo: 1261, codingScore: null, inputPrice: 0.25, outputPrice: 1.25, cachePrice: 0.03, batchDiscount: true, thinkingMode: null },
        ];

        // Provider colors
        const providerColors = {
            'Google': '#4285f4',
            'Anthropic': '#d4a373',
            'xAI': '#1da1f2',
            'OpenAI': '#10a37f'
        };

        // Chart dimensions
        const margin = { top: 20, right: 120, bottom: 60, left: 60 };
        const width = 1200 - margin.left - margin.right;
        const height = 600 - margin.top - margin.bottom;

        // Create SVG
        const svg = d3.select('#chart')
            .attr('width', width + margin.left + margin.right)
            .attr('height', height + margin.top + margin.bottom)
            .append('g')
            .attr('transform', `translate(${margin.left},${margin.top})`);

        // Scales - log scale INVERTED (expensive on left, cheap on right like Google's chart)
        const xScale = d3.scaleLog().range([width, 0]);
        const yScale = d3.scaleLinear().range([height, 0]);

        // Axes
        const xAxis = svg.append('g')
            .attr('transform', `translate(0,${height})`);

        const yAxis = svg.append('g');

        // Axis labels
        svg.append('text')
            .attr('x', width / 2)
            .attr('y', height + 50)
            .attr('text-anchor', 'middle')
            .style('font-size', '14px')
            .style('font-weight', '600')
            .style('fill', '#ccc')
            .text('$ Price per million tokens (log scale)');

        const yAxisLabel = svg.append('text')
            .attr('transform', 'rotate(-90)')
            .attr('x', -height / 2)
            .attr('y', -45)
            .attr('text-anchor', 'middle')
            .style('font-size', '14px')
            .style('font-weight', '600')
            .style('fill', '#ccc')
            .text('LMArena Elo Score');

        // Pareto frontier line
        const paretoLine = svg.append('path')
            .attr('fill', 'none')
            .attr('stroke', '#ff6b6b')
            .attr('stroke-width', 2)
            .attr('stroke-dasharray', '5,5');

        // Tooltip
        const tooltip = d3.select('#tooltip');

        function calculateCost(model, inputTokens, outputTokens, cacheHitRate, thinkingOverhead) {
            const totalTokens = inputTokens + outputTokens;
            const cachedTokens = model.cachePrice !== null ? inputTokens * (cacheHitRate / 100) : 0;
            const regularInputTokens = inputTokens - cachedTokens;

            // Apply thinking token overhead based on model's thinkingMode property
            let effectiveOutputTokens = outputTokens;
            if (model.thinkingMode === 'full') {
                // Full thinking models: full slider overhead
                effectiveOutputTokens = outputTokens * (1 + thinkingOverhead / 100);
            } else if (model.thinkingMode === 'minimal') {
                // Minimal thinking models: 25% of slider overhead
                effectiveOutputTokens = outputTokens * (1 + thinkingOverhead / 400);
            }
            // Non-thinking models (thinkingMode: null): no overhead applied

            // Prices are per million tokens, so divide token counts by 1M
            let cost = (regularInputTokens * model.inputPrice + effectiveOutputTokens * model.outputPrice) / 1000000;

            if (cachedTokens > 0) {
                cost += (cachedTokens * model.cachePrice) / 1000000;
            }

            // Note: Batch processing (50% discount) not included as it adds significant latency
            // and is unsuitable for interactive use cases

            return cost;
        }

        function calculateParetoFrontier(points) {
            // Sort by cost ASCENDING (cheap to expensive)
            // On inverted axis, this goes right to left (cheap on right, expensive on left)
            // Keep points with increasing score as we go from cheap to expensive
            // This creates frontier from lower-right to upper-left
            const sorted = [...points].sort((a, b) => a.cost - b.cost);
            const frontier = [];
            let maxScore = -Infinity;

            for (const point of sorted) {
                if (point.score > maxScore) {
                    frontier.push(point);
                    maxScore = point.score;
                }
            }

            return frontier;
        }

        function updateChart() {
            // Get current settings
            const inputOutputRatio = parseInt(document.getElementById('inputOutputRatio').value);
            const cacheHitRate = parseInt(document.getElementById('cacheHitRate').value);
            const thinkingOverhead = parseInt(document.getElementById('thinkingOverhead').value);
            const benchmark = document.querySelector('input[name="benchmark"]:checked').value;

            // Calculate costs for each model (per 1M total tokens)
            const inputRatio = 1 / (1 + inputOutputRatio);
            const outputRatio = inputOutputRatio / (1 + inputOutputRatio);

            const inputTokens = 1000000 * inputRatio;
            const outputTokens = 1000000 * outputRatio;

            // Filter models based on benchmark and add cost
            const allDataPoints = models.map(model => ({
                ...model,
                cost: calculateCost(model, inputTokens, outputTokens, cacheHitRate, thinkingOverhead),
                score: benchmark === 'lmarena' ? model.lmarenaElo : model.codingScore
            }));

            // Filter out models without scores for selected benchmark
            const dataPoints = allDataPoints.filter(d => d.score !== null);

            // Update Y-axis label based on benchmark
            yAxisLabel.text(benchmark === 'lmarena' ? 'LMArena Elo Score (General Chat)' : 'SWE-Bench Verified Score (%)');

            // Update scales
            const minCost = d3.min(dataPoints, d => d.cost);
            const maxCost = d3.max(dataPoints, d => d.cost);
            const minScore = d3.min(dataPoints, d => d.score);
            const maxScore = d3.max(dataPoints, d => d.score);

            // Log scale needs positive values, add padding
            xScale.domain([Math.max(minCost * 0.8, 0.01), maxCost * 1.2]);

            // Set Y-axis domain based on benchmark type
            if (benchmark === 'lmarena') {
                yScale.domain([Math.floor(minScore / 50) * 50, Math.ceil(maxScore / 50) * 50]);
            } else {
                yScale.domain([Math.floor(minScore / 10) * 10, Math.ceil(maxScore / 10) * 10]);
            }

            // Update axes - let D3 handle log scale ticks properly
            xAxis.transition().duration(500)
                .call(d3.axisBottom(xScale)
                    .ticks(10)
                    .tickFormat(d => {
                        // Format nicely for different scales
                        if (d >= 1) return `$${d.toFixed(0)}`;
                        if (d >= 0.1) return `$${d.toFixed(1)}`;
                        return `$${d.toFixed(2)}`;
                    }))
                .call(g => g.selectAll('.tick text').style('fill', '#ccc'))
                .call(g => g.selectAll('.tick line').style('stroke', '#555'))
                .call(g => g.select('.domain').style('stroke', '#555'));

            yAxis.transition().duration(500)
                .call(d3.axisLeft(yScale).ticks(10))
                .call(g => g.selectAll('.tick text').style('fill', '#ccc'))
                .call(g => g.selectAll('.tick line').style('stroke', '#555'))
                .call(g => g.select('.domain').style('stroke', '#555'));

            // Calculate Pareto frontier
            const frontierPoints = calculateParetoFrontier(dataPoints);

            // Update Pareto line
            const lineGenerator = d3.line()
                .x(d => xScale(d.cost))
                .y(d => yScale(d.score));

            paretoLine.transition().duration(500)
                .attr('d', lineGenerator(frontierPoints));

            // Update circles
            const circles = svg.selectAll('.model-circle')
                .data(dataPoints, d => d.name);

            circles.exit().remove();

            const circlesEnter = circles.enter()
                .append('circle')
                .attr('class', 'model-circle')
                .attr('r', 6)
                .attr('opacity', 0.8)
                .attr('stroke', '#fff')
                .attr('stroke-width', 2)
                .on('mouseover', function(event, d) {
                    d3.select(this)
                        .attr('r', 8)
                        .attr('opacity', 1);

                    const benchmark = document.querySelector('input[name="benchmark"]:checked').value;
                    const scoreLabel = benchmark === 'lmarena' ? 'LMArena Elo' : 'SWE-Bench';
                    tooltip
                        .style('opacity', 1)
                        .html(`
                            <strong>${d.name}</strong><br>
                            Provider: ${d.provider}<br>
                            ${scoreLabel}: ${d.score}<br>
                            Cost: $${d.cost.toFixed(2)}/M tokens<br>
                            ${d.cachePrice !== null ? 'Cache: ✓' : 'Cache: ✗'}
                        `)
                        .style('left', (event.pageX + 10) + 'px')
                        .style('top', (event.pageY - 10) + 'px');
                })
                .on('mouseout', function() {
                    d3.select(this)
                        .attr('r', 6)
                        .attr('opacity', 0.8);

                    tooltip.style('opacity', 0);
                });

            circles.merge(circlesEnter)
                .transition()
                .duration(500)
                .attr('cx', d => xScale(d.cost))
                .attr('cy', d => yScale(d.score))
                .attr('fill', d => providerColors[d.provider]);

            // Update labels for all models (make frontier models more prominent)
            const frontierSet = new Set(frontierPoints.map(d => d.name));

            const labels = svg.selectAll('.model-label')
                .data(dataPoints, d => d.name);

            labels.exit().remove();

            const labelsEnter = labels.enter()
                .append('text')
                .attr('class', 'model-label')
                .attr('font-size', '11px')
                .style('pointer-events', 'none');

            labels.merge(labelsEnter)
                .transition()
                .duration(500)
                .attr('x', d => xScale(d.cost) + 10)
                .attr('y', d => yScale(d.score) + 4)
                .attr('font-weight', d => frontierSet.has(d.name) ? '600' : '400')
                .attr('fill', d => frontierSet.has(d.name) ? '#e0e0e0' : '#888')
                .attr('opacity', d => frontierSet.has(d.name) ? 1 : 0.6)
                .text(d => d.name);
        }

        function applyPreset(preset) {
            const presets = {
                casual: { ratio: 1, cache: 0, batch: false },
                coding: { ratio: 50, cache: 85, batch: false },
                rag: { ratio: 10, cache: 70, batch: false },
                docs: { ratio: 1, cache: 95, batch: false },
                google: { ratio: 5, cache: 0, batch: false }
            };

            const config = presets[preset];
            document.getElementById('inputOutputRatio').value = config.ratio;
            document.getElementById('cacheHitRate').value = config.cache;
            document.getElementById('batchMode').checked = config.batch;

            updateDisplays();
            updateChart();
        }

        function updateDisplays() {
            const ratio = parseInt(document.getElementById('inputOutputRatio').value);
            const cache = parseInt(document.getElementById('cacheHitRate').value);
            const thinking = parseInt(document.getElementById('thinkingOverhead').value);

            document.getElementById('ratioDisplay').textContent = `1:${ratio}`;
            document.getElementById('cacheDisplay').textContent = `${cache}%`;
            document.getElementById('thinkingDisplay').textContent = `${thinking}%`;
        }

        // Create legend
        function createLegend() {
            const legend = document.getElementById('legend');
            const providers = [...new Set(models.map(m => m.provider))];

            providers.forEach(provider => {
                const item = document.createElement('div');
                item.className = 'legend-item';

                const circle = document.createElement('div');
                circle.className = 'legend-circle';
                circle.style.backgroundColor = providerColors[provider];

                const label = document.createElement('span');
                label.textContent = provider;

                item.appendChild(circle);
                item.appendChild(label);
                legend.appendChild(item);
            });
        }

        // Event listeners
        document.getElementById('inputOutputRatio').addEventListener('input', () => {
            updateDisplays();
            updateChart();
        });

        document.getElementById('cacheHitRate').addEventListener('input', () => {
            updateDisplays();
            updateChart();
        });

        document.getElementById('thinkingOverhead').addEventListener('input', () => {
            updateDisplays();
            updateChart();
        });

        document.querySelectorAll('input[name="benchmark"]').forEach(radio => {
            radio.addEventListener('change', updateChart);
        });

        // Initialize
        createLegend();
        updateDisplays();
        updateChart();
    </script>
</body>
</html>
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>LLM Performance vs Cost - Interactive Pareto Frontier</title>
	<script src="https://d3js.org/d3.v7.min.js"></script>
	<style>
	body {
	font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
	margin: 0;
	padding: 20px;
	background: #1a1a1a;
	}

	.container {
	max-width: 1400px;
	margin: 0 auto;
	background: #2a2a2a;
	padding: 30px;
	border-radius: 8px;
	box-shadow: 0 2px 8px rgba(0,0,0,0.3);
	}

	h1 {
	margin-top: 0;
	color: #e0e0e0;
	}

	.controls {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
	gap: 20px;
	margin: 20px 0;
	padding: 20px;
	background: #333;
	border-radius: 4px;
	}

	.control-group {
	display: flex;
	flex-direction: column;
	}

	label {
	font-weight: 600;
	margin-bottom: 8px;
	color: #ccc;
	font-size: 14px;
	}

	input[type="range"] {
	width: 100%;
	}

	.value-display {
	color: #0066cc;
	font-weight: 600;
	margin-top: 4px;
	font-size: 13px;
	}

	.presets {
	display: flex;
	gap: 10px;
	flex-wrap: wrap;
	}

	button {
	padding: 8px 16px;
	border: 1px solid #555;
	background: #3a3a3a;
	color: #e0e0e0;
	border-radius: 4px;
	cursor: pointer;
	font-size: 13px;
	transition: all 0.2s;
	}

	button:hover {
	background: #4a4a4a;
	}

	button.active {
	background: #0066cc;
	color: white;
	border-color: #0066cc;
	}

	.chart-container {
	margin-top: 30px;
	}

	.tooltip {
	position: absolute;
	padding: 10px;
	background: rgba(0, 0, 0, 0.8);
	color: white;
	border-radius: 4px;
	pointer-events: none;
	font-size: 12px;
	opacity: 0;
	transition: opacity 0.2s;
	}

	.legend {
	display: flex;
	gap: 20px;
	margin-top: 20px;
	flex-wrap: wrap;
	}

	.legend-item {
	display: flex;
	align-items: center;
	gap: 8px;
	font-size: 13px;
	color: #ccc;
	}

	.legend-circle {
	width: 12px;
	height: 12px;
	border-radius: 50%;
	}

	.info {
	margin-top: 20px;
	padding: 15px;
	background: #333;
	border-left: 4px solid #0066cc;
	border-radius: 4px;
	font-size: 14px;
	color: #ccc;
	}
	</style>
	</head>
	<body>
	<div class="container">
	<h1>LLM Performance vs Cost: Interactive Pareto Frontier</h1>

	<div class="info">
	<strong>Why this matters:</strong> Google's Gemini 3 Flash chart assumes a fixed 5:1 output:input ratio with no caching.
	Real-world usage varies dramatically—coding agents often run 50:1+ ratios with 80-90% cache hits,
	completely changing the cost picture and Pareto frontier.<br><br>
	<strong>Note:</strong> Prices shown are for interactive/real-time usage. Batch processing (50% discount) is available
	from some providers but adds significant latency, making it unsuitable for the interactive use cases these Elo scores measure.
	</div>

	<div class="controls">
	<div class="control-group">
	<label>Benchmark</label>
	<div style="display: flex; gap: 10px;">
	<label style="display: flex; align-items: center; gap: 6px; font-weight: normal;">
	<input type="radio" name="benchmark" value="lmarena" checked style="width: auto; margin: 0;">
	General Chat (LMArena)
	</label>
	<label style="display: flex; align-items: center; gap: 6px; font-weight: normal;">
	<input type="radio" name="benchmark" value="coding" style="width: auto; margin: 0;">
	Coding (SWE-Bench)
	</label>
	</div>
	</div>

	<div class="control-group">
	<label for="inputOutputRatio">Input:Output Token Ratio</label>
	<input type="range" id="inputOutputRatio" min="0" max="100" value="5" step="1">
	<div class="value-display" id="ratioDisplay">1:5</div>
	</div>

	<div class="control-group">
	<label for="cacheHitRate">Cache Hit Rate (%)</label>
	<input type="range" id="cacheHitRate" min="0" max="100" value="0" step="5">
	<div class="value-display" id="cacheDisplay">0%</div>
	</div>

	<div class="control-group">
	<label for="thinkingOverhead">Thinking Token Overhead (%)</label>
	<input type="range" id="thinkingOverhead" min="0" max="300" value="100" step="25">
	<div class="value-display" id="thinkingDisplay">100%</div>
	</div>
	</div>

	<div class="presets">
	<button onclick="applyPreset('casual')">Casual Chat (1:1, no cache)</button>
	<button onclick="applyPreset('coding')">Coding Agent (1:50, 85% cache)</button>
	<button onclick="applyPreset('rag')">RAG System (1:10, 70% cache)</button>
	<button onclick="applyPreset('docs')">Document Analysis (100:1, 95% cache)</button>
	<button onclick="applyPreset('google')">Google's Scenario (1:5, no cache)</button>
	</div>

	<div class="chart-container">
	<svg id="chart"></svg>
	</div>

	<div class="legend" id="legend"></div>
	</div>

	<div class="tooltip" id="tooltip"></div>

	<script>
	// Model data with Elo scores and pricing (per million tokens)
	// Pricing from https://ai.google.dev/gemini-api/docs/pricing (text only)
	// thinkingMode: 'full' = full thinking overhead, 'minimal' = reduced overhead, null = no thinking
	// lmarenaElo: LMArena Elo score (general chat), codingScore: SWE-Bench Verified % or CodeClash Elo
	const models = [
	{ name: 'Gemini 3 Pro', provider: 'Google', lmarenaElo: 1490, codingScore: 76.2, inputPrice: 2.0, outputPrice: 12.0, cachePrice: 0.20, batchDiscount: false, thinkingMode: 'full' },
	{ name: 'Gemini 3 Flash', provider: 'Google', lmarenaElo: 1480, codingScore: 76.2, inputPrice: 0.5, outputPrice: 3.0, cachePrice: 0.05, batchDiscount: false, thinkingMode: 'full' },
	{ name: 'Grok 4.1 (thinking)', provider: 'xAI', lmarenaElo: 1477, codingScore: null, inputPrice: 3.0, outputPrice: 15.0, cachePrice: null, batchDiscount: false, thinkingMode: 'full' },
	{ name: 'Opus 4.5 (thinking)', provider: 'Anthropic', lmarenaElo: 1470, codingScore: 80.9, inputPrice: 5.0, outputPrice: 25.0, cachePrice: 0.50, batchDiscount: true, thinkingMode: 'full' },
	{ name: 'Opus 4.5', provider: 'Anthropic', lmarenaElo: 1467, codingScore: 74.6, inputPrice: 5.0, outputPrice: 25.0, cachePrice: 0.50, batchDiscount: true, thinkingMode: null },
	{ name: 'Grok 4.1', provider: 'xAI', lmarenaElo: 1466, codingScore: null, inputPrice: 3.0, outputPrice: 15.0, cachePrice: null, batchDiscount: false, thinkingMode: null },
	{ name: 'Gemini 3 Flash (thinking-min)', provider: 'Google', lmarenaElo: 1464, codingScore: null, inputPrice: 0.5, outputPrice: 3.0, cachePrice: 0.05, batchDiscount: false, thinkingMode: 'minimal' },
	{ name: 'GPT-5.1 High', provider: 'OpenAI', lmarenaElo: 1458, codingScore: 76.3, inputPrice: 30.0, outputPrice: 60.0, cachePrice: null, batchDiscount: false, thinkingMode: 'full' },
	{ name: 'Gemini 2.5 Pro', provider: 'Google', lmarenaElo: 1451, codingScore: 53.6, inputPrice: 1.25, outputPrice: 10.0, cachePrice: 0.125, batchDiscount: false, thinkingMode: 'full' },
	{ name: 'Sonnet 4.5 (thinking)', provider: 'Anthropic', lmarenaElo: 1450, codingScore: 77.2, inputPrice: 3.0, outputPrice: 15.0, cachePrice: 0.30, batchDiscount: true, thinkingMode: 'full' },
	{ name: 'Sonnet 4.5', provider: 'Anthropic', lmarenaElo: 1450, codingScore: 77.2, inputPrice: 3.0, outputPrice: 15.0, cachePrice: 0.30, batchDiscount: true, thinkingMode: null },
	{ name: 'GPT-4o', provider: 'OpenAI', lmarenaElo: 1440, codingScore: null, inputPrice: 2.5, outputPrice: 10.0, cachePrice: null, batchDiscount: false, thinkingMode: 'full' },
	{ name: 'Haiku 4.5', provider: 'Anthropic', lmarenaElo: 1420, codingScore: 73.3, inputPrice: 1.0, outputPrice: 5.0, cachePrice: 0.10, batchDiscount: true, thinkingMode: null },
	{ name: 'Gemini 2.5 Flash', provider: 'Google', lmarenaElo: 1408, codingScore: null, inputPrice: 0.30, outputPrice: 2.50, cachePrice: 0.03, batchDiscount: false, thinkingMode: 'full' },
	{ name: 'Gemini 2.5 Flash-Lite', provider: 'Google', lmarenaElo: 1378, codingScore: null, inputPrice: 0.10, outputPrice: 0.40, cachePrice: 0.01, batchDiscount: false, thinkingMode: 'minimal' },
	{ name: 'Sonnet 3.5', provider: 'Anthropic', lmarenaElo: 1372, codingScore: null, inputPrice: 3.0, outputPrice: 15.0, cachePrice: 0.30, batchDiscount: true, thinkingMode: null },
	{ name: 'Gemini 2.0 Flash', provider: 'Google', lmarenaElo: 1360, codingScore: null, inputPrice: 0.10, outputPrice: 0.40, cachePrice: 0.025, batchDiscount: false, thinkingMode: 'full' },
	{ name: 'Haiku 3.5', provider: 'Anthropic', lmarenaElo: 1323, codingScore: null, inputPrice: 0.80, outputPrice: 4.0, cachePrice: 0.08, batchDiscount: true, thinkingMode: null },
	{ name: 'Opus 3', provider: 'Anthropic', lmarenaElo: 1322, codingScore: null, inputPrice: 15.0, outputPrice: 75.0, cachePrice: 1.50, batchDiscount: true, thinkingMode: null },
	{ name: 'Gemini 1.5 Flash', provider: 'Google', lmarenaElo: 1310, codingScore: null, inputPrice: 0.13, outputPrice: 0.38, cachePrice: null, batchDiscount: false, thinkingMode: 'full' },
	{ name: 'Sonnet 3', provider: 'Anthropic', lmarenaElo: 1281, codingScore: null, inputPrice: 3.0, outputPrice: 15.0, cachePrice: 0.30, batchDiscount: true, thinkingMode: null },
	{ name: 'Haiku 3', provider: 'Anthropic', lmarenaElo: 1261, codingScore: null, inputPrice: 0.25, outputPrice: 1.25, cachePrice: 0.03, batchDiscount: true, thinkingMode: null },
	];

	// Provider colors
	const providerColors = {
	'Google': '#4285f4',
	'Anthropic': '#d4a373',
	'xAI': '#1da1f2',
	'OpenAI': '#10a37f'
	};

	// Chart dimensions
	const margin = { top: 20, right: 120, bottom: 60, left: 60 };
	const width = 1200 - margin.left - margin.right;
	const height = 600 - margin.top - margin.bottom;

	// Create SVG
	const svg = d3.select('#chart')
	.attr('width', width + margin.left + margin.right)
	.attr('height', height + margin.top + margin.bottom)
	.append('g')
	.attr('transform', `translate(${margin.left},${margin.top})`);

	// Scales - log scale INVERTED (expensive on left, cheap on right like Google's chart)
	const xScale = d3.scaleLog().range([width, 0]);
	const yScale = d3.scaleLinear().range([height, 0]);

	// Axes
	const xAxis = svg.append('g')
	.attr('transform', `translate(0,${height})`);

	const yAxis = svg.append('g');

	// Axis labels
	svg.append('text')
	.attr('x', width / 2)
	.attr('y', height + 50)
	.attr('text-anchor', 'middle')
	.style('font-size', '14px')
	.style('font-weight', '600')
	.style('fill', '#ccc')
	.text('$ Price per million tokens (log scale)');

	const yAxisLabel = svg.append('text')
	.attr('transform', 'rotate(-90)')
	.attr('x', -height / 2)
	.attr('y', -45)
	.attr('text-anchor', 'middle')
	.style('font-size', '14px')
	.style('font-weight', '600')
	.style('fill', '#ccc')
	.text('LMArena Elo Score');

	// Pareto frontier line
	const paretoLine = svg.append('path')
	.attr('fill', 'none')
	.attr('stroke', '#ff6b6b')
	.attr('stroke-width', 2)
	.attr('stroke-dasharray', '5,5');

	// Tooltip
	const tooltip = d3.select('#tooltip');

	function calculateCost(model, inputTokens, outputTokens, cacheHitRate, thinkingOverhead) {
	const totalTokens = inputTokens + outputTokens;
	const cachedTokens = model.cachePrice !== null ? inputTokens * (cacheHitRate / 100) : 0;
	const regularInputTokens = inputTokens - cachedTokens;

	// Apply thinking token overhead based on model's thinkingMode property
	let effectiveOutputTokens = outputTokens;
	if (model.thinkingMode === 'full') {
	// Full thinking models: full slider overhead
	effectiveOutputTokens = outputTokens * (1 + thinkingOverhead / 100);
	} else if (model.thinkingMode === 'minimal') {
	// Minimal thinking models: 25% of slider overhead
	effectiveOutputTokens = outputTokens * (1 + thinkingOverhead / 400);
	}
	// Non-thinking models (thinkingMode: null): no overhead applied

	// Prices are per million tokens, so divide token counts by 1M
	let cost = (regularInputTokens * model.inputPrice + effectiveOutputTokens * model.outputPrice) / 1000000;

	if (cachedTokens > 0) {
	cost += (cachedTokens * model.cachePrice) / 1000000;
	}

	// Note: Batch processing (50% discount) not included as it adds significant latency
	// and is unsuitable for interactive use cases

	return cost;
	}

	function calculateParetoFrontier(points) {
	// Sort by cost ASCENDING (cheap to expensive)
	// On inverted axis, this goes right to left (cheap on right, expensive on left)
	// Keep points with increasing score as we go from cheap to expensive
	// This creates frontier from lower-right to upper-left
	const sorted = [...points].sort((a, b) => a.cost - b.cost);
	const frontier = [];
	let maxScore = -Infinity;

	for (const point of sorted) {
	if (point.score > maxScore) {
	frontier.push(point);
	maxScore = point.score;
	}
	}

	return frontier;
	}

	function updateChart() {
	// Get current settings
	const inputOutputRatio = parseInt(document.getElementById('inputOutputRatio').value);
	const cacheHitRate = parseInt(document.getElementById('cacheHitRate').value);
	const thinkingOverhead = parseInt(document.getElementById('thinkingOverhead').value);
	const benchmark = document.querySelector('input[name="benchmark"]:checked').value;

	// Calculate costs for each model (per 1M total tokens)
	const inputRatio = 1 / (1 + inputOutputRatio);
	const outputRatio = inputOutputRatio / (1 + inputOutputRatio);

	const inputTokens = 1000000 * inputRatio;
	const outputTokens = 1000000 * outputRatio;

	// Filter models based on benchmark and add cost
	const allDataPoints = models.map(model => ({
	...model,
	cost: calculateCost(model, inputTokens, outputTokens, cacheHitRate, thinkingOverhead),
	score: benchmark === 'lmarena' ? model.lmarenaElo : model.codingScore
	}));

	// Filter out models without scores for selected benchmark
	const dataPoints = allDataPoints.filter(d => d.score !== null);

	// Update Y-axis label based on benchmark
	yAxisLabel.text(benchmark === 'lmarena' ? 'LMArena Elo Score (General Chat)' : 'SWE-Bench Verified Score (%)');

	// Update scales
	const minCost = d3.min(dataPoints, d => d.cost);
	const maxCost = d3.max(dataPoints, d => d.cost);
	const minScore = d3.min(dataPoints, d => d.score);
	const maxScore = d3.max(dataPoints, d => d.score);

	// Log scale needs positive values, add padding
	xScale.domain([Math.max(minCost * 0.8, 0.01), maxCost * 1.2]);

	// Set Y-axis domain based on benchmark type
	if (benchmark === 'lmarena') {
	yScale.domain([Math.floor(minScore / 50) * 50, Math.ceil(maxScore / 50) * 50]);
	} else {
	yScale.domain([Math.floor(minScore / 10) * 10, Math.ceil(maxScore / 10) * 10]);
	}

	// Update axes - let D3 handle log scale ticks properly
	xAxis.transition().duration(500)
	.call(d3.axisBottom(xScale)
	.ticks(10)
	.tickFormat(d => {
	// Format nicely for different scales
	if (d >= 1) return `$${d.toFixed(0)}`;
	if (d >= 0.1) return `$${d.toFixed(1)}`;
	return `$${d.toFixed(2)}`;
	}))
	.call(g => g.selectAll('.tick text').style('fill', '#ccc'))
	.call(g => g.selectAll('.tick line').style('stroke', '#555'))
	.call(g => g.select('.domain').style('stroke', '#555'));

	yAxis.transition().duration(500)
	.call(d3.axisLeft(yScale).ticks(10))
	.call(g => g.selectAll('.tick text').style('fill', '#ccc'))
	.call(g => g.selectAll('.tick line').style('stroke', '#555'))
	.call(g => g.select('.domain').style('stroke', '#555'));

	// Calculate Pareto frontier
	const frontierPoints = calculateParetoFrontier(dataPoints);

	// Update Pareto line
	const lineGenerator = d3.line()
	.x(d => xScale(d.cost))
	.y(d => yScale(d.score));

	paretoLine.transition().duration(500)
	.attr('d', lineGenerator(frontierPoints));

	// Update circles
	const circles = svg.selectAll('.model-circle')
	.data(dataPoints, d => d.name);

	circles.exit().remove();

	const circlesEnter = circles.enter()
	.append('circle')
	.attr('class', 'model-circle')
	.attr('r', 6)
	.attr('opacity', 0.8)
	.attr('stroke', '#fff')
	.attr('stroke-width', 2)
	.on('mouseover', function(event, d) {
	d3.select(this)
	.attr('r', 8)
	.attr('opacity', 1);

	const benchmark = document.querySelector('input[name="benchmark"]:checked').value;
	const scoreLabel = benchmark === 'lmarena' ? 'LMArena Elo' : 'SWE-Bench';
	tooltip
	.style('opacity', 1)
	.html(`
	<strong>${d.name}</strong><br>
	Provider: ${d.provider}<br>
	${scoreLabel}: ${d.score}<br>
	Cost: $${d.cost.toFixed(2)}/M tokens<br>
	${d.cachePrice !== null ? 'Cache: ✓' : 'Cache: ✗'}
	`)
	.style('left', (event.pageX + 10) + 'px')
	.style('top', (event.pageY - 10) + 'px');
	})
	.on('mouseout', function() {
	d3.select(this)
	.attr('r', 6)
	.attr('opacity', 0.8);

	tooltip.style('opacity', 0);
	});

	circles.merge(circlesEnter)
	.transition()
	.duration(500)
	.attr('cx', d => xScale(d.cost))
	.attr('cy', d => yScale(d.score))
	.attr('fill', d => providerColors[d.provider]);

	// Update labels for all models (make frontier models more prominent)
	const frontierSet = new Set(frontierPoints.map(d => d.name));

	const labels = svg.selectAll('.model-label')
	.data(dataPoints, d => d.name);

	labels.exit().remove();

	const labelsEnter = labels.enter()
	.append('text')
	.attr('class', 'model-label')
	.attr('font-size', '11px')
	.style('pointer-events', 'none');

	labels.merge(labelsEnter)
	.transition()
	.duration(500)
	.attr('x', d => xScale(d.cost) + 10)
	.attr('y', d => yScale(d.score) + 4)
	.attr('font-weight', d => frontierSet.has(d.name) ? '600' : '400')
	.attr('fill', d => frontierSet.has(d.name) ? '#e0e0e0' : '#888')
	.attr('opacity', d => frontierSet.has(d.name) ? 1 : 0.6)
	.text(d => d.name);
	}

	function applyPreset(preset) {
	const presets = {
	casual: { ratio: 1, cache: 0, batch: false },
	coding: { ratio: 50, cache: 85, batch: false },
	rag: { ratio: 10, cache: 70, batch: false },
	docs: { ratio: 1, cache: 95, batch: false },
	google: { ratio: 5, cache: 0, batch: false }
	};

	const config = presets[preset];
	document.getElementById('inputOutputRatio').value = config.ratio;
	document.getElementById('cacheHitRate').value = config.cache;
	document.getElementById('batchMode').checked = config.batch;

	updateDisplays();
	updateChart();
	}

	function updateDisplays() {
	const ratio = parseInt(document.getElementById('inputOutputRatio').value);
	const cache = parseInt(document.getElementById('cacheHitRate').value);
	const thinking = parseInt(document.getElementById('thinkingOverhead').value);

	document.getElementById('ratioDisplay').textContent = `1:${ratio}`;
	document.getElementById('cacheDisplay').textContent = `${cache}%`;
	document.getElementById('thinkingDisplay').textContent = `${thinking}%`;
	}

	// Create legend
	function createLegend() {
	const legend = document.getElementById('legend');
	const providers = [...new Set(models.map(m => m.provider))];

	providers.forEach(provider => {
	const item = document.createElement('div');
	item.className = 'legend-item';

	const circle = document.createElement('div');
	circle.className = 'legend-circle';
	circle.style.backgroundColor = providerColors[provider];

	const label = document.createElement('span');
	label.textContent = provider;

	item.appendChild(circle);
	item.appendChild(label);
	legend.appendChild(item);
	});
	}

	// Event listeners
	document.getElementById('inputOutputRatio').addEventListener('input', () => {
	updateDisplays();
	updateChart();
	});

	document.getElementById('cacheHitRate').addEventListener('input', () => {
	updateDisplays();
	updateChart();
	});

	document.getElementById('thinkingOverhead').addEventListener('input', () => {
	updateDisplays();
	updateChart();
	});

	document.querySelectorAll('input[name="benchmark"]').forEach(radio => {
	radio.addEventListener('change', updateChart);
	});

	// Initialize
	createLegend();
	updateDisplays();
	updateChart();
	</script>
	</body>
	</html>
No results found