Skip to content

Instantly share code, notes, and snippets.

@iamwrm
Last active February 14, 2026 02:43
Show Gist options
  • Select an option

  • Save iamwrm/424132ac19c9934c46de39f089632fae to your computer and use it in GitHub Desktop.

Select an option

Save iamwrm/424132ac19c9934c46de39f089632fae to your computer and use it in GitHub Desktop.
GPT-4o Tokenizer - Static webpage using Transformers.js
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Tokenizer Playground</title>
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=IBM+Plex+Mono:wght@400;500&display=swap');
:root,
[data-theme="dark"] {
--bg: #0d0f14;
--surface: #13151c;
--surface-2: #191c26;
--border: #23273a;
--border-hover: #343a54;
--text: #e6edf3;
--text-muted: #7d8590;
--text-dim: #484f58;
--accent: #bc8cff;
--accent-dim: rgba(188, 140, 255, 0.12);
--accent-2: #d2a8ff;
--btn-text: #0d0f14;
--tooltip-bg: #2d333b;
--tooltip-text: #e6edf3;
--h1-from: #fff;
--noise-opacity: 0.03;
--mono: 'IBM Plex Mono', 'SF Mono', 'Fira Code', monospace;
--sans: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
--radius: 8px;
}
[data-theme="light"] {
--bg: #f8f9fb;
--surface: #ffffff;
--surface-2: #eef0f4;
--border: #d1d5de;
--border-hover: #b0b8c9;
--text: #1a1e2c;
--text-muted: #5c6370;
--text-dim: #9ca3af;
--accent: #7c3aed;
--accent-dim: rgba(124, 58, 237, 0.10);
--accent-2: #6d28d9;
--btn-text: #ffffff;
--tooltip-bg: #1e2030;
--tooltip-text: #e6edf3;
--h1-from: #1a1e2c;
--noise-opacity: 0.015;
}
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: var(--sans);
background: var(--bg);
color: var(--text);
min-height: 100vh;
-webkit-font-smoothing: antialiased;
}
/* — Noise texture overlay — */
body::before {
content: '';
position: fixed;
inset: 0;
background: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)' opacity='1'/%3E%3C/svg%3E");
opacity: var(--noise-opacity);
pointer-events: none;
z-index: 0;
}
.page {
position: relative;
z-index: 1;
max-width: 780px;
margin: 0 auto;
padding: 60px 24px 80px;
}
/* — Header — */
header { margin-bottom: 48px; }
.badge {
display: inline-flex;
align-items: center;
gap: 6px;
padding: 4px 12px;
border: 1px solid var(--border);
border-radius: 999px;
font-size: 0.75rem;
color: var(--text-muted);
margin-bottom: 20px;
background: var(--surface);
}
.badge .dot {
width: 6px; height: 6px;
border-radius: 50%;
background: #3fb950;
box-shadow: 0 0 6px #3fb95066;
}
h1 {
font-size: 2.4rem;
font-weight: 700;
letter-spacing: -0.03em;
line-height: 1.15;
background: linear-gradient(135deg, var(--h1-from) 0%, var(--accent-2) 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
}
.subtitle {
margin-top: 10px;
font-size: 1.05rem;
color: var(--text-muted);
line-height: 1.6;
}
.subtitle a {
color: var(--accent);
text-decoration: none;
border-bottom: 1px solid transparent;
transition: border-color 0.2s;
}
.subtitle a:hover { border-bottom-color: var(--accent); }
/* — Form controls — */
.field { margin-bottom: 20px; }
label {
display: block;
font-size: 0.8rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.06em;
color: var(--text-muted);
margin-bottom: 8px;
}
select, textarea {
width: 100%;
padding: 12px 14px;
font-size: 0.95rem;
font-family: var(--sans);
color: var(--text);
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
transition: border-color 0.2s, box-shadow 0.2s;
appearance: none;
}
select:focus, textarea:focus {
outline: none;
border-color: var(--accent);
box-shadow: 0 0 0 3px var(--accent-dim);
}
select {
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='%237d8590' viewBox='0 0 16 16'%3E%3Cpath d='M4.427 6.427l3.396 3.396a.25.25 0 00.354 0l3.396-3.396A.25.25 0 0011.396 6H4.604a.25.25 0 00-.177.427z'/%3E%3C/svg%3E");
background-repeat: no-repeat;
background-position: right 12px center;
cursor: pointer;
padding-right: 36px;
}
textarea {
font-family: var(--mono);
font-size: 0.9rem;
min-height: 130px;
resize: vertical;
line-height: 1.65;
}
/* — Button — */
.actions { display: flex; gap: 12px; align-items: center; }
button {
padding: 10px 24px;
font-size: 0.9rem;
font-weight: 600;
font-family: var(--sans);
border: none;
border-radius: var(--radius);
cursor: pointer;
transition: all 0.2s;
background: var(--accent);
color: var(--btn-text);
}
button:hover { filter: brightness(1.15); transform: translateY(-1px); }
button:active { transform: translateY(0); }
button:disabled {
background: var(--surface-2);
color: var(--text-dim);
cursor: not-allowed;
filter: none;
transform: none;
}
#status {
font-size: 0.85rem;
color: var(--text-muted);
}
.spinner {
display: inline-block;
width: 13px; height: 13px;
border: 2px solid var(--border);
border-top-color: var(--accent);
border-radius: 50%;
animation: spin 0.6s linear infinite;
vertical-align: middle;
margin-right: 5px;
}
@keyframes spin { to { transform: rotate(360deg); } }
/* — Stats bar — */
.stats-bar {
display: none;
gap: 24px;
margin-top: 28px;
padding: 14px 18px;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
font-size: 0.85rem;
color: var(--text-muted);
}
.stats-bar.visible { display: flex; }
.stat-value {
font-family: var(--mono);
font-weight: 600;
color: var(--accent-2);
font-size: 1.1rem;
}
.stat-label { font-size: 0.75rem; margin-top: 2px; }
/* — Output — */
.output { display: none; margin-top: 28px; }
.output.visible { display: block; }
.section-title {
font-size: 0.8rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.06em;
color: var(--text-muted);
margin-bottom: 10px;
}
.section-title small {
font-weight: 400;
text-transform: none;
letter-spacing: 0;
color: var(--text-dim);
}
.token-grid {
display: flex;
flex-wrap: wrap;
gap: 4px;
padding: 14px;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
line-height: 1.9;
min-height: 52px;
}
.tok {
display: inline-block;
padding: 2px 7px;
border-radius: 4px;
font-family: var(--mono);
font-size: 0.85rem;
cursor: default;
position: relative;
transition: filter 0.15s;
white-space: pre;
}
.tok:hover { filter: brightness(1.4); }
.tok::after {
content: attr(data-id);
position: absolute;
bottom: calc(100% + 4px);
left: 50%;
transform: translateX(-50%) scale(0.9);
background: var(--tooltip-bg);
color: var(--tooltip-text);
padding: 3px 8px;
border-radius: 5px;
font-size: 0.7rem;
white-space: nowrap;
opacity: 0;
pointer-events: none;
transition: opacity 0.15s, transform 0.15s;
z-index: 10;
box-shadow: 0 2px 8px rgba(0,0,0,0.4);
}
.tok:hover::after { opacity: 1; transform: translateX(-50%) scale(1); }
.ids-box {
margin-top: 16px;
padding: 14px;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
font-family: var(--mono);
font-size: 0.8rem;
color: var(--text-muted);
word-break: break-all;
max-height: 180px;
overflow-y: auto;
line-height: 1.7;
}
/* — Divider — */
hr {
border: none;
border-top: 1px solid var(--border);
margin: 28px 0;
}
/* — Theme toggle — */
.theme-toggle {
position: fixed;
top: 20px;
right: 20px;
z-index: 100;
width: 40px; height: 40px;
border: 1px solid var(--border);
border-radius: 50%;
background: var(--surface);
color: var(--text-muted);
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
transition: border-color 0.2s, background 0.2s, transform 0.2s;
padding: 0;
font-size: 0;
}
.theme-toggle:hover {
border-color: var(--border-hover);
transform: scale(1.08);
}
.theme-toggle svg {
width: 18px; height: 18px;
fill: none;
stroke: currentColor;
stroke-width: 2;
stroke-linecap: round;
stroke-linejoin: round;
}
.theme-toggle .icon-sun { display: none; }
.theme-toggle .icon-moon { display: block; }
[data-theme="light"] .theme-toggle .icon-sun { display: block; }
[data-theme="light"] .theme-toggle .icon-moon { display: none; }
/* smooth transition on theme change */
body, .page, select, textarea, .badge, .stats-bar, .token-grid, .ids-box,
.theme-toggle, button, .tok::after {
transition: background 0.3s, color 0.3s, border-color 0.3s;
}
/* scrollbar */
::-webkit-scrollbar { width: 6px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
</style>
</head>
<body>
<button class="theme-toggle" id="theme-toggle" aria-label="Toggle theme">
<!-- Moon icon (shown in dark mode → click to go light) -->
<svg class="icon-moon" viewBox="0 0 24 24"><path d="M21 12.79A9 9 0 1 1 11.21 3a7 7 0 0 0 9.79 9.79z"/></svg>
<!-- Sun icon (shown in light mode → click to go dark) -->
<svg class="icon-sun" viewBox="0 0 24 24"><circle cx="12" cy="12" r="5"/><line x1="12" y1="1" x2="12" y2="3"/><line x1="12" y1="21" x2="12" y2="23"/><line x1="4.22" y1="4.22" x2="5.64" y2="5.64"/><line x1="18.36" y1="18.36" x2="19.78" y2="19.78"/><line x1="1" y1="12" x2="3" y2="12"/><line x1="21" y1="12" x2="23" y2="12"/><line x1="4.22" y1="19.78" x2="5.64" y2="18.36"/><line x1="18.36" y1="5.64" x2="19.78" y2="4.22"/></svg>
</button>
<div class="page">
<header>
<div class="badge"><span class="dot"></span> Runs entirely in your browser</div>
<h1>Tokenizer Playground</h1>
<p class="subtitle">
Visualize how LLMs break text into tokens. Powered by
<a href="https://huggingface.co/docs/transformers.js" target="_blank">Transformers.js</a>
</p>
</header>
<div class="field">
<label for="model-select">Tokenizer</label>
<select id="model-select">
<optgroup label="OpenAI">
<option value="Xenova/gpt-4o" selected>GPT-4o &nbsp;(o200k_base)</option>
<option value="Xenova/gpt-4">GPT-4 / 3.5 Turbo &nbsp;(cl100k_base)</option>
<option value="Xenova/gpt2">GPT-2 &nbsp;(bpe 50k)</option>
</optgroup>
<optgroup label="Meta">
<option value="Xenova/llama3-tokenizer">LLaMA 3 &nbsp;(128k vocab)</option>
<option value="Xenova/llama-160m">LLaMA 2 &nbsp;(32k vocab)</option>
</optgroup>
<optgroup label="Mistral">
<option value="Xenova/mistral-tokenizer-v3">Mistral v3 &nbsp;(Tekken)</option>
<option value="Xenova/mistral-tokenizer-v1">Mistral v1 &nbsp;(SentencePiece)</option>
</optgroup>
<optgroup label="Google">
<option value="Xenova/gemma-2-tokenizer">Gemma 2 &nbsp;(256k vocab)</option>
</optgroup>
<optgroup label="Microsoft">
<option value="Xenova/Phi-3-mini-4k-instruct">Phi-3 &nbsp;(32k vocab)</option>
</optgroup>
<optgroup label="Alibaba">
<option value="Xenova/Qwen1.5-0.5B">Qwen 1.5 &nbsp;(152k vocab)</option>
</optgroup>
<optgroup label="DeepSeek">
<option value="Xenova/deepseek-coder-1.3b-base">DeepSeek Coder &nbsp;(32k vocab)</option>
</optgroup>
<optgroup label="Anthropic">
<option value="Xenova/claude-tokenizer">Claude &nbsp;(bpe)</option>
</optgroup>
</select>
</div>
<div class="field">
<label for="input">Input Text</label>
<textarea id="input" placeholder="Type or paste text here…">Hello, world! This is a test of the tokenizer running entirely in your browser. 🚀</textarea>
</div>
<div class="actions">
<button id="btn" disabled>Loading…</button>
<span id="status"></span>
</div>
<div class="stats-bar" id="stats">
<div><div class="stat-value" id="token-count">0</div><div class="stat-label">Tokens</div></div>
<div><div class="stat-value" id="char-count">0</div><div class="stat-label">Characters</div></div>
<div><div class="stat-value" id="ratio">0</div><div class="stat-label">Chars / Token</div></div>
</div>
<div class="output" id="output">
<hr>
<div class="section-title">Tokens <small>— hover for ID</small></div>
<div class="token-grid" id="token-grid"></div>
<div class="section-title" style="margin-top:20px">Token IDs</div>
<div class="ids-box" id="ids-box"></div>
</div>
</div>
<script type="module">
import { AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3';
// — Theme toggle —
const root = document.documentElement;
const stored = localStorage.getItem('theme');
const prefersDark = window.matchMedia('(prefers-color-scheme: dark)').matches;
root.setAttribute('data-theme', stored || (prefersDark ? 'dark' : 'light'));
document.getElementById('theme-toggle').addEventListener('click', () => {
const next = root.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
root.setAttribute('data-theme', next);
localStorage.setItem('theme', next);
// re-render tokens with correct palette if output is showing
if (lastIds.length) renderTokens();
});
function isDark() { return root.getAttribute('data-theme') === 'dark'; }
const PALETTE_DARK = [
['#2a1f3d','#c4b5fd'], ['#1a2e1a','#86efac'], ['#1e293b','#7dd3fc'],
['#312e2a','#fcd34d'], ['#2d1f2f','#f9a8d4'], ['#1a2a2a','#5eead4'],
['#2e2418','#fdba74'], ['#1e1e30','#a5b4fc'], ['#2a1a1a','#fca5a5'],
['#1a2e25','#6ee7b7'],
];
const PALETTE_LIGHT = [
['#ede9fe','#5b21b6'], ['#dcfce7','#166534'], ['#dbeafe','#1e40af'],
['#fef9c3','#854d0e'], ['#fce7f3','#9d174d'], ['#ccfbf1','#115e59'],
['#ffedd5','#9a3412'], ['#e0e7ff','#3730a3'], ['#fee2e2','#991b1b'],
['#d1fae5','#065f46'],
];
function palette() { return isDark() ? PALETTE_DARK : PALETTE_LIGHT; }
const $ = (sel) => document.querySelector(sel);
const selectEl = $('#model-select');
const inputEl = $('#input');
const btn = $('#btn');
const statusEl = $('#status');
const statsEl = $('#stats');
const tokenGrid = $('#token-grid');
const idsBox = $('#ids-box');
const output = $('#output');
let tokenizer = null;
let currentModel = '';
let lastIds = [];
let lastText = '';
async function loadTokenizer(modelId) {
if (modelId === currentModel && tokenizer) return;
btn.disabled = true;
btn.textContent = 'Loading…';
statusEl.innerHTML = `<span class="spinner"></span> Downloading ${modelId}…`;
try {
tokenizer = await AutoTokenizer.from_pretrained(modelId);
currentModel = modelId;
statusEl.textContent = '';
btn.disabled = false;
btn.textContent = 'Tokenize';
} catch (e) {
statusEl.textContent = '❌ ' + e.message;
console.error(e);
}
}
function renderTokens() {
if (!lastIds.length || !tokenizer) return;
const pal = palette();
tokenGrid.innerHTML = '';
for (let i = 0; i < lastIds.length; i++) {
const decoded = tokenizer.decode([lastIds[i]], { skip_special_tokens: false });
const [bg, fg] = pal[i % pal.length];
const span = document.createElement('span');
span.className = 'tok';
span.style.background = bg;
span.style.color = fg;
span.textContent = decoded.replace(/ /g, '·').replace(/\n/g, '↵').replace(/\t/g, '⇥');
span.setAttribute('data-id', lastIds[i]);
tokenGrid.appendChild(span);
}
}
function tokenize() {
lastText = inputEl.value;
if (!lastText || !tokenizer) return;
lastIds = Array.from(tokenizer.encode(lastText));
renderTokens();
idsBox.textContent = '[' + lastIds.join(', ') + ']';
$('#token-count').textContent = lastIds.length;
$('#char-count').textContent = lastText.length;
$('#ratio').textContent = lastIds.length ? (lastText.length / lastIds.length).toFixed(2) : '—';
statsEl.classList.add('visible');
output.classList.add('visible');
}
// Events
selectEl.addEventListener('change', () => loadTokenizer(selectEl.value));
btn.addEventListener('click', tokenize);
inputEl.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) tokenize();
});
// Initial load
await loadTokenizer(selectEl.value);
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment