Last active
February 14, 2026 02:43
-
-
Save iamwrm/424132ac19c9934c46de39f089632fae to your computer and use it in GitHub Desktop.
GPT-4o Tokenizer - Static webpage using Transformers.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Tokenizer Playground</title> | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=IBM+Plex+Mono:wght@400;500&display=swap'); | |
| :root, | |
| [data-theme="dark"] { | |
| --bg: #0d0f14; | |
| --surface: #13151c; | |
| --surface-2: #191c26; | |
| --border: #23273a; | |
| --border-hover: #343a54; | |
| --text: #e6edf3; | |
| --text-muted: #7d8590; | |
| --text-dim: #484f58; | |
| --accent: #bc8cff; | |
| --accent-dim: rgba(188, 140, 255, 0.12); | |
| --accent-2: #d2a8ff; | |
| --btn-text: #0d0f14; | |
| --tooltip-bg: #2d333b; | |
| --tooltip-text: #e6edf3; | |
| --h1-from: #fff; | |
| --noise-opacity: 0.03; | |
| --mono: 'IBM Plex Mono', 'SF Mono', 'Fira Code', monospace; | |
| --sans: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; | |
| --radius: 8px; | |
| } | |
| [data-theme="light"] { | |
| --bg: #f8f9fb; | |
| --surface: #ffffff; | |
| --surface-2: #eef0f4; | |
| --border: #d1d5de; | |
| --border-hover: #b0b8c9; | |
| --text: #1a1e2c; | |
| --text-muted: #5c6370; | |
| --text-dim: #9ca3af; | |
| --accent: #7c3aed; | |
| --accent-dim: rgba(124, 58, 237, 0.10); | |
| --accent-2: #6d28d9; | |
| --btn-text: #ffffff; | |
| --tooltip-bg: #1e2030; | |
| --tooltip-text: #e6edf3; | |
| --h1-from: #1a1e2c; | |
| --noise-opacity: 0.015; | |
| } | |
| * { box-sizing: border-box; margin: 0; padding: 0; } | |
| body { | |
| font-family: var(--sans); | |
| background: var(--bg); | |
| color: var(--text); | |
| min-height: 100vh; | |
| -webkit-font-smoothing: antialiased; | |
| } | |
| /* — Noise texture overlay — */ | |
| body::before { | |
| content: ''; | |
| position: fixed; | |
| inset: 0; | |
| background: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)' opacity='1'/%3E%3C/svg%3E"); | |
| opacity: var(--noise-opacity); | |
| pointer-events: none; | |
| z-index: 0; | |
| } | |
| .page { | |
| position: relative; | |
| z-index: 1; | |
| max-width: 780px; | |
| margin: 0 auto; | |
| padding: 60px 24px 80px; | |
| } | |
| /* — Header — */ | |
| header { margin-bottom: 48px; } | |
| .badge { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 6px; | |
| padding: 4px 12px; | |
| border: 1px solid var(--border); | |
| border-radius: 999px; | |
| font-size: 0.75rem; | |
| color: var(--text-muted); | |
| margin-bottom: 20px; | |
| background: var(--surface); | |
| } | |
| .badge .dot { | |
| width: 6px; height: 6px; | |
| border-radius: 50%; | |
| background: #3fb950; | |
| box-shadow: 0 0 6px #3fb95066; | |
| } | |
| h1 { | |
| font-size: 2.4rem; | |
| font-weight: 700; | |
| letter-spacing: -0.03em; | |
| line-height: 1.15; | |
| background: linear-gradient(135deg, var(--h1-from) 0%, var(--accent-2) 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| } | |
| .subtitle { | |
| margin-top: 10px; | |
| font-size: 1.05rem; | |
| color: var(--text-muted); | |
| line-height: 1.6; | |
| } | |
| .subtitle a { | |
| color: var(--accent); | |
| text-decoration: none; | |
| border-bottom: 1px solid transparent; | |
| transition: border-color 0.2s; | |
| } | |
| .subtitle a:hover { border-bottom-color: var(--accent); } | |
| /* — Form controls — */ | |
| .field { margin-bottom: 20px; } | |
| label { | |
| display: block; | |
| font-size: 0.8rem; | |
| font-weight: 600; | |
| text-transform: uppercase; | |
| letter-spacing: 0.06em; | |
| color: var(--text-muted); | |
| margin-bottom: 8px; | |
| } | |
| select, textarea { | |
| width: 100%; | |
| padding: 12px 14px; | |
| font-size: 0.95rem; | |
| font-family: var(--sans); | |
| color: var(--text); | |
| background: var(--surface); | |
| border: 1px solid var(--border); | |
| border-radius: var(--radius); | |
| transition: border-color 0.2s, box-shadow 0.2s; | |
| appearance: none; | |
| } | |
| select:focus, textarea:focus { | |
| outline: none; | |
| border-color: var(--accent); | |
| box-shadow: 0 0 0 3px var(--accent-dim); | |
| } | |
| select { | |
| background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='%237d8590' viewBox='0 0 16 16'%3E%3Cpath d='M4.427 6.427l3.396 3.396a.25.25 0 00.354 0l3.396-3.396A.25.25 0 0011.396 6H4.604a.25.25 0 00-.177.427z'/%3E%3C/svg%3E"); | |
| background-repeat: no-repeat; | |
| background-position: right 12px center; | |
| cursor: pointer; | |
| padding-right: 36px; | |
| } | |
| textarea { | |
| font-family: var(--mono); | |
| font-size: 0.9rem; | |
| min-height: 130px; | |
| resize: vertical; | |
| line-height: 1.65; | |
| } | |
| /* — Button — */ | |
| .actions { display: flex; gap: 12px; align-items: center; } | |
| button { | |
| padding: 10px 24px; | |
| font-size: 0.9rem; | |
| font-weight: 600; | |
| font-family: var(--sans); | |
| border: none; | |
| border-radius: var(--radius); | |
| cursor: pointer; | |
| transition: all 0.2s; | |
| background: var(--accent); | |
| color: var(--btn-text); | |
| } | |
| button:hover { filter: brightness(1.15); transform: translateY(-1px); } | |
| button:active { transform: translateY(0); } | |
| button:disabled { | |
| background: var(--surface-2); | |
| color: var(--text-dim); | |
| cursor: not-allowed; | |
| filter: none; | |
| transform: none; | |
| } | |
| #status { | |
| font-size: 0.85rem; | |
| color: var(--text-muted); | |
| } | |
| .spinner { | |
| display: inline-block; | |
| width: 13px; height: 13px; | |
| border: 2px solid var(--border); | |
| border-top-color: var(--accent); | |
| border-radius: 50%; | |
| animation: spin 0.6s linear infinite; | |
| vertical-align: middle; | |
| margin-right: 5px; | |
| } | |
| @keyframes spin { to { transform: rotate(360deg); } } | |
| /* — Stats bar — */ | |
| .stats-bar { | |
| display: none; | |
| gap: 24px; | |
| margin-top: 28px; | |
| padding: 14px 18px; | |
| background: var(--surface); | |
| border: 1px solid var(--border); | |
| border-radius: var(--radius); | |
| font-size: 0.85rem; | |
| color: var(--text-muted); | |
| } | |
| .stats-bar.visible { display: flex; } | |
| .stat-value { | |
| font-family: var(--mono); | |
| font-weight: 600; | |
| color: var(--accent-2); | |
| font-size: 1.1rem; | |
| } | |
| .stat-label { font-size: 0.75rem; margin-top: 2px; } | |
| /* — Output — */ | |
| .output { display: none; margin-top: 28px; } | |
| .output.visible { display: block; } | |
| .section-title { | |
| font-size: 0.8rem; | |
| font-weight: 600; | |
| text-transform: uppercase; | |
| letter-spacing: 0.06em; | |
| color: var(--text-muted); | |
| margin-bottom: 10px; | |
| } | |
| .section-title small { | |
| font-weight: 400; | |
| text-transform: none; | |
| letter-spacing: 0; | |
| color: var(--text-dim); | |
| } | |
| .token-grid { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 4px; | |
| padding: 14px; | |
| background: var(--surface); | |
| border: 1px solid var(--border); | |
| border-radius: var(--radius); | |
| line-height: 1.9; | |
| min-height: 52px; | |
| } | |
| .tok { | |
| display: inline-block; | |
| padding: 2px 7px; | |
| border-radius: 4px; | |
| font-family: var(--mono); | |
| font-size: 0.85rem; | |
| cursor: default; | |
| position: relative; | |
| transition: filter 0.15s; | |
| white-space: pre; | |
| } | |
| .tok:hover { filter: brightness(1.4); } | |
| .tok::after { | |
| content: attr(data-id); | |
| position: absolute; | |
| bottom: calc(100% + 4px); | |
| left: 50%; | |
| transform: translateX(-50%) scale(0.9); | |
| background: var(--tooltip-bg); | |
| color: var(--tooltip-text); | |
| padding: 3px 8px; | |
| border-radius: 5px; | |
| font-size: 0.7rem; | |
| white-space: nowrap; | |
| opacity: 0; | |
| pointer-events: none; | |
| transition: opacity 0.15s, transform 0.15s; | |
| z-index: 10; | |
| box-shadow: 0 2px 8px rgba(0,0,0,0.4); | |
| } | |
| .tok:hover::after { opacity: 1; transform: translateX(-50%) scale(1); } | |
| .ids-box { | |
| margin-top: 16px; | |
| padding: 14px; | |
| background: var(--surface); | |
| border: 1px solid var(--border); | |
| border-radius: var(--radius); | |
| font-family: var(--mono); | |
| font-size: 0.8rem; | |
| color: var(--text-muted); | |
| word-break: break-all; | |
| max-height: 180px; | |
| overflow-y: auto; | |
| line-height: 1.7; | |
| } | |
| /* — Divider — */ | |
| hr { | |
| border: none; | |
| border-top: 1px solid var(--border); | |
| margin: 28px 0; | |
| } | |
| /* — Theme toggle — */ | |
| .theme-toggle { | |
| position: fixed; | |
| top: 20px; | |
| right: 20px; | |
| z-index: 100; | |
| width: 40px; height: 40px; | |
| border: 1px solid var(--border); | |
| border-radius: 50%; | |
| background: var(--surface); | |
| color: var(--text-muted); | |
| cursor: pointer; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| transition: border-color 0.2s, background 0.2s, transform 0.2s; | |
| padding: 0; | |
| font-size: 0; | |
| } | |
| .theme-toggle:hover { | |
| border-color: var(--border-hover); | |
| transform: scale(1.08); | |
| } | |
| .theme-toggle svg { | |
| width: 18px; height: 18px; | |
| fill: none; | |
| stroke: currentColor; | |
| stroke-width: 2; | |
| stroke-linecap: round; | |
| stroke-linejoin: round; | |
| } | |
| .theme-toggle .icon-sun { display: none; } | |
| .theme-toggle .icon-moon { display: block; } | |
| [data-theme="light"] .theme-toggle .icon-sun { display: block; } | |
| [data-theme="light"] .theme-toggle .icon-moon { display: none; } | |
| /* smooth transition on theme change */ | |
| body, .page, select, textarea, .badge, .stats-bar, .token-grid, .ids-box, | |
| .theme-toggle, button, .tok::after { | |
| transition: background 0.3s, color 0.3s, border-color 0.3s; | |
| } | |
| /* scrollbar */ | |
| ::-webkit-scrollbar { width: 6px; } | |
| ::-webkit-scrollbar-track { background: transparent; } | |
| ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; } | |
| </style> | |
| </head> | |
| <body> | |
| <button class="theme-toggle" id="theme-toggle" aria-label="Toggle theme"> | |
| <!-- Moon icon (shown in dark mode → click to go light) --> | |
| <svg class="icon-moon" viewBox="0 0 24 24"><path d="M21 12.79A9 9 0 1 1 11.21 3a7 7 0 0 0 9.79 9.79z"/></svg> | |
| <!-- Sun icon (shown in light mode → click to go dark) --> | |
| <svg class="icon-sun" viewBox="0 0 24 24"><circle cx="12" cy="12" r="5"/><line x1="12" y1="1" x2="12" y2="3"/><line x1="12" y1="21" x2="12" y2="23"/><line x1="4.22" y1="4.22" x2="5.64" y2="5.64"/><line x1="18.36" y1="18.36" x2="19.78" y2="19.78"/><line x1="1" y1="12" x2="3" y2="12"/><line x1="21" y1="12" x2="23" y2="12"/><line x1="4.22" y1="19.78" x2="5.64" y2="18.36"/><line x1="18.36" y1="5.64" x2="19.78" y2="4.22"/></svg> | |
| </button> | |
| <div class="page"> | |
| <header> | |
| <div class="badge"><span class="dot"></span> Runs entirely in your browser</div> | |
| <h1>Tokenizer Playground</h1> | |
| <p class="subtitle"> | |
| Visualize how LLMs break text into tokens. Powered by | |
| <a href="https://huggingface.co/docs/transformers.js" target="_blank">Transformers.js</a> | |
| </p> | |
| </header> | |
| <div class="field"> | |
| <label for="model-select">Tokenizer</label> | |
| <select id="model-select"> | |
| <optgroup label="OpenAI"> | |
| <option value="Xenova/gpt-4o" selected>GPT-4o (o200k_base)</option> | |
| <option value="Xenova/gpt-4">GPT-4 / 3.5 Turbo (cl100k_base)</option> | |
| <option value="Xenova/gpt2">GPT-2 (bpe 50k)</option> | |
| </optgroup> | |
| <optgroup label="Meta"> | |
| <option value="Xenova/llama3-tokenizer">LLaMA 3 (128k vocab)</option> | |
| <option value="Xenova/llama-160m">LLaMA 2 (32k vocab)</option> | |
| </optgroup> | |
| <optgroup label="Mistral"> | |
| <option value="Xenova/mistral-tokenizer-v3">Mistral v3 (Tekken)</option> | |
| <option value="Xenova/mistral-tokenizer-v1">Mistral v1 (SentencePiece)</option> | |
| </optgroup> | |
| <optgroup label="Google"> | |
| <option value="Xenova/gemma-2-tokenizer">Gemma 2 (256k vocab)</option> | |
| </optgroup> | |
| <optgroup label="Microsoft"> | |
| <option value="Xenova/Phi-3-mini-4k-instruct">Phi-3 (32k vocab)</option> | |
| </optgroup> | |
| <optgroup label="Alibaba"> | |
| <option value="Xenova/Qwen1.5-0.5B">Qwen 1.5 (152k vocab)</option> | |
| </optgroup> | |
| <optgroup label="DeepSeek"> | |
| <option value="Xenova/deepseek-coder-1.3b-base">DeepSeek Coder (32k vocab)</option> | |
| </optgroup> | |
| <optgroup label="Anthropic"> | |
| <option value="Xenova/claude-tokenizer">Claude (bpe)</option> | |
| </optgroup> | |
| </select> | |
| </div> | |
| <div class="field"> | |
| <label for="input">Input Text</label> | |
| <textarea id="input" placeholder="Type or paste text here…">Hello, world! This is a test of the tokenizer running entirely in your browser. 🚀</textarea> | |
| </div> | |
| <div class="actions"> | |
| <button id="btn" disabled>Loading…</button> | |
| <span id="status"></span> | |
| </div> | |
| <div class="stats-bar" id="stats"> | |
| <div><div class="stat-value" id="token-count">0</div><div class="stat-label">Tokens</div></div> | |
| <div><div class="stat-value" id="char-count">0</div><div class="stat-label">Characters</div></div> | |
| <div><div class="stat-value" id="ratio">0</div><div class="stat-label">Chars / Token</div></div> | |
| </div> | |
| <div class="output" id="output"> | |
| <hr> | |
| <div class="section-title">Tokens <small>— hover for ID</small></div> | |
| <div class="token-grid" id="token-grid"></div> | |
| <div class="section-title" style="margin-top:20px">Token IDs</div> | |
| <div class="ids-box" id="ids-box"></div> | |
| </div> | |
| </div> | |
| <script type="module"> | |
| import { AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3'; | |
| // — Theme toggle — | |
| const root = document.documentElement; | |
| const stored = localStorage.getItem('theme'); | |
| const prefersDark = window.matchMedia('(prefers-color-scheme: dark)').matches; | |
| root.setAttribute('data-theme', stored || (prefersDark ? 'dark' : 'light')); | |
| document.getElementById('theme-toggle').addEventListener('click', () => { | |
| const next = root.getAttribute('data-theme') === 'dark' ? 'light' : 'dark'; | |
| root.setAttribute('data-theme', next); | |
| localStorage.setItem('theme', next); | |
| // re-render tokens with correct palette if output is showing | |
| if (lastIds.length) renderTokens(); | |
| }); | |
| function isDark() { return root.getAttribute('data-theme') === 'dark'; } | |
| const PALETTE_DARK = [ | |
| ['#2a1f3d','#c4b5fd'], ['#1a2e1a','#86efac'], ['#1e293b','#7dd3fc'], | |
| ['#312e2a','#fcd34d'], ['#2d1f2f','#f9a8d4'], ['#1a2a2a','#5eead4'], | |
| ['#2e2418','#fdba74'], ['#1e1e30','#a5b4fc'], ['#2a1a1a','#fca5a5'], | |
| ['#1a2e25','#6ee7b7'], | |
| ]; | |
| const PALETTE_LIGHT = [ | |
| ['#ede9fe','#5b21b6'], ['#dcfce7','#166534'], ['#dbeafe','#1e40af'], | |
| ['#fef9c3','#854d0e'], ['#fce7f3','#9d174d'], ['#ccfbf1','#115e59'], | |
| ['#ffedd5','#9a3412'], ['#e0e7ff','#3730a3'], ['#fee2e2','#991b1b'], | |
| ['#d1fae5','#065f46'], | |
| ]; | |
| function palette() { return isDark() ? PALETTE_DARK : PALETTE_LIGHT; } | |
| const $ = (sel) => document.querySelector(sel); | |
| const selectEl = $('#model-select'); | |
| const inputEl = $('#input'); | |
| const btn = $('#btn'); | |
| const statusEl = $('#status'); | |
| const statsEl = $('#stats'); | |
| const tokenGrid = $('#token-grid'); | |
| const idsBox = $('#ids-box'); | |
| const output = $('#output'); | |
| let tokenizer = null; | |
| let currentModel = ''; | |
| let lastIds = []; | |
| let lastText = ''; | |
| async function loadTokenizer(modelId) { | |
| if (modelId === currentModel && tokenizer) return; | |
| btn.disabled = true; | |
| btn.textContent = 'Loading…'; | |
| statusEl.innerHTML = `<span class="spinner"></span> Downloading ${modelId}…`; | |
| try { | |
| tokenizer = await AutoTokenizer.from_pretrained(modelId); | |
| currentModel = modelId; | |
| statusEl.textContent = ''; | |
| btn.disabled = false; | |
| btn.textContent = 'Tokenize'; | |
| } catch (e) { | |
| statusEl.textContent = '❌ ' + e.message; | |
| console.error(e); | |
| } | |
| } | |
| function renderTokens() { | |
| if (!lastIds.length || !tokenizer) return; | |
| const pal = palette(); | |
| tokenGrid.innerHTML = ''; | |
| for (let i = 0; i < lastIds.length; i++) { | |
| const decoded = tokenizer.decode([lastIds[i]], { skip_special_tokens: false }); | |
| const [bg, fg] = pal[i % pal.length]; | |
| const span = document.createElement('span'); | |
| span.className = 'tok'; | |
| span.style.background = bg; | |
| span.style.color = fg; | |
| span.textContent = decoded.replace(/ /g, '·').replace(/\n/g, '↵').replace(/\t/g, '⇥'); | |
| span.setAttribute('data-id', lastIds[i]); | |
| tokenGrid.appendChild(span); | |
| } | |
| } | |
| function tokenize() { | |
| lastText = inputEl.value; | |
| if (!lastText || !tokenizer) return; | |
| lastIds = Array.from(tokenizer.encode(lastText)); | |
| renderTokens(); | |
| idsBox.textContent = '[' + lastIds.join(', ') + ']'; | |
| $('#token-count').textContent = lastIds.length; | |
| $('#char-count').textContent = lastText.length; | |
| $('#ratio').textContent = lastIds.length ? (lastText.length / lastIds.length).toFixed(2) : '—'; | |
| statsEl.classList.add('visible'); | |
| output.classList.add('visible'); | |
| } | |
| // Events | |
| selectEl.addEventListener('change', () => loadTokenizer(selectEl.value)); | |
| btn.addEventListener('click', tokenize); | |
| inputEl.addEventListener('keydown', (e) => { | |
| if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) tokenize(); | |
| }); | |
| // Initial load | |
| await loadTokenizer(selectEl.value); | |
| </script> | |
| </body> | |
| </html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment