Created
March 8, 2026 22:06
-
-
Save andrewnc/46cb8445e22abe3700149afe8481a6cd to your computer and use it in GitHub Desktop.
Nano GPT BQN
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # nanoGPT-style single-file character model in pure BQN | |
| # trained on Tiny Shakespeare from Karpathy's nanoGPT/char-rnn data path | |
| # result shape: | |
| # ⟨ initial_loss, final_loss, prompt, target, direct_prediction, autoregressive_sample ⟩ | |
| t ← 32 | |
| c ← 16 | |
| h ← 32 | |
| steps ← 4000 | |
| lr ← 0.03 | |
| sampleLen ← 64 | |
| corpusPath ← "CBQN/data/shakespeare_char/input.txt" | |
| MatMul ← +˝∘×⎉1‿∞ | |
| ColSum ← +˝ | |
| RowSum ← +´⎉1 | |
| RowMax ← ⌈´⎉1 | |
| OneHot ← {n←𝕨 ⋄ ids←𝕩 ⋄ ⍉((↕n)=⌜ids)} | |
| Hash ← {1 | 43758.5453 × •math.Sin (12.9898 × 𝕩) + 78.233} | |
| Rand ← {shape←𝕩 ⋄ n←×´⥊shape ⋄ shape ⥊ Hash 1+↕n} | |
| Init ← {scale←𝕨 ⋄ shape←𝕩 ⋄ scale × (2×Rand shape)-1} | |
| SoftmaxRows ← {z←𝕩 ⋄ e←⋆ z -˘ RowMax z ⋄ e ÷˘ RowSum e} | |
| Clip1 ← {(-1.0)⌈1.0⌊𝕩} | |
| corpus ← •FChars corpusPath | |
| vocab ← (0=⊒corpus)/corpus | |
| tokens ← vocab⊐corpus | |
| v ← ≠vocab | |
| trainStarts ← (≠tokens)-t | |
| te ← 0.08 Init v‿c | |
| pe ← 0.08 Init t‿c | |
| wq ← 0.08 Init c‿c | |
| wk ← 0.08 Init c‿c | |
| wv ← 0.08 Init c‿c | |
| wo ← 0.08 Init c‿c | |
| w1 ← 0.08 Init c‿h | |
| b1 ← h⥊0.0 | |
| w2 ← 0.08 Init h‿c | |
| b2 ← c⥊0.0 | |
| wout ← 0.08 Init c‿v | |
| bout ← v⥊0.0 | |
| mask ← (↕t)≥⌜↕t | |
| attScale ← √c | |
| Forward ← { | |
| idx ← 𝕩 | |
| x0 ← (idx⊏te) + pe | |
| q ← x0 MatMul wq | |
| k ← x0 MatMul wk | |
| val ← x0 MatMul wv | |
| s ← ((q MatMul ⍉k) ÷ attScale) + (1-mask) × -1e9 | |
| a ← SoftmaxRows s | |
| ctx ← a MatMul val | |
| o ← ctx MatMul wo | |
| x1 ← x0 + o | |
| h1 ← (x1 MatMul w1) (+⎉1‿∞) b1 | |
| g ← •math.Tanh h1 | |
| m ← (g MatMul w2) (+⎉1‿∞) b2 | |
| y ← x1 + m | |
| logits ← (y MatMul wout) (+⎉1‿∞) bout | |
| ⟨x0,q,k,val,s,a,ctx,o,x1,h1,g,m,y,logits⟩ | |
| } | |
| TrainStep ← { | |
| step ← 𝕩 | |
| start ← •rand.Range trainStarts | |
| idx ← (start+↕t)⊏tokens | |
| tgt ← (start+1+↕t)⊏tokens | |
| idxOh ← v OneHot idx | |
| tgtOh ← v OneHot tgt | |
| f ← Forward idx | |
| x0 ← 0⊑f | |
| q ← 1⊑f | |
| k ← 2⊑f | |
| val ← 3⊑f | |
| a ← 5⊑f | |
| ctx ← 6⊑f | |
| x1 ← 8⊑f | |
| g ← 10⊑f | |
| y ← 12⊑f | |
| logits ← 13⊑f | |
| probs ← SoftmaxRows logits | |
| picked ← RowSum probs × tgtOh | |
| loss ← (-+´⋆⁼picked) ÷ t | |
| dlogits ← (probs - tgtOh) ÷ t | |
| dwout ← (⍉y) MatMul dlogits | |
| dbout ← ColSum dlogits | |
| dy ← dlogits MatMul ⍉wout | |
| dx1 ← dy | |
| dm ← dy | |
| dw2 ← (⍉g) MatMul dm | |
| db2 ← ColSum dm | |
| dg ← dm MatMul ⍉w2 | |
| dh1 ← dg × (1 - g×g) | |
| dw1 ← (⍉x1) MatMul dh1 | |
| db1 ← ColSum dh1 | |
| dx1 +↩ dh1 MatMul ⍉w1 | |
| do ← dx1 | |
| dx0 ← dx1 | |
| dwo ← (⍉ctx) MatMul do | |
| dctx ← do MatMul ⍉wo | |
| da ← dctx MatMul ⍉val | |
| dval ← (⍉a) MatMul dctx | |
| rowDot ← RowSum da × a | |
| ds ← mask × (a × (da -˘ rowDot)) | |
| dq ← (ds MatMul k) ÷ attScale | |
| dk ← ((⍉ds) MatMul q) ÷ attScale | |
| dwq ← (⍉x0) MatMul dq | |
| dwk ← (⍉x0) MatMul dk | |
| dwv ← (⍉x0) MatMul dval | |
| dx0 +↩ (dq MatMul ⍉wq) + (dk MatMul ⍉wk) + (dval MatMul ⍉wv) | |
| dte ← (⍉idxOh) MatMul dx0 | |
| dpe ← dx0 | |
| te -↩ lr × (Clip1 dte) | |
| pe -↩ lr × (Clip1 dpe) | |
| wq -↩ lr × (Clip1 dwq) | |
| wk -↩ lr × (Clip1 dwk) | |
| wv -↩ lr × (Clip1 dwv) | |
| wo -↩ lr × (Clip1 dwo) | |
| w1 -↩ lr × (Clip1 dw1) | |
| b1 -↩ lr × (Clip1 db1) | |
| w2 -↩ lr × (Clip1 dw2) | |
| b2 -↩ lr × (Clip1 db2) | |
| wout -↩ lr × (Clip1 dwout) | |
| bout -↩ lr × (Clip1 dbout) | |
| loss | |
| } | |
| losses ← TrainStep¨ ↕steps | |
| seedStart ← •rand.Range trainStarts | |
| seed ← (seedStart+↕t)⊏tokens | |
| tgt ← (seedStart+1+↕t)⊏tokens | |
| ff ← Forward seed | |
| logits ← 13⊑ff | |
| pred ← (⊑∘⍒⎉1) logits | |
| out ← seed | |
| ctx ← seed | |
| GenStep ← {𝕩 ⋄ | |
| gg ← Forward ctx | |
| next ← ⊑⍒ ¯1⊏ 13⊑gg | |
| out ↩ out ∾ <next | |
| ctx ↩ (-t)↑ out | |
| next | |
| } | |
| GenStep¨ ↕sampleLen | |
| ⟨0⊑losses, ¯1⊑losses, seed⊏vocab, tgt⊏vocab, pred⊏vocab, out⊏vocab⟩ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment