Skip to content

Instantly share code, notes, and snippets.

@andrewnc
Created March 8, 2026 22:06
Show Gist options
  • Select an option

  • Save andrewnc/46cb8445e22abe3700149afe8481a6cd to your computer and use it in GitHub Desktop.

Select an option

Save andrewnc/46cb8445e22abe3700149afe8481a6cd to your computer and use it in GitHub Desktop.
Nano GPT BQN
# nanoGPT-style single-file character model in pure BQN
# trained on Tiny Shakespeare from Karpathy's nanoGPT/char-rnn data path
# result shape:
# ⟨ initial_loss, final_loss, prompt, target, direct_prediction, autoregressive_sample ⟩
t ← 32
c ← 16
h ← 32
steps ← 4000
lr ← 0.03
sampleLen ← 64
corpusPath ← "CBQN/data/shakespeare_char/input.txt"
MatMul ← +˝∘×⎉1‿∞
ColSum ← +˝
RowSum ← +´⎉1
RowMax ← ⌈´⎉1
OneHot ← {n←𝕨 ⋄ ids←𝕩 ⋄ ⍉((↕n)=⌜ids)}
Hash ← {1 | 43758.5453 × •math.Sin (12.9898 × 𝕩) + 78.233}
Rand ← {shape←𝕩 ⋄ n←×´⥊shape ⋄ shape ⥊ Hash 1+↕n}
Init ← {scale←𝕨 ⋄ shape←𝕩 ⋄ scale × (2×Rand shape)-1}
SoftmaxRows ← {z←𝕩 ⋄ e←⋆ z -˘ RowMax z ⋄ e ÷˘ RowSum e}
Clip1 ← {(-1.0)⌈1.0⌊𝕩}
corpus ← •FChars corpusPath
vocab ← (0=⊒corpus)/corpus
tokens ← vocab⊐corpus
v ← ≠vocab
trainStarts ← (≠tokens)-t
te ← 0.08 Init v‿c
pe ← 0.08 Init t‿c
wq ← 0.08 Init c‿c
wk ← 0.08 Init c‿c
wv ← 0.08 Init c‿c
wo ← 0.08 Init c‿c
w1 ← 0.08 Init c‿h
b1 ← h⥊0.0
w2 ← 0.08 Init h‿c
b2 ← c⥊0.0
wout ← 0.08 Init c‿v
bout ← v⥊0.0
mask ← (↕t)≥⌜↕t
attScale ← √c
Forward ← {
idx ← 𝕩
x0 ← (idx⊏te) + pe
q ← x0 MatMul wq
k ← x0 MatMul wk
val ← x0 MatMul wv
s ← ((q MatMul ⍉k) ÷ attScale) + (1-mask) × -1e9
a ← SoftmaxRows s
ctx ← a MatMul val
o ← ctx MatMul wo
x1 ← x0 + o
h1 ← (x1 MatMul w1) (+⎉1‿∞) b1
g ← •math.Tanh h1
m ← (g MatMul w2) (+⎉1‿∞) b2
y ← x1 + m
logits ← (y MatMul wout) (+⎉1‿∞) bout
⟨x0,q,k,val,s,a,ctx,o,x1,h1,g,m,y,logits⟩
}
TrainStep ← {
step ← 𝕩
start ← •rand.Range trainStarts
idx ← (start+↕t)⊏tokens
tgt ← (start+1+↕t)⊏tokens
idxOh ← v OneHot idx
tgtOh ← v OneHot tgt
f ← Forward idx
x0 ← 0⊑f
q ← 1⊑f
k ← 2⊑f
val ← 3⊑f
a ← 5⊑f
ctx ← 6⊑f
x1 ← 8⊑f
g ← 10⊑f
y ← 12⊑f
logits ← 13⊑f
probs ← SoftmaxRows logits
picked ← RowSum probs × tgtOh
loss ← (-+´⋆⁼picked) ÷ t
dlogits ← (probs - tgtOh) ÷ t
dwout ← (⍉y) MatMul dlogits
dbout ← ColSum dlogits
dy ← dlogits MatMul ⍉wout
dx1 ← dy
dm ← dy
dw2 ← (⍉g) MatMul dm
db2 ← ColSum dm
dg ← dm MatMul ⍉w2
dh1 ← dg × (1 - g×g)
dw1 ← (⍉x1) MatMul dh1
db1 ← ColSum dh1
dx1 +↩ dh1 MatMul ⍉w1
do ← dx1
dx0 ← dx1
dwo ← (⍉ctx) MatMul do
dctx ← do MatMul ⍉wo
da ← dctx MatMul ⍉val
dval ← (⍉a) MatMul dctx
rowDot ← RowSum da × a
ds ← mask × (a × (da -˘ rowDot))
dq ← (ds MatMul k) ÷ attScale
dk ← ((⍉ds) MatMul q) ÷ attScale
dwq ← (⍉x0) MatMul dq
dwk ← (⍉x0) MatMul dk
dwv ← (⍉x0) MatMul dval
dx0 +↩ (dq MatMul ⍉wq) + (dk MatMul ⍉wk) + (dval MatMul ⍉wv)
dte ← (⍉idxOh) MatMul dx0
dpe ← dx0
te -↩ lr × (Clip1 dte)
pe -↩ lr × (Clip1 dpe)
wq -↩ lr × (Clip1 dwq)
wk -↩ lr × (Clip1 dwk)
wv -↩ lr × (Clip1 dwv)
wo -↩ lr × (Clip1 dwo)
w1 -↩ lr × (Clip1 dw1)
b1 -↩ lr × (Clip1 db1)
w2 -↩ lr × (Clip1 dw2)
b2 -↩ lr × (Clip1 db2)
wout -↩ lr × (Clip1 dwout)
bout -↩ lr × (Clip1 dbout)
loss
}
losses ← TrainStep¨ ↕steps
seedStart ← •rand.Range trainStarts
seed ← (seedStart+↕t)⊏tokens
tgt ← (seedStart+1+↕t)⊏tokens
ff ← Forward seed
logits ← 13⊑ff
pred ← (⊑∘⍒⎉1) logits
out ← seed
ctx ← seed
GenStep ← {𝕩 ⋄
gg ← Forward ctx
next ← ⊑⍒ ¯1⊏ 13⊑gg
out ↩ out ∾ <next
ctx ↩ (-t)↑ out
next
}
GenStep¨ ↕sampleLen
⟨0⊑losses, ¯1⊑losses, seed⊏vocab, tgt⊏vocab, pred⊏vocab, out⊏vocab⟩
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment