Skip to content

Instantly share code, notes, and snippets.

@ryanwebster90
Last active November 13, 2025 12:28
Show Gist options
  • Select an option

  • Save ryanwebster90/083348a6e4c10000ecf10de79cf08cbf to your computer and use it in GitHub Desktop.

Select an option

Save ryanwebster90/083348a6e4c10000ecf10de79cf08cbf to your computer and use it in GitHub Desktop.
worddict_tm.c

usage: gcc -O3 -shared -fPIC tm_vm_wordsim.c -o libtm_vm_wordsim.so

python3 wordsim_main.py --in bb6_machines.txt --out bb6_cpp.csv --steps 10000000 --bits 16 --skip-py --batch-c --entry-timeout=128

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
#ifdef _WIN32
#define EXPORT __declspec(dllexport)
#else
#define EXPORT __attribute__((visibility("default")))
#endif
typedef struct { uint8_t w; int8_t d; uint8_t ns; } TR;
/* status: 1=halt, 2=exit(left/right), 3=stuck (entry timeout) */
typedef struct { uint8_t status; uint32_t steps; uint64_t new_word; uint8_t ns; int8_t ex; uint8_t bp; } Entry;
typedef struct {
uint8_t used;
uint8_t s;
uint8_t side;
uint64_t word;
Entry val;
} HEntry;
typedef struct {
uint8_t nstates, word_bits;
uint32_t tape_words;
uint64_t *tape;
int32_t head;
uint8_t bitpos;
uint8_t state;
TR tr[32][2];
HEntry* table;
uint64_t cap;
uint64_t sz;
uint64_t fullmask;
uint8_t lastbit;
} TM;
/* time helper */
static inline double now_sec(void){
#ifdef CLOCK_MONOTONIC
struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec*1e-9;
#else
return (double)clock() / (double)CLOCKS_PER_SEC;
#endif
}
static void* xmalloc(size_t n){ void* p=malloc(n); if(!p) abort(); return p; }
static uint64_t rotl64(uint64_t x, int r){ return (x<<r) | (x>>(64-r)); }
static uint64_t mix64(uint64_t x){
x ^= rotl64(x,25) ^ rotl64(x,47);
x *= 0x9ddfea08eb382d69ULL;
x ^= x >> 28;
x *= 0x9ddfea08eb382d69ULL;
x ^= x >> 28;
return x;
}
static uint64_t keyhash(uint8_t s,uint64_t w,uint8_t side){
uint64_t x = w ^ ((uint64_t)s<<1) ^ (uint64_t)side;
return mix64(x);
}
static inline int bitget_word(uint64_t w, uint8_t bp){ return (int)((w>>bp)&1ULL); }
static inline uint64_t bitset_word(uint64_t w, uint8_t bp, int v){
uint64_t m = (1ULL<<bp);
return v ? (w|m) : (w & ~m);
}
/* parse BB normal form */
static int parse_prog(TM* tm,const char* s){
memset(tm->tr,0,sizeof(tm->tr));
const char* p=s; int idx=0;
while(*p){
const char* q=p; while(*q && *q!='_') q++;
if((q-p)!=6) return 0;
char a0=p[0],a1=p[1],a2=p[2],b0=p[3],b1=p[4],b2=p[5];
if(a0=='-'&&a1=='-'&&a2=='-'){ tm->tr[idx][0]=(TR){255,0,25}; } else {
tm->tr[idx][0]=(TR){(uint8_t)(a0-'0'), (int8_t)((a1=='L')?-1:1), (uint8_t)((a2=='Z')?25:(a2-'A'))};
}
if(b0=='-'&&b1=='-'&&b2=='-'){ tm->tr[idx][1]=(TR){255,0,25}; } else {
tm->tr[idx][1]=(TR){(uint8_t)(b0-'0'), (int8_t)((b1=='L')?-1:1), (uint8_t)((b2=='Z')?25:(b2-'A'))};
}
if(*q==0) break;
p=q+1; idx++; if(idx>=32) break;
}
tm->nstates=(uint8_t)(idx+1);
return 1;
}
/* cache (open addressing) */
static void cache_init(TM* tm, uint64_t cap_pow2){
tm->cap = cap_pow2;
tm->sz = 0;
tm->table = (HEntry*)xmalloc(sizeof(HEntry)*cap_pow2);
memset(tm->table,0,sizeof(HEntry)*cap_pow2);
}
static void cache_clear(TM* tm){
memset(tm->table,0,sizeof(HEntry)*tm->cap);
tm->sz=0;
}
static int cache_probe(TM* tm, uint8_t s, uint64_t word, uint8_t side, Entry* out, int insert_if_missing){
uint64_t h = keyhash(s,word,side);
uint64_t i = h & (tm->cap-1);
for(uint64_t k=0;k<tm->cap;k++){
HEntry* e = &tm->table[i];
if(!e->used){
if(insert_if_missing){
e->used=1; e->s=s; e->side=side; e->word=word;
return (int)i;
} else {
return -1;
}
} else if(e->s==s && e->side==side && e->word==word){
if(out) *out = e->val;
return -2;
}
i = (i+1) & (tm->cap-1);
}
return -1;
}
static void cache_store_index(TM* tm, int idx, Entry v){
tm->table[idx].val = v;
tm->sz++;
}
static Entry make_entry(TM* tm, uint8_t s, uint64_t word, uint8_t side, uint32_t entry_timeout){
uint8_t bp = side? tm->lastbit : 0;
uint8_t cs = s;
uint64_t w = word;
uint32_t st=0;
while(1){
if(cs==25){ Entry e={1,st,(w & tm->fullmask),25,0,bp}; return e; }
if(entry_timeout && st>=entry_timeout){
Entry e={3,st,(w & tm->fullmask),cs,0,bp}; return e; /* stuck */
}
int r = bitget_word(w,bp);
TR t = tm->tr[cs][r];
if(t.w==255){ Entry e={1,st,(w & tm->fullmask),25,0,bp}; return e; }
w = bitset_word(w,bp, t.w?1:0);
if(t.d<0){
if(bp==0){ Entry e={2,st+1,(w & tm->fullmask),t.ns,-1,tm->lastbit}; return e; }
else { bp--; cs=t.ns; st++; continue; }
} else {
if(bp==tm->lastbit){ Entry e={2,st+1,(w & tm->fullmask),t.ns,1,0}; return e; }
else { bp++; cs=t.ns; st++; continue; }
}
}
}
/* step kernel */
static inline void single_step(TM* tm){
uint64_t w = tm->tape[tm->head];
int r = (int)((w>>tm->bitpos)&1ULL);
TR t = tm->tr[tm->state][r];
if(t.w==255){ tm->state=25; return; }
if(t.w) w |= (1ULL<<tm->bitpos);
else w &= ~(1ULL<<tm->bitpos);
tm->tape[tm->head] = (w & tm->fullmask);
if(t.d<0){
if(tm->bitpos==0){ tm->head-=1; tm->bitpos=tm->lastbit; }
else tm->bitpos-=1;
} else {
if(tm->bitpos==tm->lastbit){ tm->head+=1; tm->bitpos=0; }
else tm->bitpos+=1;
}
tm->state=t.ns;
}
/* API */
EXPORT TM* tm_create(uint32_t tape_words, uint8_t word_bits){
if(word_bits!=8 && word_bits!=16 && word_bits!=32 && word_bits!=64) word_bits=8;
TM* tm=(TM*)xmalloc(sizeof(TM));
memset(tm,0,sizeof(TM));
tm->tape_words=tape_words;
tm->word_bits=word_bits;
tm->tape=(uint64_t*)xmalloc(sizeof(uint64_t)*tape_words);
for(uint32_t i=0;i<tape_words;i++) tm->tape[i]=0;
tm->head=(int32_t)(tape_words/2);
tm->bitpos=0;
tm->state=0;
tm->lastbit=(uint8_t)(word_bits-1);
tm->fullmask = (word_bits==64)?~0ULL:((1ULL<<word_bits)-1ULL);
cache_init(tm, 1ULL<<20);
return tm;
}
EXPORT void tm_destroy(TM* tm){
if(!tm) return;
free(tm->tape);
free(tm->table);
free(tm);
}
EXPORT int tm_set_program(TM* tm,const char* prog){
return parse_prog(tm,prog);
}
EXPORT void tm_reset(TM* tm){
for(uint32_t i=0;i<tm->tape_words;i++) tm->tape[i]=0;
tm->head=(int32_t)(tm->tape_words/2);
tm->bitpos=0;
tm->state=0;
}
EXPORT uint64_t tm_run_wordsim_with_timeout(TM* tm, uint64_t steps, uint8_t promote, uint64_t cache_build_budget, uint32_t entry_timeout,
uint8_t* halted, uint64_t* out_hits, uint64_t* out_compressed, uint64_t* out_new){
uint64_t st=0, hits=0, compressed=0, created=0;
uint8_t allow_new = 1;
*halted=0;
while(st<steps && tm->state!=25){
if(tm->head<0 || (uint32_t)tm->head>=tm->tape_words){ *halted=3; break; }
if(promote && tm->bitpos>0 && tm->bitpos<tm->lastbit){
uint8_t distL = tm->bitpos;
uint8_t distR = tm->lastbit - tm->bitpos;
if(distL<=promote || distR<=promote){
single_step(tm); st++; continue;
}
}
if(tm->bitpos==0 || tm->bitpos==tm->lastbit){
uint8_t side = (tm->bitpos==tm->lastbit)?1:0;
uint64_t w = (tm->tape[tm->head] & tm->fullmask);
Entry val; int probe = cache_probe(tm, tm->state, w, side, &val, 0);
if(probe==-2){
/* found */
} else {
if(allow_new && created<cache_build_budget){
int place = cache_probe(tm, tm->state, w, side, NULL, 1);
Entry v = make_entry(tm, tm->state, w, side, entry_timeout);
cache_store_index(tm, place, v);
created++;
val = v;
if(created>=cache_build_budget) allow_new=0;
} else {
single_step(tm); st++; continue;
}
}
if(val.status==3){
single_step(tm); st++; continue;
}
if(st + val.steps > steps){
while(st<steps && tm->state!=25){ single_step(tm); st++; }
break;
}
tm->tape[tm->head] = (val.new_word & tm->fullmask);
st += val.steps; compressed += val.steps; hits++;
if(val.status==1){ tm->state=25; break; }
tm->state = val.ns;
if(val.ex<0){ tm->head-=1; tm->bitpos=val.bp; }
else { tm->head+=1; tm->bitpos=val.bp; }
} else {
single_step(tm); st++;
}
}
if(out_hits) *out_hits = hits;
if(out_compressed) *out_compressed = compressed;
if(out_new) *out_new = created;
if(tm->state==25) *halted=1;
return st;
}
EXPORT uint64_t tm_run_wordsim(TM* tm, uint64_t steps, uint8_t promote, uint64_t cache_build_budget,
uint8_t* halted, uint64_t* out_hits, uint64_t* out_compressed, uint64_t* out_new){
return tm_run_wordsim_with_timeout(tm, steps, promote, cache_build_budget, 0, halted, out_hits, out_compressed, out_new);
}
EXPORT uint32_t tm_popcount(TM* tm){
uint32_t s=0;
if(tm->word_bits==64){
for(uint32_t i=0;i<tm->tape_words;i++){
uint64_t x=tm->tape[i];
x = (x & 0x5555555555555555ULL) + ((x>>1) & 0x5555555555555555ULL);
x = (x & 0x3333333333333333ULL) + ((x>>2) & 0x3333333333333333ULL);
x = (x + (x>>4)) & 0x0F0F0F0F0F0F0F0FULL;
x = x + (x>>8);
x = x + (x>>16);
x = x + (x>>32);
s += (uint32_t)(x & 0x7F);
}
} else {
for(uint32_t i=0;i<tm->tape_words;i++){
uint64_t x=tm->tape[i] & tm->fullmask;
while(x){ s += (uint32_t)(x & 1ULL); x >>= 1; }
}
}
return s;
}
EXPORT void tm_run_wordsim_batch(uint32_t tape_words, uint8_t word_bits,
const char** progs, uint32_t n,
uint64_t steps, uint8_t promote,
uint64_t cache_build_budget, uint32_t entry_timeout,
uint8_t* halted_out, uint64_t* steps_out, uint32_t* pop_out,
double* time_out){
TM* tm = tm_create(tape_words, word_bits);
for(uint32_t i=0;i<n;i++){
const char* p = progs[i];
if ((i & 127u) == 0) fprintf(stderr,"[batch] sim %u/%u : %s\n",(unsigned)(i+1),(unsigned)n,p?p:"(null)");
if(!p){ halted_out[i]=2; steps_out[i]=0; pop_out[i]=0; time_out[i]=0.0; continue; }
tm_set_program(tm, p);
tm_reset(tm);
cache_clear(tm);
uint8_t halted=0;
uint64_t hits=0, comp=0, neu=0;
double t0 = now_sec();
uint64_t done = tm_run_wordsim_with_timeout(tm, steps, promote, cache_build_budget, entry_timeout,
&halted, &hits, &comp, &neu);
double t1 = now_sec();
halted_out[i]=halted;
steps_out[i]=done;
pop_out[i]=tm_popcount(tm);
time_out[i]=t1 - t0;
}
tm_destroy(tm);
}
#!/usr/bin/env python3
import os, sys, time, csv, ctypes, argparse, numpy as np
class TMRef:
def parse(self,s):
t={}; blks=s.split('_')
for i,blk in enumerate(blks):
a=blk[:3]; b=blk[3:]
for tr,j in ((a,0),(b,1)):
if tr=='---': t[(i,j)]=(255,0,25); continue
w=int(tr[0]); d=-1 if tr[1]=='L' else 1; ns=25 if tr[2]=='Z' else ord(tr[2])-65
t[(i,j)]=(w,d,ns)
return t,len(blks)
class WordSimPy:
def __init__(self,bits=64,promote=2,entry_max=65536):
self.bits=bits; self.last=bits-1
self.dtype=np.uint64 if bits==64 else {8:np.uint8,16:np.uint16,32:np.uint32}[bits]
self.cache={}; self.promote=promote; self.entry_max=entry_max
def _ensure(self,a,i,slack=4096):
if i<0:
pad=np.zeros((-i+slack,),dtype=self.dtype); a=np.concatenate([pad,a]); return a,i+pad.size
if i>=a.size:
pad=np.zeros((i-a.size+1+slack,),dtype=self.dtype); a=np.concatenate([a,pad]); return a,i
return a,i
def _step(self,a,i,bp,s,tr,mm):
r=(int(a[i])>>bp)&1; e=tr[(s,r)]
if e[0]==255: return a,i,bp,25,True,mm
w,d,ns=e
v=int(a[i]); m=(1<<bp)
if w: v|=m
else: v&=~m
a[i]=self.dtype(v)
absb=i*self.bits+bp
if mm[0] is None or absb<mm[0]: mm[0]=absb
if mm[1] is None or absb>mm[1]: mm[1]=absb
if d<0:
bp-=1
if bp<0: i-=1; bp=self.last
else:
bp+=1
if bp>self.last: i+=1; bp=0
return a,i,bp,ns,False,mm
def _entry(self,prog,s,word,side,tr):
k=(s,int(word),side)
if k in self.cache: return self.cache[k],False
a=np.array([word],dtype=self.dtype); i=0; bp=0 if side==0 else self.last; st=0; cs=s; halted=False
while cs!=25 and 0<=i<1 and not halted:
a,i,bp,cs,halted,_=self._step(a,i,bp,cs,tr,[None,None]); st+=0 if halted else 1
if st>=self.entry_max:
v=('stuck',st,int(a[0]),cs,0,bp); self.cache[k]=v; return v,True
if cs==25: v=('halt',st,int(a[0]),25,0,bp)
elif i<0: v=('left',st,int(a[0]),cs,-1,bp)
else: v=('right',st,int(a[0]),cs,1,bp)
self.cache[k]=v; return v,True
def run(self,prog,steps):
tr,_=TMRef().parse(prog)
a=np.zeros(4096,dtype=self.dtype); i=a.size//2; bp=0; s=0; st=0; halted=False
t0=time.perf_counter(); mm=[None,None]
while st<steps and s!=25 and not halted:
a,i=self._ensure(a,i)
if self.promote and 0<bp<self.last and min(bp,self.last-bp)<=self.promote:
a,i,bp,s,halted,mm=self._step(a,i,bp,s,tr,mm); st+=0 if halted else 1; continue
if bp in (0,self.last):
k=(s,int(a[i]),0 if bp==0 else 1)
if k in self.cache: status,u,neww,ns,ex,bp2=self.cache[k]
else: (status,u,neww,ns,ex,bp2),_=self._entry(prog,*k,tr)
if status not in ('halt','left','right'):
a,i,bp,s,halted,mm=self._step(a,i,bp,s,tr,mm); st+=0 if halted else 1; continue
if u>0 and u<=steps-st:
a[i]=self.dtype(neww); st+=u
if status=='halt': s=25; break
s=ns; i=(i-1 if ex<0 else i+1); bp=bp2
else:
a,i,bp,s,halted,mm=self._step(a,i,bp,s,tr,mm); st+=0 if halted else 1
else:
a,i,bp,s,halted,mm=self._step(a,i,bp,s,tr,mm); st+=0 if halted else 1
t=time.perf_counter()-t0
ones=0; mask=(1<<self.bits)-1 if self.bits<64 else (2**64-1)
for word in a:
v=int(word)&mask
while v: ones+= (v&1); v>>=1
space = 0 if mm[0] is None else (mm[1]-mm[0]+1)
return st, 1 if s==25 else 0, ones, space, t
class CTMW:
def __init__(self, bits, so):
self.L=ctypes.CDLL(so)
self.L.tm_create.restype=ctypes.c_void_p
self.L.tm_set_program.argtypes=[ctypes.c_void_p,ctypes.c_char_p]
self.L.tm_set_program.restype=ctypes.c_int
self.L.tm_run_wordsim_with_timeout.argtypes=[ctypes.c_void_p,ctypes.c_ulonglong,ctypes.c_uint8,ctypes.c_ulonglong,ctypes.c_uint32,ctypes.POINTER(ctypes.c_uint8),ctypes.POINTER(ctypes.c_ulonglong),ctypes.POINTER(ctypes.c_ulonglong),ctypes.POINTER(ctypes.c_ulonglong)]
self.L.tm_run_wordsim_with_timeout.restype=ctypes.c_ulonglong
self.L.tm_popcount.argtypes=[ctypes.c_void_p]
self.L.tm_popcount.restype=ctypes.c_uint32
self.L.tm_reset.argtypes=[ctypes.c_void_p]
self.vm=self.L.tm_create(ctypes.c_uint32(1<<20),ctypes.c_uint8(bits))
def set_prog(self,p):
ok=self.L.tm_set_program(self.vm,p.encode())
if not ok: raise RuntimeError("parse failed")
def reset(self): self.L.tm_reset(self.vm)
def run(self,steps,promote=2,budget=(1<<60),entry_timeout=65536):
halted=ctypes.c_uint8(0); hits=ctypes.c_ulonglong(0); comp=ctypes.c_ulonglong(0); newe=ctypes.c_ulonglong(0)
t0=time.perf_counter()
st=self.L.tm_run_wordsim_with_timeout(self.vm,ctypes.c_ulonglong(steps),ctypes.c_uint8(promote),ctypes.c_ulonglong(budget),ctypes.c_uint32(entry_timeout),ctypes.byref(halted),ctypes.byref(hits),ctypes.byref(comp),ctypes.byref(newe))
t=time.perf_counter()-t0
pc=self.L.tm_popcount(self.vm)
return st, int(halted.value), pc, -1, t
class CTMWBatch:
def __init__(self, so):
self.L=ctypes.CDLL(so)
self.L.tm_run_wordsim_batch.argtypes = [
ctypes.c_uint32, ctypes.c_uint8,
ctypes.POINTER(ctypes.c_char_p), ctypes.c_uint32,
ctypes.c_ulonglong, ctypes.c_uint8,
ctypes.c_ulonglong, ctypes.c_uint32,
ctypes.POINTER(ctypes.c_uint8),
ctypes.POINTER(ctypes.c_ulonglong),
ctypes.POINTER(ctypes.c_uint32),
ctypes.POINTER(ctypes.c_double)
]
def run_batch(self, programs, steps, bits=32, promote=2, entry_timeout=65536, tape_words=(1<<20)):
n=len(programs)
arr=(ctypes.c_char_p * n)(*[p.encode() for p in programs])
halted=(ctypes.c_uint8 * n)()
steps_out=(ctypes.c_ulonglong * n)()
pops=(ctypes.c_uint32 * n)()
times=(ctypes.c_double * n)()
self.L.tm_run_wordsim_batch(
ctypes.c_uint32(tape_words), ctypes.c_uint8(bits),
arr, ctypes.c_uint32(n),
ctypes.c_ulonglong(steps), ctypes.c_uint8(promote),
ctypes.c_ulonglong(1<<60), ctypes.c_uint32(entry_timeout),
halted, steps_out, pops, times
)
return [int(h) for h in halted], [int(s) for s in steps_out], [int(p) for p in pops], [float(t) for t in times]
def read_lines(path):
with open(path,'r') as f:
for line in f:
s=line.strip()
if s: yield s
def write_rows(out_csv, rows):
exists=os.path.exists(out_csv)
with open(out_csv, 'a' if exists else 'w', newline='') as f:
w=csv.writer(f)
if not exists: w.writerow(['method','machine','status','steps','sigma','space','time_s'])
for r in rows: w.writerow(r)
def summarize(in_csv, out_csv):
data={}
with open(in_csv,'r') as f:
rd=csv.DictReader(f)
for row in rd:
m=row['method']; data.setdefault(m,[]).append(float(row['time_s']))
with open(out_csv,'w',newline='') as f:
w=csv.writer(f); w.writerow(['method','n','median_s','mean_s','var_s2','total_steps'])
for m,ts in data.items():
n=len(ts); med=np.median(ts) if n else float('nan'); mean=float(np.mean(ts)) if n else float('nan')
var=float(np.var(ts, ddof=0)) if n else float('nan')
total=0
with open(in_csv,'r') as g:
rd=csv.DictReader(g)
for row in rd:
if row['method']==m:
try: total+=int(row['steps'])
except: pass
w.writerow([m,n,med,mean,var,total])
def main():
ap=argparse.ArgumentParser()
ap.add_argument('--in', dest='inp', required=True)
ap.add_argument('--out', dest='out', required=True)
ap.add_argument('--steps', type=int, default=50_000_000)
ap.add_argument('--bits', default='8,16,64')
ap.add_argument('--promote', type=int, default=2)
ap.add_argument('--lib', default='./libtm_vm_wordsim.so')
ap.add_argument('--skip-py', action='store_true')
ap.add_argument('--skip-c', action='store_true')
ap.add_argument('--entry-max', type=int, default=65536) # Python entry timeout
ap.add_argument('--entry-timeout', type=int, default=65536) # C entry timeout
ap.add_argument('--batch-c', action='store_true') # use C batch API (per-machine times)
args=ap.parse_args()
bits=[int(x) for x in args.bits.split(',') if x.strip()]
machines=list(read_lines(args.inp))
rows=[]
if not args.skip_py:
for machine in machines:
for b in bits:
sim=WordSimPy(bits=b, promote=args.promote, entry_max=args.entry_max)
st,h,pc,space,t=sim.run(machine,args.steps)
rows.append([f'word_py_u{b}', machine, 'Halt' if h else 'Unknown', st, pc, space, f'{t:.9f}'])
if not args.skip_c:
if args.batch_c:
cb=CTMWBatch(args.lib)
for b in bits:
h,st,pc,times = cb.run_batch(machines, args.steps, bits=b, promote=args.promote, entry_timeout=args.entry_timeout)
for m_i,m in enumerate(machines):
rows.append([f'word_c_u{b}', m, 'Halt' if h[m_i] else 'Unknown', st[m_i], pc[m_i], -1, f'{times[m_i]:.9f}'])
else:
for machine in machines:
for b in bits:
c=CTMW(b,args.lib); c.set_prog(machine); c.reset()
st,h,pc,space,t=c.run(args.steps, promote=args.promote, budget=(1<<60), entry_timeout=args.entry_timeout)
rows.append([f'word_c_u{b}', machine, 'Halt' if h else 'Unknown', st, pc, space, f'{t:.9f}'])
write_rows(args.out, rows)
summarize(args.out, args.out.replace('.csv','_comparison.csv'))
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment