Skip to content

Instantly share code, notes, and snippets.

WOMIR APC Generation Profiling Results

Date: 2026-03-10 Block: 24171384, --apc-count 10

High-Level Timing

Phase Wall Time
APC generation (parallel) 57.1 min
@leonardoalt
leonardoalt / diff-hintstore-cu.diff
Last active March 4, 2026 22:19
CUDA diffs: WOMIR HintStore vs upstream OpenVM rv32im HintStore
Diff of extensions/womir_circuit/cuda/src/hintstore.cu vs upstream extensions/rv32im/circuit/cuda/src/hintstore.cu
WOMIR-specific changes:
- WomirExecutionState (has fp) instead of ExecutionState
- fp and fp_read_aux fields in record header and columns
- Timestamp delta is 4 per row (fp_read=+0, mem_ptr_read=+1, num_words_read=+2, write=+3) instead of 3
- womir_ prefix on kernel/extern names
- Different includes (histogram.cuh, offline_checker.cuh, womir/execution.cuh)
--- /home/leo/.cargo/git/checkouts/openvm-77dd23e285a1262c/72e9013/extensions/rv32im/circuit/cuda/src/hintstore.cu 2026-03-02 19:09:24.445511386 +0100
@leonardoalt
leonardoalt / diff-call-vs-loadstore-adapter.cuh.diff
Last active March 4, 2026 16:43
CUDA diffs for Call chip GPU tracegen vs LoadStore (closest implementation)
--- extensions/womir_circuit/cuda/include/womir/adapters/loadstore.cuh 2026-03-04 17:32:05.607231710 +0100
+++ extensions/womir_circuit/cuda/include/womir/adapters/call.cuh 2026-03-04 17:36:26.770791861 +0100
@@ -1,62 +1,69 @@
-// Adapted from <openvm>/extensions/rv32im/circuit/cuda/include/rv32im/adapters/loadstore.cuh
-// Main changes: adds frame pointer (fp) field, WomirExecutionState, fp_read_aux, timestamp +1 shift,
-// imm_lo/imm_hi instead of imm/imm_sign
+// Call adapter CUDA implementation for GPU tracegen.
+// Handles the 6 memory operations (reads/writes) and carry-chain arithmetic
+// for the Call/CallIndirect/Ret instructions.
#pragma once
@leonardoalt
leonardoalt / diff-jump-vs-alu-adapter.diff
Created March 4, 2026 16:22
CUDA diff: Jump vs ALU implementation (womir-openvm)
--- extensions/womir_circuit/cuda/include/womir/adapters/alu.cuh 2026-03-04 17:06:33.450505710 +0100
+++ extensions/womir_circuit/cuda/include/womir/adapters/jump.cuh 2026-03-04 17:10:52.534016146 +0100
@@ -1,9 +1,8 @@
-// Adapted from <openvm>/extensions/rv32im/circuit/cuda/include/rv32im/adapters/alu.cuh
-// Main changes: adds frame pointer (fp) field, WomirExecutionState, fp_read_aux, timestamp +1 shift
-// Diff: https://gist.github.com/leonardoalt/09fd3d60bd571851bb656dc53cec0a4b#file-diff-adapters-alu-cuh-diff
+// WOMIR Jump adapter for CUDA tracegen.
+// Reads FP + 1 register (condition/offset), no writes.
+// Simpler than the ALU adapter: single register read, no rs2, no write.
#pragma once
@leonardoalt
leonardoalt / diff-alu-adapter-vs-const32-cuh.diff
Created March 4, 2026 15:37
Diffs between Const32 and ALU CUDA tracegen implementations
--- extensions/womir_circuit/cuda/include/womir/adapters/alu.cuh 2026-03-04 16:22:21.368217432 +0100
+++ extensions/womir_circuit/cuda/include/womir/const32.cuh 2026-03-04 16:26:53.584878211 +0100
@@ -1,9 +1,9 @@
-// Adapted from <openvm>/extensions/rv32im/circuit/cuda/include/rv32im/adapters/alu.cuh
-// Main changes: adds frame pointer (fp) field, WomirExecutionState, fp_read_aux, timestamp +1 shift
-// Diff: https://gist.github.com/leonardoalt/09fd3d60bd571851bb656dc53cec0a4b#file-diff-adapters-alu-cuh-diff
+// CUDA tracegen for Const32 chip.
+// Unlike ALU chips, Const32 has no adapter+core split: it's a single unified structure.
#pragma once
@leonardoalt
leonardoalt / diff-adapters-alu.cuh.diff
Created March 3, 2026 08:17
Diffs of womir CUDA files vs OpenVM rv32im originals
--- /home/leo/devel/openvm/extensions/rv32im/circuit/cuda/include/rv32im/adapters/alu.cuh 2026-03-02 14:57:12.776007921 +0100
+++ /home/leo/devel/womir-openvm/extensions/womir_circuit/cuda/include/womir/adapters/alu.cuh 2026-03-02 15:50:33.500343846 +0100
@@ -7,63 +7,81 @@
using namespace riscv;
-template <typename T> struct Rv32BaseAluAdapterCols {
- ExecutionState<T> from_state; // { pub pc: T, pub timestamp: T}
+// WOMIR ExecutionState includes frame pointer (fp) between pc and timestamp.
+template <typename T> struct WomirExecutionState {
; fixed inputs
(declare-const imm_limbs_1 Int)
(declare-const imm_limbs_2 Int)
(declare-const imm_limbs_3 Int)
(declare-const from_pc Int)
; two versions of every variable and constraint, to check for nondeterminism
(declare-const pc_limbs_1 Int)
(declare-const pc_limbs_2 Int)
./INSTALL
+ cargo run --bin creusot-install --
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.14s
Running `target/debug/creusot-install`
Building prelude...
Installing Why3 and Why3find...
Package creusot-deps does not exist, create as a NEW package? [y/n] y
The following additional pinnings are required by creusot-deps.dev:
- why3.git-9c6f at git+https://gitlab.inria.fr/why3/why3.git#9c6f54fbb
- why3-ide.git-9c6f at git+https://gitlab.inria.fr/why3/why3.git#9c6f54fbb
use std::machines::large_field::binary::Binary;
use std::machines::range::Bit2;
use std::machines::range::Bit6;
use std::machines::range::Bit7;
use std::machines::range::Byte;
use std::machines::range::Byte2;
use std::machines::binary::ByteBinary;
use std::machines::split::ByteCompare;
use std::machines::large_field::shift::ByteShift;
use std::machines::hash::poseidon_gl_memory::PoseidonGLMemory;
#![no_main]
#![no_std]
extern crate powdr_riscv_runtime;
#[no_mangle]
fn main() {
loop_test_matrix();
}