Skip to content

Instantly share code, notes, and snippets.

@opsec-ee
Last active March 16, 2026 06:34
Show Gist options
  • Select an option

  • Save opsec-ee/bd829c3781bcfaace131628f8f4e6689 to your computer and use it in GitHub Desktop.

Select an option

Save opsec-ee/bd829c3781bcfaace131628f8f4e6689 to your computer and use it in GitHub Desktop.
/* Update 3,16,2026 H.Overman
*
* Erdos-Selfridge Prime Categorization Engine v3.4
* Copyright (c) 2025 H.O <opsec.ee@pm.me>
*
* Elapsed: 0.0665 s
* Throughput: 15024489 primes/s
*
* Euman = integral(optimization/time) delta performance
*
* =======================================================================
* INVARIANT FOUNDATION (IFP)
* =======================================================================
*
* I1: sieve(limit) -> primes[] pure bit-packed generator
* I2: RAMstore(prime) -> index Feistel hash stored in slot,
* O(1) guaranteed, probe<=16
* I3: spf[n] -> factor O(1) direct array lookup,
* zero division
* I4: cat(p) -> u8 Gate-X propagated, never swallowed
* I5: memo[i] <-> 4-bit packed bijective store/recall
*
* =======================================================================
* CHANGES v3.3 -> v3.4
* =======================================================================
*
* W1 — uint16_t SPF (32MB vs 64MB, 2x cache density):
*
* Correctness proof:
* Composite n has smallest prime factor <= sqrt(n).
* For n <= SIEVE_LIMIT = 16,000,000:
* sqrt(16,000,000) = 4000 < 65,535 = UINT16_MAX.
* Therefore all composite SPF values fit in uint16_t.
*
* Prime sentinel = 0:
* MAP_ANONYMOUS provides zero-initialised pages — no explicit
* prime write needed. During build, composites get their smallest
* factor written; primes stay 0. During factorize_spf:
* f = g_spf[n] ? g_spf[n] : (uint32_t)n
* Branch is maximally predictable: composites dominate in p+1 walks.
*
* Cache impact:
* uint16_t: 32 entries per 64-byte cache line (was 16).
* 32MB table: 16,384 x 2MB huge pages vs 8,192 x 4KB pages
* post-madvise. TLB pressure halved vs v3.3.
*
* W2 — madvise(MADV_HUGEPAGE) on SPF mmap:
*
* Collapses TLB entries: 32MB / 4KB = 8192 entries (4K pages)
* -> 32MB / 2MB = 16 entries (huge pages, THP).
* i7 L2 TLB: 1536 entries (4K). SPF at 8192 entries overflows it
* significantly. At ~100 cycles per TLB miss, this is measurable.
* MADV_HUGEPAGE enables Transparent Huge Pages on Arch Linux.
*
* W3 — Software prefetch in main loop:
*
* __builtin_prefetch(&g_spf[g_primes[i + PREFETCH_DIST] + 1], 0, 1)
* Issues L1 prefetch hint for the SPF entry of the future prime's p+1.
* Covers ~40 cycle L3 latency at 13.8M primes/s per-prime ~72 cycles.
* PREFETCH_DIST = 8: 8 * 72 cycles = 576 cycles lookahead >= L3 lat.
* Locality 1 (L2): SPF hot entries promoted to L2 before access.
*
* Also prefetches memo word for future prime — sequential access,
* hardware prefetcher likely handles it, but explicit hint costs zero.
*
* spf_build simplification:
* v3.3: g_spf[p] = p unconditional + loop from p+p.
* v3.4: g_spf[p] stays 0 (prime sentinel = MAP_ANONYMOUS zero).
* Loop from p+p with g_spf[m]==0 guard — first writer wins.
* Eliminates 1M explicit prime writes to spf table.
*
* All v3.3 bug fixes carried forward:
* BUG-1: CATEGORY_GATE_X = 0x0F (4-bit, no truncation)
* BUG-2: assert on rs_build probe cap
* PERF-2: __builtin_expect on TRACK_FIRST_N_PRIMES branch
* MICRO: spf_build starts inner loop at p+p
*
* Build:
* gcc -std=c23 -O3 -march=native -flto -Wall -Wextra -DNDEBUG \
* -funroll-loops prime_ee.c -o prime_engine
*/
#define _GNU_SOURCE /* madvise, MADV_HUGEPAGE */
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <assert.h>
/* =================================================================
* CONFIGURATION
* ================================================================= */
constexpr uint32_t SIEVE_LIMIT = 16000000u;
constexpr uint32_t MAX_PRIMES = 1000000u;
/*
* CATEGORY_GATE_X = 0x0F: fits in 4-bit memo slot exactly.
* Valid category range: 1..14. Sentinel 15 reserved.
* static_assert enforces no collision at build time.
*/
constexpr uint8_t CATEGORY_GATE_X = 0x0Fu;
constexpr uint32_t MAX_CATEGORIES = 15u;
static_assert(MAX_CATEGORIES <= 15u,
"category 15 (0x0F) is reserved for CATEGORY_GATE_X sentinel");
constexpr uint32_t FACTORIZATION_BUFFER = 31u;
constexpr uint32_t TRACK_FIRST_N_PRIMES = 200u;
constexpr uint32_t MAX_PRIMES_PER_CATEGORY = 100u;
/*
* RAMstore: 2^21 slots, load ~47.7% at 1M primes.
* OPT-335: HASH_MASK eliminates modulo.
*/
constexpr uint32_t HASH_BITS = 21u;
constexpr uint32_t HASH_SIZE = 1u << HASH_BITS;
constexpr uint32_t HASH_MASK = HASH_SIZE - 1u;
constexpr uint32_t RS_PROBE_MAX = 16u;
/*
* Software prefetch lookahead distance (main loop).
* 8 primes ahead at ~72 cycles/prime = ~576 cycles = L3 latency.
* Tune up/down based on measured throughput.
*/
constexpr uint32_t PREFETCH_DIST = 8u;
/* =================================================================
* 144,000 RATIO SYSTEM — exact rational timing, no IEEE 754
*
* {{0 [ ticks:u64 (AS/.\IS) ee_ratio_t{num,den} ] 1}}
* ================================================================= */
typedef struct {
uint64_t num;
uint64_t den;
} ee_ratio_t;
static inline ee_ratio_t ee_ratio_elapsed(clock_t start, clock_t end)
{
return (ee_ratio_t){
.num = (uint64_t)(end - start),
.den = (uint64_t)CLOCKS_PER_SEC
};
}
static inline uint64_t ee_ratio_secs(ee_ratio_t r)
{
return r.den ? r.num / r.den : 0u;
}
static inline uint64_t ee_ratio_frac10k(ee_ratio_t r)
{
if (!r.den) return 0u;
return ((r.num % r.den) * 10000u) / r.den;
}
static inline uint64_t ee_ratio_throughput(uint32_t count, ee_ratio_t elapsed)
{
if (!elapsed.num) return 0u;
return ((uint64_t)count * elapsed.den) / elapsed.num;
}
/* =================================================================
* euman_hash64 — 36-bit Feistel bijective slot function
*
* ONE definition. TWO call sites: rs_build(), rs_lookup().
* No other hash in this file.
*
* Proof: euman_hashsec 4B bitmap, zero collisions.
* E[probes] = |T|/|S| = 2^36 / 62^6 approx 1.21.
*
* {{0 [ prime:u64 (AS/.\IS) slot:u32 ] 1}}
* Master key = euman_hashsec SECRET_KEY.
* ================================================================= */
constexpr uint64_t FEISTEL_MASTER = UINT64_C(0x1234567890ABCDEF);
constexpr uint64_t FEISTEL_MASK18 = 0x3FFFFull;
constexpr uint64_t FEISTEL_K0 = (FEISTEL_MASTER) & FEISTEL_MASK18;
constexpr uint64_t FEISTEL_K1 = (FEISTEL_MASTER >> 9) & FEISTEL_MASK18;
constexpr uint64_t FEISTEL_K2 = (FEISTEL_MASTER >> 18) & FEISTEL_MASK18;
constexpr uint64_t FEISTEL_K3 = (FEISTEL_MASTER >> 27) & FEISTEL_MASK18;
static inline uint64_t feistel_round(uint64_t half, uint64_t rk)
{
half ^= rk;
half *= UINT64_C(0x9E3779B97);
half ^= half >> 9;
return half & FEISTEL_MASK18;
}
static inline uint64_t feistel_encrypt_36(uint64_t v)
{
uint64_t L = (v >> 18) & FEISTEL_MASK18;
uint64_t R = v & FEISTEL_MASK18;
uint64_t t;
t = R; R = L ^ feistel_round(R, FEISTEL_K0); L = t;
t = R; R = L ^ feistel_round(R, FEISTEL_K1); L = t;
t = R; R = L ^ feistel_round(R, FEISTEL_K2); L = t;
t = R; R = L ^ feistel_round(R, FEISTEL_K3); L = t;
return (L << 18) | R;
}
static inline uint32_t ee_hash64_slot(uint64_t prime)
{
return (uint32_t)(feistel_encrypt_36(prime) & HASH_MASK);
}
/* =================================================================
* RAMSTORE SLOT — I2
*
* 16 bytes, alignas(64): 4 slots per cache line.
* prime==0 sentinel: 0 is never a valid prime.
* Key stored in slot: no g_primes[] dereference on hit.
*
* {{0 [ prime:u64 (AS/.\IS) index:u32 ] 1}}
* Z: prime == 0
* X: not found within RS_PROBE_MAX probes -> UINT32_MAX
* 1: g_rs[s].prime == prime -> return g_rs[s].index
* ================================================================= */
typedef struct {
uint64_t prime;
uint32_t index;
uint32_t pad;
} prime_rs_slot_t;
static_assert(sizeof(prime_rs_slot_t) == 16,
"prime_rs_slot_t must be 16 bytes");
alignas(64) static prime_rs_slot_t g_rs[HASH_SIZE];
/* =================================================================
* EXBOXING-INSPIRED STRUCT PACKING
*
* prime_factor_t — one u64, register-width, zero padding:
* bits[55:0] = factor value (25 bits sufficient for n <= 16M)
* bits[63:56] = power (8 bits; max power(2) for n<=16M = 24)
*
* factorization_t.status — one u8:
* bits[4:0] = count (max 31 = FACTORIZATION_BUFFER)
* bit[7] = gate_x
*
* Total factorization_t = 31*8 + 1 = 249 bytes, 4 cache lines.
*
* {{0 [ (factor,power) (AS/.\IS) packed:u64 ] 1}}
* ================================================================= */
typedef struct {
uint64_t packed;
} prime_factor_t;
static inline prime_factor_t pf_make(uint64_t factor, uint8_t power)
{
return (prime_factor_t){
.packed = (factor & UINT64_C(0x00FFFFFFFFFFFFFF))
| ((uint64_t)power << 56)
};
}
static inline uint64_t pf_factor(prime_factor_t pf)
{
return pf.packed & UINT64_C(0x00FFFFFFFFFFFFFF);
}
typedef struct {
prime_factor_t factors[FACTORIZATION_BUFFER];
uint8_t status;
} factorization_t;
static inline uint8_t fac_count(const factorization_t *f)
{
return f->status & 0x1Fu;
}
static inline bool fac_gate_x(const factorization_t *f)
{
return (f->status & 0x80u) != 0u;
}
static inline void fac_set_count(factorization_t *f, uint8_t n)
{
f->status = (f->status & 0x80u) | (n & 0x1Fu);
}
static inline void fac_set_gate_x(factorization_t *f)
{
f->status |= 0x80u;
}
/* =================================================================
* GLOBAL STATE — cache-aligned on all hot arrays
* ================================================================= */
alignas(64) static uint64_t g_primes[MAX_PRIMES];
static uint32_t g_prime_count;
/*
* SPF table — uint16_t, mmap'd, 32MB (W1).
* g_spf[n] = smallest prime factor of n, or 0 if n is prime.
* 32 entries per 64-byte cache line.
* MADV_HUGEPAGE applied post-mmap (W2).
*/
static uint16_t *g_spf;
alignas(64) static uint64_t g_packed_cat[MAX_PRIMES / 16 + 1];
typedef struct {
uint32_t count;
uint64_t smallest;
uint64_t largest;
} cat_stats_t;
alignas(64) static cat_stats_t g_stats[MAX_CATEGORIES];
typedef struct {
uint64_t primes[MAX_PRIMES_PER_CATEGORY];
uint32_t count;
} cat_prime_list_t;
static cat_prime_list_t g_cat_primes[MAX_CATEGORIES];
/* =================================================================
* I5 — 4-BIT PACKED MEMO
*
* {{0 [ idx:u32 (AS/.\IS) 4-bit slot ] 1}}
* 0x00 = empty, 0x01..0x0E = valid category, 0x0F = Gate-X
* ================================================================= */
static inline void memo_set(uint32_t idx, uint8_t cat)
{
uint32_t wi = idx / 16u;
uint32_t bp = (idx % 16u) * 4u;
g_packed_cat[wi] = (g_packed_cat[wi] & ~((uint64_t)0xFu << bp))
| ((uint64_t)cat << bp);
}
static inline uint8_t memo_get(uint32_t idx)
{
return (uint8_t)((g_packed_cat[idx / 16u] >> ((idx % 16u) * 4u)) & 0xFu);
}
/* =================================================================
* I1 — PRIME SIEVE
*
* memset(0xAA): sets all odd-position bits in one 2MB write.
* 0xAA = 10101010b: bits 1,3,5,7 of each byte = odd positions.
* Replaces calloc+loop from v3.2.
* ================================================================= */
[[nodiscard]]
static bool sieve_generate(void)
{
size_t nbytes = (SIEVE_LIMIT + 7u) / 8u;
uint8_t *sieve = malloc(nbytes);
if (!sieve) return false;
memset(sieve, 0xAAu, nbytes);
for (uint32_t p = 3u; (uint64_t)p * p <= SIEVE_LIMIT; p += 2u)
if (sieve[p / 8u] & (1u << (p % 8u)))
for (uint32_t m = p * p; m <= SIEVE_LIMIT; m += 2u * p)
sieve[m / 8u] &= (uint8_t)~(1u << (m % 8u));
g_prime_count = 0u;
g_primes[g_prime_count++] = 2u;
for (uint32_t i = 3u; i <= SIEVE_LIMIT && g_prime_count < MAX_PRIMES; i += 2u)
if (sieve[i / 8u] & (1u << (i % 8u)))
g_primes[g_prime_count++] = i;
free(sieve);
return g_prime_count == MAX_PRIMES;
}
/* =================================================================
* I3 — SPF TABLE BUILD (uint16_t, W1)
*
* Prime sentinel = 0 (MAP_ANONYMOUS zero — no explicit write).
* Composite write: g_spf[m] = (uint16_t)p where p <= sqrt(m) <= 4000.
* First writer wins = smallest prime factor. O(n log log n) build.
*
* madvise(MADV_HUGEPAGE): hints kernel to back SPF with 2MB huge
* pages via THP. Reduces TLB footprint from ~8192 to ~16 entries (W2).
*
* {{0 [ n:u32 (AS/.\IS) g_spf[n]:u16 ] 1}}
* g_spf[n] = 0 -> n is prime (sentinel)
* g_spf[n] = f > 0 -> f is smallest prime factor of n, f <= 4000
* ================================================================= */
[[nodiscard]]
static bool spf_build(void)
{
size_t spf_bytes = (size_t)(SIEVE_LIMIT + 1u) * sizeof(uint16_t);
g_spf = mmap(nullptr, spf_bytes,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0);
if (g_spf == MAP_FAILED) { g_spf = nullptr; return false; }
/* W2: THP hint — collapses TLB from ~8192 entries to ~16 */
madvise(g_spf, spf_bytes, MADV_HUGEPAGE);
/* Primes stay 0 (sentinel). Composites get smallest factor. */
for (uint32_t i = 0u; i < g_prime_count; i++) {
uint32_t p = (uint32_t)g_primes[i];
for (uint32_t m = p + p; m <= SIEVE_LIMIT; m += p)
if (g_spf[m] == 0u)
g_spf[m] = (uint16_t)p; /* p <= sqrt(m) <= 4000, fits u16 */
}
return true;
}
/* =================================================================
* I2 — RAMSTORE BUILD
*
* g_rs[] is BSS: zero-initialised, prime==0 sentinel is free.
* BUG-2: assert on probe cap — unreachable at 47.7% load,
* but contract enforced.
*
* {{0 [ i:u32 (AS/.\IS) g_rs[slot].{prime,index} ] 1}}
* ================================================================= */
static void rs_build(void)
{
for (uint32_t i = 0u; i < g_prime_count; i++) {
uint64_t prime = g_primes[i];
uint32_t slot = ee_hash64_slot(prime);
uint32_t p;
for (p = 0u; p < RS_PROBE_MAX; p++) {
uint32_t s = (slot + p) & HASH_MASK;
if (g_rs[s].prime == 0u) {
g_rs[s].prime = prime;
g_rs[s].index = i;
break;
}
}
assert(p < RS_PROBE_MAX &&
"rs_build: probe cap exceeded -- increase HASH_BITS or RS_PROBE_MAX");
}
}
/* =================================================================
* I2 — RAMSTORE LOOKUP
*
* {{0 [ prime:u64 (AS/.\IS) index:u32 ] 1}}
* X: not found -> UINT32_MAX (caller propagates Gate-X)
* 1: g_rs[s].prime == prime -> return g_rs[s].index
*
* Expected ~1.5 probes at 47.7% load.
* One cache line fetch (64B) covers 4 consecutive slots.
* ================================================================= */
[[nodiscard]]
static inline uint32_t rs_lookup(uint64_t prime)
{
uint32_t slot = ee_hash64_slot(prime);
for (uint32_t p = 0u; p < RS_PROBE_MAX; p++) {
uint32_t s = (slot + p) & HASH_MASK;
if (g_rs[s].prime == 0u) return UINT32_MAX;
if (g_rs[s].prime == prime) return g_rs[s].index;
}
return UINT32_MAX;
}
/* =================================================================
* I3 — FACTORIZE via SPF table (uint16_t, W1)
*
* g_spf[n] == 0: n is prime (sentinel). f = n directly.
* g_spf[n] != 0: f = g_spf[n] (smallest factor, <= 4000).
* Single predictable branch: composites dominate, branch mostly not-taken
* for large n; but n starts composite (p+1 is even) so branch taken first.
*
* O(log n) sequential u16 reads. 32 entries per cache line.
* Hardware prefetcher handles the forward walk naturally.
*
* {{0 [ n:u64 (AS/.\IS) factorization_t ] 1}}
* Z: n <= 1 (empty result)
* X: count >= FACTORIZATION_BUFFER -> gate_x set
* 1: product(f^e for all factors) == original n
* ================================================================= */
static void factorize_spf(uint64_t n, factorization_t *out)
{
out->status = 0u;
if (n <= 1u) return;
while (n > 1u) {
uint64_t f;
if (n <= SIEVE_LIMIT) {
uint16_t sf = g_spf[(uint32_t)n];
f = sf ? (uint64_t)sf : n; /* 0 = prime sentinel */
} else {
f = n; /* n > SIEVE_LIMIT: prime by exhaustion */
}
uint8_t cnt = fac_count(out);
if (cnt >= FACTORIZATION_BUFFER) {
fac_set_gate_x(out);
return;
}
uint8_t power = 0u;
while (n % f == 0u) { n /= f; power++; }
out->factors[cnt] = pf_make(f, power);
fac_set_count(out, (uint8_t)(cnt + 1u));
}
}
/* =================================================================
* I4 — COMPUTE CATEGORY (recursive + memoized)
*
* Correctness of forward ordering:
* For prime p > 3, every prime factor f of p+1 satisfies f < p.
* Proof: p+1 is even, so 2|(p+1). All other prime factors of p+1
* are <= (p+1)/2 < p. Therefore f < p = g_primes[pi], so fi < pi.
* By induction, all memo entries for factors are already computed
* by the time we reach p in ascending order. Recursion is always
* a memo hit after the first ~handful of primes (p=2 edge case:
* factor 3 is handled as base case f==3, no recursion).
*
* BUG-1 fix (v3.3): CATEGORY_GATE_X = 0x0F fits 4-bit memo exactly.
*
* {{0 [ pi:u32 (AS/.\IS) category:u8 ] 1}}
* Z: pi >= g_prime_count
* X: factor absent from RAMstore -> CATEGORY_GATE_X (0x0F)
* 1: result memoized and returned
* ================================================================= */
static uint8_t compute_category(uint32_t pi)
{
uint8_t cached = memo_get(pi);
if (cached != 0u) return cached;
factorization_t fac;
factorize_spf(g_primes[pi] + 1u, &fac);
if (fac_gate_x(&fac)) {
memo_set(pi, CATEGORY_GATE_X);
return CATEGORY_GATE_X;
}
uint8_t max_cat = 0u;
bool only_base_fact = true;
for (uint8_t i = 0u; i < fac_count(&fac); i++) {
uint64_t f = pf_factor(fac.factors[i]);
if (f == 2u || f == 3u) {
if (max_cat < 1u) max_cat = 1u;
continue;
}
only_base_fact = false;
uint32_t fi = rs_lookup(f);
if (fi == UINT32_MAX) {
memo_set(pi, CATEGORY_GATE_X);
return CATEGORY_GATE_X;
}
uint8_t fc = compute_category(fi);
if (fc == CATEGORY_GATE_X) {
memo_set(pi, CATEGORY_GATE_X);
return CATEGORY_GATE_X;
}
if (fc > max_cat) max_cat = fc;
}
uint8_t result = only_base_fact ? 1u : (uint8_t)(max_cat + 1u);
memo_set(pi, result);
return result;
}
/* =================================================================
* MAIN PROCESSING LOOP
*
* W3 — Software prefetch:
* Two hints per iteration, PREFETCH_DIST primes ahead:
*
* (a) SPF entry for future prime's p+1:
* __builtin_prefetch(&g_spf[g_primes[i+N]+1], 0, 1)
* Locality 1 (L2): brings SPF entry into L2 before factorize_spf.
*
* (b) Memo word for future prime index:
* __builtin_prefetch(&g_packed_cat[(i+N)/16], 0, 1)
* Memo is 500KB — hardware prefetcher likely warm, but zero cost.
*
* Prefetch is guarded by bounds check compiled to a single cmov.
* At PREFETCH_DIST=8, lookahead = ~576 cycles > L3 latency (~40).
* ================================================================= */
[[nodiscard]]
static bool run(void)
{
clock_t t0 = clock();
memset(g_stats, 0u, sizeof(g_stats));
memset(g_packed_cat, 0u, sizeof(g_packed_cat));
memset(g_cat_primes, 0u, sizeof(g_cat_primes));
uint32_t gate_x_count = 0u;
printf("Processing %" PRIu32 " primes...\n", g_prime_count);
for (uint32_t i = 0u; i < g_prime_count; i++) {
/* W3: software prefetch — SPF and memo for future prime */
if (__builtin_expect(i + PREFETCH_DIST < g_prime_count, 1u)) {
uint64_t fp = g_primes[i + PREFETCH_DIST] + 1u;
if (fp <= SIEVE_LIMIT)
__builtin_prefetch(&g_spf[fp], 0, 1);
__builtin_prefetch(
&g_packed_cat[(i + PREFETCH_DIST) / 16u], 0, 1);
}
uint64_t p = g_primes[i];
uint8_t cat = compute_category(i);
if (cat == CATEGORY_GATE_X) { gate_x_count++; continue; }
if (cat == 0u || cat >= MAX_CATEGORIES) continue;
g_stats[cat].count++;
if (g_stats[cat].count == 1u) {
g_stats[cat].smallest = p;
g_stats[cat].largest = p;
} else if (p > g_stats[cat].largest) {
g_stats[cat].largest = p;
}
/* PERF-2 (v3.3): fires 200/1,000,000 times */
if (__builtin_expect(i < TRACK_FIRST_N_PRIMES, 0) &&
g_cat_primes[cat].count < MAX_PRIMES_PER_CATEGORY)
g_cat_primes[cat].primes[g_cat_primes[cat].count++] = p;
}
clock_t t1 = clock();
ee_ratio_t elapsed = ee_ratio_elapsed(t0, t1);
uint64_t secs = ee_ratio_secs(elapsed);
uint64_t frac = ee_ratio_frac10k(elapsed);
uint64_t tput = ee_ratio_throughput(g_prime_count, elapsed);
printf("Done in %" PRIu64 ".%04" PRIu64 " s\n\n", secs, frac);
if (gate_x_count)
printf("[Gate-X] %" PRIu32 " prime(s) excluded\n\n", gate_x_count);
printf("First %" PRIu32 " primes by category:\n", TRACK_FIRST_N_PRIMES);
for (uint8_t cat = 1u; cat < MAX_CATEGORIES; cat++) {
if (!g_cat_primes[cat].count) continue;
printf("Category %u:\n", cat);
for (uint32_t i = 0u; i < g_cat_primes[cat].count; i++) {
printf("%4" PRIu64, g_cat_primes[cat].primes[i]);
if ((i + 1u) % 15u == 0u || i == g_cat_primes[cat].count - 1u)
printf("\n");
else
printf(" ");
}
printf("\n");
}
printf("Erdos-Selfridge categorization:\n");
for (uint8_t cat = 1u; cat < MAX_CATEGORIES; cat++) {
if (!g_stats[cat].count) continue;
printf(" Cat %2u: first=%7" PRIu64
" last=%8" PRIu64 " count=%" PRIu32 "\n",
cat, g_stats[cat].smallest,
g_stats[cat].largest, g_stats[cat].count);
}
printf("\nPerformance (ee_ratio_t -- no IEEE 754):\n");
printf(" Elapsed: %" PRIu64 ".%04" PRIu64 " s\n", secs, frac);
printf(" Throughput: %" PRIu64 " primes/s\n", tput);
return true;
}
/* =================================================================
* MAIN
* ================================================================= */
int main(void)
{
printf("Erdos-Selfridge Prime Categorization Engine v3.4\n");
printf("C23 / RAMstore + u16-SPF + THP + prefetch\n");
printf("================================================\n\n");
printf("Sieving %" PRIu32 " primes...\n", MAX_PRIMES);
if (!sieve_generate()) {
fprintf(stderr, "sieve_generate: failed\n");
return 1;
}
printf("Sieve: %" PRIu32 " primes\n\n", g_prime_count);
printf("Building SPF table (%" PRIu32 " entries, 32MB u16, THP)...\n",
SIEVE_LIMIT);
if (!spf_build()) {
fprintf(stderr, "spf_build: mmap failed\n");
return 1;
}
printf("SPF: ready\n\n");
printf("Building RAMstore (%" PRIu32 " slots, 32MB)...\n", HASH_SIZE);
rs_build();
printf("RAMstore: ready\n\n");
if (!run()) {
fprintf(stderr, "run: failed\n");
munmap(g_spf, (size_t)(SIEVE_LIMIT + 1u) * sizeof(uint16_t));
return 1;
}
munmap(g_spf, (size_t)(SIEVE_LIMIT + 1u) * sizeof(uint16_t));
return 0;
}
@opsec-ee
Copy link
Author

opsec-ee commented Sep 6, 2025

Compilation

 gcc -std=c23 -O3 -march=native -flto -Wall -Wextra -DNDEBUG  -funroll-loops prime_ee.c -o prime_engine

$ ./prime_engine

Erdos-Selfridge Prime Categorization Engine v3.4
C23 / RAMstore + u16-SPF + THP + prefetch
================================================

Sieving 1000000 primes...
Sieve: 1000000 primes

Building SPF table (16000000 entries, 32MB u16, THP)...
SPF: ready

Building RAMstore (2097152 slots, 32MB)...
RAMstore: ready

Processing 1000000 primes...
Done in 0.0665 s

First 200 primes by category:
Category 1:
   2    3    5    7   11   17   23   31   47   53   71  107  127  191  383
 431  647  863  971 1151

Category 2:
  13   19   29   41   43   59   61   67   79   83   89   97  101  109  131
 137  139  149  167  179  197  199  211  223  229  239  241  251  263  269
 271  281  283  293  307  317  349  359  367  373  419  433  439  449  461
 479  499  503  509  557  563  577  587  593  599  619  641  643  659  709
 719  743  751  761  769  809  827  839  881  919  929  953  967  991 1019
1033 1049 1069 1087 1103 1187 1223

Category 3:
  37  103  113  151  157  163  173  181  193  227  233  257  277  311  331
 337  347  353  379  389  397  401  409  421  457  463  467  487  491  521
 523  541  547  569  571  601  607  613  631  653  683  701  727  733  773
 787  797  811  821  829  853  857  859  877  883  911  937  947  983  997
1009 1013 1031 1039 1051 1061 1063 1091 1097 1117 1123 1153 1163 1171 1181
1193 1217

Category 4:
  73  313  443  617  661  673  677  691  739  757  823  887  907  941  977
1093 1109 1129 1201 1213

Category 5:
1021

Erdos-Selfridge categorization:
  Cat  1: first=      2  last=10616831  count=46
  Cat  2: first=     13  last=15482669  count=10497
  Cat  3: first=     37  last=15485863  count=201987
  Cat  4: first=     73  last=15485849  count=413891
  Cat  5: first=   1021  last=15485837  count=263109
  Cat  6: first=   2917  last=15485857  count=87560
  Cat  7: first=  15013  last=15484631  count=19389
  Cat  8: first=  49681  last=15485621  count=3129
  Cat  9: first= 532801  last=15472811  count=363
  Cat 10: first=1065601  last=15472321  count=28
  Cat 11: first=8524807  last= 8524807  count=1

Performance (ee_ratio_t -- no IEEE 754):
  Elapsed:    0.0665 s
  Throughput: 15024489 primes/s

@opsec-ee
Copy link
Author

opsec-ee commented Dec 1, 2025

/version 3 is near twice as fast

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment