Skip to content

Instantly share code, notes, and snippets.

@opsec-ee
Last active March 16, 2026 06:34
Show Gist options
  • Select an option

  • Save opsec-ee/bd829c3781bcfaace131628f8f4e6689 to your computer and use it in GitHub Desktop.

Select an option

Save opsec-ee/bd829c3781bcfaace131628f8f4e6689 to your computer and use it in GitHub Desktop.
/* Update 3,16,2026 H.Overman
*
* Erdos-Selfridge Prime Categorization Engine v3.4
* Copyright (c) 2025 H.O <opsec.ee@pm.me>
*
* Elapsed: 0.0665 s
* Throughput: 15024489 primes/s
*
* Euman = integral(optimization/time) delta performance
*
* =======================================================================
* INVARIANT FOUNDATION (IFP)
* =======================================================================
*
* I1: sieve(limit) -> primes[] pure bit-packed generator
* I2: RAMstore(prime) -> index Feistel hash stored in slot,
* O(1) guaranteed, probe<=16
* I3: spf[n] -> factor O(1) direct array lookup,
* zero division
* I4: cat(p) -> u8 Gate-X propagated, never swallowed
* I5: memo[i] <-> 4-bit packed bijective store/recall
*
* =======================================================================
* CHANGES v3.3 -> v3.4
* =======================================================================
*
* W1 — uint16_t SPF (32MB vs 64MB, 2x cache density):
*
* Correctness proof:
* Composite n has smallest prime factor <= sqrt(n).
* For n <= SIEVE_LIMIT = 16,000,000:
* sqrt(16,000,000) = 4000 < 65,535 = UINT16_MAX.
* Therefore all composite SPF values fit in uint16_t.
*
* Prime sentinel = 0:
* MAP_ANONYMOUS provides zero-initialised pages — no explicit
* prime write needed. During build, composites get their smallest
* factor written; primes stay 0. During factorize_spf:
* f = g_spf[n] ? g_spf[n] : (uint32_t)n
* Branch is maximally predictable: composites dominate in p+1 walks.
*
* Cache impact:
* uint16_t: 32 entries per 64-byte cache line (was 16).
* 32MB table: 16,384 x 2MB huge pages vs 8,192 x 4KB pages
* post-madvise. TLB pressure halved vs v3.3.
*
* W2 — madvise(MADV_HUGEPAGE) on SPF mmap:
*
* Collapses TLB entries: 32MB / 4KB = 8192 entries (4K pages)
* -> 32MB / 2MB = 16 entries (huge pages, THP).
* i7 L2 TLB: 1536 entries (4K). SPF at 8192 entries overflows it
* significantly. At ~100 cycles per TLB miss, this is measurable.
* MADV_HUGEPAGE enables Transparent Huge Pages on Arch Linux.
*
* W3 — Software prefetch in main loop:
*
* __builtin_prefetch(&g_spf[g_primes[i + PREFETCH_DIST] + 1], 0, 1)
* Issues L1 prefetch hint for the SPF entry of the future prime's p+1.
* Covers ~40 cycle L3 latency at 13.8M primes/s per-prime ~72 cycles.
* PREFETCH_DIST = 8: 8 * 72 cycles = 576 cycles lookahead >= L3 lat.
* Locality 1 (L2): SPF hot entries promoted to L2 before access.
*
* Also prefetches memo word for future prime — sequential access,
* hardware prefetcher likely handles it, but explicit hint costs zero.
*
* spf_build simplification:
* v3.3: g_spf[p] = p unconditional + loop from p+p.
* v3.4: g_spf[p] stays 0 (prime sentinel = MAP_ANONYMOUS zero).
* Loop from p+p with g_spf[m]==0 guard — first writer wins.
* Eliminates 1M explicit prime writes to spf table.
*
* All v3.3 bug fixes carried forward:
* BUG-1: CATEGORY_GATE_X = 0x0F (4-bit, no truncation)
* BUG-2: assert on rs_build probe cap
* PERF-2: __builtin_expect on TRACK_FIRST_N_PRIMES branch
* MICRO: spf_build starts inner loop at p+p
*
* Build:
* gcc -std=c23 -O3 -march=native -flto -Wall -Wextra -DNDEBUG \
* -funroll-loops prime_ee.c -o prime_engine
*/
#define _GNU_SOURCE /* madvise, MADV_HUGEPAGE */
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <assert.h>
/* =================================================================
* CONFIGURATION
* ================================================================= */
constexpr uint32_t SIEVE_LIMIT = 16000000u;
constexpr uint32_t MAX_PRIMES = 1000000u;
/*
* CATEGORY_GATE_X = 0x0F: fits in 4-bit memo slot exactly.
* Valid category range: 1..14. Sentinel 15 reserved.
* static_assert enforces no collision at build time.
*/
constexpr uint8_t CATEGORY_GATE_X = 0x0Fu;
constexpr uint32_t MAX_CATEGORIES = 15u;
static_assert(MAX_CATEGORIES <= 15u,
"category 15 (0x0F) is reserved for CATEGORY_GATE_X sentinel");
constexpr uint32_t FACTORIZATION_BUFFER = 31u;
constexpr uint32_t TRACK_FIRST_N_PRIMES = 200u;
constexpr uint32_t MAX_PRIMES_PER_CATEGORY = 100u;
/*
* RAMstore: 2^21 slots, load ~47.7% at 1M primes.
* OPT-335: HASH_MASK eliminates modulo.
*/
constexpr uint32_t HASH_BITS = 21u;
constexpr uint32_t HASH_SIZE = 1u << HASH_BITS;
constexpr uint32_t HASH_MASK = HASH_SIZE - 1u;
constexpr uint32_t RS_PROBE_MAX = 16u;
/*
* Software prefetch lookahead distance (main loop).
* 8 primes ahead at ~72 cycles/prime = ~576 cycles = L3 latency.
* Tune up/down based on measured throughput.
*/
constexpr uint32_t PREFETCH_DIST = 8u;
/* =================================================================
* 144,000 RATIO SYSTEM — exact rational timing, no IEEE 754
*
* {{0 [ ticks:u64 (AS/.\IS) ee_ratio_t{num,den} ] 1}}
* ================================================================= */
typedef struct {
uint64_t num;
uint64_t den;
} ee_ratio_t;
static inline ee_ratio_t ee_ratio_elapsed(clock_t start, clock_t end)
{
return (ee_ratio_t){
.num = (uint64_t)(end - start),
.den = (uint64_t)CLOCKS_PER_SEC
};
}
static inline uint64_t ee_ratio_secs(ee_ratio_t r)
{
return r.den ? r.num / r.den : 0u;
}
static inline uint64_t ee_ratio_frac10k(ee_ratio_t r)
{
if (!r.den) return 0u;
return ((r.num % r.den) * 10000u) / r.den;
}
static inline uint64_t ee_ratio_throughput(uint32_t count, ee_ratio_t elapsed)
{
if (!elapsed.num) return 0u;
return ((uint64_t)count * elapsed.den) / elapsed.num;
}
/* =================================================================
* euman_hash64 — 36-bit Feistel bijective slot function
*
* ONE definition. TWO call sites: rs_build(), rs_lookup().
* No other hash in this file.
*
* Proof: euman_hashsec 4B bitmap, zero collisions.
* E[probes] = |T|/|S| = 2^36 / 62^6 approx 1.21.
*
* {{0 [ prime:u64 (AS/.\IS) slot:u32 ] 1}}
* Master key = euman_hashsec SECRET_KEY.
* ================================================================= */
constexpr uint64_t FEISTEL_MASTER = UINT64_C(0x1234567890ABCDEF);
constexpr uint64_t FEISTEL_MASK18 = 0x3FFFFull;
constexpr uint64_t FEISTEL_K0 = (FEISTEL_MASTER) & FEISTEL_MASK18;
constexpr uint64_t FEISTEL_K1 = (FEISTEL_MASTER >> 9) & FEISTEL_MASK18;
constexpr uint64_t FEISTEL_K2 = (FEISTEL_MASTER >> 18) & FEISTEL_MASK18;
constexpr uint64_t FEISTEL_K3 = (FEISTEL_MASTER >> 27) & FEISTEL_MASK18;
static inline uint64_t feistel_round(uint64_t half, uint64_t rk)
{
half ^= rk;
half *= UINT64_C(0x9E3779B97);
half ^= half >> 9;
return half & FEISTEL_MASK18;
}
static inline uint64_t feistel_encrypt_36(uint64_t v)
{
uint64_t L = (v >> 18) & FEISTEL_MASK18;
uint64_t R = v & FEISTEL_MASK18;
uint64_t t;
t = R; R = L ^ feistel_round(R, FEISTEL_K0); L = t;
t = R; R = L ^ feistel_round(R, FEISTEL_K1); L = t;
t = R; R = L ^ feistel_round(R, FEISTEL_K2); L = t;
t = R; R = L ^ feistel_round(R, FEISTEL_K3); L = t;
return (L << 18) | R;
}
static inline uint32_t ee_hash64_slot(uint64_t prime)
{
return (uint32_t)(feistel_encrypt_36(prime) & HASH_MASK);
}
/* =================================================================
* RAMSTORE SLOT — I2
*
* 16 bytes, alignas(64): 4 slots per cache line.
* prime==0 sentinel: 0 is never a valid prime.
* Key stored in slot: no g_primes[] dereference on hit.
*
* {{0 [ prime:u64 (AS/.\IS) index:u32 ] 1}}
* Z: prime == 0
* X: not found within RS_PROBE_MAX probes -> UINT32_MAX
* 1: g_rs[s].prime == prime -> return g_rs[s].index
* ================================================================= */
typedef struct {
uint64_t prime;
uint32_t index;
uint32_t pad;
} prime_rs_slot_t;
static_assert(sizeof(prime_rs_slot_t) == 16,
"prime_rs_slot_t must be 16 bytes");
alignas(64) static prime_rs_slot_t g_rs[HASH_SIZE];
/* =================================================================
* EXBOXING-INSPIRED STRUCT PACKING
*
* prime_factor_t — one u64, register-width, zero padding:
* bits[55:0] = factor value (25 bits sufficient for n <= 16M)
* bits[63:56] = power (8 bits; max power(2) for n<=16M = 24)
*
* factorization_t.status — one u8:
* bits[4:0] = count (max 31 = FACTORIZATION_BUFFER)
* bit[7] = gate_x
*
* Total factorization_t = 31*8 + 1 = 249 bytes, 4 cache lines.
*
* {{0 [ (factor,power) (AS/.\IS) packed:u64 ] 1}}
* ================================================================= */
typedef struct {
uint64_t packed;
} prime_factor_t;
static inline prime_factor_t pf_make(uint64_t factor, uint8_t power)
{
return (prime_factor_t){
.packed = (factor & UINT64_C(0x00FFFFFFFFFFFFFF))
| ((uint64_t)power << 56)
};
}
static inline uint64_t pf_factor(prime_factor_t pf)
{
return pf.packed & UINT64_C(0x00FFFFFFFFFFFFFF);
}
typedef struct {
prime_factor_t factors[FACTORIZATION_BUFFER];
uint8_t status;
} factorization_t;
static inline uint8_t fac_count(const factorization_t *f)
{
return f->status & 0x1Fu;
}
static inline bool fac_gate_x(const factorization_t *f)
{
return (f->status & 0x80u) != 0u;
}
static inline void fac_set_count(factorization_t *f, uint8_t n)
{
f->status = (f->status & 0x80u) | (n & 0x1Fu);
}
static inline void fac_set_gate_x(factorization_t *f)
{
f->status |= 0x80u;
}
/* =================================================================
* GLOBAL STATE — cache-aligned on all hot arrays
* ================================================================= */
alignas(64) static uint64_t g_primes[MAX_PRIMES];
static uint32_t g_prime_count;
/*
* SPF table — uint16_t, mmap'd, 32MB (W1).
* g_spf[n] = smallest prime factor of n, or 0 if n is prime.
* 32 entries per 64-byte cache line.
* MADV_HUGEPAGE applied post-mmap (W2).
*/
static uint16_t *g_spf;
alignas(64) static uint64_t g_packed_cat[MAX_PRIMES / 16 + 1];
typedef struct {
uint32_t count;
uint64_t smallest;
uint64_t largest;
} cat_stats_t;
alignas(64) static cat_stats_t g_stats[MAX_CATEGORIES];
typedef struct {
uint64_t primes[MAX_PRIMES_PER_CATEGORY];
uint32_t count;
} cat_prime_list_t;
static cat_prime_list_t g_cat_primes[MAX_CATEGORIES];
/* =================================================================
* I5 — 4-BIT PACKED MEMO
*
* {{0 [ idx:u32 (AS/.\IS) 4-bit slot ] 1}}
* 0x00 = empty, 0x01..0x0E = valid category, 0x0F = Gate-X
* ================================================================= */
static inline void memo_set(uint32_t idx, uint8_t cat)
{
uint32_t wi = idx / 16u;
uint32_t bp = (idx % 16u) * 4u;
g_packed_cat[wi] = (g_packed_cat[wi] & ~((uint64_t)0xFu << bp))
| ((uint64_t)cat << bp);
}
static inline uint8_t memo_get(uint32_t idx)
{
return (uint8_t)((g_packed_cat[idx / 16u] >> ((idx % 16u) * 4u)) & 0xFu);
}
/* =================================================================
* I1 — PRIME SIEVE
*
* memset(0xAA): sets all odd-position bits in one 2MB write.
* 0xAA = 10101010b: bits 1,3,5,7 of each byte = odd positions.
* Replaces calloc+loop from v3.2.
* ================================================================= */
[[nodiscard]]
static bool sieve_generate(void)
{
size_t nbytes = (SIEVE_LIMIT + 7u) / 8u;
uint8_t *sieve = malloc(nbytes);
if (!sieve) return false;
memset(sieve, 0xAAu, nbytes);
for (uint32_t p = 3u; (uint64_t)p * p <= SIEVE_LIMIT; p += 2u)
if (sieve[p / 8u] & (1u << (p % 8u)))
for (uint32_t m = p * p; m <= SIEVE_LIMIT; m += 2u * p)
sieve[m / 8u] &= (uint8_t)~(1u << (m % 8u));
g_prime_count = 0u;
g_primes[g_prime_count++] = 2u;
for (uint32_t i = 3u; i <= SIEVE_LIMIT && g_prime_count < MAX_PRIMES; i += 2u)
if (sieve[i / 8u] & (1u << (i % 8u)))
g_primes[g_prime_count++] = i;
free(sieve);
return g_prime_count == MAX_PRIMES;
}
/* =================================================================
* I3 — SPF TABLE BUILD (uint16_t, W1)
*
* Prime sentinel = 0 (MAP_ANONYMOUS zero — no explicit write).
* Composite write: g_spf[m] = (uint16_t)p where p <= sqrt(m) <= 4000.
* First writer wins = smallest prime factor. O(n log log n) build.
*
* madvise(MADV_HUGEPAGE): hints kernel to back SPF with 2MB huge
* pages via THP. Reduces TLB footprint from ~8192 to ~16 entries (W2).
*
* {{0 [ n:u32 (AS/.\IS) g_spf[n]:u16 ] 1}}
* g_spf[n] = 0 -> n is prime (sentinel)
* g_spf[n] = f > 0 -> f is smallest prime factor of n, f <= 4000
* ================================================================= */
[[nodiscard]]
static bool spf_build(void)
{
size_t spf_bytes = (size_t)(SIEVE_LIMIT + 1u) * sizeof(uint16_t);
g_spf = mmap(nullptr, spf_bytes,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0);
if (g_spf == MAP_FAILED) { g_spf = nullptr; return false; }
/* W2: THP hint — collapses TLB from ~8192 entries to ~16 */
madvise(g_spf, spf_bytes, MADV_HUGEPAGE);
/* Primes stay 0 (sentinel). Composites get smallest factor. */
for (uint32_t i = 0u; i < g_prime_count; i++) {
uint32_t p = (uint32_t)g_primes[i];
for (uint32_t m = p + p; m <= SIEVE_LIMIT; m += p)
if (g_spf[m] == 0u)
g_spf[m] = (uint16_t)p; /* p <= sqrt(m) <= 4000, fits u16 */
}
return true;
}
/* =================================================================
* I2 — RAMSTORE BUILD
*
* g_rs[] is BSS: zero-initialised, prime==0 sentinel is free.
* BUG-2: assert on probe cap — unreachable at 47.7% load,
* but contract enforced.
*
* {{0 [ i:u32 (AS/.\IS) g_rs[slot].{prime,index} ] 1}}
* ================================================================= */
static void rs_build(void)
{
for (uint32_t i = 0u; i < g_prime_count; i++) {
uint64_t prime = g_primes[i];
uint32_t slot = ee_hash64_slot(prime);
uint32_t p;
for (p = 0u; p < RS_PROBE_MAX; p++) {
uint32_t s = (slot + p) & HASH_MASK;
if (g_rs[s].prime == 0u) {
g_rs[s].prime = prime;
g_rs[s].index = i;
break;
}
}
assert(p < RS_PROBE_MAX &&
"rs_build: probe cap exceeded -- increase HASH_BITS or RS_PROBE_MAX");
}
}
/* =================================================================
* I2 — RAMSTORE LOOKUP
*
* {{0 [ prime:u64 (AS/.\IS) index:u32 ] 1}}
* X: not found -> UINT32_MAX (caller propagates Gate-X)
* 1: g_rs[s].prime == prime -> return g_rs[s].index
*
* Expected ~1.5 probes at 47.7% load.
* One cache line fetch (64B) covers 4 consecutive slots.
* ================================================================= */
[[nodiscard]]
static inline uint32_t rs_lookup(uint64_t prime)
{
uint32_t slot = ee_hash64_slot(prime);
for (uint32_t p = 0u; p < RS_PROBE_MAX; p++) {
uint32_t s = (slot + p) & HASH_MASK;
if (g_rs[s].prime == 0u) return UINT32_MAX;
if (g_rs[s].prime == prime) return g_rs[s].index;
}
return UINT32_MAX;
}
/* =================================================================
* I3 — FACTORIZE via SPF table (uint16_t, W1)
*
* g_spf[n] == 0: n is prime (sentinel). f = n directly.
* g_spf[n] != 0: f = g_spf[n] (smallest factor, <= 4000).
* Single predictable branch: composites dominate, branch mostly not-taken
* for large n; but n starts composite (p+1 is even) so branch taken first.
*
* O(log n) sequential u16 reads. 32 entries per cache line.
* Hardware prefetcher handles the forward walk naturally.
*
* {{0 [ n:u64 (AS/.\IS) factorization_t ] 1}}
* Z: n <= 1 (empty result)
* X: count >= FACTORIZATION_BUFFER -> gate_x set
* 1: product(f^e for all factors) == original n
* ================================================================= */
static void factorize_spf(uint64_t n, factorization_t *out)
{
out->status = 0u;
if (n <= 1u) return;
while (n > 1u) {
uint64_t f;
if (n <= SIEVE_LIMIT) {
uint16_t sf = g_spf[(uint32_t)n];
f = sf ? (uint64_t)sf : n; /* 0 = prime sentinel */
} else {
f = n; /* n > SIEVE_LIMIT: prime by exhaustion */
}
uint8_t cnt = fac_count(out);
if (cnt >= FACTORIZATION_BUFFER) {
fac_set_gate_x(out);
return;
}
uint8_t power = 0u;
while (n % f == 0u) { n /= f; power++; }
out->factors[cnt] = pf_make(f, power);
fac_set_count(out, (uint8_t)(cnt + 1u));
}
}
/* =================================================================
* I4 — COMPUTE CATEGORY (recursive + memoized)
*
* Correctness of forward ordering:
* For prime p > 3, every prime factor f of p+1 satisfies f < p.
* Proof: p+1 is even, so 2|(p+1). All other prime factors of p+1
* are <= (p+1)/2 < p. Therefore f < p = g_primes[pi], so fi < pi.
* By induction, all memo entries for factors are already computed
* by the time we reach p in ascending order. Recursion is always
* a memo hit after the first ~handful of primes (p=2 edge case:
* factor 3 is handled as base case f==3, no recursion).
*
* BUG-1 fix (v3.3): CATEGORY_GATE_X = 0x0F fits 4-bit memo exactly.
*
* {{0 [ pi:u32 (AS/.\IS) category:u8 ] 1}}
* Z: pi >= g_prime_count
* X: factor absent from RAMstore -> CATEGORY_GATE_X (0x0F)
* 1: result memoized and returned
* ================================================================= */
static uint8_t compute_category(uint32_t pi)
{
uint8_t cached = memo_get(pi);
if (cached != 0u) return cached;
factorization_t fac;
factorize_spf(g_primes[pi] + 1u, &fac);
if (fac_gate_x(&fac)) {
memo_set(pi, CATEGORY_GATE_X);
return CATEGORY_GATE_X;
}
uint8_t max_cat = 0u;
bool only_base_fact = true;
for (uint8_t i = 0u; i < fac_count(&fac); i++) {
uint64_t f = pf_factor(fac.factors[i]);
if (f == 2u || f == 3u) {
if (max_cat < 1u) max_cat = 1u;
continue;
}
only_base_fact = false;
uint32_t fi = rs_lookup(f);
if (fi == UINT32_MAX) {
memo_set(pi, CATEGORY_GATE_X);
return CATEGORY_GATE_X;
}
uint8_t fc = compute_category(fi);
if (fc == CATEGORY_GATE_X) {
memo_set(pi, CATEGORY_GATE_X);
return CATEGORY_GATE_X;
}
if (fc > max_cat) max_cat = fc;
}
uint8_t result = only_base_fact ? 1u : (uint8_t)(max_cat + 1u);
memo_set(pi, result);
return result;
}
/* =================================================================
* MAIN PROCESSING LOOP
*
* W3 — Software prefetch:
* Two hints per iteration, PREFETCH_DIST primes ahead:
*
* (a) SPF entry for future prime's p+1:
* __builtin_prefetch(&g_spf[g_primes[i+N]+1], 0, 1)
* Locality 1 (L2): brings SPF entry into L2 before factorize_spf.
*
* (b) Memo word for future prime index:
* __builtin_prefetch(&g_packed_cat[(i+N)/16], 0, 1)
* Memo is 500KB — hardware prefetcher likely warm, but zero cost.
*
* Prefetch is guarded by bounds check compiled to a single cmov.
* At PREFETCH_DIST=8, lookahead = ~576 cycles > L3 latency (~40).
* ================================================================= */
[[nodiscard]]
static bool run(void)
{
clock_t t0 = clock();
memset(g_stats, 0u, sizeof(g_stats));
memset(g_packed_cat, 0u, sizeof(g_packed_cat));
memset(g_cat_primes, 0u, sizeof(g_cat_primes));
uint32_t gate_x_count = 0u;
printf("Processing %" PRIu32 " primes...\n", g_prime_count);
for (uint32_t i = 0u; i < g_prime_count; i++) {
/* W3: software prefetch — SPF and memo for future prime */
if (__builtin_expect(i + PREFETCH_DIST < g_prime_count, 1u)) {
uint64_t fp = g_primes[i + PREFETCH_DIST] + 1u;
if (fp <= SIEVE_LIMIT)
__builtin_prefetch(&g_spf[fp], 0, 1);
__builtin_prefetch(
&g_packed_cat[(i + PREFETCH_DIST) / 16u], 0, 1);
}
uint64_t p = g_primes[i];
uint8_t cat = compute_category(i);
if (cat == CATEGORY_GATE_X) { gate_x_count++; continue; }
if (cat == 0u || cat >= MAX_CATEGORIES) continue;
g_stats[cat].count++;
if (g_stats[cat].count == 1u) {
g_stats[cat].smallest = p;
g_stats[cat].largest = p;
} else if (p > g_stats[cat].largest) {
g_stats[cat].largest = p;
}
/* PERF-2 (v3.3): fires 200/1,000,000 times */
if (__builtin_expect(i < TRACK_FIRST_N_PRIMES, 0) &&
g_cat_primes[cat].count < MAX_PRIMES_PER_CATEGORY)
g_cat_primes[cat].primes[g_cat_primes[cat].count++] = p;
}
clock_t t1 = clock();
ee_ratio_t elapsed = ee_ratio_elapsed(t0, t1);
uint64_t secs = ee_ratio_secs(elapsed);
uint64_t frac = ee_ratio_frac10k(elapsed);
uint64_t tput = ee_ratio_throughput(g_prime_count, elapsed);
printf("Done in %" PRIu64 ".%04" PRIu64 " s\n\n", secs, frac);
if (gate_x_count)
printf("[Gate-X] %" PRIu32 " prime(s) excluded\n\n", gate_x_count);
printf("First %" PRIu32 " primes by category:\n", TRACK_FIRST_N_PRIMES);
for (uint8_t cat = 1u; cat < MAX_CATEGORIES; cat++) {
if (!g_cat_primes[cat].count) continue;
printf("Category %u:\n", cat);
for (uint32_t i = 0u; i < g_cat_primes[cat].count; i++) {
printf("%4" PRIu64, g_cat_primes[cat].primes[i]);
if ((i + 1u) % 15u == 0u || i == g_cat_primes[cat].count - 1u)
printf("\n");
else
printf(" ");
}
printf("\n");
}
printf("Erdos-Selfridge categorization:\n");
for (uint8_t cat = 1u; cat < MAX_CATEGORIES; cat++) {
if (!g_stats[cat].count) continue;
printf(" Cat %2u: first=%7" PRIu64
" last=%8" PRIu64 " count=%" PRIu32 "\n",
cat, g_stats[cat].smallest,
g_stats[cat].largest, g_stats[cat].count);
}
printf("\nPerformance (ee_ratio_t -- no IEEE 754):\n");
printf(" Elapsed: %" PRIu64 ".%04" PRIu64 " s\n", secs, frac);
printf(" Throughput: %" PRIu64 " primes/s\n", tput);
return true;
}
/* =================================================================
* MAIN
* ================================================================= */
int main(void)
{
printf("Erdos-Selfridge Prime Categorization Engine v3.4\n");
printf("C23 / RAMstore + u16-SPF + THP + prefetch\n");
printf("================================================\n\n");
printf("Sieving %" PRIu32 " primes...\n", MAX_PRIMES);
if (!sieve_generate()) {
fprintf(stderr, "sieve_generate: failed\n");
return 1;
}
printf("Sieve: %" PRIu32 " primes\n\n", g_prime_count);
printf("Building SPF table (%" PRIu32 " entries, 32MB u16, THP)...\n",
SIEVE_LIMIT);
if (!spf_build()) {
fprintf(stderr, "spf_build: mmap failed\n");
return 1;
}
printf("SPF: ready\n\n");
printf("Building RAMstore (%" PRIu32 " slots, 32MB)...\n", HASH_SIZE);
rs_build();
printf("RAMstore: ready\n\n");
if (!run()) {
fprintf(stderr, "run: failed\n");
munmap(g_spf, (size_t)(SIEVE_LIMIT + 1u) * sizeof(uint16_t));
return 1;
}
munmap(g_spf, (size_t)(SIEVE_LIMIT + 1u) * sizeof(uint16_t));
return 0;
}
@opsec-ee
Copy link
Author

opsec-ee commented Dec 1, 2025

/version 3 is near twice as fast

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment