Last active
January 26, 2026 20:03
-
-
Save jzakiya/b8316f22efb82cb636cff2ad0ab02530 to your computer and use it in GitHub Desktop.
Primes generator, using multi-threaded SSoZ (Segmented Sieve of Zakiya), written in Crystal
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This Crystal source file is a multiple threaded implementation to perform an | |
| # extremely fast Segmented Sieve of Zakiya (SSoZ) to find Primes <= N. | |
| # Compile as: $ crystal build --release -Dpreview_mt --mcpu native primes_ssoz.cr | |
| # To reduce binary size do: $ strip primes_ssoz | |
| # Thread workers default to 4, set to system max for optimum performance. | |
| # Single val: $ CRYSTAL_WORKERS=8 ./primes_ssoz val1 | |
| # Range vals: $ CRYSTAL_WORKERS=8 ./primes_ssoz val1 val2 | |
| # val1 and val2 can be entered as either: 123456789 or 123_456_789 | |
| # Mathematical and technical basis for implementation are explained here: | |
| # https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_r | |
| # Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_ | |
| # for_the_Riemann_Hypotheses | |
| # https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_ | |
| # https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK | |
| # https://www.academia.edu/81206391/Twin_Primes_Segmented_Sieve_of_Zakiya_SSoZ_Explained | |
| # This source code, and its updates, can be found here: | |
| # https://gist.github.com/jzakiya/b8316f22efb82cb636cff2ad0ab02530 | |
| # This code is provided free and subject to copyright and terms of the | |
| # GNU General Public License Version 3, GPLv3, or greater. | |
| # License copy/terms are here: http://www.gnu.org/licenses/ | |
| # Copyright (c) 2017-2026; Jabari Zakiya -- jzakiya at gmail dot com | |
| # Last update: 2026/01/25 | |
| # Compute modular inverse a^-1 to base m, e.g. a*(a^-1) mod m = 1 | |
| def modinv(a0, m0) | |
| return 1 if m0 == 1 | |
| a, m = a0, m0 | |
| x0, inv = 0, 1 | |
| while a > 1 | |
| inv &-= (a // m) &* x0 | |
| a, m = m, a % m | |
| x0, inv = inv, x0 | |
| end | |
| inv &+= m0 if inv < 0 | |
| inv | |
| end | |
| def gen_pg_parameters(prime) | |
| # Create prime generator parameters for given Pn | |
| puts "using Prime Generator parameters for P#{prime}" | |
| primes = [2, 3, 5, 7, 11, 13, 17, 19, 23] | |
| modpg, res_0 = 1, 0 # compute Pn's pn# modulus and res_0 value | |
| primes.each { |prm| res_0 = prm; break if prm > prime; modpg &*= prm } | |
| residues = [] of Int32 # save upper twinpair residues here | |
| inverses = Array.new(modpg + 2, 0) # save Pn's residues inverses here | |
| rc, inc = 5, 2 # use P3's PGS to generate residue candidates | |
| midmodpg = modpg >> 1 # mid value for modulus | |
| while rc < midmodpg # find PG's 1st half residues | |
| if rc.gcd(modpg) == 1 # if rc a residue | |
| mc = modpg &- rc # create its modular complement | |
| inverses[rc] = modinv(rc, modpg) # save pc inverse | |
| inverses[mc] = modinv(mc, modpg) # save mc inverse | |
| residues << rc << mc # save both residues | |
| end | |
| rc &+= inc; inc ^= 0b110 # create next P3 sequence pc: 5 7 11 13 17 19 ... | |
| end | |
| residues.sort!; residues << (modpg - 1) << (modpg + 1) # last 2 residue | |
| inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1 # last 2 residues are self inverses | |
| {modpg, res_0, residues.size, residues, inverses} | |
| end | |
| def set_sieve_parameters(start_num, end_num) | |
| # Select at runtime best PG and segment size parameters for input values. | |
| # These are good estimates derived from PG data profiling. Can be improved. | |
| nrange = end_num - start_num | |
| bn, pg = 0, 3 | |
| if end_num < 49 | |
| bn = 1; pg = 3 | |
| elsif nrange < 77_000_000 | |
| bn = 16; pg = 5 | |
| elsif nrange < 1_100_000_000 | |
| bn = 32; pg = 7 | |
| elsif nrange < 35_500_000_000 | |
| bn = 64; pg = 11 | |
| elsif nrange < 140_000_000_000_000 | |
| pg = 13 | |
| if nrange > 7_000_000_000_000; bn = 384 | |
| elsif nrange > 2_500_000_000_000; bn = 320 | |
| elsif nrange > 250_000_000_000; bn = 196 | |
| else bn = 128 | |
| end | |
| else | |
| bn = 384; pg = 17 | |
| end | |
| modpg, res_0, rescnt, residues, resinvrs = gen_pg_parameters(pg) | |
| kmin = (start_num-2) // modpg + 1 # number of resgroups to start_num | |
| kmax = (end_num - 2) // modpg + 1 # number of resgroups to end_num | |
| krange = kmax - kmin + 1 # number of resgroups in range, at least 1 | |
| n = krange < 37_500_000_000_000 ? 10 : (krange < 975_000_000_000_000 ? 16 : 20) | |
| b = bn * n * 1024 # set seg size to optimize for selected PG | |
| ks = krange < b ? krange : b # segments resgroups size | |
| puts "segment size = #{ks.format} resgroups; seg array is [1 x #{(((ks-1) >> 6) + 1).format}] 64-bits" | |
| maxprimes = krange * rescnt # maximum number of prime pcs | |
| puts "prime candidates = #{maxprimes.format}; resgroups = #{krange.format}" | |
| {modpg, res_0, ks, kmin, kmax, krange, rescnt, residues, resinvrs} | |
| end | |
| def sozp5(val, res_0, start_num, end_num) | |
| # Return the primes r0..sqrt(end_num) within range (start_num...end_num) | |
| md, rescnt = 30u64, 8 # P5's modulus and residues count | |
| res = [7,11,13,17,19,23,29,31] # P5's residues | |
| range_size = end_num - start_num # integers size of inputs range | |
| kmax = (val &- 2) // md &+ 1 # number of resgroups upto val = sqrt(end_num) | |
| prms = Array(UInt8).new(kmax, 0) # byte array of prime candidates, init '0' | |
| sqrtn = Math.isqrt(val-1|1) # integer sqrt of largest odd value <= val | |
| k = sqrtn//md; resk = sqrtn-md*k; r=0 # sqrtn resgroup|residue values; 1st res posn | |
| while resk >= res[r]; r &+= 1 end # find largest residue <= sqrtn posn in its resgroup | |
| pcs_to_sqrtn = k &* rescnt &+ r # number of pcs <= sqrtn | |
| pcs_to_sqrtn.times do |i| # for r0..sqrtN primes mark their multiples | |
| k, r = i.divmod rescnt # update resgroup parameters | |
| next if prms[k] & (1 << r) != 0 # skip pc if not prime | |
| prm_r = res[r] # if prime save its residue value | |
| prime = md &* k &+ prm_r # numerate its prime value | |
| rem = start_num % prime # prime's modular distance to start_num | |
| next unless (prime &- rem <= range_size) || rem == 0 # skip prime if no multiple in range | |
| res.each do |ri| # mark prime's multiples in prms | |
| kn,rn = (prm_r &* ri &- 2).divmod md # cross-product resgroup|residue | |
| bit_r = 1 << res.index(rn &+ 2).not_nil! # bit mask value for prod's residue | |
| kpm = k &* (prime &+ ri) &+ kn # resgroup for prime's 1st multiple | |
| while kpm < kmax; prms[kpm] |= bit_r; kpm &+= prime end # mark primes's multiples | |
| end end | |
| # prms now contains the prime multiples positions marked for the pcs r0..N | |
| # now along each restrack, identify|numerate the primes in each resgroup | |
| # return only the primes with a multiple within range (start_num..end_num) | |
| primes = [] of UInt64 # create empty dynamic array for primes | |
| res.each_with_index do |r_i, i| # along each restrack|row til end | |
| kmax.times do |k| # for each resgroup along restrack | |
| if prms[k] & (1 << i) == 0 # if bit location a prime | |
| prime = md &* k &+ r_i # numerate its value, store if in range | |
| # check if prime has multiple in range, if so keep it, if not don't | |
| rem = start_num % prime # if rem 0 then start_num is multiple of prime | |
| primes << prime if (res_0 <= prime <= val) && (prime &- rem <= range_size || rem == 0) | |
| end end end | |
| primes # primes stored in restrack|row sequence order | |
| end | |
| def nextp_init(r_i, kmin, modpg, primes, resinvrs) | |
| # Initialize 'nextp' array for twinpair upper residue rhi in 'restwins'. | |
| # Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and | |
| # store consecutively as lo_tp|hi_tp pairs for their restracks. | |
| nextp = Slice(UInt64).new(primes.size) # 1st mults array for r_i | |
| primes.each_with_index do |prime, j| # for each prime r0..sqrt(N) | |
| k = (prime &- 2) // modpg # find the resgroup it's in | |
| r = (prime &- 2) % modpg &+ 2 # and its residue value | |
| r_inv = resinvrs[r] # and residue inverse | |
| ri = (r_inv &* r_i &- 2) % modpg &+ 2 # compute r's ri for r_lo | |
| ki = k &* (prime &+ ri) &+ (r &* ri &- 2) // modpg # ki 1st mult resgroup | |
| ki < kmin ? (ki = (kmin &- ki) % prime; ki = prime &- ki if ki > 0) : (ki &-= kmin) | |
| nextp[j] = ki # prime's 1st mult resgroup val in range for ri | |
| end | |
| nextp | |
| end | |
| def primes_sieve(ri, kmin, kmax, ks, start_num, end_num, modpg, primes, resinvrs) | |
| # Perform in thread the ssoz for given prime residue for kmax resgroups. | |
| # First create|init 'nextp' array of 1st prime mults for given prime residue, | |
| # stored in 'nextp', and init seg array for ks resgroups. | |
| # For sieve, mark resgroup bits to '1' if prime restrack is nonprime | |
| # for primes mults resgroups, and update 'nextp' restrack slices acccordingly. | |
| # Return the last prime|sum for the range for this prime residue. | |
| s = 6 # shift value for 64 bits | |
| bmask = (1 << s) - 1 # bitmask val for 64 bits | |
| sum, ki, kn = 0_u64, kmin-1, ks # init these parameters | |
| hi_p, k_max = 0_u64, kmax # max prime|resgroup | |
| seg = Slice(UInt64).new(((ks &- 1) >> s) &+ 1) # seg array of ks resgroups | |
| ki &+= 1 if ri < (start_num &- 2) % modpg &+ 2 # ensure lo tp in range | |
| k_max &-= 1 if ri > (end_num &- 2) % modpg &+ 2 # ensure hi tp in range | |
| nextp = nextp_init(ri, ki, modpg, primes, resinvrs) # init nextp array | |
| while ki < k_max # for ks size slices upto kmax | |
| kn = k_max &- ki if ks > (k_max &- ki) # adjust kn size for last seg | |
| primes.each_with_index do |prime, j| # for each prime r0..sqrt(N) | |
| # for lower twinpair residue track | |
| k = nextp.to_unsafe[j] # starting from this resgroup in seg | |
| while k < kn # mark primenth resgroup bits prime mults | |
| seg.to_unsafe[k >> s] |= 1_u64 << (k & bmask) | |
| k &+= prime end # set resgroup for prime's next multiple | |
| nextp.to_unsafe[j] = k &- kn # save 1st resgroup in next eligible seg | |
| end # set as nonprime unused bits in last seg[n] | |
| # so fast, do for every seg[i] | |
| seg.to_unsafe[(kn - 1) >> s] |= ~1u64 << ((kn &- 1) & bmask) | |
| cnt = 0 # count the twinprimes in the segment | |
| seg[0..(kn &- 1) >> s].each { |m| cnt &+= (~m).popcount } | |
| if cnt > 0 # if segment has primes | |
| sum &+= cnt # add segment count to total range count | |
| upk = kn &- 1 # from end of seg, count back to largest prime | |
| while seg.to_unsafe[upk >> s] & (1_u64 << (upk & bmask)) != 0; upk &-= 1 end | |
| hi_p = ki &+ upk # set its full range resgroup value | |
| end | |
| ki &+= ks # set 1st resgroup val of next seg slice | |
| seg.fill(0) if ki < k_max # set next seg bits to all primes if in range | |
| end # when sieve done, numerate largest prime in range | |
| # for small ranges w/o primes set largest to 0 | |
| hi_p = (ri > end_num || sum == 0) ? 0u64 : hi_p &* modpg &+ ri | |
| {hi_p, sum} # return largest primes count in range | |
| end | |
| def primes_ssoz() | |
| end_num = {(ARGV[0].to_u64 underscore: true), 1u64}.max | |
| start_num = ARGV.size > 1 ? {(ARGV[1].to_u64 underscore: true), 1u64}.max : 1u64 | |
| start_num, end_num = end_num, start_num if start_num > end_num | |
| start_num = end_num = 4u64 if end_num < 2 && start_num < 2 | |
| start_num = 2u64 if end_num > 1 && start_num == 1 | |
| puts "threads = #{System.cpu_count}" # show number of system threads | |
| ts = Time.instant # start timing sieve setup execution | |
| # select Pn, set sieving params for inputs | |
| modpg, res_0, ks, kmin, kmax, krange, rescnt, residues, resinvrs = set_sieve_parameters(start_num, end_num) | |
| # create sieve primes <= sqrt(end_num), only use those whose multiples within inputs range | |
| primes = end_num < 49 ? [5u64] : sozp5(Math.isqrt(end_num), res_0, start_num, end_num) | |
| puts "each of #{rescnt} threads has nextp[1 x #{primes.size.format}] array" | |
| primescnt = 0u64 # init primes range count | |
| last_prime = 0u64 # init val for largest prime | |
| lo_prime = residues[0] # first residue|prime for selected PG | |
| [2, 3, 5, 7, 11, 13, 17].each { |prm| (primescnt &+= 1; last_prime = prm) if start_num <= prm < lo_prime} | |
| te = (Time.instant - ts).total_seconds.round(6) | |
| puts "setup time = #{te} secs" # display sieve setup time | |
| puts "perform primes ssoz sieve" | |
| t1 = Time.instant # start timing ssoz sieve execution | |
| cnts = Array(UInt64).new(rescnt, 0) # number of primes found per thread | |
| lastprimes = Array(UInt64).new(rescnt, 0) # largest prime val for each thread | |
| done = Channel(Nil).new(rescnt) | |
| threadscnt = Atomic.new(0) # count of finished threads | |
| residues.each_with_index do |ri, i| # sieve twinpair restracks | |
| spawn do | |
| lastprimes[i], cnts[i] = primes_sieve(ri, kmin, kmax, ks, start_num, end_num, modpg, primes, resinvrs) | |
| print "\r#{threadscnt.add(1)} of #{rescnt} residues done" | |
| done.send(nil) | |
| end end | |
| rescnt.times { done.receive } # wait for all threads to finish | |
| print "\r#{rescnt} of #{rescnt} residues done" | |
| lastprimes.each { |prm| last_prime = prm if last_prime < prm} | |
| primescnt += cnts.sum # count number primes in range | |
| if start_num == 2 && end_num == 2; primescnt = 1; last_prime = 2 end # for both inputs = 2 | |
| kn = krange % ks # set number of resgroups in last slice | |
| kn = ks if kn == 0 # if multiple of seg size set to seg size | |
| t2 = (Time.instant - t1).total_seconds # sieve execution time | |
| puts "\nsieve time = #{t2.round(6)} secs" # ssoz sieve time | |
| puts "total time = #{(t2 + te).round(6)} secs" # setup + sieve time | |
| puts "last segment = #{kn.format} resgroups; segment slices = #{((krange - 1)//ks + 1).format}" | |
| puts "total primes = #{primescnt.format}; last prime = #{last_prime.format}" | |
| end | |
| primes_ssoz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment