Created
August 21, 2020 12:37
-
-
Save asterite/3207fe5e31c0b944fd57c754f9dfa56f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This Crystal source file is a multiple threaded implementation to perform an | |
# extremely fast Segmented Sieve of Zakiya (SSoZ) to find Twin Primes <= N. | |
# Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1. | |
# Output is the number of twin primes <= N, or in range N1 to N2; the last | |
# twin prime value for the range; and the total time of execution. | |
# This code was developed on a System76 laptop with an Intel I7 6700HQ cpu, | |
# 2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning | |
# probably needed to optimize for other hardware systems (ARM, PowerPC, etc). | |
# Compile as: $ crystal build twinprimes_ssoz.cr -Dpreview_mt --release | |
# To reduce binary size do: $ strip twinprimes_ssoz | |
# Thread workers default to 4, set to system max for optimum performance. | |
# Single val: $ CRYSTAL_WORKERS=8 ./twinprimes_ssoz val1 | |
# Range vals: $ CRYSTAL_WORKERS=8 ./twinprimes_ssoz val1 val2 | |
# Mathematical and technical basis for implementation are explained here: | |
# https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_ | |
# Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_ | |
# for_the_Riemann_Hypotheses | |
# https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_ | |
# https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK | |
# This source code, and its updates, can be found here: | |
# https://gist.github.com/jzakiya/2b65b609f091dcbb6f792f16c63a8ac4 | |
# This code is provided free and subject to copyright and terms of the | |
# GNU General Public License Version 3, GPLv3, or greater. | |
# License copy/terms are here: http://www.gnu.org/licenses/ | |
# Copyright (c) 2017-2020; Jabari Zakiya -- jzakiya at gmail dot com | |
# Last update: 2020/8/18 | |
module Enumerable(T) | |
module End | |
extend self | |
end | |
def pmap(workers = 4, &block : T -> U) forall U | |
inputs = Array(Channel(T | End)).new(workers) { Channel(T | End).new } | |
outputs = Array(Channel(U | End)).new(workers) { Channel(U | End).new } | |
workers.times do |i| | |
spawn do | |
loop do | |
case input = inputs[i].receive | |
when End | |
break | |
else | |
output = block.call(input) | |
outputs[i].send output | |
end | |
end | |
outputs[i].send End | |
end | |
end | |
spawn do | |
each.with_index do |input, index| | |
inputs[index % workers].send(input) | |
end | |
workers.times do |i| | |
inputs[i].send(End) | |
end | |
end | |
Array(U).new.tap do |result| | |
i = 0 | |
closed = 0 | |
loop do | |
case output = outputs[i % workers].receive | |
when End | |
closed += 1 | |
break if closed == workers | |
else | |
result << output | |
end | |
i += 1 | |
end | |
end | |
end | |
end | |
struct Slice | |
# Ideally this would be fill(n) but I don't want to lost time on this now | |
def zero! | |
to_unsafe.clear(size) | |
end | |
end | |
# Customized gcd for prime generators; n > m; m odd | |
def gcd(m, n) | |
while m | 1 != 1 | |
t = m; m = n.remainder(m); n = t | |
end | |
m | |
end | |
# Compute modular inverse a^-1 to base m, e.g. a*(a^-1) mod m = 1 | |
def modinv(a0, m0) | |
return 1 if m0 == 1 | |
a, m = a0, m0 | |
x0, inv = 0, 1 | |
while a > 1 | |
inv -= (a // m) * x0 | |
a, m = m, a.remainder(m) | |
x0, inv = inv, x0 | |
end | |
inv += m0 if inv < 0 | |
inv | |
end | |
def gen_pg_parameters(prime) | |
# Create prime generator parameters for given Pn | |
puts "using Prime Generator parameters for P#{prime}" | |
primes = [2, 3, 5, 7, 11, 13, 17, 19, 23] | |
modpg, res_0 = 1, 0 # compute Pn's modulus and res_0 value | |
primes.each { |prm| res_0 = prm; break if prm > prime; modpg *= prm } | |
restwins = [] of Int32 # save upper twinpair residues here | |
inverses = Array.new(modpg + 2, 0) # save Pn's residues inverses here | |
pc, inc, res = 5, 2, 0 # use P3's PGS to generate pcs | |
while pc < modpg // 2 # find a residue, then complement|inverse | |
if gcd(pc, modpg) == 1 # if pc a residue | |
pc_mc = modpg - pc # create its modular complement | |
inv_r = modinv(pc, modpg) # compute residues's inverse | |
inverses[pc] = inv_r # save its inverse | |
inverses[inv_r] = pc # save its inverse's inverse | |
inv_r = modinv(pc_mc, modpg) # compute complement's inverse | |
inverses[pc_mc] = inv_r # save its inverse | |
inverses[inv_r] = pc_mc # save its inverse's inverse | |
if res + 2 == pc | |
restwins << pc; restwins << (pc_mc + 2) | |
end # save hi_tp residues | |
res = pc | |
end | |
pc += inc; inc ^= 0b110 | |
end | |
restwins.sort!; restwins << (modpg + 1) # last residue is last hi_tp | |
inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1 # last 2 residues are self inverses | |
{modpg, res_0, restwins.size, restwins, inverses} | |
end | |
def set_sieve_parameters(start_num : UInt64, end_num) | |
# Select at runtime best PG and segment size parameters for input values. | |
# These are good estimates derived from PG data profiling. Can be improved. | |
range = end_num - start_num | |
bn, pg = 0, 3 | |
if end_num < 49 | |
bn = 1; pg = 3 | |
elsif range < 10_000_000 | |
bn = 16; pg = 5 | |
elsif range < 1_100_000_000 | |
bn = 32; pg = 7 | |
elsif range < 35_500_000_000 | |
bn = 64; pg = 11 | |
elsif range < 15_000_000_000_000 | |
pg = 13 | |
if range > 7_000_000_000_000 | |
bn = 384 | |
elsif range > 2_500_000_000_000 | |
bn = 320 | |
elsif range > 250_000_000_000 | |
bn = 192 | |
else | |
bn = 128 | |
end | |
else | |
bn = 384; pg = 17 | |
end | |
modpg, res_0, pairscnt, restwins, resinvrs = gen_pg_parameters(pg) | |
kmin : UInt64 = (start_num - 2) // modpg + 1 # number of resgroups to start_num | |
kmax = (end_num - 2) // modpg + 1 # number of resgroups to end_num | |
krange = kmax - kmin + 1 # number of resgroups in range, at least 1 | |
n = krange < 37_500_000_000_000 ? 4 : (krange < 975_000_000_000_000 ? 6 : 8) | |
b = bn * 1024 * n # set seg size to optimize for selected PG | |
kb = krange < b ? krange : b # segments resgroups size | |
puts "segment size = #{kb} resgroups; seg array is [1 x #{((kb - 1) >> 6) + 1}] 64-bits" | |
maxpairs = krange * pairscnt # maximum number of twinprime pcs | |
puts "twinprime candidates = #{maxpairs}; resgroups = #{krange}" | |
{modpg, res_0, kb, kmin, kmax, krange, pairscnt, restwins, resinvrs} | |
end | |
def sozpg(val, res_0) | |
# Compute the primes r0..sqrt(input_num) and store in 'primes' array. | |
# Any algorithm (fast|small) is usable. Here the SoZ for P5 is used. | |
md, rscnt = 30u64, 8 # P5's modulus and residue count | |
res = [7, 11, 13, 17, 19, 23, 29, 31] # P5's residues | |
posn = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 6, 0, 7] | |
kmax = (val - 7) // md + 1 # number of resgroups upto input value | |
prms = Array(UInt8).new(kmax, 0) # byte array of prime candidates, init '0' | |
sqrt_n = Math.sqrt(val).to_u32 # compute integer sqrt of val | |
modk, r, k = 0, -1, 0 # initialize residue parameters | |
while true # for r0..sqrtN primes mark their multiples | |
if (r += 1) == rscnt | |
r = 0; modk += md; k += 1 | |
end | |
next if (prms[k] & (1 << r)) != 0 # skip pc if not prime | |
prm_r = res[r] # if prime save its residue value | |
prime = modk + prm_r # numerate the prime value | |
break if prime > sqrt_n # we're finished when it's > sqrtN | |
res.each do |ri| # mark prime's multiples in prms | |
prod = prm_r * ri - 2 # compute cross-product for prm_r|ri pair | |
bit_r = 1 << posn[prod % md] # bit mask for prod's residue | |
kpm = k * (prime + ri) + prod // md # 1st resgroup for prime mult | |
while kpm < kmax | |
prms[kpm] |= bit_r; kpm += prime | |
end | |
end | |
end | |
# prms now contains the nonprime positions for the prime candidates r0..N | |
# extract primes into ssoz var 'primes' | |
primes = [] of UInt64 # create empty dynamic array for primes | |
kmax.times do |k| # for each resgroup | |
rscnt.times do |r| # numerate|store primes from pcs list | |
if (prms[k] & (1 << r)) == 0 # if bit location a prime | |
prime = md * k + res[r] # numerate its value, store if in range | |
primes << prime if (res_0..val).includes? prime | |
end | |
end | |
end | |
primes | |
end | |
def nextp_init(rhi : Int32, kmin : UInt64, modpg : Int32, primes : Array(UInt64), resinvrs : Array(Int32)) | |
# Initialize 'nextp' array for twinpair upper residue rhi in 'restwins'. | |
# Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and | |
# store consecutively as lo_tp|hi_tp pairs for their restracks. | |
nextp = Slice(UInt64).new(primes.size*2, 0) # 1st mults array for twinpair | |
r_hi, r_lo = rhi, rhi - 2 # upper|lower twinpair residue values | |
primes.each_with_index do |prime, j| # for each prime r0..sqrt(N) | |
k = (prime - 2) // modpg # find the resgroup it's in | |
r = (prime - 2) % modpg + 2 # and its residue value | |
r_inv = resinvrs[r] # and residue inverse | |
ri = (r_lo * r_inv - 2) % modpg + 2 # compute r's ri for r_lo | |
ki = k * (prime + ri) + (r * ri - 2) // modpg # and 1st mult | |
ki < kmin ? (ki = (kmin - ki) % prime; ki = prime - ki if ki > 0) : (ki -= kmin) | |
nextp[2 * j] = ki.to_u64 # prime's 1st mult resgroup val in range for lo_tp | |
ri = (r_hi * r_inv - 2) % modpg + 2 # compute r's ri for r_hi | |
ki = k * (prime + ri) + (r * ri - 2) // modpg # and 1st mult resgroup | |
ki < kmin ? (ki = (kmin - ki) % prime; ki = prime - ki if ki > 0) : (ki -= kmin) | |
nextp[2 * j | 1] = ki.to_u64 # prime's 1st mult resgroup val in range for hi_tp | |
end | |
nextp | |
end | |
def twins_sieve(r_hi, kmin : UInt64, kmax, kb, start_num, end_num, modpg, primes : Array(UInt64), resinvrs) | |
# Perform in thread the ssoz for given twinpair residues for Kmax resgroups. | |
# First create|init 'nextp' array of 1st prime mults for given twinpair, | |
# stored consequtively in 'nextp', and init seg array for kb resgroups. | |
# For sieve, mark resgroup bits to '1' if either twinpair restrack is nonprime | |
# for primes mults resgroups, and update 'nextp' restrack slices acccordingly. | |
# Return the last twinprime|sum for the range for this twinpair residues. | |
s = 6 # shift value for 64 bits | |
bmask = (1 << s) - 1 # bitmask val for 64 bits | |
sum, ki, kn = 0_u64, kmin - 1, kb # init these parameters | |
hi_tp, k_max = 0_u64, kmax # max twinprime|resgroup | |
seg = Slice(UInt64).new(((kb - 1) >> s) + 1) # seg arry of kb resgroups | |
ki += 1 if ((ki * modpg) + r_hi - 2) < start_num # ensure lo tp in range | |
k_max -= 1 if ((k_max - 1) * modpg + r_hi) > end_num # ensure hi tp in range | |
nextp = nextp_init(r_hi, ki, modpg, primes, resinvrs) # init nextp array | |
while ki < k_max # for kb size slices upto kmax | |
kn = k_max - ki if kb > (k_max - ki) # adjust kb size for last seg | |
primes.each_with_index do |prime, j| # for each prime r0..sqrt(N) | |
# for lower twinpair residue track | |
k = nextp[j * 2] # starting from this resgroup in seg | |
while k < kn # mark primenth resgroup bits prime mults | |
seg[k >> s] |= 1_u64 << (k & bmask) | |
k += prime | |
end # set resgroup for prime's next multiple | |
nextp[j * 2] = k - kn # save 1st resgroup in next eligible seg | |
# for upper twinpair residue track | |
k = nextp[j * 2 | 1] # starting from this resgroup in seg | |
while k < kn # mark primenth resgroup bits prime mults | |
seg[k >> s] |= 1_u64 << (k & bmask) | |
k += prime | |
end # set resgroup for prime's next multiple | |
nextp[j * 2 | 1] = k - kn # save 1st resgroup in next eligible seg | |
end # set as nonprime unused bits in last | |
# seg[i]; so fast, do for every seg | |
seg[(kn - 1) >> s] |= (~0u64 << (((kn - 1) & bmask))) << 1 | |
cnt = 0 # initialize segment twinprimes count | |
# then count the twinprimes in segment | |
seg[0..(kn - 1) >> s].each { |m| cnt += (1 << s) - m.popcount } | |
if cnt > 0 # if segment has twinprimes | |
sum += cnt # add segment count to total range count | |
upk = kn - 1 # from end of seg, count back to largest tp | |
while seg[upk >> s] & (1_u64 << (upk & bmask)) != 0 | |
upk -= 1 | |
end | |
hi_tp = upk + ki # set its full range resgroup value | |
end | |
ki += kb # set 1st resgroup val of next seg slice | |
seg.zero! if ki < k_max # set next seg to all primes if this one not last | |
end # when sieve done, numerate largest twin in range | |
# for small ranges w/o twins set largest to 1 | |
hi_tp = (r_hi > end_num || sum == 0) ? 1 : hi_tp * modpg + r_hi | |
{hi_tp, sum} # return largest twinprime|twins count in range | |
end | |
def process(r_hi, kmin, kmax, kb, start_num, end_num, modpg, primes, resinvrs, pairscnt, i, done) | |
l, c = twins_sieve(r_hi, kmin, kmax, kb, start_num, end_num, modpg, primes, resinvrs) | |
print "\r#{i + 1} of #{pairscnt} twinpairs done" | |
done.send({i, l.to_u64, c.to_u64}) | |
end | |
def twinprimes_ssoz | |
end_num = ARGV[0].to_u64 | |
start_num = ARGV.size < 2 ? 3_u64 : ARGV[1].to_u64 | |
start_num, end_num = end_num, start_num if start_num > end_num | |
puts "threads = #{System.cpu_count}" | |
ts = Time.monotonic # start timing sieve setup execution | |
start_num |= 1_u64 # if start_num even increase by 1 | |
end_num = (end_num - 1) | 1 # if end_num even decrease by 1 | |
# select Pn, set sieving params for inputs | |
modpg, res_0, kb, kmin, kmax, range, pairscnt, restwins, resinvrs = set_sieve_parameters(start_num, end_num) | |
primes = end_num < 49 ? [5] of UInt64 : sozpg(Math.sqrt(end_num).to_u64, res_0) # sieve primes | |
puts "each of #{pairscnt} threads has nextp[2 x #{primes.size}] array" | |
twinscnt = 0_u64 # init twinprimes range count | |
lo_range = restwins[0] - 3 # lo_range = lo_tp - 1 | |
[3, 5, 11, 17].each do |tp| # excluded low tp values PGs used | |
break if end_num == 3 # if 3 end of range, no twinprimes | |
twinscnt += 1 if (start_num..lo_range).includes? tp # cnt any small tps | |
end | |
te = (Time.monotonic - ts).total_seconds.round(6) | |
puts "setup time = #{te} secs" # display sieve setup time | |
puts "perform twinprimes ssoz sieve" | |
t1 = Time.monotonic # start timing ssoz sieve execution | |
puts "restwins size: #{restwins.size}" | |
counter = Atomic.new(0) | |
results = restwins.pmap(8) do |r_hi| # sieve twinpair restracks | |
result = twins_sieve(r_hi, kmin, kmax, kb, start_num, end_num, modpg, primes, resinvrs) | |
counter.add(1) | |
print "\r#{counter.get} of #{pairscnt} twinpairs done" | |
result | |
end | |
lastwins = results.map &.[0] | |
cnts = results.map &.[1] | |
last_twin = 0_u64 # init val for largest twinprime | |
pairscnt.times do |i| # find largest twinprime|sum in range | |
twinscnt += cnts[i] | |
last_twin = lastwins[i] if last_twin < lastwins[i] | |
end | |
last_twin = 5 if end_num == 5 && twinscnt == 1 | |
kn = range % kb # set number of resgroups in last slice | |
kn = kb if kn == 0 # if multiple of seg size set to seg size | |
t2 = (Time.monotonic - t1).total_seconds # sieve execution time | |
puts "\nsieve time = #{t2.round(6)} secs" # ssoz sieve time | |
puts "total time = #{(t2 + te).round(6)} secs" # setup + sieve time | |
puts "last segment = #{kn} resgroups; segment slices = #{(range - 1)//kb + 1}" | |
puts "total twins = #{twinscnt}; last twin = #{last_twin - 1}+/-1" | |
end | |
twinprimes_ssoz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment