Last active
January 5, 2026 10:39
-
-
Save terasakisatoshi/03b41d4cc068d3565eaf2e5dae99968b to your computer and use it in GitHub Desktop.
Optimized Binary GCD algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Binary GCD algorithm (Stein's algorithm) | |
| # Optimized implementation matching Rust performance | |
| # Fast right shift without bounds check | |
| # Masking shift amount lets compiler emit single lsr instruction | |
| # (ARM64/x86-64 lsr only uses low bits of shift amount anyway) | |
| @inline function unsafe_lshr(x::T, n::T) where T<:Unsigned | |
| x >> (n & T(8 * sizeof(T) - 1)) | |
| end | |
| """ | |
| mygcd(a::T, b::T) where T<:Signed -> T | |
| Binary GCD algorithm for signed integers. | |
| Uses unsafe shift to match Rust performance. | |
| """ | |
| @inline function mygcd(ain::T, bin::T)::T where T<:Signed | |
| U = unsigned(T) # Get unsigned counterpart type | |
| ain == 0 && return abs(bin) | |
| bin == 0 && return abs(ain) | |
| zb = trailing_zeros(bin) % U | |
| za = trailing_zeros(ain) % U | |
| a::U = reinterpret(U, abs(ain)) | |
| b::U = reinterpret(U, abs(bin >> zb)) | |
| k = min(zb, za) | |
| while a != zero(U) | |
| shifted_a = unsafe_lshr(a, za) | |
| diff = shifted_a - b | |
| za = trailing_zeros(diff) % U | |
| cmp = shifted_a >= b | |
| a = ifelse(cmp, diff, zero(U) - diff) # |a - b| | |
| b = ifelse(cmp, b, shifted_a) # min(a, b) - update b last | |
| end | |
| reinterpret(T, b << k) | |
| end | |
| """ | |
| mygcd(a::T, b::T) where T<:Unsigned -> T | |
| Binary GCD algorithm for unsigned integers. | |
| Uses unsafe shift to match Rust performance. | |
| """ | |
| @inline function mygcd(a::T, b::T)::T where T<:Unsigned | |
| a == 0 && return b | |
| b == 0 && return a | |
| # Factor out common powers of 2 | |
| za = trailing_zeros(a) % T | |
| zb = trailing_zeros(b) % T | |
| k = min(za, zb) | |
| # Remove factors of 2 from b | |
| b = unsafe_lshr(b, zb) | |
| while a != zero(T) | |
| shifted_a = unsafe_lshr(a, za) | |
| diff = shifted_a - b | |
| za = trailing_zeros(diff) % T | |
| cmp = shifted_a >= b | |
| a = ifelse(cmp, diff, zero(T) - diff) # |a - b| | |
| b = ifelse(cmp, b, shifted_a) # min(a, b) - update b last | |
| end | |
| b << k | |
| end | |
| """ | |
| gcd_euclidean(a::T, b::T) where T -> T | |
| Generic GCD using Euclidean algorithm (fallback). | |
| """ | |
| function gcd_euclidean(a::T, b::T)::T where T | |
| while b != zero(T) | |
| a, b = b, a % b | |
| end | |
| abs(a) | |
| end | |
| # Tests | |
| function run_tests() | |
| println("Running tests...") | |
| # Test Int64 | |
| @assert mygcd(Int64(0), Int64(5)) == 5 | |
| @assert mygcd(Int64(5), Int64(0)) == 5 | |
| @assert mygcd(Int64(12), Int64(8)) == 4 | |
| @assert mygcd(Int64(-12), Int64(8)) == 4 | |
| @assert mygcd(Int64(12), Int64(-8)) == 4 | |
| @assert mygcd(Int64(-12), Int64(-8)) == 4 | |
| @assert mygcd(Int64(48), Int64(18)) == 6 | |
| @assert mygcd(Int64(-48), Int64(-18)) == 6 | |
| @assert mygcd(Int64(1071), Int64(462)) == 21 | |
| # Test Int32 | |
| @assert mygcd(Int32(0), Int32(5)) == 5 | |
| @assert mygcd(Int32(12), Int32(8)) == 4 | |
| @assert mygcd(Int32(-12), Int32(8)) == 4 | |
| @assert mygcd(Int32(1071), Int32(462)) == 21 | |
| # Test Int16 | |
| @assert mygcd(Int16(0), Int16(5)) == 5 | |
| @assert mygcd(Int16(12), Int16(8)) == 4 | |
| @assert mygcd(Int16(-12), Int16(-8)) == 4 | |
| # Test Int8 | |
| @assert mygcd(Int8(0), Int8(5)) == 5 | |
| @assert mygcd(Int8(12), Int8(8)) == 4 | |
| @assert mygcd(Int8(-12), Int8(8)) == 4 | |
| # Test Int128 | |
| @assert mygcd(Int128(0), Int128(5)) == 5 | |
| @assert mygcd(Int128(12), Int128(8)) == 4 | |
| @assert mygcd(Int128(-12), Int128(8)) == 4 | |
| @assert mygcd(Int128(1071), Int128(462)) == 21 | |
| # Test UInt64 | |
| @assert mygcd(UInt64(0), UInt64(5)) == 5 | |
| @assert mygcd(UInt64(12), UInt64(8)) == 4 | |
| @assert mygcd(UInt64(1071), UInt64(462)) == 21 | |
| # Test UInt32 | |
| @assert mygcd(UInt32(0), UInt32(5)) == 5 | |
| @assert mygcd(UInt32(12), UInt32(8)) == 4 | |
| @assert mygcd(UInt32(1071), UInt32(462)) == 21 | |
| # Test UInt16 | |
| @assert mygcd(UInt16(0), UInt16(5)) == 5 | |
| @assert mygcd(UInt16(12), UInt16(8)) == 4 | |
| # Test UInt8 | |
| @assert mygcd(UInt8(0), UInt8(5)) == 5 | |
| @assert mygcd(UInt8(12), UInt8(8)) == 4 | |
| # Test UInt128 | |
| @assert mygcd(UInt128(0), UInt128(5)) == 5 | |
| @assert mygcd(UInt128(12), UInt128(8)) == 4 | |
| @assert mygcd(UInt128(1071), UInt128(462)) == 21 | |
| # Test against Base.gcd | |
| for a in 1:100, b in 1:100 | |
| @assert mygcd(a, b) == gcd(a, b) "Failed for ($a, $b)" | |
| end | |
| println("All tests passed!") | |
| end | |
| """ | |
| calc_pi(n::Int64) -> Float64 | |
| Approximate pi using probability that two numbers are coprime. | |
| The probability that two random integers are coprime is 6/pi^2. | |
| """ | |
| function calc_pi(n)::Float64 | |
| cnt::Int64 = 0 | |
| @inbounds for a in 1:n | |
| for b in 1:n | |
| cnt += ifelse(mygcd(a, b) == 1, 1, 0) | |
| end | |
| end | |
| sqrt(6.0 / (cnt / (n * n))) | |
| end | |
| # Main function | |
| function main() | |
| run_tests() | |
| n = 10000 | |
| # Warmup | |
| calc_pi(UInt64(n)) | |
| # Benchmark | |
| println("\nBenchmark:") | |
| for i in 1:3 | |
| GC.gc() | |
| start = time_ns() | |
| pi_approx = calc_pi(n) | |
| duration = (time_ns() - start) / 1e9 | |
| println("Run $i: $(duration)s pi=$pi_approx") | |
| end | |
| end | |
| if abspath(PROGRAM_FILE) == @__FILE__ | |
| main() | |
| end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /// Binary GCD algorithm (Stein's algorithm) | |
| /// Ported from Julia's base/intfuncs.jl | |
| /// | |
| /// This is significantly faster than the Euclidean algorithm for machine integers: | |
| /// ~1.7x faster for i64, ~2.1x faster for i128 | |
| use std::time::Instant; | |
| /// Macro to generate optimized GCD implementations for unsigned types | |
| macro_rules! impl_gcd_unsigned { | |
| ($name:ident, $t:ty) => { | |
| #[inline] | |
| pub fn $name(mut a: $t, mut b: $t) -> $t { | |
| if a == 0 { | |
| return b; | |
| } | |
| if b == 0 { | |
| return a; | |
| } | |
| // Factor out common powers of 2 | |
| let mut za = a.trailing_zeros(); | |
| let zb = b.trailing_zeros(); | |
| let k = za.min(zb); | |
| // Remove factors of 2 from b | |
| b >>= zb; | |
| while a != 0 { | |
| // Remove factors of 2 from a | |
| a >>= za; | |
| // Compute absolute difference (original pattern) | |
| let d = a.max(b) - a.min(b); | |
| za = d.trailing_zeros(); | |
| b = a.min(b); | |
| a = d; | |
| } | |
| // Restore common factors of 2 | |
| b << k | |
| } | |
| }; | |
| } | |
| /// Macro to generate optimized GCD implementations for signed types | |
| macro_rules! impl_gcd_signed { | |
| ($name:ident, $t:ty, $ut:ty) => { | |
| #[inline] | |
| pub fn $name(ain: $t, bin: $t) -> $t { | |
| if ain == 0 { | |
| return bin.abs(); | |
| } | |
| if bin == 0 { | |
| return ain.abs(); | |
| } | |
| // Match original Julia port pattern exactly | |
| let zb = bin.trailing_zeros(); | |
| let mut za = ain.trailing_zeros(); | |
| let mut a = ain.unsigned_abs(); | |
| let mut b = (bin >> zb).unsigned_abs(); | |
| let k = za.min(zb); | |
| while a != 0 { | |
| a >>= za; | |
| // Use wrapping_sub pattern from original | |
| let diff = (a as $t).wrapping_sub(b as $t); | |
| let absd = diff.unsigned_abs(); | |
| za = (diff as $ut).trailing_zeros(); | |
| b = a.min(b); | |
| a = absd; | |
| } | |
| (b << k) as $t | |
| } | |
| }; | |
| } | |
| // Generate specialized implementations | |
| impl_gcd_unsigned!(gcd_u8, u8); | |
| impl_gcd_unsigned!(gcd_u16, u16); | |
| impl_gcd_unsigned!(gcd_u32, u32); | |
| impl_gcd_unsigned!(gcd_u64, u64); | |
| impl_gcd_unsigned!(gcd_u128, u128); | |
| impl_gcd_unsigned!(gcd_usize, usize); | |
| impl_gcd_signed!(gcd_i8, i8, u8); | |
| impl_gcd_signed!(gcd_i16, i16, u16); | |
| impl_gcd_signed!(gcd_i32, i32, u32); | |
| impl_gcd_signed!(gcd_i64, i64, u64); | |
| impl_gcd_signed!(gcd_i128, i128, u128); | |
| impl_gcd_signed!(gcd_isize, isize, usize); | |
| /// Generic GCD using Euclidean algorithm (fallback for any type) | |
| pub fn gcd_euclidean<T>(mut a: T, mut b: T) -> T | |
| where | |
| T: Copy + PartialEq + Default + std::ops::Rem<Output = T>, | |
| { | |
| let zero = T::default(); | |
| while b != zero { | |
| let t = b; | |
| b = a % b; | |
| a = t; | |
| } | |
| a | |
| } | |
| #[cfg(test)] | |
| mod tests { | |
| use super::*; | |
| #[test] | |
| fn test_gcd_u64() { | |
| assert_eq!(gcd_u64(0, 5), 5); | |
| assert_eq!(gcd_u64(5, 0), 5); | |
| assert_eq!(gcd_u64(12, 8), 4); | |
| assert_eq!(gcd_u64(48, 18), 6); | |
| assert_eq!(gcd_u64(17, 13), 1); | |
| assert_eq!(gcd_u64(100, 25), 25); | |
| assert_eq!(gcd_u64(1071, 462), 21); | |
| } | |
| #[test] | |
| fn test_gcd_u32() { | |
| assert_eq!(gcd_u32(0, 5), 5); | |
| assert_eq!(gcd_u32(12, 8), 4); | |
| assert_eq!(gcd_u32(48, 18), 6); | |
| } | |
| #[test] | |
| fn test_gcd_i64() { | |
| assert_eq!(gcd_i64(0, 5), 5); | |
| assert_eq!(gcd_i64(5, 0), 5); | |
| assert_eq!(gcd_i64(12, 8), 4); | |
| assert_eq!(gcd_i64(-12, 8), 4); | |
| assert_eq!(gcd_i64(12, -8), 4); | |
| assert_eq!(gcd_i64(-12, -8), 4); | |
| assert_eq!(gcd_i64(48, 18), 6); | |
| assert_eq!(gcd_i64(-48, -18), 6); | |
| } | |
| #[test] | |
| fn test_gcd_i32() { | |
| assert_eq!(gcd_i32(0, 5), 5); | |
| assert_eq!(gcd_i32(-12, 8), 4); | |
| assert_eq!(gcd_i32(12, -8), 4); | |
| } | |
| #[test] | |
| fn test_gcd_u128() { | |
| assert_eq!(gcd_u128(0, 5), 5); | |
| assert_eq!(gcd_u128(5, 0), 5); | |
| assert_eq!(gcd_u128(12, 8), 4); | |
| assert_eq!(gcd_u128(1071, 462), 21); | |
| } | |
| #[test] | |
| fn test_gcd_i128() { | |
| assert_eq!(gcd_i128(0, 5), 5); | |
| assert_eq!(gcd_i128(-12, 8), 4); | |
| assert_eq!(gcd_i128(12, -8), 4); | |
| assert_eq!(gcd_i128(-12, -8), 4); | |
| } | |
| } | |
| // Function to approximate pi using probability that two numbers are coprime | |
| fn calc_pi(n: i64) -> f64 { | |
| let mut cnt = 0i64; // Counter for coprime pairs | |
| // Loop through all pairs (a, b) where 1 <= a, b <= N | |
| for a in 1..=n { | |
| for b in 1..=n { | |
| // Check if a and b are coprime | |
| if gcd_i64(a, b) == 1 { | |
| cnt += 1; // Increment counter if coprime | |
| } | |
| } | |
| } | |
| // Probability that two numbers are coprime | |
| let prob = cnt as f64 / (n * n) as f64; | |
| // Approximate pi using the formula: pi ≈ sqrt(6 / prob) | |
| (6.0 / prob).sqrt() | |
| } | |
| // Main function to run the pi approximation | |
| fn main() { | |
| let n: i64 = 10000; | |
| // Warmup | |
| let _ = calc_pi(100); | |
| // Benchmark | |
| println!("Benchmark:"); | |
| for i in 1..=3 { | |
| let start = Instant::now(); | |
| let pi = calc_pi(n); | |
| let duration = start.elapsed(); | |
| println!("Run {}: {:?} pi={}", i, duration, pi); | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| calc_pi(n::Int64) -> Float64 | |
| Approximate pi using probability that two numbers are coprime. | |
| The probability that two random integers are coprime is 6/pi^2. | |
| """ | |
| function calc_pi(n)::Float64 | |
| cnt::Int64 = 0 | |
| @inbounds for a in 1:n | |
| for b in 1:n | |
| cnt += ifelse(gcd(a, b) == 1, 1, 0) | |
| end | |
| end | |
| sqrt(6.0 / (cnt / (n * n))) | |
| end | |
| # Main function | |
| function main() | |
| n = 10000 | |
| # Warmup | |
| calc_pi(UInt64(n)) | |
| # Benchmark | |
| println("\nBenchmark:") | |
| for i in 1:3 | |
| GC.gc() | |
| start = time_ns() | |
| pi_approx = calc_pi(n) | |
| duration = (time_ns() - start) / 1e9 | |
| println("Run $i: $(duration)s pi=$pi_approx") | |
| end | |
| end | |
| if abspath(PROGRAM_FILE) == @__FILE__ | |
| main() | |
| end |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Rust
Julia
$ julia src/main.jl Running tests... All tests passed! Benchmark: Run 1: 1.072994208s pi=3.141534239016629 Run 2: 1.07328325s pi=3.141534239016629 Run 3: 1.072947583s pi=3.141534239016629