Skip to content

Instantly share code, notes, and snippets.

@MrCroxx
Created April 2, 2024 06:44
Show Gist options
  • Select an option

  • Save MrCroxx/20b3fe56d8fbd9ed57d735be5be48909 to your computer and use it in GitHub Desktop.

Select an option

Save MrCroxx/20b3fe56d8fbd9ed57d735be5be48909 to your computer and use it in GitHub Desktop.
use std::{
hint::black_box,
sync::{
atomic::{AtomicUsize, Ordering},
Arc,
},
time::{Duration, Instant},
};
use itertools::Itertools;
fn coarse(loops: usize) -> Duration {
let now = Instant::now();
for _ in 0..loops {
let _ = coarsetime::Instant::now();
}
now.elapsed()
}
fn primitive(loops: usize) -> Duration {
let mut cnt = 0usize;
let now = Instant::now();
for _ in 0..loops {
cnt += 1;
let _ = cnt;
}
now.elapsed()
}
fn atomic(loops: usize, atomic: Arc<AtomicUsize>) -> Duration {
let now = Instant::now();
for _ in 0..loops {
let _ = atomic.fetch_add(1, Ordering::Relaxed);
}
now.elapsed()
}
fn atomic_skip(loops: usize, atomic: Arc<AtomicUsize>, skip: usize) -> Duration {
let mut cnt = 0usize;
let now = Instant::now();
for _ in 0..loops {
cnt += 1;
let _ = cnt;
if cnt % skip == 0 {
let _ = atomic.fetch_add(skip, Ordering::Relaxed);
} else {
let _ = atomic.load(Ordering::Relaxed);
}
}
now.elapsed()
}
fn benchmark<F>(name: &str, threads: usize, loops: usize, f: F)
where
F: Fn() -> Duration + Clone + Send + 'static,
{
let handles = (0..threads)
.map(|_| std::thread::spawn(black_box(f.clone())))
.collect_vec();
let mut dur = Duration::from_nanos(0);
for handle in handles {
dur += handle.join().unwrap();
}
println!(
"{:16} {} threads {} loops: {:?} per iter",
name,
threads,
loops,
Duration::from_nanos((dur.as_nanos() / threads as u128 / loops as u128) as u64)
);
}
fn main() {
for (threads, loops) in [
(1, 10_000_000),
(4, 10_000_000),
(8, 10_000_000),
(16, 10_000_000),
(32, 10_000_000),
] {
println!();
benchmark("primitive", threads, loops, move || primitive(loops));
let a = Arc::new(AtomicUsize::new(0));
benchmark("atomic", threads, loops, move || atomic(loops, a.clone()));
let a = Arc::new(AtomicUsize::new(0));
benchmark("atomic skip 8", threads, loops, move || {
atomic_skip(loops, a.clone(), 8)
});
let a = Arc::new(AtomicUsize::new(0));
benchmark("atomic skip 16", threads, loops, move || {
atomic_skip(loops, a.clone(), 16)
});
let a = Arc::new(AtomicUsize::new(0));
benchmark("atomic skip 32", threads, loops, move || {
atomic_skip(loops, a.clone(), 32)
});
let a = Arc::new(AtomicUsize::new(0));
benchmark("atomic skip 64", threads, loops, move || {
atomic_skip(loops, a.clone(), 64)
});
benchmark("coarse", threads, loops, move || coarse(loops));
}
}
/*
Results:
primitive 1 threads 10000000 loops: 0ns per iter
atomic 1 threads 10000000 loops: 1ns per iter
atomic skip 8 1 threads 10000000 loops: 0ns per iter
atomic skip 16 1 threads 10000000 loops: 0ns per iter
atomic skip 32 1 threads 10000000 loops: 0ns per iter
atomic skip 64 1 threads 10000000 loops: 0ns per iter
coarse 1 threads 10000000 loops: 4ns per iter
primitive 4 threads 10000000 loops: 0ns per iter
atomic 4 threads 10000000 loops: 30ns per iter
atomic skip 8 4 threads 10000000 loops: 7ns per iter
atomic skip 16 4 threads 10000000 loops: 1ns per iter
atomic skip 32 4 threads 10000000 loops: 2ns per iter
atomic skip 64 4 threads 10000000 loops: 1ns per iter
coarse 4 threads 10000000 loops: 15ns per iter
primitive 8 threads 10000000 loops: 0ns per iter
atomic 8 threads 10000000 loops: 61ns per iter
atomic skip 8 8 threads 10000000 loops: 14ns per iter
atomic skip 16 8 threads 10000000 loops: 7ns per iter
atomic skip 32 8 threads 10000000 loops: 5ns per iter
atomic skip 64 8 threads 10000000 loops: 3ns per iter
coarse 8 threads 10000000 loops: 30ns per iter
primitive 16 threads 10000000 loops: 0ns per iter
atomic 16 threads 10000000 loops: 109ns per iter
atomic skip 8 16 threads 10000000 loops: 31ns per iter
atomic skip 16 16 threads 10000000 loops: 15ns per iter
atomic skip 32 16 threads 10000000 loops: 12ns per iter
atomic skip 64 16 threads 10000000 loops: 9ns per iter
coarse 16 threads 10000000 loops: 58ns per iter
primitive 32 threads 10000000 loops: 0ns per iter
atomic 32 threads 10000000 loops: 375ns per iter
atomic skip 8 32 threads 10000000 loops: 72ns per iter
atomic skip 16 32 threads 10000000 loops: 42ns per iter
atomic skip 32 32 threads 10000000 loops: 31ns per iter
atomic skip 64 32 threads 10000000 loops: 20ns per iter
coarse 32 threads 10000000 loops: 254ns per iter
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment