Skip to content

Instantly share code, notes, and snippets.

@KirillLykov
Created February 22, 2023 18:03
Show Gist options
  • Select an option

  • Save KirillLykov/506596d1d777e11b3771930fc79dda0d to your computer and use it in GitHub Desktop.

Select an option

Save KirillLykov/506596d1d777e11b3771930fc79dda0d to your computer and use it in GitHub Desktop.
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
use itertools::Itertools;
use rand::Rng;
type T = u64;
fn sum_1(data: &Vec<T>) -> (T, T, T)
{
let mut sum: (T, T, T) = (T::default(), T::default(), T::default());
for (x,y,z) in data.iter().tuples() {
sum.0 += x;
sum.1 += y;
sum.2 += z;
}
sum
}
fn sum_2(data: &Vec<T>) -> (T, T, T) {
let mut sum: (T, T, T) = (T::default(), T::default(), T::default());
let mut chunks = data.chunks(3);
while let Some([x,y, z]) = chunks.next() {
sum.0 += x;
sum.1 += y;
sum.2 += z;
}
sum
}
fn bench_fibs(c: &mut Criterion) {
let mut group = c.benchmark_group("Sum");
let mut rng = rand::thread_rng();
for n in (100_000..200_000).step_by(100_000) {
let vals: Vec<u64> = (0..n).map(|_| rng.gen_range(0..10000)).collect();
group.bench_with_input(BenchmarkId::new(" using tuple", n), &vals,
|b, i| b.iter(|| sum_1(i)));
group.bench_with_input(BenchmarkId::new(" using chunk(3)", n), &vals,
|b, i| b.iter(|| sum_2(i)));
}
group.finish();
}
criterion_group!(benches, bench_fibs);
criterion_main!(benches);
@KirillLykov
Copy link
Author

	.globl	try_bml::sum_2
	.p2align	2
try_bml::sum_2:
Lfunc_begin1:
	.cfi_startproc
	stp x29, x30, [sp, #-16]!
	.cfi_def_cfa_offset 16
	mov x29, sp
	.cfi_def_cfa w29, 16
	.cfi_offset w30, -8
	.cfi_offset w29, -16
	stp xzr, xzr, [x8]
	str xzr, [x8, #16]
	ldr x9, [x0, #16]
	subs x10, x9, #3
	b.lo LBB1_8
	ldr x15, [x0]
	cmp x10, #8
	b.hi LBB1_3
	mov x16, #0
	mov x12, #0
	mov x11, #0
	mov x10, x15
	b LBB1_6
LBB1_3:
	mov x11, #-6148914691236517206
	movk x11, #43691
	umulh x10, x10, x11
	lsr x10, x10, #1
	add x13, x10, #1
	and x14, x13, #0x7ffffffffffffffc
	mov w10, #24
	madd x10, x14, x10, x15
	sub x11, x14, x14, lsl #2
	add x9, x9, x11
	movi.2d v0, #0000000000000000
	mov x11, x14
	movi.2d v1, #0000000000000000
	movi.2d v2, #0000000000000000
	movi.2d v3, #0000000000000000
	movi.2d v4, #0000000000000000
	movi.2d v5, #0000000000000000
LBB1_4:
	mov x12, x15
	add x15, x15, #96
	ld3.2d { v16, v17, v18 }, [x12], #48
	add.2d v0, v0, v16
	ld3.2d { v19, v20, v21 }, [x12]
	add.2d v1, v1, v19
	add.2d v2, v2, v17
	add.2d v3, v3, v20
	add.2d v4, v4, v18
	add.2d v5, v5, v21
	subs x11, x11, #4
	b.ne LBB1_4
	add.2d v4, v5, v4
	addp.2d d4, v4
	fmov x11, d4
	add.2d v2, v3, v2
	addp.2d d2, v2
	fmov x12, d2
	add.2d v0, v1, v0
	addp.2d d0, v0
	fmov x16, d0
	cmp x13, x14
	b.eq LBB1_7
LBB1_6:
	sub x9, x9, #3
	ldp x13, x14, [x10]
	add x16, x16, x13
	add x12, x12, x14
	ldr x13, [x10, #16]
	add x11, x11, x13
	add x10, x10, #24
	cmp x9, #3
	b.hs LBB1_6
LBB1_7:
	stp x16, x12, [x8]
	str x11, [x8, #16]
LBB1_8:
	.cfi_def_cfa wsp, 16
	ldp x29, x30, [sp], #16
	.cfi_def_cfa_offset 0
	.cfi_restore w30
	.cfi_restore w29
	ret

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment