Skip to content

Instantly share code, notes, and snippets.

@puzpuzpuz
Last active August 20, 2025 10:25
Show Gist options
  • Select an option

  • Save puzpuzpuz/4df9a361a3b1fee9149d83397c1933d7 to your computer and use it in GitHub Desktop.

Select an option

Save puzpuzpuz/4df9a361a3b1fee9149d83397c1933d7 to your computer and use it in GitHub Desktop.
Benchmarks for QuestDB array shape calculation function
use std::hint::black_box;
use std::time::Instant;
const ARRAY_NDIMS_LIMIT: usize = 32;
// Simple linear congruential generator for reproducible random numbers
struct SimpleRng {
state: u64,
}
impl SimpleRng {
fn new(seed: u64) -> Self {
Self { state: seed }
}
fn next(&mut self) -> u32 {
self.state = self.state.wrapping_mul(1103515245).wrapping_add(12345);
(self.state >> 16) as u32
}
fn gen_range(&mut self, min: u32, max: u32) -> u32 {
min + (self.next() % (max - min + 1))
}
}
// Old implementation (Claude-optimized version)
fn calculate_array_shape_old(
shape: &mut [u32; ARRAY_NDIMS_LIMIT],
max_rep_level: u32,
rep_levels: &[u32],
) {
if rep_levels.is_empty() {
return;
}
let max_rep_level = max_rep_level as usize;
let mut counts = [0_u32; ARRAY_NDIMS_LIMIT];
// Common case optimization for small dimensions
if max_rep_level <= 4 {
// Unrolled version for common small dimensions
for &rep_level in rep_levels {
let rep_level = rep_level.max(1) as usize;
// Reset counts - unrolled for small dimensions
match max_rep_level {
2 => {
if rep_level < 2 {
counts[1] = 0;
}
}
3 => {
if rep_level < 3 {
counts[2] = 0;
}
if rep_level < 2 {
counts[1] = 0;
}
}
4 => {
if rep_level < 4 {
counts[3] = 0;
}
if rep_level < 3 {
counts[2] = 0;
}
if rep_level < 2 {
counts[1] = 0;
}
}
_ => {
counts[rep_level] = 0;
}
}
// Always increment the deepest level
counts[max_rep_level - 1] += 1;
// Update shape - unrolled for better performance
for i in 0..max_rep_level {
let count = counts[i];
shape[i] = shape[i].max(count);
}
// Increment intermediate dimensions
if rep_level > 0 {
for dim in (rep_level - 1)..(max_rep_level - 1) {
counts[dim] += 1;
shape[dim] = shape[dim].max(counts[dim]);
}
}
}
} else {
// General case for higher dimensions
const CHUNK_SIZE: usize = 64;
for chunk in rep_levels.chunks(CHUNK_SIZE) {
for &rep_level in chunk {
let rep_level = rep_level.max(1) as usize;
// Reset counts for dimensions deeper than repetition level
for dim in rep_level..max_rep_level {
counts[dim] = 0;
}
// Increment count at the deepest level
counts[max_rep_level - 1] += 1;
// Update shape with branchless max
for dim in 0..max_rep_level {
shape[dim] = shape[dim].max(counts[dim]);
}
// Increment deeper dimension counts
for dim in (rep_level - 1)..(max_rep_level - 1) {
counts[dim] += 1;
shape[dim] = shape[dim].max(counts[dim]);
}
}
}
}
}
// New implementation (current version)
fn calculate_array_shape_new(
shape: &mut [u32; ARRAY_NDIMS_LIMIT],
max_rep_level: u32,
rep_levels: &[u32],
) {
if rep_levels.is_empty() {
return;
}
let max_rep_level = max_rep_level as usize;
let mut counts = [0_u32; ARRAY_NDIMS_LIMIT];
match max_rep_level {
// Common case optimization for small dimensions - unrolled loops
1 => {
shape[0] = rep_levels.len() as u32;
}
2 => {
for &rep_level in rep_levels {
let rep_level = rep_level.max(1) as usize;
if rep_level < 2 {
counts[0] += 1;
counts[1] = 1;
} else {
counts[1] += 1;
}
shape[1] = shape[1].max(counts[1]);
}
shape[0] = shape[0].max(counts[0]);
}
3 => {
for &rep_level in rep_levels {
let rep_level = rep_level.max(1) as usize;
if rep_level < 2 {
counts[0] += 1;
counts[1] = 0;
}
if rep_level < 3 {
counts[1] += 1;
counts[2] = 1;
} else {
counts[2] += 1;
}
shape[1] = shape[1].max(counts[1]);
shape[2] = shape[2].max(counts[2]);
}
shape[0] = shape[0].max(counts[0]);
}
4 => {
for &rep_level in rep_levels {
let rep_level = rep_level.max(1) as usize;
if rep_level < 2 {
counts[0] += 1;
counts[1] = 0;
}
if rep_level < 3 {
counts[1] += 1;
counts[2] = 0;
}
if rep_level < 4 {
counts[2] += 1;
counts[3] = 1;
} else {
counts[3] += 1;
}
shape[1] = shape[1].max(counts[1]);
shape[2] = shape[2].max(counts[2]);
shape[3] = shape[3].max(counts[3]);
}
shape[0] = shape[0].max(counts[0]);
}
_ => {
// General case for higher dimensions
for &rep_level in rep_levels {
let rep_level = rep_level.max(1) as usize;
// Increment count at the deepest level (where actual values are)
counts[rep_level - 1] += 1;
shape[rep_level - 1] = shape[rep_level - 1].max(counts[rep_level - 1]);
for dim in rep_level..max_rep_level {
// Reset counts for dimensions deeper than repetition level
counts[dim] = 1;
// Update shape with maximum counts seen so far
shape[dim] = shape[dim].max(1);
}
}
}
}
}
fn generate_random_rep_levels(rng: &mut SimpleRng, max_size: u32, max_level: u32) -> Vec<u32> {
// Generate random array shape for each dimension (non-jagged)
let mut shape = Vec::new();
for _ in 0..max_level {
let mut dim = rng.gen_range(2, max_size);
if shape.iter().product::<u32>() * dim > max_size {
dim = 1;
}
shape.push(dim);
}
// Generate repetition levels for non-jagged ND array
generate_nd_array_rep_levels(&shape, max_level)
}
fn generate_nd_array_rep_levels(shape: &[u32], max_level: u32) -> Vec<u32> {
let total_elements: usize = shape.iter().product::<u32>() as usize;
let mut rep_levels = Vec::with_capacity(total_elements);
// For non-jagged arrays, repetition level indicates the deepest level
// at which the structure repeats when moving to the next element
let dims = shape.len();
// Generate multi-dimensional indices and convert to repetition levels
let mut indices = vec![0u32; dims];
for _ in 0..total_elements {
// Calculate repetition level based on which dimensions changed
let rep_level = calculate_repetition_level(&indices);
rep_levels.push(rep_level.min(max_level));
// Increment indices (rightmost dimension first)
increment_indices(&mut indices, shape);
}
rep_levels
}
fn calculate_repetition_level(indices: &[u32]) -> u32 {
// For the very first element [0,0,0,...], repetition level is 0
if indices.iter().all(|&x| x == 0) {
return 0;
}
// Find the deepest (rightmost) dimension that changed from 0
// This determines which level is repeating
for (dim, &idx) in indices.iter().enumerate().rev() {
if idx > 0 {
return (dim + 1) as u32;
}
}
1 // Should not reach here for valid indices
}
fn increment_indices(indices: &mut [u32], shape: &[u32]) {
// Increment rightmost dimension first (row-major order)
for i in (0..indices.len()).rev() {
indices[i] += 1;
if indices[i] < shape[i] {
break; // No carry needed
}
indices[i] = 0; // Reset and carry to next dimension
}
}
fn benchmark_function<F>(name: &str, mut f: F, iterations: usize) -> std::time::Duration
where
F: FnMut(),
{
let start = Instant::now();
for _ in 0..iterations {
black_box(f());
}
let duration = start.elapsed();
println!(
"{}: {:.2}ms ({:.2}ns/op)",
name,
duration.as_millis(),
duration.as_nanos() as f64 / iterations as f64
);
duration
}
fn main() {
// println!("Array Shape Calculation Benchmark (Random Shapes)");
// println!("=================================================\n");
let iterations = 1000;
let num_test_cases = 1000;
let max_array_size = 100_u32;
// Generate multiple random test cases for each dimension to prevent branch predictor optimization
let mut rng = SimpleRng::new(42);
// Test case 1: 1D arrays
let mut test_cases_1d = Vec::new();
let mut test_case_lengths_1d = Vec::new();
for _ in 0..num_test_cases {
let test_case = generate_random_rep_levels(&mut rng, max_array_size, 1);
test_case_lengths_1d.push(test_case.len());
test_cases_1d.extend(test_case);
}
println!(
"Benchmark 1: 1D arrays ({} test cases, up to {} elements each, {} iterations)",
num_test_cases, max_array_size, iterations
);
let old_time_1d = benchmark_function(
"Old implementation",
|| {
let mut start_idx = 0;
for &len in &test_case_lengths_1d {
let test_case = &test_cases_1d[start_idx..start_idx + len];
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_old(
black_box(&mut shape),
black_box(1),
black_box(test_case),
);
black_box(shape);
start_idx += len;
}
},
iterations,
);
let new_time_1d = benchmark_function(
"New implementation",
|| {
let mut start_idx = 0;
for &len in &test_case_lengths_1d {
let test_case = &test_cases_1d[start_idx..start_idx + len];
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_new(
black_box(&mut shape),
black_box(1),
black_box(test_case),
);
black_box(shape);
start_idx += len;
}
},
iterations,
);
let speedup_1d = old_time_1d.as_nanos() as f64 / new_time_1d.as_nanos() as f64;
println!("Speedup: {:.2}x\n", speedup_1d);
// Test case 2: 2D arrays
let mut test_cases_2d = Vec::new();
let mut test_case_lengths_2d = Vec::new();
for _ in 0..num_test_cases {
let test_case = generate_random_rep_levels(&mut rng, max_array_size, 2);
test_case_lengths_2d.push(test_case.len());
test_cases_2d.extend(test_case);
}
println!(
"Benchmark 2: 2D arrays ({} test cases, up to {} elements each, {} iterations)",
num_test_cases, max_array_size, iterations
);
let old_time_2d = benchmark_function(
"Old implementation",
|| {
let mut start_idx = 0;
for &len in &test_case_lengths_2d {
let test_case = &test_cases_2d[start_idx..start_idx + len];
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_old(
black_box(&mut shape),
black_box(2),
black_box(test_case),
);
black_box(shape);
start_idx += len;
}
},
iterations,
);
let new_time_2d = benchmark_function(
"New implementation",
|| {
let mut start_idx = 0;
for &len in &test_case_lengths_2d {
let test_case = &test_cases_2d[start_idx..start_idx + len];
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_new(
black_box(&mut shape),
black_box(2),
black_box(test_case),
);
black_box(shape);
start_idx += len;
}
},
iterations,
);
let speedup_2d = old_time_2d.as_nanos() as f64 / new_time_2d.as_nanos() as f64;
println!("Speedup: {:.2}x\n", speedup_2d);
// Test case 3: 3D arrays
let mut test_cases_3d = Vec::new();
let mut test_case_lengths_3d = Vec::new();
for _ in 0..num_test_cases {
let test_case = generate_random_rep_levels(&mut rng, max_array_size, 3);
test_case_lengths_3d.push(test_case.len());
test_cases_3d.extend(test_case);
}
println!(
"Benchmark 3: 3D arrays ({} test cases, up to {} elements each, {} iterations)",
num_test_cases, max_array_size, iterations
);
let old_time_3d = benchmark_function(
"Old implementation",
|| {
let mut start_idx = 0;
for &len in &test_case_lengths_3d {
let test_case = &test_cases_3d[start_idx..start_idx + len];
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_old(
black_box(&mut shape),
black_box(3),
black_box(test_case),
);
black_box(shape);
start_idx += len;
}
},
iterations,
);
let new_time_3d = benchmark_function(
"New implementation",
|| {
let mut start_idx = 0;
for &len in &test_case_lengths_3d {
let test_case = &test_cases_3d[start_idx..start_idx + len];
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_new(
black_box(&mut shape),
black_box(3),
black_box(test_case),
);
black_box(shape);
start_idx += len;
}
},
iterations,
);
let speedup_3d = old_time_3d.as_nanos() as f64 / new_time_3d.as_nanos() as f64;
println!("Speedup: {:.2}x\n", speedup_3d);
// Test case 4: 4D arrays
let mut test_cases_4d = Vec::new();
let mut test_case_lengths_4d = Vec::new();
for _ in 0..num_test_cases {
let test_case = generate_random_rep_levels(&mut rng, max_array_size, 4);
test_case_lengths_4d.push(test_case.len());
test_cases_4d.extend(test_case);
}
println!(
"Benchmark 4: 4D arrays ({} test cases, up to {} elements each, {} iterations)",
num_test_cases, max_array_size, iterations
);
let old_time_4d = benchmark_function(
"Old implementation",
|| {
let mut start_idx = 0;
for &len in &test_case_lengths_4d {
let test_case = &test_cases_4d[start_idx..start_idx + len];
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_old(
black_box(&mut shape),
black_box(4),
black_box(test_case),
);
black_box(shape);
start_idx += len;
}
},
iterations,
);
let new_time_4d = benchmark_function(
"New implementation",
|| {
let mut start_idx = 0;
for &len in &test_case_lengths_4d {
let test_case = &test_cases_4d[start_idx..start_idx + len];
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_new(
black_box(&mut shape),
black_box(4),
black_box(test_case),
);
black_box(shape);
start_idx += len;
}
},
iterations,
);
let speedup_4d = old_time_4d.as_nanos() as f64 / new_time_4d.as_nanos() as f64;
println!("Speedup: {:.2}x\n", speedup_4d);
// Test case 5: 5D arrays
let mut test_cases_5d = Vec::new();
let mut test_case_lengths_5d = Vec::new();
for _ in 0..num_test_cases {
let test_case = generate_random_rep_levels(&mut rng, max_array_size, 5);
test_case_lengths_5d.push(test_case.len());
test_cases_5d.extend(test_case);
}
println!(
"Benchmark 5: 5D arrays ({} test cases, up to {} elements each, {} iterations)",
num_test_cases, max_array_size, iterations
);
let old_time_5d = benchmark_function(
"Old implementation",
|| {
let mut start_idx = 0;
for &len in &test_case_lengths_5d {
let test_case = &test_cases_5d[start_idx..start_idx + len];
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_old(
black_box(&mut shape),
black_box(5),
black_box(test_case),
);
black_box(shape);
start_idx += len;
}
},
iterations,
);
let new_time_5d = benchmark_function(
"New implementation",
|| {
let mut start_idx = 0;
for &len in &test_case_lengths_5d {
let test_case = &test_cases_5d[start_idx..start_idx + len];
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_new(
black_box(&mut shape),
black_box(5),
black_box(test_case),
);
black_box(shape);
start_idx += len;
}
},
iterations,
);
let speedup_5d = old_time_5d.as_nanos() as f64 / new_time_5d.as_nanos() as f64;
println!("Speedup: {:.2}x\n", speedup_5d);
println!("Summary");
println!("-------");
println!("1D arrays: {:.2}x speedup", speedup_1d);
println!("2D arrays: {:.2}x speedup", speedup_2d);
println!("3D arrays: {:.2}x speedup", speedup_3d);
println!("4D arrays: {:.2}x speedup", speedup_4d);
println!("5D arrays: {:.2}x speedup", speedup_5d);
println!(
"Average: {:.2}x speedup",
(speedup_1d + speedup_2d + speedup_3d + speedup_4d + speedup_5d) / 5.0
);
// Same test case to measure same array shape scenario
println!();
println!("Array Shape Calculation Benchmark (Same Shape)");
println!("=================================================\n");
// Test case 1: 1D arrays
let test_case_1d = generate_nd_array_rep_levels(&[100], 1);
println!(
"Benchmark 1: 1D arrays ({} test cases, {} elements, {} iterations)",
num_test_cases,
test_case_1d.len(),
iterations
);
let old_time_1d = benchmark_function(
"Old implementation",
|| {
for _ in 0..num_test_cases {
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_old(
black_box(&mut shape),
black_box(1),
black_box(&test_case_1d),
);
black_box(shape);
}
},
iterations,
);
let new_time_1d = benchmark_function(
"New implementation",
|| {
for _ in 0..num_test_cases {
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_new(
black_box(&mut shape),
black_box(1),
black_box(&test_case_1d),
);
black_box(shape);
}
},
iterations,
);
let speedup_1d = old_time_1d.as_nanos() as f64 / new_time_1d.as_nanos() as f64;
println!("Speedup: {:.2}x\n", speedup_1d);
// Test case 2: 2D arrays
let test_case_2d = generate_nd_array_rep_levels(&[10, 10], 2);
println!(
"Benchmark 2: 2D arrays ({} test cases, {} elements, {} iterations)",
num_test_cases,
test_case_2d.len(),
iterations
);
let old_time_2d = benchmark_function(
"Old implementation",
|| {
for _ in 0..num_test_cases {
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_old(
black_box(&mut shape),
black_box(2),
black_box(&test_case_2d),
);
black_box(shape);
}
},
iterations,
);
let new_time_2d = benchmark_function(
"New implementation",
|| {
for _ in 0..num_test_cases {
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_new(
black_box(&mut shape),
black_box(2),
black_box(&test_case_2d),
);
black_box(shape);
}
},
iterations,
);
let speedup_2d = old_time_2d.as_nanos() as f64 / new_time_2d.as_nanos() as f64;
println!("Speedup: {:.2}x\n", speedup_2d);
// Test case 3: 3D arrays
let test_case_3d = generate_nd_array_rep_levels(&[4, 5, 5], 3);
println!(
"Benchmark 3: 3D arrays ({} test cases, {} elements, {} iterations)",
num_test_cases,
test_case_3d.len(),
iterations
);
let old_time_3d = benchmark_function(
"Old implementation",
|| {
for _ in 0..num_test_cases {
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_old(
black_box(&mut shape),
black_box(3),
black_box(&test_case_3d),
);
black_box(shape);
}
},
iterations,
);
let new_time_3d = benchmark_function(
"New implementation",
|| {
for _ in 0..num_test_cases {
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_new(
black_box(&mut shape),
black_box(3),
black_box(&test_case_3d),
);
black_box(shape);
}
},
iterations,
);
let speedup_3d = old_time_3d.as_nanos() as f64 / new_time_3d.as_nanos() as f64;
println!("Speedup: {:.2}x\n", speedup_3d);
// Test case 4: 4D arrays
let test_case_4d = generate_nd_array_rep_levels(&[3, 3, 3, 4], 4);
println!(
"Benchmark 4: 4D arrays ({} test cases, {} elements, {} iterations)",
num_test_cases,
test_case_4d.len(),
iterations
);
let old_time_4d = benchmark_function(
"Old implementation",
|| {
for _ in 0..num_test_cases {
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_old(
black_box(&mut shape),
black_box(4),
black_box(&test_case_4d),
);
black_box(shape);
}
},
iterations,
);
let new_time_4d = benchmark_function(
"New implementation",
|| {
for _ in 0..num_test_cases {
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_new(
black_box(&mut shape),
black_box(4),
black_box(&test_case_4d),
);
black_box(shape);
}
},
iterations,
);
let speedup_4d = old_time_4d.as_nanos() as f64 / new_time_4d.as_nanos() as f64;
println!("Speedup: {:.2}x\n", speedup_4d);
// Test case 5: 5D arrays
let test_case_5d = generate_nd_array_rep_levels(&[2, 2, 3, 3, 4], 5);
println!(
"Benchmark 5: 5D arrays ({} test cases, {} elements, {} iterations)",
num_test_cases,
test_case_5d.len(),
iterations
);
let old_time_5d = benchmark_function(
"Old implementation",
|| {
for _ in 0..num_test_cases {
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_old(
black_box(&mut shape),
black_box(5),
black_box(&test_case_5d),
);
black_box(shape);
}
},
iterations,
);
let new_time_5d = benchmark_function(
"New implementation",
|| {
for _ in 0..num_test_cases {
let mut shape = [0u32; ARRAY_NDIMS_LIMIT];
calculate_array_shape_new(
black_box(&mut shape),
black_box(5),
black_box(&test_case_5d),
);
black_box(shape);
}
},
iterations,
);
let speedup_5d = old_time_5d.as_nanos() as f64 / new_time_5d.as_nanos() as f64;
println!("Speedup: {:.2}x\n", speedup_5d);
println!("Summary");
println!("-------");
println!("1D arrays: {:.2}x speedup", speedup_1d);
println!("2D arrays: {:.2}x speedup", speedup_2d);
println!("3D arrays: {:.2}x speedup", speedup_3d);
println!("4D arrays: {:.2}x speedup", speedup_4d);
println!("5D arrays: {:.2}x speedup", speedup_5d);
println!(
"Average: {:.2}x speedup",
(speedup_1d + speedup_2d + speedup_3d + speedup_4d + speedup_5d) / 5.0
);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment