Last active
January 5, 2026 20:39
-
-
Save alekswn/24fb2d1892bb0914b34ca9bcf66145f6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <stdatomic.h> | |
| #include <pthread.h> | |
| #include <time.h> | |
| #include <stdint.h> | |
| #include <unistd.h> | |
| #include <string.h> | |
| #include <sys/time.h> | |
| #ifdef __x86_64__ | |
| #include <x86intrin.h> | |
| #define GET_CYCLES() __rdtsc() | |
| #elif defined(__aarch64__) | |
| static inline uint64_t GET_CYCLES(void) { | |
| uint64_t cycles; | |
| __asm__ volatile("mrs %0, cntvct_el0" : "=r" (cycles)); | |
| return cycles; | |
| } | |
| #else | |
| #define GET_CYCLES() 0 // Fallback for unsupported architectures | |
| #endif | |
| typedef struct { | |
| size_t value; | |
| pthread_mutex_t mutex; | |
| } mutex_counter_t; | |
| typedef struct { | |
| union { | |
| mutex_counter_t *mutex_counters; | |
| atomic_size_t *atomic_counters; | |
| size_t *plain_counters; | |
| }; // 8 bytes | |
| uint32_t *cycles; // 4 bytes | |
| uint32_t num_iterations; // 4 bytes | |
| uint16_t num_counters; // 2 bytes | |
| uint16_t thread_id; // 2 bytes | |
| memory_order mem_order; //1 byte? | |
| } __attribute__((aligned(64))) thread_data_t; | |
| static inline | |
| void increment_cas_counter(atomic_size_t *counter, memory_order mem_order) { | |
| size_t expected, required; | |
| do { | |
| expected = atomic_load_explicit(counter, mem_order); | |
| required = expected + 1; | |
| } while (!atomic_compare_exchange_weak_explicit(counter, | |
| &expected, required, | |
| mem_order, mem_order)); | |
| } | |
| #define DECLARE_COUNTER_THREAD(name, impl) \ | |
| void* name##_counter_thread(void* arg) { \ | |
| thread_data_t *data = (thread_data_t*)arg; \ | |
| for (size_t i = 0, j = 0; i < data->num_iterations; i++, j = (j+1) % data->num_counters) { \ | |
| uint64_t start_cycles = GET_CYCLES(); \ | |
| do { \ | |
| impl \ | |
| } while(0); \ | |
| uint64_t end_cycles = GET_CYCLES(); \ | |
| data->cycles[i] = end_cycles - start_cycles; \ | |
| } \ | |
| return NULL; \ | |
| } | |
| DECLARE_COUNTER_THREAD(mutex, | |
| pthread_mutex_lock(&data->mutex_counters[j].mutex); | |
| data->mutex_counters[j].value++; | |
| pthread_mutex_unlock(&data->mutex_counters[j].mutex); | |
| ) | |
| DECLARE_COUNTER_THREAD(atomic_fetch_add, | |
| atomic_fetch_add_explicit(&data->atomic_counters[j], 1, data->mem_order); | |
| ) | |
| DECLARE_COUNTER_THREAD(atomic_cas, | |
| increment_cas_counter(&data->atomic_counters[j], data->mem_order); | |
| ) | |
| DECLARE_COUNTER_THREAD(plain, | |
| data->plain_counters[(size_t)data->thread_id*(size_t)data->num_counters + j]++; | |
| ) | |
| DECLARE_COUNTER_THREAD(noop, | |
| (void)0; | |
| ) | |
| double get_time_diff(struct timeval *start, struct timeval *end) { | |
| return (end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec) / 1000000.0; | |
| } | |
| int compare_uint32(const void *a, const void *b) { | |
| uint32_t ua = *(const uint32_t*)a; | |
| uint32_t ub = *(const uint32_t*)b; | |
| return (ua > ub) - (ua < ub); | |
| } | |
| void calculate_percentiles(uint32_t array[], size_t sz, | |
| size_t denominator, const size_t numerators[], | |
| size_t num_percentiles, uint32_t out_percentiles[]) { | |
| qsort(array, sz, sizeof(uint32_t), compare_uint32); | |
| for (size_t i = 0; i < num_percentiles; i++) { | |
| const size_t sz_mult_numerator = sz * numerators[i]; | |
| const size_t index = sz_mult_numerator / denominator; | |
| const size_t reminder = sz_mult_numerator % denominator; | |
| if (index == 0) { | |
| out_percentiles[i] = array[0]; | |
| continue; | |
| } | |
| out_percentiles[i] = array[index - 1]; | |
| if (reminder && index) { | |
| out_percentiles[i] += array[index+1]; | |
| out_percentiles[i] /= 2; | |
| } | |
| } | |
| } | |
| #define DECLARE_RUN_BENCHMARK(name, counter_decl, counter_name, thread_name, init_impl, cleanup_impl, m_order) \ | |
| double run_##name##_counter_benchmark(uint16_t num_threads, uint16_t num_counters, \ | |
| uint32_t total_iterations, uint32_t cycles[]) { \ | |
| pthread_t threads[num_threads]; \ | |
| thread_data_t thread_data[num_threads]; \ | |
| counter_decl; \ | |
| struct timeval start, end; \ | |
| for (size_t i = 0; i < num_counters; i++) { \ | |
| init_impl \ | |
| } \ | |
| const uint32_t increments_per_iteration = (uint32_t)num_threads; \ | |
| const uint32_t base_iterations = total_iterations / increments_per_iteration; \ | |
| const uint32_t remainder = total_iterations % increments_per_iteration; \ | |
| for (size_t t = 0, i = 0; t < num_threads; t++) { \ | |
| const uint32_t num_iterations = base_iterations + (t < remainder ? 1 : 0); \ | |
| thread_data[t].counter_name = counter_name; \ | |
| thread_data[t].cycles = &cycles[i]; \ | |
| thread_data[t].num_counters = num_counters; \ | |
| thread_data[t].num_iterations = num_iterations; \ | |
| thread_data[t].thread_id = t; \ | |
| thread_data[t].mem_order = m_order; \ | |
| i+=num_iterations; \ | |
| } \ | |
| gettimeofday(&start, NULL); \ | |
| for (size_t t = 0; t < num_threads; t++) { \ | |
| pthread_create(&threads[t], NULL, thread_name##_counter_thread, &thread_data[t]); \ | |
| } \ | |
| for (int t = 0; t < num_threads; t++) { \ | |
| pthread_join(threads[t], NULL); \ | |
| } \ | |
| gettimeofday(&end, NULL); \ | |
| uint32_t total_increments = 0; \ | |
| for (int i = 0; i < num_counters; i++) { \ | |
| cleanup_impl \ | |
| } \ | |
| if (total_increments != total_iterations) abort(); \ | |
| return get_time_diff(&start, &end); \ | |
| } | |
| DECLARE_RUN_BENCHMARK(mutex, mutex_counter_t mutex_counters[num_counters], mutex_counters, mutex, { | |
| if (pthread_mutex_init(&mutex_counters[i].mutex, NULL)) | |
| abort(); | |
| mutex_counters[i].value = 0; | |
| },{ | |
| total_increments += mutex_counters[i].value; | |
| pthread_mutex_destroy(&mutex_counters[i].mutex); | |
| }, 0) | |
| DECLARE_RUN_BENCHMARK(cas_relaxed, atomic_size_t atomic_counters[num_counters], atomic_counters, atomic_cas, { | |
| atomic_init(&atomic_counters[i], 0); | |
| },{ | |
| total_increments += atomic_load(&atomic_counters[i]); | |
| }, memory_order_relaxed) | |
| DECLARE_RUN_BENCHMARK(fetch_add_seq_cst, atomic_size_t atomic_counters[num_counters], atomic_counters, atomic_fetch_add, { | |
| atomic_init(&atomic_counters[i], 0); | |
| },{ | |
| total_increments += atomic_load(&atomic_counters[i]); | |
| }, memory_order_seq_cst) | |
| DECLARE_RUN_BENCHMARK(fetch_add_acq_rel, atomic_size_t atomic_counters[num_counters], atomic_counters, atomic_fetch_add, { | |
| atomic_init(&atomic_counters[i], 0); | |
| },{ | |
| total_increments += atomic_load(&atomic_counters[i]); | |
| }, memory_order_acq_rel) | |
| DECLARE_RUN_BENCHMARK(fetch_add_relaxed, atomic_size_t atomic_counters[num_counters], atomic_counters, atomic_fetch_add, { | |
| atomic_init(&atomic_counters[i], 0); | |
| },{ | |
| total_increments += atomic_load(&atomic_counters[i]); | |
| }, memory_order_relaxed) | |
| DECLARE_RUN_BENCHMARK(plain, size_t plain_counters[(size_t)num_threads*(size_t)num_counters], plain_counters, plain, { | |
| (void)0; | |
| },{ | |
| total_increments = total_iterations; | |
| }, 0) | |
| DECLARE_RUN_BENCHMARK(noop, size_t *plain_counters = NULL, plain_counters, noop, { | |
| (void)0; | |
| },{ | |
| total_increments = total_iterations; | |
| }, 0) | |
| int main(int argc, char *argv[]) { | |
| // Test configurations | |
| const uint16_t thread_counts[] = {1, 10, 100, 1000}; | |
| const uint16_t counter_counts[] = {1, 2, 8, 32, 128}; | |
| double (*benchmarks[])(uint16_t, uint16_t, uint32_t, uint32_t[]) = { | |
| &run_mutex_counter_benchmark, | |
| &run_cas_relaxed_counter_benchmark, | |
| &run_fetch_add_seq_cst_counter_benchmark, | |
| &run_fetch_add_acq_rel_counter_benchmark, | |
| &run_fetch_add_relaxed_counter_benchmark, | |
| &run_plain_counter_benchmark, | |
| &run_noop_counter_benchmark, | |
| }; | |
| const char* benchmark_names[] = { "Mutex", "CAS-Relaxed", "Atomic-SeqCst", "Atomic-AcqRel", "Atomic-Relaxed", "Plain", "NO-OP" }; | |
| const size_t num_thread_configs = sizeof(thread_counts) / sizeof(thread_counts[0]); | |
| const size_t num_counter_configs = sizeof(counter_counts) / sizeof(counter_counts[0]); | |
| const size_t num_benchmarks = sizeof(benchmarks) / sizeof(benchmarks[0]); | |
| const size_t num_benchmark_names = sizeof(benchmark_names) / sizeof(benchmark_names[0]); | |
| if (num_benchmarks != num_benchmark_names) abort(); | |
| const uint32_t total_iterations = thread_counts[num_thread_configs - 1] * counter_counts[num_counter_configs - 1]; | |
| const size_t percentile_denominator = 100000; | |
| const size_t percentile_numerators[] = {50000, 90000, 99000, 99990, 99999}; | |
| const size_t num_percentiles = sizeof(percentile_numerators) / sizeof(percentile_numerators[0]); | |
| uint32_t percentiles[num_percentiles]; | |
| uint32_t cycles[total_iterations]; | |
| printf("Total iterations: %u\n", total_iterations); | |
| printf("%-20s %4s %4s %8s %8s %12s %8s %8s %8s %8s %8s\n", | |
| "Type", "Thrd", "Cntr", "Total", "Time(s)", "Ops/sec", "P50", "P90", "P99", "P99.99", "P99.999"); | |
| printf("===================================================================================================================\n"); | |
| // Run benchmarks | |
| for (size_t i = 0; i < num_thread_configs; i++) { | |
| for (size_t j = 0; j < num_counter_configs; j++) { | |
| for (size_t k = 0; k < num_benchmarks; k++) { | |
| const double elapsed = benchmarks[k](thread_counts[i], counter_counts[j], total_iterations, cycles); | |
| const double ops_per_sec = total_iterations / elapsed; | |
| calculate_percentiles(cycles, total_iterations, | |
| percentile_denominator, percentile_numerators, | |
| num_percentiles, percentiles); | |
| printf("%-20s %4u %4u %8u %8.3f %12.0f %8u %8u %8u %8u %8u\n", | |
| benchmark_names[k], | |
| thread_counts[i], counter_counts[j], total_iterations, elapsed, ops_per_sec, | |
| percentiles[0], percentiles[1], percentiles[2], percentiles[3], percentiles[4]); | |
| } | |
| printf("\n"); | |
| } | |
| } | |
| return 0; | |
| } |
Author
Author
c8g.48xlarge:
Total iterations: 128000
Type Thrd Cntr Total Time(s) Ops/sec P50 P90 P99 P99.99 P99.999
===================================================================================================================
Mutex 1 1 128000 0.004 35774176 12 13 14 66 11349
CAS-Relaxed 1 1 128000 0.003 39131764 12 13 13 15 1744
Atomic-SeqCst 1 1 128000 0.003 38893953 12 13 14 15 3654
Atomic-AcqRel 1 1 128000 0.003 39167687 13 13 13 15 1262
Atomic-Relaxed 1 1 128000 0.003 38941284 12 13 14 15 4936
Plain 1 1 128000 0.003 37903465 12 13 13 15 8888
NO-OP 1 1 128000 0.003 39396737 13 13 14 15 3587
Mutex 1 2 128000 0.003 38140644 13 13 22 73 7636
CAS-Relaxed 1 2 128000 0.003 38461538 12 13 13 15 20500
Atomic-SeqCst 1 2 128000 0.003 39036292 13 13 13 15 1916
Atomic-AcqRel 1 2 128000 0.003 39167687 13 13 14 14 2524
Atomic-Relaxed 1 2 128000 0.003 39348294 12 13 13 14 1416
Plain 1 2 128000 0.003 39167687 12 13 14 14 5071
NO-OP 1 2 128000 0.003 39143731 13 13 14 14 19
Mutex 1 8 128000 0.003 38729198 13 13 19 68 4557
CAS-Relaxed 1 8 128000 0.003 39251763 12 13 13 15 111
Atomic-SeqCst 1 8 128000 0.003 39275851 13 13 14 14 6110
Atomic-AcqRel 1 8 128000 0.003 39215686 12 13 14 14 1309
Atomic-Relaxed 1 8 128000 0.003 38220364 12 13 14 150 5851
Plain 1 8 128000 0.003 39072039 13 13 14 15 1714
NO-OP 1 8 128000 0.003 39179676 12 13 14 14 1388
Mutex 1 32 128000 0.003 38764385 13 13 19 70 4695
CAS-Relaxed 1 32 128000 0.003 39083969 13 13 14 15 2139
Atomic-SeqCst 1 32 128000 0.003 39360394 13 13 13 15 2364
Atomic-AcqRel 1 32 128000 0.003 38988730 12 13 14 15 4646
Atomic-Relaxed 1 32 128000 0.003 38976857 12 13 13 15 2099
Plain 1 32 128000 0.003 39215686 12 13 14 15 2615
NO-OP 1 32 128000 0.003 39203675 12 13 14 14 3684
Mutex 1 128 128000 0.003 38670695 13 13 20 74 4473
CAS-Relaxed 1 128 128000 0.003 39227705 13 13 14 16 3984
Atomic-SeqCst 1 128 128000 0.003 38208955 12 13 14 128 2842
Atomic-AcqRel 1 128 128000 0.003 38964992 12 13 14 18 2830
Atomic-Relaxed 1 128 128000 0.003 39179676 12 13 14 18 52
Plain 1 128 128000 0.003 39312039 12 13 13 14 4270
NO-OP 1 128 128000 0.003 39119804 12 13 13 14 2462
Mutex 10 1 128000 0.014 9369739 13 558 19657 57959 76119
CAS-Relaxed 10 1 128000 0.007 17169685 13 1599 4330 18163 25744
Atomic-SeqCst 10 1 128000 0.002 58688675 13 13 457 4674 8861
Atomic-AcqRel 10 1 128000 0.002 57476426 13 13 674 7385 11488
Atomic-Relaxed 10 1 128000 0.002 53985660 13 13 468 6105 11873
Plain 10 1 128000 0.001 195121951 12 13 14 936 1153
NO-OP 10 1 128000 0.001 200000000 12 13 13 14 18
Mutex 10 2 128000 0.019 6909209 145 4679 15776 39060 54510
CAS-Relaxed 10 2 128000 0.011 11838698 347 1835 5837 8943 23807
Atomic-SeqCst 10 2 128000 0.002 57683641 13 13 598 6348 11033
Atomic-AcqRel 10 2 128000 0.002 58823529 13 13 538 7667 10579
Atomic-Relaxed 10 2 128000 0.002 52053680 13 13 605 5967 11390
Plain 10 2 128000 0.001 189910979 12 13 14 302 3452
NO-OP 10 2 128000 0.001 208809135 13 13 13 14 18
Mutex 10 8 128000 0.014 8917375 225 2079 15883 47484 60490
CAS-Relaxed 10 8 128000 0.009 14575268 427 1208 4128 16560 21924
Atomic-SeqCst 10 8 128000 0.002 52523595 13 13 744 8077 17443
Atomic-AcqRel 10 8 128000 0.002 53917439 13 13 684 7067 66143
Atomic-Relaxed 10 8 128000 0.002 53556485 13 13 701 4627 8570
Plain 10 8 128000 0.001 204146730 13 13 14 250 417
NO-OP 10 8 128000 0.001 204800000 12 13 13 14 18
Mutex 10 32 128000 0.004 30188679 216 684 1930 17656 31143
CAS-Relaxed 10 32 128000 0.004 32686415 13 701 2591 14372 22113
Atomic-SeqCst 10 32 128000 0.001 105960265 13 13 207 1251 14356
Atomic-AcqRel 10 32 128000 0.001 106400665 13 13 211 5946 11812
Atomic-Relaxed 10 32 128000 0.001 105610561 13 13 213 1260 6752
Plain 10 32 128000 0.001 211221122 12 13 14 184 7914
NO-OP 10 32 128000 0.001 197836167 12 13 14 14 2629
Mutex 10 128 128000 0.004 36519258 216 493 1375 12188 18459
CAS-Relaxed 10 128 128000 0.001 91168091 13 145 1075 7699 12997
Atomic-SeqCst 10 128 128000 0.001 135593220 13 13 153 450 993
Atomic-AcqRel 10 128 128000 0.001 129685917 13 13 155 439 6071
Atomic-Relaxed 10 128 128000 0.001 136898396 13 13 144 920 6887
Plain 10 128 128000 0.001 202531646 13 13 13 21 6830
NO-OP 10 128 128000 0.001 205457464 12 13 13 16 6146
Mutex 100 1 128000 0.023 5538728 13 56119 227998 606686 879215
CAS-Relaxed 100 1 128000 0.007 17657608 343 3373 32624 357968 615749
Atomic-SeqCst 100 1 128000 0.004 35874439 12 13 43 336 7824
Atomic-AcqRel 100 1 128000 0.003 36676218 12 13 27 251 5697
Atomic-Relaxed 100 1 128000 0.003 37198489 12 13 18 274 7595
Plain 100 1 128000 0.003 37047757 12 13 14 20 5392
NO-OP 100 1 128000 0.004 33988317 12 13 13 14 24154
Mutex 100 2 128000 0.020 6403522 13 45399 205357 594489 922517
CAS-Relaxed 100 2 128000 0.011 12030075 654 3804 26359 87472 208899
Atomic-SeqCst 100 2 128000 0.004 34051609 13 13 220 884 10533
Atomic-AcqRel 100 2 128000 0.004 33755274 13 13 489 8156 49609
Atomic-Relaxed 100 2 128000 0.004 34905918 13 13 561 7273 11293
Plain 100 2 128000 0.003 38038633 12 13 13 57 158
NO-OP 100 2 128000 0.003 37780401 12 13 14 14 8449
Mutex 100 8 128000 0.013 9484292 236 7768 163867 491059 733041
CAS-Relaxed 100 8 128000 0.010 13075902 754 4370 29982 109670 163075
Atomic-SeqCst 100 8 128000 0.004 34390113 13 13 673 10238 16761
Atomic-AcqRel 100 8 128000 0.004 34261242 13 13 567 9246 17051
Atomic-Relaxed 100 8 128000 0.004 35106967 13 13 471 7418 17895
Plain 100 8 128000 0.003 36728838 12 13 14 20 5175
NO-OP 100 8 128000 0.003 38243203 12 13 13 14 18
Mutex 100 32 128000 0.005 25356577 233 1098 25414 132821 302516
CAS-Relaxed 100 32 128000 0.005 25271471 13 1285 8982 46973 170471
Atomic-SeqCst 100 32 128000 0.003 36940837 13 13 37 759 9590
Atomic-AcqRel 100 32 128000 0.003 37746977 12 13 17 507 32408
Atomic-Relaxed 100 32 128000 0.004 36373970 13 13 19 384 4012
Plain 100 32 128000 0.004 36025894 12 13 14 21 10259
NO-OP 100 32 128000 0.004 36312057 12 13 13 14 5719
Mutex 100 128 128000 0.004 32512065 217 469 1497 77769 123414
CAS-Relaxed 100 128 128000 0.003 39179676 13 13 324 1920 8616
Atomic-SeqCst 100 128 128000 0.003 37026323 13 13 17 340 61452
Atomic-AcqRel 100 128 128000 0.003 40137974 13 13 40 321 2463
Atomic-Relaxed 100 128 128000 0.003 37791556 13 13 28 356 6091
Plain 100 128 128000 0.004 34417854 12 13 14 3098 7576
NO-OP 100 128 128000 0.003 37813885 12 13 13 15 4679
Mutex 1000 1 128000 0.029 4453723 13 13 64 624 8943
CAS-Relaxed 1000 1 128000 0.029 4360713 12 13 14 264 3619
Atomic-SeqCst 1000 1 128000 0.029 4363983 12 13 17 226 3849
Atomic-AcqRel 1000 1 128000 0.029 4489180 12 13 17 219 288
Atomic-Relaxed 1000 1 128000 0.029 4370838 12 13 17 202 3684
Plain 1000 1 128000 0.030 4265245 12 13 14 19 21586
NO-OP 1000 1 128000 0.029 4453568 12 13 13 14 2946
Mutex 1000 2 128000 0.029 4419127 13 13 106 615 18761
CAS-Relaxed 1000 2 128000 0.029 4468650 12 13 14 254 4211
Atomic-SeqCst 1000 2 128000 0.029 4427687 12 13 14 221 4692
Atomic-AcqRel 1000 2 128000 0.029 4397265 12 13 14 194 3643
Atomic-Relaxed 1000 2 128000 0.029 4373975 12 13 14 236 13434
Plain 1000 2 128000 0.031 4192872 12 13 13 20 5834
NO-OP 1000 2 128000 0.029 4364728 12 13 13 16 10254
Mutex 1000 8 128000 0.030 4306429 13 13 126 676 13810
CAS-Relaxed 1000 8 128000 0.028 4498647 12 13 14 224 569
Atomic-SeqCst 1000 8 128000 0.030 4304547 12 13 14 243 4225
Atomic-AcqRel 1000 8 128000 0.029 4363388 12 13 14 186 251
Atomic-Relaxed 1000 8 128000 0.028 4515469 12 13 14 235 403
Plain 1000 8 128000 0.029 4478030 12 13 14 20 11600
NO-OP 1000 8 128000 0.029 4437511 12 13 14 14 5461
Mutex 1000 32 128000 0.030 4338248 13 57 226 747 7541
CAS-Relaxed 1000 32 128000 0.030 4230426 12 13 15 386 9838
Atomic-SeqCst 1000 32 128000 0.029 4383562 12 13 14 237 3048
Atomic-AcqRel 1000 32 128000 0.030 4278504 12 13 14 260 4622
Atomic-Relaxed 1000 32 128000 0.029 4398474 12 13 14 222 3471
Plain 1000 32 128000 0.030 4255178 12 13 14 3052 12191
NO-OP 1000 32 128000 0.029 4397114 12 13 14 14 3093
Mutex 1000 128 128000 0.031 4190950 56 159 256 6439 12395
CAS-Relaxed 1000 128 128000 0.029 4444907 12 13 14 297 2948
Atomic-SeqCst 1000 128 128000 0.030 4296167 12 13 14 242 4543
Atomic-AcqRel 1000 128 128000 0.029 4349008 12 13 14 227 4033
Atomic-Relaxed 1000 128 128000 0.030 4260418 12 13 14 263 4064
Plain 1000 128 128000 0.030 4333401 12 13 14 3152 11148
NO-OP 1000 128 128000 0.029 4339424 12 13 14 16 6698
ubuntu@ip-172-31-37-184:~/24fb2d1892bb0914b34ca9bcf66145f6$ head /proc/cpuinfo
processor : 0
BogoMIPS : 2000.00
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 flagm2 frint svei8mm svebf16 i8mm bf16 dgh rng bti
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
CPU part : 0xd4f
CPU revision : 1
processor : 1
Author
c7i.48xlarge
Total iterations: 128000
Type Thrd Cntr Total Time(s) Ops/sec P50 P90 P99 P99.99 P99.999
===================================================================================================================
Mutex 1 1 128000 0.007 19190405 76 88 94 929 27093
CAS-Relaxed 1 1 128000 0.005 25884732 58 60 70 490 28663
Atomic-SeqCst 1 1 128000 0.005 25723473 54 56 64 866 23339
Atomic-AcqRel 1 1 128000 0.005 26688907 54 62 66 387 23506
Atomic-Relaxed 1 1 128000 0.005 26370004 54 56 66 245 18338
Plain 1 1 128000 0.004 33298647 32 38 40 175 20615
NO-OP 1 1 128000 0.003 36613272 30 30 36 114 12999
Mutex 1 2 128000 0.006 21052632 76 78 92 259 20308
CAS-Relaxed 1 2 128000 0.005 24502297 58 66 70 410 36700
Atomic-SeqCst 1 2 128000 0.005 26186579 54 56 66 470 42319
Atomic-AcqRel 1 2 128000 0.005 25281454 54 62 66 244 4619
Atomic-Relaxed 1 2 128000 0.005 26451746 54 56 66 243 10861
Plain 1 2 128000 0.004 33083484 34 38 40 192 33256
NO-OP 1 2 128000 0.004 36127576 30 34 36 192 16970
Mutex 1 8 128000 0.006 20529270 76 90 94 772 41085
CAS-Relaxed 1 8 128000 0.005 24839899 58 66 70 510 35648
Atomic-SeqCst 1 8 128000 0.005 26042726 54 56 66 251 25173
Atomic-AcqRel 1 8 128000 0.005 26337449 54 56 66 229 30345
Atomic-Relaxed 1 8 128000 0.005 26528497 54 56 66 248 16663
Plain 1 8 128000 0.004 33281331 34 38 40 192 16094
NO-OP 1 8 128000 0.004 34867883 30 30 32 91 27887
Mutex 1 32 128000 0.006 21056095 76 78 92 275 45185
CAS-Relaxed 1 32 128000 0.005 24568138 58 68 72 328 31830
Atomic-SeqCst 1 32 128000 0.005 26117119 54 62 66 248 45827
Atomic-AcqRel 1 32 128000 0.005 25661588 54 58 66 257 41060
Atomic-Relaxed 1 32 128000 0.005 25432148 54 56 66 251 39065
Plain 1 32 128000 0.004 34858388 32 34 40 157 29890
NO-OP 1 32 128000 0.004 35884497 30 34 36 192 27079
Mutex 1 128 128000 0.006 20618557 76 78 92 954 32904
CAS-Relaxed 1 128 128000 0.005 24413504 58 66 70 281 118160
Atomic-SeqCst 1 128 128000 0.005 25702811 54 62 66 438 24564
Atomic-AcqRel 1 128 128000 0.005 26229508 54 62 66 278 30978
Atomic-Relaxed 1 128 128000 0.005 26294166 54 58 66 249 29847
Plain 1 128 128000 0.004 32703117 32 38 40 192 16511
NO-OP 1 128 128000 0.004 36281179 30 30 36 112 12147
Mutex 10 1 128000 0.018 6947083 520 8466 46018 128787 161812
CAS-Relaxed 10 1 128000 0.010 12342108 1236 3490 7216 46709 86757
Atomic-SeqCst 10 1 128000 0.009 13500686 1122 3070 6456 38333 73604
Atomic-AcqRel 10 1 128000 0.010 13418597 1106 3042 6556 33399 63412
Atomic-Relaxed 10 1 128000 0.010 13223140 1186 3158 6458 32148 73457
Plain 10 1 128000 0.002 63586687 32 34 42 4275 56476
NO-OP 10 1 128000 0.001 134878820 30 34 36 648 19016
Mutex 10 2 128000 0.029 4373526 90 15420 77274 214273 463800
CAS-Relaxed 10 2 128000 0.011 11958146 1222 3672 7888 36440 57286
Atomic-SeqCst 10 2 128000 0.010 12327844 1238 3432 7308 57211 93246
Atomic-AcqRel 10 2 128000 0.010 12602146 1218 3358 7258 32903 69196
Atomic-Relaxed 10 2 128000 0.010 12643224 1220 3386 7260 40118 92638
Plain 10 2 128000 0.003 42077581 32 34 40 30274 89488
NO-OP 10 2 128000 0.001 146620848 30 34 36 716 39562
Mutex 10 8 128000 0.017 7702955 306 6262 47734 129176 321762
CAS-Relaxed 10 8 128000 0.008 15936255 904 2956 6496 37724 70223
Atomic-SeqCst 10 8 128000 0.008 16978379 690 2518 6896 35699 57451
Atomic-AcqRel 10 8 128000 0.007 17114588 774 2540 6178 32751 115373
Atomic-Relaxed 10 8 128000 0.008 16946909 796 2616 6068 53538 80883
Plain 10 8 128000 0.001 94885100 32 34 40 1191 40594
NO-OP 10 8 128000 0.001 155151515 30 36 36 737 4162
Mutex 10 32 128000 0.008 15892724 270 2336 23324 74672 260960
CAS-Relaxed 10 32 128000 0.004 31651830 318 1400 4168 37482 47125
Atomic-SeqCst 10 32 128000 0.004 36168409 316 1090 3168 37237 74021
Atomic-AcqRel 10 32 128000 0.004 34848897 308 1016 2992 32915 51717
Atomic-Relaxed 10 32 128000 0.004 35995501 320 1036 3152 31359 58442
Plain 10 32 128000 0.001 143982002 32 34 40 614 28550
NO-OP 10 32 128000 0.001 161209068 30 30 36 772 23920
Mutex 10 128 128000 0.005 27485506 110 1118 13900 61637 99586
CAS-Relaxed 10 128 128000 0.002 55291577 220 692 1832 31424 45492
Atomic-SeqCst 10 128 128000 0.002 61805891 224 610 1238 31930 67079
Atomic-AcqRel 10 128 128000 0.002 63713290 226 586 1176 28096 63936
Atomic-Relaxed 10 128 128000 0.002 60663507 230 616 1296 14518 38024
Plain 10 128 128000 0.001 131416838 32 34 40 859 32167
NO-OP 10 128 128000 0.001 166233766 30 34 36 817 35611
Mutex 100 1 128000 0.019 6780738 90 650 807386 4834457 6794460
CAS-Relaxed 100 1 128000 0.011 12109745 5298 21276 51508 155459 289425
Atomic-SeqCst 100 1 128000 0.010 12255841 5066 20448 50732 209100 327963
Atomic-AcqRel 100 1 128000 0.010 12829508 3866 18924 48924 136071 298818
Atomic-Relaxed 100 1 128000 0.011 11759302 4724 20304 52618 159233 261687
Plain 100 1 128000 0.005 26970080 36 38 42 8198 53671
NO-OP 100 1 128000 0.004 29110757 32 34 36 827 20504
Mutex 100 2 128000 0.016 7782101 96 1334 675568 4897769 7736744
CAS-Relaxed 100 2 128000 0.011 12115476 5312 21210 52660 207331 447459
Atomic-SeqCst 100 2 128000 0.010 12479282 3992 18534 49228 117267 156784
Atomic-AcqRel 100 2 128000 0.010 12462272 4760 19710 50536 137459 234919
Atomic-Relaxed 100 2 128000 0.011 12091442 5092 21262 54110 145610 183528
Plain 100 2 128000 0.006 23017443 36 38 40 7307 93053
NO-OP 100 2 128000 0.005 27777778 32 34 36 890 21656
Mutex 100 8 128000 0.014 8889506 192 1414 440190 5450065 7055383
CAS-Relaxed 100 8 128000 0.010 12209081 5036 21056 57280 230848 311123
Atomic-SeqCst 100 8 128000 0.010 12679544 4420 19946 48820 221404 527319
Atomic-AcqRel 100 8 128000 0.010 12685828 5180 20428 48706 191758 258429
Atomic-Relaxed 100 8 128000 0.011 12174244 3386 18630 50778 217544 504424
Plain 100 8 128000 0.004 28783450 36 38 40 957 196347
NO-OP 100 8 128000 0.005 26310380 30 32 34 842 21127
Mutex 100 32 128000 0.007 19101627 366 3932 88480 963953 1295006
CAS-Relaxed 100 32 128000 0.005 27765727 396 1576 4814 48727 67100
Atomic-SeqCst 100 32 128000 0.005 25979298 266 752 1798 33856 73468
Atomic-AcqRel 100 32 128000 0.005 27503223 258 788 2158 30726 56968
Atomic-Relaxed 100 32 128000 0.006 22800143 254 750 1914 29025 72471
Plain 100 32 128000 0.004 28913485 34 36 40 985 28447
NO-OP 100 32 128000 0.005 27084215 30 32 34 811 27828
Mutex 100 128 128000 0.006 20075282 208 904 13166 83822 688412
CAS-Relaxed 100 128 128000 0.006 21680217 64 558 1248 46932 290142
Atomic-SeqCst 100 128 128000 0.004 30865686 56 462 772 5060 56685
Atomic-AcqRel 100 128 128000 0.005 24591739 56 430 740 23280 42370
Atomic-Relaxed 100 128 128000 0.004 29767442 58 474 774 4047 49157
Plain 100 128 128000 0.004 31022782 34 38 40 858 28269
NO-OP 100 128 128000 0.005 27009918 30 34 36 730 188831
Mutex 1000 1 128000 0.041 3088952 80 92 1700 74179 100791
CAS-Relaxed 1000 1 128000 0.043 2985562 62 260 828 4750 30915
Atomic-SeqCst 1000 1 128000 0.040 3236901 62 76 656 2540 192160
Atomic-AcqRel 1000 1 128000 0.043 3003990 60 268 744 2833 43712
Atomic-Relaxed 1000 1 128000 0.043 3003355 62 140 744 2907 40960
Plain 1000 1 128000 0.040 3190906 36 40 102 1032 4140
NO-OP 1000 1 128000 0.042 3069103 32 34 102 944 8606
Mutex 1000 2 128000 0.044 2942326 82 136 1268 53526 76698
CAS-Relaxed 1000 2 128000 0.042 3032457 66 128 738 3537 38346
Atomic-SeqCst 1000 2 128000 0.043 2948969 62 254 752 2936 43995
Atomic-AcqRel 1000 2 128000 0.044 2941852 62 264 752 2977 36149
Atomic-Relaxed 1000 2 128000 0.043 3008155 58 234 828 4830 47923
Plain 1000 2 128000 0.042 3064400 36 40 102 1027 43904
NO-OP 1000 2 128000 0.042 3039658 34 36 102 997 132405
Mutex 1000 8 128000 0.043 2961797 82 292 1254 43203 122786
CAS-Relaxed 1000 8 128000 0.044 2925848 64 136 746 4945 153891
Atomic-SeqCst 1000 8 128000 0.043 2976675 62 96 646 1769 47251
Atomic-AcqRel 1000 8 128000 0.044 2922575 62 102 676 1764 42471
Atomic-Relaxed 1000 8 128000 0.046 2784970 60 202 712 2010 29823
Plain 1000 8 128000 0.043 2950669 38 40 102 1026 24536
NO-OP 1000 8 128000 0.040 3165026 32 36 94 1005 28606
Mutex 1000 32 128000 0.043 2979446 88 172 894 37253 49850
CAS-Relaxed 1000 32 128000 0.042 3061908 66 88 700 2139 47612
Atomic-SeqCst 1000 32 128000 0.045 2846848 60 98 672 1525 40810
Atomic-AcqRel 1000 32 128000 0.046 2792321 60 96 684 1691 29755
Atomic-Relaxed 1000 32 128000 0.043 2990025 62 88 652 1655 23391
Plain 1000 32 128000 0.043 2957486 36 40 96 1056 314588
NO-OP 1000 32 128000 0.043 2983544 32 34 102 961 45061
Mutex 1000 128 128000 0.044 2888021 88 254 916 28456 94522
CAS-Relaxed 1000 128 128000 0.044 2907373 66 84 768 1829 42714
Atomic-SeqCst 1000 128 128000 0.043 2943002 60 74 726 1657 31082
Atomic-AcqRel 1000 128 128000 0.045 2859441 60 86 734 2497 63193
Atomic-Relaxed 1000 128 128000 0.043 3011765 62 74 744 1594 34283
Plain 1000 128 128000 0.042 3022789 38 40 56 1258 43201
NO-OP 1000 128 128000 0.040 3192975 32 36 104 938 1116
ubuntu@ip-172-31-40-71:~/24fb2d1892bb0914b34ca9bcf66145f6$ head /proc/cpuinfo
processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 143
model name : Intel(R) Xeon(R) Platinum 8488C
stepping : 8
microcode : 0x2b000643
cpu MHz : 3200.948
cache size : 107520 KB
physical id : 0
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
AMD: