Last active
September 3, 2025 00:23
-
-
Save jweinst1/814939f607e8cfc2619b04da8e47e72a to your computer and use it in GitHub Desktop.
an important benchmark of pop count vs jungle precomp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <array> | |
| #include <cstdint> | |
| #include <iostream> | |
| #include <vector> | |
| #include <random> | |
| #include <chrono> | |
| constexpr size_t NUM_BYTES = 1024; | |
| constexpr int N = 10'000'000; | |
| // -------------------------- | |
| // Popcount | |
| // -------------------------- | |
| inline int popcount8(uint8_t x) { | |
| #if defined(__clang__) || defined(__GNUC__) | |
| return __builtin_popcount(x); | |
| #else | |
| int cnt = 0; | |
| while (x) { x &= (x - 1); ++cnt; } | |
| return cnt; | |
| #endif | |
| } | |
| inline int popcount64(uint64_t x) { | |
| #if defined(__clang__) || defined(__GNUC__) | |
| return __builtin_popcountll(x); | |
| #else | |
| int cnt = 0; | |
| while (x) { x &= (x - 1); ++cnt; } | |
| return cnt; | |
| #endif | |
| } | |
| // -------------------------- | |
| // Hamming distance: baseline | |
| // -------------------------- | |
| inline int hamming_distance_popcount8(const std::array<uint8_t, NUM_BYTES>& q, | |
| const std::array<uint8_t, NUM_BYTES>& c) { | |
| int total = 0; | |
| for (size_t i = 0; i < NUM_BYTES; i++) { | |
| total += popcount8(q[i] ^ c[i]); | |
| } | |
| return total; | |
| } | |
| // -------------------------- | |
| // Hamming distance: 64-bit | |
| // -------------------------- | |
| inline int hamming_distance_popcount64(const std::array<uint8_t, NUM_BYTES>& q, | |
| const std::array<uint8_t, NUM_BYTES>& c) { | |
| int total = 0; | |
| constexpr size_t num_words = NUM_BYTES / 8; | |
| const uint64_t* q64 = reinterpret_cast<const uint64_t*>(q.data()); | |
| const uint64_t* c64 = reinterpret_cast<const uint64_t*>(c.data()); | |
| for (size_t i = 0; i < num_words; i++) { | |
| total += popcount64(q64[i] ^ c64[i]); | |
| } | |
| return total; | |
| } | |
| // -------------------------- | |
| // Benchmark | |
| // -------------------------- | |
| int main() { | |
| std::mt19937 rng(12345); | |
| std::uniform_int_distribution<int> dist(0, 255); | |
| std::array<uint8_t, NUM_BYTES> query{}; | |
| for (auto &b : query) b = static_cast<uint8_t>(dist(rng)); | |
| std::vector<std::array<uint8_t, NUM_BYTES>> collection(N); | |
| for (int i = 0; i < N; i++) | |
| for (auto &b : collection[i]) b = static_cast<uint8_t>(dist(rng)); | |
| // Baseline popcount8 | |
| volatile size_t sink8 = 0; | |
| auto start = std::chrono::high_resolution_clock::now(); | |
| for (int i = 0; i < N; i++) | |
| sink8 += hamming_distance_popcount8(query, collection[i]); | |
| auto end = std::chrono::high_resolution_clock::now(); | |
| auto t8 = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count(); | |
| // 64-bit popcount | |
| volatile size_t sink64 = 0; | |
| start = std::chrono::high_resolution_clock::now(); | |
| for (int i = 0; i < N; i++) | |
| sink64 += hamming_distance_popcount64(query, collection[i]); | |
| end = std::chrono::high_resolution_clock::now(); | |
| auto t64 = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count(); | |
| std::cout << "Baseline 8-bit popcount: " << t8 << " ms, sink=" << sink8 << "\n"; | |
| std::cout << "64-bit popcount: " << t64 << " ms, sink=" << sink64 << "\n"; | |
| std::cout << "Speedup factor: " << static_cast<double>(t8) / t64 << "x\n"; | |
| /** | |
| * Baseline 8-bit popcount: 509 ms, sink=40959816215 | |
| 64-bit popcount: 267 ms, sink=40959816215 | |
| Speedup factor: 1.90637x | |
| * */ | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <array> | |
| #include <cstdint> | |
| #include <iostream> | |
| #include <random> | |
| #include <chrono> | |
| #include <vector> | |
| // -------------------------- | |
| // Constants | |
| // -------------------------- | |
| constexpr size_t NUM_BYTES = 256; | |
| constexpr int N = 10'000'000; // 10M vectors | |
| // -------------------------- | |
| // Simple popcount | |
| // -------------------------- | |
| inline int popcount8(uint8_t x) { | |
| #if defined(__clang__) || defined(__GNUC__) | |
| return __builtin_popcount(x); | |
| #else | |
| int cnt = 0; | |
| while (x) { x &= (x - 1); ++cnt; } | |
| return cnt; | |
| #endif | |
| } | |
| // -------------------------- | |
| // Super Jungle: runtime | |
| // -------------------------- | |
| using ByteRow = std::array<uint8_t, 256>; | |
| using ByteTable = std::array<ByteRow, 256>; | |
| using SuperJungle = std::array<ByteTable, NUM_BYTES>; | |
| SuperJungle superJungle; | |
| void build_super_jungle_runtime() { | |
| for (size_t b = 0; b < NUM_BYTES; ++b) | |
| for (size_t q = 0; q < 256; ++q) | |
| for (size_t c = 0; c < 256; ++c) | |
| superJungle[b][q][c] = static_cast<uint8_t>(popcount8(q ^ c)); | |
| } | |
| // -------------------------- | |
| // Rocket Jungle distance | |
| // -------------------------- | |
| inline int hamming_distance_jungle(const std::array<uint8_t, NUM_BYTES>& q, | |
| const std::array<uint8_t, NUM_BYTES>& c) { | |
| int total = 0; | |
| for (size_t i = 0; i < NUM_BYTES; i++) { | |
| total += superJungle[i][q[i]][c[i]]; | |
| } | |
| return total; | |
| } | |
| // -------------------------- | |
| // Baseline distance (popcount on the fly) | |
| // -------------------------- | |
| inline int hamming_distance_popcount(const std::array<uint8_t, NUM_BYTES>& q, | |
| const std::array<uint8_t, NUM_BYTES>& c) { | |
| int total = 0; | |
| for (size_t i = 0; i < NUM_BYTES; i++) { | |
| total += popcount8(q[i] ^ c[i]); | |
| } | |
| return total; | |
| } | |
| // -------------------------- | |
| // Benchmark | |
| // -------------------------- | |
| int main() { | |
| std::mt19937 rng(12345); | |
| std::uniform_int_distribution<int> dist(0, 255); | |
| // Build super jungle once | |
| build_super_jungle_runtime(); | |
| // Random query vector | |
| std::array<uint8_t, NUM_BYTES> query{}; | |
| for (auto &b : query) b = static_cast<uint8_t>(dist(rng)); | |
| // Random collection vectors | |
| std::vector<std::array<uint8_t, NUM_BYTES>> collection(N); | |
| for (int i = 0; i < N; i++) { | |
| for (auto &b : collection[i]) b = static_cast<uint8_t>(dist(rng)); | |
| } | |
| volatile size_t sink_jungle = 0; | |
| auto start = std::chrono::high_resolution_clock::now(); | |
| for (int i = 0; i < N; i++) | |
| sink_jungle += hamming_distance_jungle(query, collection[i]); | |
| auto end = std::chrono::high_resolution_clock::now(); | |
| auto jungle_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count(); | |
| volatile size_t sink_pop = 0; | |
| start = std::chrono::high_resolution_clock::now(); | |
| for (int i = 0; i < N; i++) | |
| sink_pop += hamming_distance_popcount(query, collection[i]); | |
| end = std::chrono::high_resolution_clock::now(); | |
| auto pop_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count(); | |
| std::cout << "Rocket Jungle: " << jungle_ns << " ns, sink=" << sink_jungle << "\n"; | |
| std::cout << "Baseline Popcount: " << pop_ns << " ns, sink=" << sink_pop << "\n"; | |
| std::cout << "Speedup factor: " << static_cast<double>(pop_ns) / jungle_ns << "x\n";/* | |
| Rocket Jungle: 1297162584 ns, sink=10240071909 | |
| Baseline Popcount: 46626625 ns, sink=10240071909 | |
| Speedup factor: 0.0359451x | |
| */ | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment