Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Last active September 3, 2025 00:23
Show Gist options
  • Select an option

  • Save jweinst1/814939f607e8cfc2619b04da8e47e72a to your computer and use it in GitHub Desktop.

Select an option

Save jweinst1/814939f607e8cfc2619b04da8e47e72a to your computer and use it in GitHub Desktop.
an important benchmark of pop count vs jungle precomp
#include <array>
#include <cstdint>
#include <iostream>
#include <vector>
#include <random>
#include <chrono>
constexpr size_t NUM_BYTES = 1024;
constexpr int N = 10'000'000;
// --------------------------
// Popcount
// --------------------------
inline int popcount8(uint8_t x) {
#if defined(__clang__) || defined(__GNUC__)
return __builtin_popcount(x);
#else
int cnt = 0;
while (x) { x &= (x - 1); ++cnt; }
return cnt;
#endif
}
inline int popcount64(uint64_t x) {
#if defined(__clang__) || defined(__GNUC__)
return __builtin_popcountll(x);
#else
int cnt = 0;
while (x) { x &= (x - 1); ++cnt; }
return cnt;
#endif
}
// --------------------------
// Hamming distance: baseline
// --------------------------
inline int hamming_distance_popcount8(const std::array<uint8_t, NUM_BYTES>& q,
const std::array<uint8_t, NUM_BYTES>& c) {
int total = 0;
for (size_t i = 0; i < NUM_BYTES; i++) {
total += popcount8(q[i] ^ c[i]);
}
return total;
}
// --------------------------
// Hamming distance: 64-bit
// --------------------------
inline int hamming_distance_popcount64(const std::array<uint8_t, NUM_BYTES>& q,
const std::array<uint8_t, NUM_BYTES>& c) {
int total = 0;
constexpr size_t num_words = NUM_BYTES / 8;
const uint64_t* q64 = reinterpret_cast<const uint64_t*>(q.data());
const uint64_t* c64 = reinterpret_cast<const uint64_t*>(c.data());
for (size_t i = 0; i < num_words; i++) {
total += popcount64(q64[i] ^ c64[i]);
}
return total;
}
// --------------------------
// Benchmark
// --------------------------
int main() {
std::mt19937 rng(12345);
std::uniform_int_distribution<int> dist(0, 255);
std::array<uint8_t, NUM_BYTES> query{};
for (auto &b : query) b = static_cast<uint8_t>(dist(rng));
std::vector<std::array<uint8_t, NUM_BYTES>> collection(N);
for (int i = 0; i < N; i++)
for (auto &b : collection[i]) b = static_cast<uint8_t>(dist(rng));
// Baseline popcount8
volatile size_t sink8 = 0;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < N; i++)
sink8 += hamming_distance_popcount8(query, collection[i]);
auto end = std::chrono::high_resolution_clock::now();
auto t8 = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
// 64-bit popcount
volatile size_t sink64 = 0;
start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < N; i++)
sink64 += hamming_distance_popcount64(query, collection[i]);
end = std::chrono::high_resolution_clock::now();
auto t64 = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
std::cout << "Baseline 8-bit popcount: " << t8 << " ms, sink=" << sink8 << "\n";
std::cout << "64-bit popcount: " << t64 << " ms, sink=" << sink64 << "\n";
std::cout << "Speedup factor: " << static_cast<double>(t8) / t64 << "x\n";
/**
* Baseline 8-bit popcount: 509 ms, sink=40959816215
64-bit popcount: 267 ms, sink=40959816215
Speedup factor: 1.90637x
* */
}
#include <array>
#include <cstdint>
#include <iostream>
#include <random>
#include <chrono>
#include <vector>
// --------------------------
// Constants
// --------------------------
constexpr size_t NUM_BYTES = 256;
constexpr int N = 10'000'000; // 10M vectors
// --------------------------
// Simple popcount
// --------------------------
inline int popcount8(uint8_t x) {
#if defined(__clang__) || defined(__GNUC__)
return __builtin_popcount(x);
#else
int cnt = 0;
while (x) { x &= (x - 1); ++cnt; }
return cnt;
#endif
}
// --------------------------
// Super Jungle: runtime
// --------------------------
using ByteRow = std::array<uint8_t, 256>;
using ByteTable = std::array<ByteRow, 256>;
using SuperJungle = std::array<ByteTable, NUM_BYTES>;
SuperJungle superJungle;
void build_super_jungle_runtime() {
for (size_t b = 0; b < NUM_BYTES; ++b)
for (size_t q = 0; q < 256; ++q)
for (size_t c = 0; c < 256; ++c)
superJungle[b][q][c] = static_cast<uint8_t>(popcount8(q ^ c));
}
// --------------------------
// Rocket Jungle distance
// --------------------------
inline int hamming_distance_jungle(const std::array<uint8_t, NUM_BYTES>& q,
const std::array<uint8_t, NUM_BYTES>& c) {
int total = 0;
for (size_t i = 0; i < NUM_BYTES; i++) {
total += superJungle[i][q[i]][c[i]];
}
return total;
}
// --------------------------
// Baseline distance (popcount on the fly)
// --------------------------
inline int hamming_distance_popcount(const std::array<uint8_t, NUM_BYTES>& q,
const std::array<uint8_t, NUM_BYTES>& c) {
int total = 0;
for (size_t i = 0; i < NUM_BYTES; i++) {
total += popcount8(q[i] ^ c[i]);
}
return total;
}
// --------------------------
// Benchmark
// --------------------------
int main() {
std::mt19937 rng(12345);
std::uniform_int_distribution<int> dist(0, 255);
// Build super jungle once
build_super_jungle_runtime();
// Random query vector
std::array<uint8_t, NUM_BYTES> query{};
for (auto &b : query) b = static_cast<uint8_t>(dist(rng));
// Random collection vectors
std::vector<std::array<uint8_t, NUM_BYTES>> collection(N);
for (int i = 0; i < N; i++) {
for (auto &b : collection[i]) b = static_cast<uint8_t>(dist(rng));
}
volatile size_t sink_jungle = 0;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < N; i++)
sink_jungle += hamming_distance_jungle(query, collection[i]);
auto end = std::chrono::high_resolution_clock::now();
auto jungle_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
volatile size_t sink_pop = 0;
start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < N; i++)
sink_pop += hamming_distance_popcount(query, collection[i]);
end = std::chrono::high_resolution_clock::now();
auto pop_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
std::cout << "Rocket Jungle: " << jungle_ns << " ns, sink=" << sink_jungle << "\n";
std::cout << "Baseline Popcount: " << pop_ns << " ns, sink=" << sink_pop << "\n";
std::cout << "Speedup factor: " << static_cast<double>(pop_ns) / jungle_ns << "x\n";/*
Rocket Jungle: 1297162584 ns, sink=10240071909
Baseline Popcount: 46626625 ns, sink=10240071909
Speedup factor: 0.0359451x
*/
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment