Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Last active August 30, 2025 06:51
Show Gist options
  • Select an option

  • Save jweinst1/c9e2a6ef9dfacdaabc866fd30073d467 to your computer and use it in GitHub Desktop.

Select an option

Save jweinst1/c9e2a6ef9dfacdaabc866fd30073d467 to your computer and use it in GitHub Desktop.
Using hamming ladders as an iterator for numbers in a bitset
#include <array>
#include <chrono>
#include <iostream>
#include <cstdint>
// --------------------------------------
// Byte-based 256-bit bitset
// --------------------------------------
struct ConstexprBitset256 {
std::array<uint8_t, 32> blocks{};
constexpr void set(std::size_t idx) {
blocks[idx / 8] |= (1u << (idx & 7));
}
constexpr bool test(std::size_t idx) const {
return (blocks[idx / 8] >> (idx & 7)) & 1u;
}
};
// --------------------------------------
// Jungle Gym Precomputation
// --------------------------------------
using JungleEntry = std::array<uint8_t, 8>; // up to 8 bits per byte
struct JungleCell {
JungleEntry bits{};
uint8_t count{};
};
constexpr JungleCell make_jungle_cell(int byteIndex, int byteVal) {
JungleCell cell{};
int base = byteIndex * 8;
int c = 0;
for (int b = 0; b < 8; b++) {
if (byteVal & (1 << b)) {
cell.bits[c++] = base + b;
}
}
cell.count = c;
return cell;
}
constexpr auto build_jungle() {
std::array<std::array<JungleCell, 256>, 32> jungle{};
for (int byteIdx = 0; byteIdx < 32; byteIdx++) {
for (int val = 0; val < 256; val++) {
jungle[byteIdx][val] = make_jungle_cell(byteIdx, val);
}
}
return jungle;
}
inline constexpr auto JungleGym = build_jungle();
// --------------------------------------
// Dump all set indices into output[]
// --------------------------------------
size_t getSetIndexesJungle(const ConstexprBitset256& bs, uint8_t* out) {
size_t total = 0;
for (int byteIdx = 0; byteIdx < 32; byteIdx++) {
const JungleCell& cell = JungleGym[byteIdx][bs.blocks[byteIdx]];
for (int j = 0; j < cell.count; j++) {
out[total++] = cell.bits[j];
}
}
return total;
}
// --------------------------------------
// Benchmark
// --------------------------------------
int main() {
ConstexprBitset256 bs;
bs.set(59);
bs.set(113);
bs.set(8);
bs.set(91);
bs.set(200);
constexpr int N = 10000000;
std::array<uint8_t, 256> buffer{};
volatile uint64_t sink = 0;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < N; ++i) {
size_t count = getSetIndexesJungle(bs, buffer.data());
sink ^= count;
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
std::cout << "Processed " << N << " iterations in "
<< duration.count() << " ns\n";
std::cout << "Sink = " << sink << "\n";
}
#include <array>
#include <chrono>
#include <iostream>
#include <cstdint>
// --------------------------------------
// Byte-based 256-bit bitset
// --------------------------------------
struct ConstexprBitset256 {
std::array<uint8_t, 32> blocks{};
constexpr void set(std::size_t idx) {
blocks[idx / 8] |= (1u << (idx & 7));
}
constexpr bool test(std::size_t idx) const {
return (blocks[idx / 8] >> (idx & 7)) & 1u;
}
};
// --------------------------------------
// Jungle Gym Precomputation
// --------------------------------------
using JungleEntry = std::array<uint8_t, 8>; // up to 8 bits per byte
struct JungleCell {
JungleEntry bits{};
uint8_t count{};
};
constexpr JungleCell make_jungle_cell(int byteIndex, int byteVal) {
JungleCell cell{};
int base = byteIndex * 8;
int c = 0;
for (int b = 0; b < 8; b++) {
if (byteVal & (1 << b)) {
cell.bits[c++] = base + b;
}
}
cell.count = c;
return cell;
}
constexpr auto build_jungle() {
std::array<std::array<JungleCell, 256>, 32> jungle{};
for (int byteIdx = 0; byteIdx < 32; byteIdx++) {
for (int val = 0; val < 256; val++) {
jungle[byteIdx][val] = make_jungle_cell(byteIdx, val);
}
}
return jungle;
}
inline constexpr auto JungleGym = build_jungle();
// --------------------------------------
// Dump all set indices into output[]
// --------------------------------------
size_t getSetIndexesJungle(const ConstexprBitset256& bs, const JungleCell** out) {
size_t total = 0;
for (int byteIdx = 0; byteIdx < 32; ++byteIdx) {
out[total++] = &JungleGym[byteIdx][bs.blocks[byteIdx]];
}
return total;
}
// --------------------------------------
// Benchmark
// --------------------------------------
int main(int argc, char* argv[]) {
ConstexprBitset256 bs;
bs.set(81);
bs.set(113);
bs.set(8);
bs.set(91);
bs.set(200);
constexpr int N = 10000000;
std::array<const JungleCell*, 32> buffer{};
volatile uint64_t sink = 0;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < N; ++i) {
size_t count = getSetIndexesJungle(bs, buffer.data());
sink += count;
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
std::cout << "Processed " << N << " iterations in "
<< duration.count() << " ns\n";
std::cout << "Sink = " << sink << "\n";
}
#include <array>
#include <cstdint>
#include <type_traits>
#include <iostream>
#include <bitset>
// ------------------------
// Portable popcount helper
// ------------------------
constexpr int portable_popcount(uint32_t x) {
#if defined(__GNUC__) || defined(__clang__)
return __builtin_popcount(x);
#elif defined(_MSC_VER)
return __popcnt(x);
#else
// fallback: simple constexpr loop
int count = 0;
while (x) {
count += x & 1;
x >>= 1;
}
return count;
#endif
}
// ------------------------
// Generate Hamming ladders
// ------------------------
constexpr auto generate_hamming_ladders() {
std::array<std::array<uint8_t, 9>, 256> ladders{};
for (int byte = 0; byte < 256; ++byte) {
int idx = 0;
int pop = portable_popcount(byte);
int val = byte;
while (pop > 0) {
ladders[byte][idx++] = static_cast<uint8_t>(val);
// move down one rung in the ladder:
// clear the lowest set bit
#if defined(__GNUC__) || defined(__clang__)
val &= (val - 1); // standard trick
#elif defined(_MSC_VER)
_BitScanForward(reinterpret_cast<unsigned long*>(&val), val);
val &= (val - 1);
#else
val &= (val - 1);
#endif
pop = portable_popcount(val);
}
ladders[byte][idx++] = 0; // always end at zero
}
return ladders;
}
// Global constexpr table
inline constexpr auto hamming_ladders = generate_hamming_ladders();
int main(int argc, char const *argv[])
{
const auto& my_ladder = hamming_ladders[0b01101010];
for (const auto& o : my_ladder) {
std::cout << std::bitset<8>(o) << "\n";
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment