Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Last active January 21, 2026 02:29
Show Gist options
  • Select an option

  • Save jweinst1/db3f2f99b17d4816999b1181ad84cf45 to your computer and use it in GitHub Desktop.

Select an option

Save jweinst1/db3f2f99b17d4816999b1181ad84cf45 to your computer and use it in GitHub Desktop.
A tlb aware slot allocator
#include <iostream>
#include <vector>
#include <unordered_map>
#include <random>
#include <chrono>
#include <cstdint>
#include <limits>
#include <cstring>
#include <cstdlib>
#include <cstdio>
struct IdGen {
std::random_device rd;
std::mt19937 gen;
std::uniform_int_distribution<uint64_t> distrib;
IdGen(): rd(), gen(rd()), distrib(std::numeric_limits<uint64_t>::min(),
std::numeric_limits<uint64_t>::max()) {}
uint64_t next() {
return static_cast<uint64_t>(distrib(gen));
}
};
struct BlockRegion {
uint8_t places[4096] = {0};
static constexpr size_t mask = 4095;
size_t curSize = 0;
inline size_t getSlot(size_t startPoint) {
size_t idx = startPoint & mask;
if (curSize == 4096) return (size_t)-1;
while (true)
{
//uint8_t diff = places[idx] ^ 0xff;
if (!places[idx]) {
places[idx] = 0xff;
++curSize;
return idx;
}
idx = (idx + 1) & mask;
}
}
inline void free(size_t idx) {
places[idx] = 0;
--curSize;
}
};
static void perfTest_BlockRegion() {
static constexpr size_t testSize = 10000000;
static constexpr size_t keyMask3 = (size_t{1} << 20) - 1;
IdGen gens;
std::vector<uint32_t> nums;
for (int i = 0; i < testSize; ++i)
{
nums.push_back(gens.next() & keyMask3);
}
BlockRegion br;
for (int i = 0; i < 800; ++i)
{
br.getSlot(nums[i]);
}
auto start = std::chrono::high_resolution_clock::now();
for (int i = 999; i < testSize; ++i)
{
size_t myId = br.getSlot(nums[i]);
br.free(myId);
}
auto end = std::chrono::high_resolution_clock::now();
std::cout << " BlockRegion " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() << "us\n";
}
int main(int argc, char const *argv[])
{
std::printf("%d\n", __builtin_ffsll(0b0011000));
perfTest_BlockRegion();
return 0;
}
#include <iostream>
#include <vector>
#include <unordered_map>
#include <random>
#include <chrono>
#include <cstdint>
#include <limits>
#include <cstring>
#include <cstdlib>
#include <cstdio>
struct IdGen {
std::random_device rd;
std::mt19937 gen;
std::uniform_int_distribution<uint64_t> distrib;
IdGen(): rd(), gen(rd()), distrib(std::numeric_limits<uint64_t>::min(),
std::numeric_limits<uint64_t>::max()) {}
uint64_t next() {
return static_cast<uint64_t>(distrib(gen));
}
};
struct BlockRegion {
static constexpr size_t theSize = 1 << 21;
uint8_t places[theSize] = {0};
static constexpr size_t mask = (theSize) - 1;
size_t curSize = 0;
inline size_t getSlot(size_t startPoint) {
size_t idx = startPoint & mask;
if (curSize == theSize) return (size_t)-1;
while (true)
{
//uint8_t diff = places[idx] ^ 0xff;
if (!places[idx]) {
places[idx] = 0xff;
++curSize;
return idx;
}
idx = (idx + 1) & mask;
}
}
inline void free(size_t idx) {
places[idx] = 0;
--curSize;
}
};
static void perfTest_BlockRegion() {
static constexpr size_t testSize = 10000000;
static constexpr size_t keyMask3 = (size_t{1} << 24) - 1;
IdGen gens;
std::vector<uint32_t> nums;
for (int i = 0; i < testSize; ++i)
{
nums.push_back(gens.next() & keyMask3);
}
BlockRegion br;
for (int i = 0; i < 700000; ++i)
{
br.getSlot(nums[i]);
}
auto start = std::chrono::high_resolution_clock::now();
for (int i = 700002; i < testSize; ++i) // 52 ms
{
size_t myId = br.getSlot(nums[i]);
br.free(myId);
}
auto end = std::chrono::high_resolution_clock::now();
std::cout << " BlockRegion " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() << "us\n";
}
int main(int argc, char const *argv[])
{
std::printf("%d\n", __builtin_ffsll(0b0011000));
perfTest_BlockRegion();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment