Skip to content

Instantly share code, notes, and snippets.

@Brugarolas
Last active May 3, 2024 02:58
Show Gist options
  • Select an option

  • Save Brugarolas/012b3625a912d916abdca8ebd4283470 to your computer and use it in GitHub Desktop.

Select an option

Save Brugarolas/012b3625a912d916abdca8ebd4283470 to your computer and use it in GitHub Desktop.
AllocaStackAllocator.hpp
/**
* @file AllocaStackAllocator.hpp
* @brief AllocaStackAllocator for fast, temporary allocations on a per-thread basis.
* @author Andrés Brugarolas
*
* The StackAllocator is designed to provide efficient, alloca stack memory allocation
* within a pre-allocated memory block. It supports fast allocation and deallocation,
* caching of frequently used block sizes, and management of free space using an AVL tree.
*/
#include <cstddef>
#include <cstdlib>
#include <new>
#include <iostream>
#include <string>
#include <algorithm>
#include <vector>
#include <cassert>
#include <type_traits>
#include <memory>
// Detect platform and compiler to select the appropriate stack allocation function
#if defined(_WIN32) || defined(_WIN64)
#if defined(_MSC_VER)
// Use _malloca for Microsoft compilers on Windows
#include <malloc.h>
static void* platform_stack_alloc(size_t size) { return _malloca(size); }
static void platform_stack_free(void* ptr) { _freea(ptr); }
#else
// Fallback for other compilers on Windows
// MinGW-w64?
#include <alloca.h>
static void* platform_stack_alloc(size_t size) { return alloca(size); }
// No corresponding free function needed for alloca
#endif
#else
// Use alloca for other platforms (GCC, Clang, etc.)
#include <alloca.h>
static void* platform_stack_alloc(size_t size) { return alloca(size); }
// No corresponding free function needed for alloca
#endif
namespace Bruga::CurrentThread::Stack {
namespace internal {
template <class T>
struct AVLTreeNode {
AVLTreeNode *left;
AVLTreeNode *right;
T value;
int count; // how many nodes are there in this subtree
int height;
AVLTreeNode(T value): value(value) {
count = 1;
height = 1;
left = nullptr;
right = nullptr;
}
// AVLTreeNode& operator=(const AVLTreeNode& other) = delete;
AVLTreeNode& operator=(const AVLTreeNode& other) {
if (this == &other) return *this;
value = other.value;
count = other.count;
height = other.height;
left = other.left;
right = other.right;
return *this;
}
void updateValues() {
count = (left != nullptr ? left->count : 0) + (right != nullptr ? right->count : 0) + 1;
height = std::max(left != nullptr ? left->height : 0, right != nullptr ? right->height : 0) + 1;
}
int balanceFactor() {
return (left != nullptr ? left->height : 0) - (right != nullptr ? right->height: 0);
}
AVLTreeNode* left_rotate(){
AVLTreeNode* R = right;
right = right->left;
R->left = this;
this->updateValues(); // the order is important
R->updateValues();
return R;
}
AVLTreeNode* right_rotate(){
AVLTreeNode* L = left;
left = left->right;
L->right = this;
this->updateValues(); // the order is important
L->updateValues();
return L;
}
};
template class AVLTreeNode<int>;
template class AVLTreeNode<short>;
template class AVLTreeNode<long>;
template class AVLTreeNode<long long>;
template class AVLTreeNode<std::string>;
template <class T>
class AVLTree {
int _size;
AVLTreeNode<T> *root;
void balance(std::vector<AVLTreeNode<T>**> path);
void display(AVLTreeNode<T>* cur, int depth=0, int state=0);
public:
AVLTree() {
root = nullptr;
_size = 0;
}
~AVLTree(){
clear();
}
void insert(T value) {
AVLTreeNode<T> **indirect = &root; // to generalize insertion
std::vector<AVLTreeNode<T>**> path; // to update height values
while (*indirect != nullptr){
path.push_back(indirect);
if ((*indirect)->value > value)
indirect = &((*indirect)->left);
else
indirect = &((*indirect)->right);
}
*indirect = new AVLTreeNode<T>(value);
path.push_back(indirect);
balance(path);
_size++;
}
void erase(T value) {
AVLTreeNode<T> **indirect = &root; // to generalize insertion
std::vector<AVLTreeNode<T>**> path; // to update height values
while (*indirect != nullptr and (*indirect)->value != value){
path.push_back(indirect);
if ((*indirect)->value > value)
indirect = &((*indirect)->left);
else
indirect = &((*indirect)->right);
}
if (*indirect == nullptr) // the value does not exist in tree
return; // may be raising an Exception is more elegant
else
path.push_back(indirect);
std::size_t index = path.size();
if ((*indirect)->left == nullptr and (*indirect)->right == nullptr) { // the node is leaf
delete *indirect; // just delete node
*indirect = nullptr;
path.pop_back(); // pop the deleted node from path
}
else if ((*indirect)->right == nullptr) { // only left child exists
AVLTreeNode<T> *toRemove = *indirect;
(*indirect) = (*indirect)->left;
delete toRemove;
path.pop_back();
} else { // right child exists
AVLTreeNode<T> **successor = &((*indirect)->right);
while ((*successor)->left != nullptr){
path.push_back(successor);
successor = &((*successor)->left);
}
if (*successor == (*indirect)->right){
(*successor)->left = (*indirect)->left;
AVLTreeNode<T> *toRemove = *indirect;
*indirect = *successor;
delete toRemove;
}
else {
AVLTreeNode<T> *tmp = *path.back(), *suc = *successor;
tmp->left = (*successor)->right;
suc->left = (*indirect)->left;
suc->right = (*indirect)->right;
delete *indirect;
*indirect = suc;
path[index] = &(suc->right);
}
}
}
void clear() {
std::vector<AVLTreeNode<T>*> stack;
if (root != nullptr)
stack.push_back(root);
while (!stack.empty()){
AVLTreeNode<T> *node = stack.back();
stack.pop_back();
if (node->left != nullptr)
stack.push_back(node->left);
if (node->right != nullptr)
stack.push_back(node->right);
_size--;
delete node;
}
root = nullptr;
}
bool empty() const {
return _size == 0;
}
int size() const{
return _size;
}
int find(T value) const{
AVLTreeNode<T> *direct = root;
int idx = 0;
while (direct != nullptr and direct->value != value){
if (direct->value > value)
direct = direct->left;
else{
idx += (direct->left ? direct->left->count : 0) + 1;
direct = direct->right;
}
}
if (direct == nullptr)
return -1;
else
return idx + (direct->left ? direct->left->count : 0);
}
int lower_bound(T value) const {
AVLTreeNode<T> *direct = root;
int idx = 0;
while (direct != nullptr) {
if (direct->value >= value)
direct = direct->left;
else {
idx += (direct->left ? direct->left->count : 0) + 1;
direct = direct->right;
}
}
return idx;
}
int upper_bound(T value) const {
AVLTreeNode<T> *direct = root;
int idx = 0;
while (direct != nullptr) {
if (direct->value > value)
direct = direct->left;
else {
idx += (direct->left ? direct->left->count : 0) + 1;
direct = direct->right;
}
}
return idx;
}
const T& find_min() const {
AVLTreeNode<T> *cur = root;
while (cur->left != nullptr)
cur = cur->left;
return cur->value;
}
const T& find_max() const {
AVLTreeNode<T> *cur = root;
while (cur->right != nullptr)
cur = cur->right;
return cur->value;
}
const T& operator[](std::size_t idx) const {
AVLTreeNode<T> *cur = root;
int left = cur->left != nullptr ? cur->left->count : 0;
while (left != idx) {
if (left < idx) {
idx -= left + 1;
cur = cur->right;
left = cur->left != nullptr ? cur->left->count : 0;
} else {
cur = cur->left;
left = cur->left != nullptr ? cur->left->count : 0;
}
}
return cur->value;
}
void insert(const T& block) {
root = insertRec(root, block);
}
void updateHeightAndBalance(AVLTreeNode<T>* node) {
node->updateValues(); // Update the height and count values
}
AVLTreeNode<T>* minValueNode(AVLTreeNode<T>* node) {
AVLTreeNode<T>* current = node;
while (current->left != nullptr) {
current = current->left;
}
return current;
}
AVLTreeNode<T>* insertRec(AVLTreeNode<T>* node, const T& block) {
if (!node) return new AVLTreeNode<T>(block);
if (block < node->value) {
node->left = insertRec(node->left, block);
} else if (block > node->value) {
node->right = insertRec(node->right, block);
} else {
return node; // Duplicate blocks not allowed
}
updateHeightAndBalance(node);
return balanceNode(node);
}
void erase(const T& block) {
root = eraseRec(root, block);
}
AVLTreeNode<T>* eraseRec(AVLTreeNode<T>* node, const T& block) {
if (!node) return node;
if (block < node->value) {
node->left = eraseRec(node->left, block);
} else if (block > node->value) {
node->right = eraseRec(node->right, block);
} else {
// Node with only one child or no child
if (!(node->left && node->right)) {
AVLTreeNode<T>* temp = node->left ? node->left : node->right;
if (!temp) {
temp = node;
node = nullptr;
} else {
*node = *temp; // Copy the contents of the non-empty child
}
delete temp;
} else {
// Node with two children: Get the inorder successor
AVLTreeNode<T>* temp = minValueNode(node->right);
node->value = temp->value;
node->right = eraseRec(node->right, temp->value);
}
}
if (!node) return node;
updateHeightAndBalance(node);
return balanceNode(node);
}
AVLTreeNode<T>* balanceNode(AVLTreeNode<T>* node) {
if (node->balanceFactor() > 1) {
if (node->left->balanceFactor() < 0) {
node->left = leftRotate(node->left);
}
return rightRotate(node);
}
if (node->balanceFactor() < -1) {
if (node->right->balanceFactor() > 0) {
node->right = rightRotate(node->right);
}
return leftRotate(node);
}
return node;
}
AVLTreeNode<T>* leftRotate(AVLTreeNode<T>* x) {
AVLTreeNode<T>* y = x->right;
AVLTreeNode<T>* T2 = y->left;
y->left = x;
x->right = T2;
updateHeightAndBalance(x);
updateHeightAndBalance(y);
return y;
}
AVLTreeNode<T>* rightRotate(AVLTreeNode<T>* y) {
AVLTreeNode<T>* x = y->left;
AVLTreeNode<T>* T2 = x->right;
x->right = y;
y->left = T2;
updateHeightAndBalance(y);
updateHeightAndBalance(x);
return x;
}
void display() {
printf("\n");
if (root != nullptr)
displayNode(root, 0, 0);
else
printf("Empty");
printf("\n");
}
private:
void displayNode(AVLTreeNode<T> *cur, int depth, int state) {
if (cur->left)
display(cur->left, depth + 1, 1);
for (int i = 0; i < depth; i++)
printf(" ");
if (state == 1) // left
printf("┌───");
else if (state == 2) // right
printf("└───");
std::cout << "[" << cur->value << "] - (" << cur->count << ", " << cur->height << ")" << std::endl;
if (cur->right)
display(cur->right, depth + 1, 2);
}
};
template class AVLTree<int>;
template class AVLTree<short>;
template class AVLTree<long>;
template class AVLTree<long long>;
template class AVLTree<std::string>;
} // namespace internal
/**
* @class StackAllocator
* @brief Provides stack-like memory allocation within a thread.
*
* This allocator is intended for use cases where temporary, fast memory allocation
* and deallocation is needed. It's optimized for speed and efficiency, using a combination
* of platform-specific stack allocation methods, a cache for common allocation sizes,
* and an AVL tree for managing free memory blocks to minimize fragmentation.
*/
class StackAllocator {
private:
struct MemoryBlock {
size_t size;
char* address;
MemoryBlock(size_t size, char* address) : size(size), address(address) {}
bool operator<(const MemoryBlock& other) const {
return size < other.size || (size == other.size && address < other.address);
}
bool operator==(const MemoryBlock& other) const {
return address == other.address;
}
};
static inline thread_local StackAllocator* currentAllocator = nullptr;
char* basePointer = nullptr;
char* currentPointer = nullptr;
static constexpr size_t MAX_STACK_SIZE = 8 * 1024 * 1024; // 8 MB stack size limit
static constexpr size_t CACHE_SIZE = 10;
static constexpr size_t COMMON_BLOCK_SIZE = 128;
void* cache[CACHE_SIZE] = {nullptr};
size_t cacheCount = 0;
internal::AVLTree<MemoryBlock> freeBlocks;
/**
* @brief Constructs a new StackAllocator instance.
*
* The constructor is private to ensure that StackAllocator instances are
* managed correctly using createAllocator() to maintain thread-local instances.
*/
StackAllocator() {
basePointer = static_cast<char*>(platform_stack_alloc(1)); // To get the current stack position
currentPointer = basePointer;
platform_stack_alloc(MAX_STACK_SIZE + 512); // Reserve space + security gap
basePointer += 512; // Move past the security gap
currentPointer = basePointer;
}
public:
/**
* @brief Creates or retrieves the thread-local StackAllocator instance.
* @return Pointer to the thread-local StackAllocator instance.
*/
static StackAllocator* createAllocator() {
if (!currentAllocator) {
currentAllocator = new StackAllocator();
}
return currentAllocator;
}
/**
* @brief Allocates a block of memory of the specified size.
* @param size The size of the memory block to allocate.
* @param alignment The alignment of the memory block (default = alignof(std::max_align_t)).
* @return Pointer to the allocated memory block.
*/
void* allocate(size_t size, size_t alignment = alignof(std::max_align_t)) {
size_t currentAddress = reinterpret_cast<size_t>(currentPointer);
size_t alignAdjustment = (alignment - currentAddress % alignment) % alignment;
size_t totalSize = size + alignAdjustment;
if (currentPointer + totalSize - basePointer > MAX_STACK_SIZE) {
throw std::bad_alloc(); // Handle stack overflow
}
void* alignedPtr = currentPointer + alignAdjustment;
currentPointer += totalSize;
return alignedPtr;
}
void deallocate(void* ptr, size_t size) {
MemoryBlock freedBlock(size, static_cast<char*>(ptr));
freeBlocks.insert(freedBlock); // Insert the freed block back into the AVL tree
coalesceAdjacent(freedBlock);
}
void coalesceAdjacent(const MemoryBlock& block) {
// This function will attempt to merge the newly freed block with adjacent free blocks
// in the tree, if any, to reduce fragmentation.
auto nextBlock = freeBlocks.upper_bound(block);
auto prevBlock = freeBlocks.lower_bound(block);
if (prevBlock != freeBlocks.begin()) {
--prevBlock; // Move to the actual previous block if not at the beginning
if (prevBlock->address + prevBlock->size == block.address) {
// Merge with previous block
MemoryBlock mergedBlock(prevBlock->size + block.size, prevBlock->address);
freeBlocks.erase(prevBlock);
freeBlocks.erase(block);
freeBlocks.insert(mergedBlock);
// Continue with the merged block as the current block
coalesceAdjacent(mergedBlock);
return;
}
}
if (nextBlock != freeBlocks.end() && block.address + block.size == nextBlock->address) {
// Merge with next block
MemoryBlock mergedBlock(block.size + nextBlock->size, block.address);
freeBlocks.erase(block);
freeBlocks.erase(nextBlock);
freeBlocks.insert(mergedBlock);
// Continue with the merged block as the current block
coalesceAdjacent(mergedBlock);
}
}
void* allocateFromCache(size_t size) {
if (cacheCount > 0 && size <= COMMON_BLOCK_SIZE) {
return cache[--cacheCount]; // Return a cached block
}
return allocate(size); // Allocate normally if not suitable for cache
}
void returnToCache(void* ptr) {
if (cacheCount < CACHE_SIZE) {
cache[cacheCount++] = ptr; // Store the block in the cache
}
// Else, ignore or handle overflow
}
StackAllocator(const StackAllocator&) = delete;
StackAllocator& operator=(const StackAllocator&) = delete;
};
} // namespace Bruga::CurrentThread::Stack
// Example 1: Temporary Object Allocation
/*
#include "StackAllocator.hpp"
class TempObject {
public:
void doWork() {
// Simulate some work here
}
};
void process() {
StackAllocator* allocator = StackAllocator::createAllocator();
// Allocate a temporary object on the stack
void* mem = allocator->allocate(sizeof(TempObject));
TempObject* tempObj = new(mem) TempObject(); // Placement new
// Use the temporary object
tempObj->doWork();
// Manual destruction and return memory back to the allocator
tempObj->~TempObject();
allocator->returnToCache(mem);
}
*/
// Example 2: Fast Array Allocation for Computation
/*
#include "StackAllocator.hpp"
#include <iostream>
void fastComputation() {
StackAllocator* allocator = StackAllocator::createAllocator();
// Example: Allocate an array of 100 integers
int* arr = static_cast<int*>(allocator->allocate(100 * sizeof(int)));
// Use the array for some computations
for (int i = 0; i < 100; ++i) {
arr[i] = i * i;
}
// Assume we're done with the array and it can be returned to the allocator
allocator->returnToCache(arr);
}
*/
# Set the required stack size (in bytes).
set(STACK_SIZE "9437184") # 9 MB in bytes
# Adjust compiler/linker flags based on the compiler
if(MSVC)
# For MSVC, use the linker flag /STACK:
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /STACK:${STACK_SIZE}")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
# For GCC and Clang, use the compiler flag -Wl,--stack:
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--stack,${STACK_SIZE}")
endif()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment