Last active
May 3, 2024 02:58
-
-
Save Brugarolas/012b3625a912d916abdca8ebd4283470 to your computer and use it in GitHub Desktop.
AllocaStackAllocator.hpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * @file AllocaStackAllocator.hpp | |
| * @brief AllocaStackAllocator for fast, temporary allocations on a per-thread basis. | |
| * @author Andrés Brugarolas | |
| * | |
| * The StackAllocator is designed to provide efficient, alloca stack memory allocation | |
| * within a pre-allocated memory block. It supports fast allocation and deallocation, | |
| * caching of frequently used block sizes, and management of free space using an AVL tree. | |
| */ | |
| #include <cstddef> | |
| #include <cstdlib> | |
| #include <new> | |
| #include <iostream> | |
| #include <string> | |
| #include <algorithm> | |
| #include <vector> | |
| #include <cassert> | |
| #include <type_traits> | |
| #include <memory> | |
| // Detect platform and compiler to select the appropriate stack allocation function | |
| #if defined(_WIN32) || defined(_WIN64) | |
| #if defined(_MSC_VER) | |
| // Use _malloca for Microsoft compilers on Windows | |
| #include <malloc.h> | |
| static void* platform_stack_alloc(size_t size) { return _malloca(size); } | |
| static void platform_stack_free(void* ptr) { _freea(ptr); } | |
| #else | |
| // Fallback for other compilers on Windows | |
| // MinGW-w64? | |
| #include <alloca.h> | |
| static void* platform_stack_alloc(size_t size) { return alloca(size); } | |
| // No corresponding free function needed for alloca | |
| #endif | |
| #else | |
| // Use alloca for other platforms (GCC, Clang, etc.) | |
| #include <alloca.h> | |
| static void* platform_stack_alloc(size_t size) { return alloca(size); } | |
| // No corresponding free function needed for alloca | |
| #endif | |
| namespace Bruga::CurrentThread::Stack { | |
| namespace internal { | |
| template <class T> | |
| struct AVLTreeNode { | |
| AVLTreeNode *left; | |
| AVLTreeNode *right; | |
| T value; | |
| int count; // how many nodes are there in this subtree | |
| int height; | |
| AVLTreeNode(T value): value(value) { | |
| count = 1; | |
| height = 1; | |
| left = nullptr; | |
| right = nullptr; | |
| } | |
| // AVLTreeNode& operator=(const AVLTreeNode& other) = delete; | |
| AVLTreeNode& operator=(const AVLTreeNode& other) { | |
| if (this == &other) return *this; | |
| value = other.value; | |
| count = other.count; | |
| height = other.height; | |
| left = other.left; | |
| right = other.right; | |
| return *this; | |
| } | |
| void updateValues() { | |
| count = (left != nullptr ? left->count : 0) + (right != nullptr ? right->count : 0) + 1; | |
| height = std::max(left != nullptr ? left->height : 0, right != nullptr ? right->height : 0) + 1; | |
| } | |
| int balanceFactor() { | |
| return (left != nullptr ? left->height : 0) - (right != nullptr ? right->height: 0); | |
| } | |
| AVLTreeNode* left_rotate(){ | |
| AVLTreeNode* R = right; | |
| right = right->left; | |
| R->left = this; | |
| this->updateValues(); // the order is important | |
| R->updateValues(); | |
| return R; | |
| } | |
| AVLTreeNode* right_rotate(){ | |
| AVLTreeNode* L = left; | |
| left = left->right; | |
| L->right = this; | |
| this->updateValues(); // the order is important | |
| L->updateValues(); | |
| return L; | |
| } | |
| }; | |
| template class AVLTreeNode<int>; | |
| template class AVLTreeNode<short>; | |
| template class AVLTreeNode<long>; | |
| template class AVLTreeNode<long long>; | |
| template class AVLTreeNode<std::string>; | |
| template <class T> | |
| class AVLTree { | |
| int _size; | |
| AVLTreeNode<T> *root; | |
| void balance(std::vector<AVLTreeNode<T>**> path); | |
| void display(AVLTreeNode<T>* cur, int depth=0, int state=0); | |
| public: | |
| AVLTree() { | |
| root = nullptr; | |
| _size = 0; | |
| } | |
| ~AVLTree(){ | |
| clear(); | |
| } | |
| void insert(T value) { | |
| AVLTreeNode<T> **indirect = &root; // to generalize insertion | |
| std::vector<AVLTreeNode<T>**> path; // to update height values | |
| while (*indirect != nullptr){ | |
| path.push_back(indirect); | |
| if ((*indirect)->value > value) | |
| indirect = &((*indirect)->left); | |
| else | |
| indirect = &((*indirect)->right); | |
| } | |
| *indirect = new AVLTreeNode<T>(value); | |
| path.push_back(indirect); | |
| balance(path); | |
| _size++; | |
| } | |
| void erase(T value) { | |
| AVLTreeNode<T> **indirect = &root; // to generalize insertion | |
| std::vector<AVLTreeNode<T>**> path; // to update height values | |
| while (*indirect != nullptr and (*indirect)->value != value){ | |
| path.push_back(indirect); | |
| if ((*indirect)->value > value) | |
| indirect = &((*indirect)->left); | |
| else | |
| indirect = &((*indirect)->right); | |
| } | |
| if (*indirect == nullptr) // the value does not exist in tree | |
| return; // may be raising an Exception is more elegant | |
| else | |
| path.push_back(indirect); | |
| std::size_t index = path.size(); | |
| if ((*indirect)->left == nullptr and (*indirect)->right == nullptr) { // the node is leaf | |
| delete *indirect; // just delete node | |
| *indirect = nullptr; | |
| path.pop_back(); // pop the deleted node from path | |
| } | |
| else if ((*indirect)->right == nullptr) { // only left child exists | |
| AVLTreeNode<T> *toRemove = *indirect; | |
| (*indirect) = (*indirect)->left; | |
| delete toRemove; | |
| path.pop_back(); | |
| } else { // right child exists | |
| AVLTreeNode<T> **successor = &((*indirect)->right); | |
| while ((*successor)->left != nullptr){ | |
| path.push_back(successor); | |
| successor = &((*successor)->left); | |
| } | |
| if (*successor == (*indirect)->right){ | |
| (*successor)->left = (*indirect)->left; | |
| AVLTreeNode<T> *toRemove = *indirect; | |
| *indirect = *successor; | |
| delete toRemove; | |
| } | |
| else { | |
| AVLTreeNode<T> *tmp = *path.back(), *suc = *successor; | |
| tmp->left = (*successor)->right; | |
| suc->left = (*indirect)->left; | |
| suc->right = (*indirect)->right; | |
| delete *indirect; | |
| *indirect = suc; | |
| path[index] = &(suc->right); | |
| } | |
| } | |
| } | |
| void clear() { | |
| std::vector<AVLTreeNode<T>*> stack; | |
| if (root != nullptr) | |
| stack.push_back(root); | |
| while (!stack.empty()){ | |
| AVLTreeNode<T> *node = stack.back(); | |
| stack.pop_back(); | |
| if (node->left != nullptr) | |
| stack.push_back(node->left); | |
| if (node->right != nullptr) | |
| stack.push_back(node->right); | |
| _size--; | |
| delete node; | |
| } | |
| root = nullptr; | |
| } | |
| bool empty() const { | |
| return _size == 0; | |
| } | |
| int size() const{ | |
| return _size; | |
| } | |
| int find(T value) const{ | |
| AVLTreeNode<T> *direct = root; | |
| int idx = 0; | |
| while (direct != nullptr and direct->value != value){ | |
| if (direct->value > value) | |
| direct = direct->left; | |
| else{ | |
| idx += (direct->left ? direct->left->count : 0) + 1; | |
| direct = direct->right; | |
| } | |
| } | |
| if (direct == nullptr) | |
| return -1; | |
| else | |
| return idx + (direct->left ? direct->left->count : 0); | |
| } | |
| int lower_bound(T value) const { | |
| AVLTreeNode<T> *direct = root; | |
| int idx = 0; | |
| while (direct != nullptr) { | |
| if (direct->value >= value) | |
| direct = direct->left; | |
| else { | |
| idx += (direct->left ? direct->left->count : 0) + 1; | |
| direct = direct->right; | |
| } | |
| } | |
| return idx; | |
| } | |
| int upper_bound(T value) const { | |
| AVLTreeNode<T> *direct = root; | |
| int idx = 0; | |
| while (direct != nullptr) { | |
| if (direct->value > value) | |
| direct = direct->left; | |
| else { | |
| idx += (direct->left ? direct->left->count : 0) + 1; | |
| direct = direct->right; | |
| } | |
| } | |
| return idx; | |
| } | |
| const T& find_min() const { | |
| AVLTreeNode<T> *cur = root; | |
| while (cur->left != nullptr) | |
| cur = cur->left; | |
| return cur->value; | |
| } | |
| const T& find_max() const { | |
| AVLTreeNode<T> *cur = root; | |
| while (cur->right != nullptr) | |
| cur = cur->right; | |
| return cur->value; | |
| } | |
| const T& operator[](std::size_t idx) const { | |
| AVLTreeNode<T> *cur = root; | |
| int left = cur->left != nullptr ? cur->left->count : 0; | |
| while (left != idx) { | |
| if (left < idx) { | |
| idx -= left + 1; | |
| cur = cur->right; | |
| left = cur->left != nullptr ? cur->left->count : 0; | |
| } else { | |
| cur = cur->left; | |
| left = cur->left != nullptr ? cur->left->count : 0; | |
| } | |
| } | |
| return cur->value; | |
| } | |
| void insert(const T& block) { | |
| root = insertRec(root, block); | |
| } | |
| void updateHeightAndBalance(AVLTreeNode<T>* node) { | |
| node->updateValues(); // Update the height and count values | |
| } | |
| AVLTreeNode<T>* minValueNode(AVLTreeNode<T>* node) { | |
| AVLTreeNode<T>* current = node; | |
| while (current->left != nullptr) { | |
| current = current->left; | |
| } | |
| return current; | |
| } | |
| AVLTreeNode<T>* insertRec(AVLTreeNode<T>* node, const T& block) { | |
| if (!node) return new AVLTreeNode<T>(block); | |
| if (block < node->value) { | |
| node->left = insertRec(node->left, block); | |
| } else if (block > node->value) { | |
| node->right = insertRec(node->right, block); | |
| } else { | |
| return node; // Duplicate blocks not allowed | |
| } | |
| updateHeightAndBalance(node); | |
| return balanceNode(node); | |
| } | |
| void erase(const T& block) { | |
| root = eraseRec(root, block); | |
| } | |
| AVLTreeNode<T>* eraseRec(AVLTreeNode<T>* node, const T& block) { | |
| if (!node) return node; | |
| if (block < node->value) { | |
| node->left = eraseRec(node->left, block); | |
| } else if (block > node->value) { | |
| node->right = eraseRec(node->right, block); | |
| } else { | |
| // Node with only one child or no child | |
| if (!(node->left && node->right)) { | |
| AVLTreeNode<T>* temp = node->left ? node->left : node->right; | |
| if (!temp) { | |
| temp = node; | |
| node = nullptr; | |
| } else { | |
| *node = *temp; // Copy the contents of the non-empty child | |
| } | |
| delete temp; | |
| } else { | |
| // Node with two children: Get the inorder successor | |
| AVLTreeNode<T>* temp = minValueNode(node->right); | |
| node->value = temp->value; | |
| node->right = eraseRec(node->right, temp->value); | |
| } | |
| } | |
| if (!node) return node; | |
| updateHeightAndBalance(node); | |
| return balanceNode(node); | |
| } | |
| AVLTreeNode<T>* balanceNode(AVLTreeNode<T>* node) { | |
| if (node->balanceFactor() > 1) { | |
| if (node->left->balanceFactor() < 0) { | |
| node->left = leftRotate(node->left); | |
| } | |
| return rightRotate(node); | |
| } | |
| if (node->balanceFactor() < -1) { | |
| if (node->right->balanceFactor() > 0) { | |
| node->right = rightRotate(node->right); | |
| } | |
| return leftRotate(node); | |
| } | |
| return node; | |
| } | |
| AVLTreeNode<T>* leftRotate(AVLTreeNode<T>* x) { | |
| AVLTreeNode<T>* y = x->right; | |
| AVLTreeNode<T>* T2 = y->left; | |
| y->left = x; | |
| x->right = T2; | |
| updateHeightAndBalance(x); | |
| updateHeightAndBalance(y); | |
| return y; | |
| } | |
| AVLTreeNode<T>* rightRotate(AVLTreeNode<T>* y) { | |
| AVLTreeNode<T>* x = y->left; | |
| AVLTreeNode<T>* T2 = x->right; | |
| x->right = y; | |
| y->left = T2; | |
| updateHeightAndBalance(y); | |
| updateHeightAndBalance(x); | |
| return x; | |
| } | |
| void display() { | |
| printf("\n"); | |
| if (root != nullptr) | |
| displayNode(root, 0, 0); | |
| else | |
| printf("Empty"); | |
| printf("\n"); | |
| } | |
| private: | |
| void displayNode(AVLTreeNode<T> *cur, int depth, int state) { | |
| if (cur->left) | |
| display(cur->left, depth + 1, 1); | |
| for (int i = 0; i < depth; i++) | |
| printf(" "); | |
| if (state == 1) // left | |
| printf("┌───"); | |
| else if (state == 2) // right | |
| printf("└───"); | |
| std::cout << "[" << cur->value << "] - (" << cur->count << ", " << cur->height << ")" << std::endl; | |
| if (cur->right) | |
| display(cur->right, depth + 1, 2); | |
| } | |
| }; | |
| template class AVLTree<int>; | |
| template class AVLTree<short>; | |
| template class AVLTree<long>; | |
| template class AVLTree<long long>; | |
| template class AVLTree<std::string>; | |
| } // namespace internal | |
| /** | |
| * @class StackAllocator | |
| * @brief Provides stack-like memory allocation within a thread. | |
| * | |
| * This allocator is intended for use cases where temporary, fast memory allocation | |
| * and deallocation is needed. It's optimized for speed and efficiency, using a combination | |
| * of platform-specific stack allocation methods, a cache for common allocation sizes, | |
| * and an AVL tree for managing free memory blocks to minimize fragmentation. | |
| */ | |
| class StackAllocator { | |
| private: | |
| struct MemoryBlock { | |
| size_t size; | |
| char* address; | |
| MemoryBlock(size_t size, char* address) : size(size), address(address) {} | |
| bool operator<(const MemoryBlock& other) const { | |
| return size < other.size || (size == other.size && address < other.address); | |
| } | |
| bool operator==(const MemoryBlock& other) const { | |
| return address == other.address; | |
| } | |
| }; | |
| static inline thread_local StackAllocator* currentAllocator = nullptr; | |
| char* basePointer = nullptr; | |
| char* currentPointer = nullptr; | |
| static constexpr size_t MAX_STACK_SIZE = 8 * 1024 * 1024; // 8 MB stack size limit | |
| static constexpr size_t CACHE_SIZE = 10; | |
| static constexpr size_t COMMON_BLOCK_SIZE = 128; | |
| void* cache[CACHE_SIZE] = {nullptr}; | |
| size_t cacheCount = 0; | |
| internal::AVLTree<MemoryBlock> freeBlocks; | |
| /** | |
| * @brief Constructs a new StackAllocator instance. | |
| * | |
| * The constructor is private to ensure that StackAllocator instances are | |
| * managed correctly using createAllocator() to maintain thread-local instances. | |
| */ | |
| StackAllocator() { | |
| basePointer = static_cast<char*>(platform_stack_alloc(1)); // To get the current stack position | |
| currentPointer = basePointer; | |
| platform_stack_alloc(MAX_STACK_SIZE + 512); // Reserve space + security gap | |
| basePointer += 512; // Move past the security gap | |
| currentPointer = basePointer; | |
| } | |
| public: | |
| /** | |
| * @brief Creates or retrieves the thread-local StackAllocator instance. | |
| * @return Pointer to the thread-local StackAllocator instance. | |
| */ | |
| static StackAllocator* createAllocator() { | |
| if (!currentAllocator) { | |
| currentAllocator = new StackAllocator(); | |
| } | |
| return currentAllocator; | |
| } | |
| /** | |
| * @brief Allocates a block of memory of the specified size. | |
| * @param size The size of the memory block to allocate. | |
| * @param alignment The alignment of the memory block (default = alignof(std::max_align_t)). | |
| * @return Pointer to the allocated memory block. | |
| */ | |
| void* allocate(size_t size, size_t alignment = alignof(std::max_align_t)) { | |
| size_t currentAddress = reinterpret_cast<size_t>(currentPointer); | |
| size_t alignAdjustment = (alignment - currentAddress % alignment) % alignment; | |
| size_t totalSize = size + alignAdjustment; | |
| if (currentPointer + totalSize - basePointer > MAX_STACK_SIZE) { | |
| throw std::bad_alloc(); // Handle stack overflow | |
| } | |
| void* alignedPtr = currentPointer + alignAdjustment; | |
| currentPointer += totalSize; | |
| return alignedPtr; | |
| } | |
| void deallocate(void* ptr, size_t size) { | |
| MemoryBlock freedBlock(size, static_cast<char*>(ptr)); | |
| freeBlocks.insert(freedBlock); // Insert the freed block back into the AVL tree | |
| coalesceAdjacent(freedBlock); | |
| } | |
| void coalesceAdjacent(const MemoryBlock& block) { | |
| // This function will attempt to merge the newly freed block with adjacent free blocks | |
| // in the tree, if any, to reduce fragmentation. | |
| auto nextBlock = freeBlocks.upper_bound(block); | |
| auto prevBlock = freeBlocks.lower_bound(block); | |
| if (prevBlock != freeBlocks.begin()) { | |
| --prevBlock; // Move to the actual previous block if not at the beginning | |
| if (prevBlock->address + prevBlock->size == block.address) { | |
| // Merge with previous block | |
| MemoryBlock mergedBlock(prevBlock->size + block.size, prevBlock->address); | |
| freeBlocks.erase(prevBlock); | |
| freeBlocks.erase(block); | |
| freeBlocks.insert(mergedBlock); | |
| // Continue with the merged block as the current block | |
| coalesceAdjacent(mergedBlock); | |
| return; | |
| } | |
| } | |
| if (nextBlock != freeBlocks.end() && block.address + block.size == nextBlock->address) { | |
| // Merge with next block | |
| MemoryBlock mergedBlock(block.size + nextBlock->size, block.address); | |
| freeBlocks.erase(block); | |
| freeBlocks.erase(nextBlock); | |
| freeBlocks.insert(mergedBlock); | |
| // Continue with the merged block as the current block | |
| coalesceAdjacent(mergedBlock); | |
| } | |
| } | |
| void* allocateFromCache(size_t size) { | |
| if (cacheCount > 0 && size <= COMMON_BLOCK_SIZE) { | |
| return cache[--cacheCount]; // Return a cached block | |
| } | |
| return allocate(size); // Allocate normally if not suitable for cache | |
| } | |
| void returnToCache(void* ptr) { | |
| if (cacheCount < CACHE_SIZE) { | |
| cache[cacheCount++] = ptr; // Store the block in the cache | |
| } | |
| // Else, ignore or handle overflow | |
| } | |
| StackAllocator(const StackAllocator&) = delete; | |
| StackAllocator& operator=(const StackAllocator&) = delete; | |
| }; | |
| } // namespace Bruga::CurrentThread::Stack | |
| // Example 1: Temporary Object Allocation | |
| /* | |
| #include "StackAllocator.hpp" | |
| class TempObject { | |
| public: | |
| void doWork() { | |
| // Simulate some work here | |
| } | |
| }; | |
| void process() { | |
| StackAllocator* allocator = StackAllocator::createAllocator(); | |
| // Allocate a temporary object on the stack | |
| void* mem = allocator->allocate(sizeof(TempObject)); | |
| TempObject* tempObj = new(mem) TempObject(); // Placement new | |
| // Use the temporary object | |
| tempObj->doWork(); | |
| // Manual destruction and return memory back to the allocator | |
| tempObj->~TempObject(); | |
| allocator->returnToCache(mem); | |
| } | |
| */ | |
| // Example 2: Fast Array Allocation for Computation | |
| /* | |
| #include "StackAllocator.hpp" | |
| #include <iostream> | |
| void fastComputation() { | |
| StackAllocator* allocator = StackAllocator::createAllocator(); | |
| // Example: Allocate an array of 100 integers | |
| int* arr = static_cast<int*>(allocator->allocate(100 * sizeof(int))); | |
| // Use the array for some computations | |
| for (int i = 0; i < 100; ++i) { | |
| arr[i] = i * i; | |
| } | |
| // Assume we're done with the array and it can be returned to the allocator | |
| allocator->returnToCache(arr); | |
| } | |
| */ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Set the required stack size (in bytes). | |
| set(STACK_SIZE "9437184") # 9 MB in bytes | |
| # Adjust compiler/linker flags based on the compiler | |
| if(MSVC) | |
| # For MSVC, use the linker flag /STACK: | |
| set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /STACK:${STACK_SIZE}") | |
| elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") | |
| # For GCC and Clang, use the compiler flag -Wl,--stack: | |
| set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--stack,${STACK_SIZE}") | |
| endif() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment