Last active
November 5, 2025 22:28
-
-
Save mistymntncop/2cbcee30ac3cc47a287f1b64f6dd6bf5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <stdint.h> | |
| #include <stdbool.h> | |
| #include <malloc.h> | |
| //Partially Persistent B+Tree | |
| //Just pedagogical. Based off Per Vognsen's B+Tree code | |
| //https://gist.github.com/pervognsen/e7883b3de183fcd601c1edf7f7e9508b | |
| // cl -Zi -Od /INCREMENTAL:NO persistent_btree.c | |
| //https://web.archive.org/web/20221206034530/https://sqlite.org/src4/artifact/82e2778a3c15381d | |
| //https://www.cs.princeton.edu/courses/archive/spr04/cos423/handouts/planar%20point.pdf | |
| #define assert(expr) if(!(expr)) { *(char*)0 = 0; } | |
| #define ArrayCount(a) (sizeof(a)/sizeof(a[0])) | |
| //Fuck const forever | |
| #pragma warning(disable : 4090) | |
| //Shitty arena implementation | |
| typedef struct Arena Arena; | |
| struct Arena { | |
| uint8_t *base; | |
| uint64_t size; | |
| uint64_t capacity; | |
| uint32_t flags; | |
| }; | |
| typedef enum ArenaFlags ArenaFlags; | |
| enum ArenaFlags { | |
| ARENA_FLAGS_NONE = 0, | |
| ARENA_FLAGS_NO_ZERO = (1 << 0), | |
| }; | |
| uint8_t *arena_push(Arena *arena, uint64_t size, uint32_t flags) { | |
| uint8_t *result = 0; | |
| uint64_t remaining = arena->capacity - arena->size; | |
| if(size <= remaining) { | |
| result = &arena->base[arena->size]; | |
| if(!(flags & ARENA_FLAGS_NO_ZERO)) { | |
| memset(result, 0, size); | |
| } | |
| } | |
| arena->size += size; | |
| return result; | |
| } | |
| #define push_array(a, T, c) (T*)arena_push(a, (c)*sizeof(T), 0); | |
| #define push_array_no_zero(a, T, c) (T*)arena_push(a, (c)*sizeof(T), ARENA_FLAGS_NO_ZERO); | |
| Arena *arena_alloc(uint64_t capacity) { | |
| uint8_t *base = malloc(capacity); | |
| Arena *arena = (Arena*)base; | |
| arena->base = base; | |
| arena->size = sizeof(*arena); | |
| arena->capacity = capacity; | |
| arena->flags = 0; | |
| return arena; | |
| } | |
| static Arena *temp_arena = 0; | |
| enum { | |
| BMAX = 4, | |
| BCUT = BMAX/2, | |
| BPATH_MAX = 32, | |
| }; | |
| //just use a unified representation for both interior and leaf nodes | |
| //for pedagogical simplicity | |
| typedef struct BNode BNode; | |
| struct BNode { | |
| uint8_t count; | |
| uint32_t version; | |
| uint64_t keys[BMAX]; | |
| union { | |
| struct { | |
| BNode *children[BMAX]; | |
| BNode *extra_ptr; | |
| uint32_t extra_index; | |
| uint32_t extra_ver; | |
| }; | |
| uint64_t vals[BMAX]; | |
| BNode *next_free; | |
| }; | |
| }; | |
| typedef struct BRoot BRoot; | |
| struct BRoot { | |
| BNode *root; | |
| uint32_t height; | |
| }; | |
| typedef struct BTree BTree; | |
| struct BTree { | |
| BRoot *roots; | |
| uint32_t roots_count; | |
| uint32_t roots_capacity; | |
| Arena *roots_arena; | |
| Arena *node_arena; | |
| BNode *interior_free_list; | |
| Arena *leaf_arena; | |
| BNode *leaf_free_list; | |
| }; | |
| typedef struct BPathSlot BPathSlot; | |
| struct BPathSlot { | |
| BNode *parent; | |
| uint32_t child_index; | |
| bool mutable; | |
| }; | |
| typedef struct BCursor BCursor; | |
| struct BCursor { | |
| BTree *tree; | |
| uint32_t height; | |
| BNode *focus; | |
| uint32_t child_index; | |
| bool mutable; | |
| int32_t depth; | |
| uint32_t version; | |
| union { | |
| BPathSlot path_stack[BPATH_MAX+1]; | |
| struct { | |
| BPathSlot dummy; | |
| BPathSlot path[BPATH_MAX]; | |
| }; | |
| }; | |
| }; | |
| BNode *b_interior_node_alloc(BTree *tree) { | |
| BNode *node = tree->interior_free_list; | |
| if(!node) { | |
| node = push_array_no_zero(tree->node_arena, BNode, 1); | |
| } else { | |
| tree->interior_free_list = node->next_free; | |
| } | |
| memset(node, 0, sizeof(node[0])); | |
| return node; | |
| } | |
| BNode *b_leaf_node_alloc(BTree *tree) { | |
| BNode *node = tree->leaf_free_list; | |
| if(!node) { | |
| node = push_array_no_zero(tree->leaf_arena, BNode, 1); | |
| } else { | |
| tree->leaf_free_list = node->next_free; | |
| } | |
| memset(node, 0, sizeof(node[0])); | |
| return node; | |
| } | |
| static BTree *btree_alloc(void) { | |
| Arena *node_arena = arena_alloc(0x100000*sizeof(BNode)); | |
| BTree *tree = push_array(node_arena, BTree, 1); | |
| tree->node_arena = node_arena; | |
| tree->leaf_arena = arena_alloc(0x100000*sizeof(BNode)); | |
| tree->roots_arena = arena_alloc(0x2000*sizeof(BRoot)); | |
| tree->roots_capacity = 0x100; | |
| tree->roots = push_array(tree->roots_arena, BRoot, tree->roots_capacity); | |
| BNode *root = b_leaf_node_alloc(tree); | |
| tree->roots[0] = (BRoot){root, 0}; | |
| tree->roots_count = 1; | |
| return tree; | |
| } | |
| void b_copy_interior_node(BNode *dst, BNode *src, uint32_t cut, uint32_t count) { | |
| dst->count = count; | |
| memcpy(dst->keys, src->keys + cut, count * sizeof(src->keys[0])); | |
| memcpy(dst->children, src->children + cut, count * sizeof(src->children[0])); | |
| } | |
| void b_copy_leaf_node(BNode *dst, BNode *src, uint32_t cut, uint32_t count) { | |
| dst->count = count; | |
| memcpy(dst->keys, src->keys + cut, count * sizeof(src->keys[0])); | |
| memcpy(dst->vals, src->vals + cut, count * sizeof(src->vals[0])); | |
| } | |
| BNode *b_get_child(BNode *node, uint32_t version, uint32_t child_index) { | |
| BNode *child = 0; | |
| if(node->extra_index == child_index && node->extra_ptr != 0 && node->extra_ver <= version) { | |
| child = node->extra_ptr; | |
| } else { | |
| child = node->children[child_index]; | |
| } | |
| return child; | |
| } | |
| static bool go_down(BCursor *cursor, uint32_t child_index) { | |
| bool result = false; | |
| BNode *current = cursor->focus; | |
| if(cursor->depth != cursor->height && child_index < current->count) { | |
| assert(child_index < BMAX); | |
| assert(cursor->depth < BPATH_MAX); | |
| cursor->depth += 1; | |
| BPathSlot *path_slot = &cursor->path[cursor->depth-1]; | |
| path_slot->parent = current; | |
| path_slot->child_index = child_index; | |
| path_slot->mutable = cursor->mutable; | |
| BNode *child = b_get_child(current, cursor->version, child_index); | |
| bool mutable = child->version == cursor->version; | |
| cursor->focus = child; | |
| cursor->child_index = child_index; | |
| cursor->mutable = mutable; | |
| result = true; | |
| } | |
| return result; | |
| } | |
| static bool go_up(BCursor *cursor) { | |
| bool result = false; | |
| if(cursor->depth != 0) { | |
| BPathSlot *path_slot = &cursor->path[cursor->depth-1]; | |
| cursor->depth -= 1; | |
| BNode *current = cursor->focus; | |
| BNode *parent = path_slot->parent; | |
| BNode *new_focus = parent; | |
| uint32_t child_index = path_slot->child_index; | |
| bool mutable = path_slot->mutable; | |
| BNode *original_child = b_get_child(parent, cursor->version, child_index); | |
| if(current != original_child) { | |
| if(!mutable && parent->extra_ptr == 0) { | |
| parent->extra_ptr = current; | |
| parent->extra_index = child_index; | |
| parent->extra_ver = cursor->version; | |
| } else { | |
| if(!mutable) { | |
| BNode *new_parent = b_interior_node_alloc(cursor->tree); | |
| b_copy_interior_node(new_parent, parent, 0, parent->count); | |
| new_parent->version = cursor->version; | |
| if(parent->extra_ptr != 0) { | |
| new_parent->children[parent->extra_index] = parent->extra_ptr; | |
| } | |
| new_focus = new_parent; | |
| mutable = true; | |
| } | |
| new_focus->children[child_index] = current; | |
| result = true; | |
| } | |
| } | |
| cursor->focus = new_focus; | |
| cursor->mutable = mutable; | |
| cursor->child_index = cursor->path[cursor->depth-1].child_index; | |
| } | |
| return result; | |
| } | |
| static void b_move_to_depth(BCursor *cursor, int32_t depth) { | |
| if(depth < cursor->depth) { | |
| cursor->depth = depth; | |
| cursor->focus = cursor->path[depth].parent; | |
| cursor->mutable = cursor->path[depth].mutable; | |
| cursor->child_index = cursor->path[depth-1].child_index; | |
| } | |
| } | |
| static BNode *b_mutable_focus(BCursor *cursor) { | |
| BNode *node = cursor->focus; | |
| if(!cursor->mutable) { | |
| BTree *tree = cursor->tree; | |
| BNode *new_node = 0; | |
| if(cursor->depth == cursor->height) { | |
| new_node = b_leaf_node_alloc(tree); | |
| b_copy_leaf_node(new_node, node, 0, node->count); | |
| } else { | |
| new_node = b_interior_node_alloc(tree); | |
| b_copy_interior_node(new_node, node, 0, node->count); | |
| if(node->extra_ptr != 0) { | |
| new_node->children[node->extra_index] = node->extra_ptr; | |
| } | |
| } | |
| new_node->version = cursor->version; | |
| cursor->focus = new_node; | |
| cursor->mutable = true; | |
| node = new_node; | |
| } | |
| return node; | |
| } | |
| static uint32_t b_commit(BCursor *cursor) { | |
| while(cursor->depth != 0) { | |
| go_up(cursor); | |
| } | |
| BNode *root = cursor->focus; | |
| BTree *tree = cursor->tree; | |
| if(tree->roots_count >= tree->roots_capacity) { | |
| uint32_t added_cap = 0x400; | |
| push_array(tree->roots_arena, BRoot, added_cap); | |
| tree->roots_capacity += added_cap; | |
| } | |
| uint32_t version = tree->roots_count; | |
| tree->roots[version] = (BRoot){root, cursor->height}; | |
| tree->roots_count += 1; | |
| cursor->version = tree->roots_count; | |
| cursor->mutable = false; | |
| return version; | |
| } | |
| static BCursor b_init_cursor(BTree *tree) { | |
| BCursor cursor = {0}; | |
| if(tree->roots_count != 0) { | |
| cursor.tree = tree; | |
| cursor.focus = tree->roots[tree->roots_count-1].root; | |
| cursor.height = tree->roots[tree->roots_count-1].height; | |
| cursor.version = tree->roots_count; | |
| cursor.mutable = false; | |
| } | |
| return cursor; | |
| } | |
| static uint64_t b_get_greatest_key(BNode *node) { | |
| assert(node->count > 0); | |
| uint64_t result = node->keys[node->count-1]; | |
| return result; | |
| } | |
| //lower bound | |
| static uint32_t array_search(uint64_t *arr, uint32_t count, uint64_t key) { | |
| uint32_t result = count; | |
| for(uint32_t i = 0; i < count; i += 1) { | |
| if(key <= arr[i]) { | |
| result = i; | |
| break; | |
| } | |
| } | |
| return result; | |
| } | |
| #define array_insert(arr, count, index, val) \ | |
| memmove((arr) + (index) + 1, (arr) + (index), ((count) - (index)) * sizeof(arr[0])); \ | |
| arr[index] = val; | |
| #define array_delete(arr, count, index) \ | |
| memmove((arr) + (index), (arr) + (index) + 1, ((count) - (index) - 1) * sizeof(arr[0])); | |
| static bool b_insert(BCursor *cursor, uint64_t key, uint64_t val) { | |
| BTree *tree = cursor->tree; | |
| assert(cursor->depth == 0); | |
| for(uint32_t depth = 0; depth < cursor->height; depth += 1) { | |
| BNode *node = cursor->focus; | |
| uint32_t child_index = array_search(node->keys, node->count, key); | |
| assert(node->count > 0); | |
| if(child_index == node->count) { | |
| child_index -= 1; | |
| node = b_mutable_focus(cursor); | |
| node->keys[child_index] = key; | |
| } | |
| go_down(cursor, child_index); | |
| } | |
| BNode *leaf = b_mutable_focus(cursor); | |
| uint32_t index = array_search(leaf->keys, leaf->count, key); | |
| bool found = index < leaf->count && leaf->keys[index] == key; | |
| BNode *new_sibling = 0; | |
| if(found) { | |
| leaf->vals[index] = val; | |
| } else { | |
| //split leaf node | |
| if(leaf->count == BMAX) { | |
| new_sibling = b_leaf_node_alloc(tree); | |
| b_copy_leaf_node(new_sibling, leaf, BCUT, BMAX - BCUT); | |
| new_sibling->version = cursor->version; | |
| leaf->count = BMAX - BCUT; | |
| if(index >= BCUT) { | |
| leaf = new_sibling; | |
| index -= BCUT; | |
| } | |
| } | |
| array_insert(leaf->keys, leaf->count, index, key); | |
| array_insert(leaf->vals, leaf->count, index, val); | |
| leaf->count += 1; | |
| } | |
| BNode *new_child = new_sibling; | |
| while(cursor->depth != 0) { | |
| uint32_t child_index = cursor->child_index; | |
| go_up(cursor); | |
| if(new_child != 0) { | |
| BNode *new_sibling = 0; | |
| BNode *node = b_mutable_focus(cursor); | |
| //split interior node | |
| if(node->count == BMAX) { | |
| new_sibling = b_interior_node_alloc(tree); | |
| b_copy_interior_node(new_sibling, node, BCUT, BMAX - BCUT); | |
| new_sibling->version = cursor->version; | |
| node->count = BMAX - BCUT; | |
| if(child_index >= BCUT) { | |
| node = new_sibling; | |
| child_index -= BCUT; | |
| } | |
| } | |
| node->keys[child_index] = b_get_greatest_key(node->children[child_index]); | |
| array_insert(node->keys, node->count, child_index + 1, b_get_greatest_key(new_child)); | |
| array_insert(node->children, node->count, child_index + 1, new_child); | |
| node->count += 1; | |
| new_child = new_sibling; | |
| } | |
| } | |
| //new root | |
| if(new_child != 0) { | |
| BNode *old_root = cursor->focus; | |
| BNode *new_root = b_interior_node_alloc(tree); | |
| new_root->version = cursor->version; | |
| new_root->count = 2; | |
| new_root->keys[0] = b_get_greatest_key(old_root); | |
| new_root->keys[1] = b_get_greatest_key(new_child); | |
| new_root->children[0] = old_root; | |
| new_root->children[1] = new_child; | |
| cursor->focus = new_root; | |
| cursor->height += 1; | |
| } | |
| return found; | |
| } | |
| static bool b_delete(BCursor *cursor, uint64_t key) { | |
| bool found = false; | |
| assert(cursor->depth == 0); | |
| for(uint32_t depth = 0; depth < cursor->height; depth += 1) { | |
| BNode *node = cursor->focus; | |
| uint32_t child_index = array_search(node->keys, node->count, key); | |
| if(child_index == node->count) { | |
| break; | |
| } | |
| go_down(cursor, child_index); | |
| } | |
| bool node_empty = false; | |
| if(cursor->depth == cursor->height) { | |
| BNode *leaf = cursor->focus; | |
| uint32_t index = array_search(leaf->keys, leaf->count, key); | |
| found = index < leaf->count && leaf->keys[index] == key; | |
| if(found) { | |
| leaf = b_mutable_focus(cursor); | |
| array_delete(leaf->keys, leaf->count, index); | |
| array_delete(leaf->vals, leaf->count, index); | |
| leaf->count -= 1; | |
| node_empty = leaf->count == 0; | |
| if(node_empty && cursor->depth != 0) { | |
| //release node... | |
| } | |
| } | |
| } | |
| if(found) { | |
| bool child_empty = node_empty; | |
| while(cursor->depth != 0) { | |
| uint32_t child_index = cursor->child_index; | |
| go_up(cursor); | |
| if(child_empty) { | |
| bool node_empty = false; | |
| BNode *node = cursor->focus; | |
| if(node->count == 1) { | |
| if(cursor->depth != 0) { | |
| //release node... | |
| } | |
| node_empty = true; | |
| } else { | |
| node = b_mutable_focus(cursor); | |
| array_delete(node->keys, node->count, child_index); | |
| array_delete(node->children, node->count, child_index); | |
| node->count -= 1; | |
| } | |
| child_empty = node_empty; | |
| } | |
| } | |
| bool root_empty = child_empty; | |
| if(root_empty) { | |
| BNode *node = b_mutable_focus(cursor); | |
| node->count = 0; | |
| cursor->height = 0; | |
| } | |
| } | |
| b_move_to_depth(cursor, 0); | |
| return found; | |
| } | |
| static bool b_search(BTree *tree, uint32_t version, uint64_t key, uint64_t *val_out) { | |
| bool found = false; | |
| if(version < tree->roots_count) { | |
| BRoot root = tree->roots[version]; | |
| BNode *node = root.root; | |
| for(uint32_t depth = 0; depth <= root.height; depth += 1) { | |
| uint32_t index = array_search(node->keys, node->count, key); | |
| if(index == node->count) { | |
| break; | |
| } | |
| if(depth == root.height) { | |
| if(node->keys[index] == key) { | |
| if(val_out) { | |
| val_out[0] = node->vals[index]; | |
| } | |
| found = true; | |
| } | |
| break; | |
| } | |
| node = b_get_child(node, version, index); | |
| } | |
| } | |
| return found; | |
| } | |
| void print_btree(BNode *node) { | |
| } | |
| uint64_t murmur64(uint64_t h) { | |
| h ^= h >> 33; | |
| h *= 0xff51afd7ed558ccdL; | |
| h ^= h >> 33; | |
| h *= 0xc4ceb9fe1a85ec53L; | |
| h ^= h >> 33; | |
| return h; | |
| } | |
| int main(int argc, char **argv) { | |
| temp_arena = arena_alloc(0x4000); //would use scratch arena in practice | |
| BTree *tree = btree_alloc(); | |
| BCursor cursor = b_init_cursor(tree); | |
| b_insert(&cursor, 1, 1); | |
| b_insert(&cursor, 2, 2); | |
| b_insert(&cursor, 3, 3); | |
| b_insert(&cursor, 4, 4); | |
| b_insert(&cursor, 5, 5); | |
| b_insert(&cursor, 6, 6); | |
| b_insert(&cursor, 7, 7); | |
| b_insert(&cursor, 8, 8); | |
| b_insert(&cursor, 9, 9); | |
| b_insert(&cursor, 10, 10); | |
| b_insert(&cursor, 11, 11); | |
| for(uint32_t i = 12; i <= 20; i += 1) { | |
| b_insert(&cursor, i, i); | |
| } | |
| uint32_t ver1 = b_commit(&cursor); | |
| b_delete(&cursor, 11); | |
| b_delete(&cursor, 10); | |
| b_delete(&cursor, 9); | |
| b_delete(&cursor, 8); | |
| b_insert(&cursor, 12, 12); | |
| b_insert(&cursor, 13, 13); | |
| uint32_t ver2 = b_commit(&cursor); | |
| b_insert(&cursor, 11, 123); | |
| uint32_t ver3 = b_commit(&cursor); | |
| #if 0 | |
| uint64_t *v = malloc(sizeof(uint64_t)*1000000); | |
| for(size_t i = 0; i < 1000000; i++) { | |
| uint64_t h = murmur64(i); | |
| v[i] = i;//h; | |
| } | |
| for(uint32_t i = 0; i < 1000000; i += 1) { | |
| b_insert(&cursor, v[i], v[i]); | |
| } | |
| uint32_t ver3 = b_commit(&cursor); | |
| #endif | |
| for(uint32_t i = 1; i <= 11; i += 1) { | |
| bool found = b_search(tree, ver1, i, 0); | |
| assert(found); | |
| } | |
| for(uint32_t i = 1; i <= 13; i += 1) { | |
| bool found = b_search(tree, ver2, i, 0); | |
| bool should_exist = !(i >= 8 && i <= 11); | |
| assert(found == should_exist); | |
| } | |
| for(uint32_t i = 1; i <= 13; i += 1) { | |
| bool found = b_search(tree, ver3, i, 0); | |
| bool should_exist = !(i >= 8 && i <= 10); | |
| assert(found == should_exist); | |
| } | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment