Last active
February 23, 2026 08:16
-
-
Save z0z0r4/ceb9c7ec98bc8beee4862e09426ea436 to your computer and use it in GitHub Desktop.
Double-Array Tire
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| class DAT: | |
| default_size: int = 4096 | |
| data_set_size = 126 | |
| base: list[int] | |
| check: list[int] | |
| def __init__(self): | |
| self.base = [0] * self.default_size | |
| self.check = [0] * self.default_size | |
| self.base[1] = 1 | |
| def extend(self, size: int): | |
| self.base += [0] * (size - len(self.base)) | |
| self.check += [0] * (size - len(self.check)) | |
| def search(self, s: str) -> bool: | |
| p = 1 | |
| for c in s: | |
| t = abs(self.base[p]) + ord(c) | |
| if t >= len(self.check): | |
| return False | |
| if self.check[t] != p: | |
| return False | |
| p = t | |
| else: | |
| # check if it's a valid end of word | |
| return self.base[p] < 0 | |
| def find_empty(self, index: int, children: list[int]) -> int: | |
| while True: | |
| index += 1 | |
| if index >= len(self.check): | |
| self.extend(len(self.base) * 2) | |
| if self.check[index] == 0: | |
| for child in children: | |
| while index + child >= len(self.check): | |
| self.extend(len(self.base) * 2) | |
| if self.check[index + child] != 0: | |
| break | |
| else: | |
| return index | |
| def relocate(self, parent: int, children: list[int]): | |
| old_base = abs(self.base[parent]) | |
| sign = -1 if self.base[parent] < 0 else 1 | |
| new_base = self.find_empty(old_base, children) | |
| for child in children: | |
| old_pos = old_base + child | |
| if old_pos >= len(self.check) or self.check[old_pos] != parent: | |
| continue | |
| new_pos = new_base + child | |
| while new_pos >= len(self.check): | |
| self.extend(len(self.base) * 2) | |
| self.base[new_pos] = self.base[old_pos] | |
| self.check[new_pos] = parent | |
| child_start = abs(self.base[old_pos]) | |
| child_end = min(child_start + self.data_set_size, len(self.check)) | |
| for i in range(child_start, child_end): | |
| if self.check[i] == old_pos: | |
| self.check[i] = new_pos | |
| self.check[old_pos] = 0 | |
| self.base[old_pos] = 0 | |
| self.base[parent] = sign * new_base | |
| return new_base | |
| def add(self, data: str): | |
| p = 1 | |
| for c in data: | |
| code = ord(c) | |
| next_index = abs(self.base[p]) + code | |
| while next_index >= len(self.base): | |
| self.extend(len(self.base) * 2) | |
| if self.check[next_index] == 0: | |
| self.base[next_index] = self.find_empty(next_index, [0]) | |
| self.check[next_index] = p | |
| p = next_index | |
| elif self.check[next_index] != p: | |
| # conflict | |
| children = [] | |
| for i in range(min(self.data_set_size, len(self.check) - abs(self.base[p]))): | |
| if self.check[abs(self.base[p]) + i] == p: | |
| children.append(i) | |
| children.append(code) | |
| self.relocate(p, children=children) | |
| next_index = abs(self.base[p]) + code | |
| while next_index >= len(self.base): | |
| self.extend(len(self.base) * 2) | |
| self.base[next_index] = self.find_empty(next_index, [0]) | |
| self.check[next_index] = p | |
| p = next_index | |
| else: # already exist | |
| p = next_index | |
| self.base[p] *= -1 # mark end of word | |
| def print_base_and_check(self): | |
| # ignore empty value | |
| for i in range(len(self.base)): | |
| if self.base[i] != 0 or self.check[i] != 0: | |
| print(f"Index: {i}, Base: {self.base[i]}, Check: {self.check[i]}") | |
| if __name__ == "__main__": | |
| dat = DAT() | |
| # https://www.mit.edu/~ecprice/wordlist.10000 | |
| with open("wordlist.10000.txt", "r", encoding="utf-8") as f: | |
| words = [line[:-1] for line in f.readlines()] | |
| random.seed(42) | |
| random.shuffle(words) | |
| for word in words: | |
| dat.add(word) | |
| # dat.print_base_and_check() | |
| # print("===" * 10) | |
| for word in words: | |
| # print(f"Searching for '{word}': {dat.search(word)}") | |
| assert dat.search(word) == True, f"Failed to find '{word}'" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| class DAT: | |
| default_size: int = 4096 | |
| data_set_size = 126 | |
| base: list[int] | |
| check: list[int] | |
| free_head: int | |
| free_tail: int | |
| def __init__(self): | |
| self.base = [0] * self.default_size | |
| self.check = [0] * self.default_size | |
| self.base[1] = 1 | |
| self.free_head = -1 | |
| self.free_tail = -1 | |
| self._init_free_list(2, self.default_size) | |
| def _init_free_list(self, start: int, end: int): | |
| if start >= end: | |
| return | |
| self.free_head = start | |
| self.free_tail = end - 1 | |
| for i in range(start, end): | |
| prev = i - 1 if i > start else -1 | |
| nxt = i + 1 if i + 1 < end else -1 | |
| self.base[i] = prev | |
| self.check[i] = -(nxt + 1) | |
| def _is_free(self, index: int) -> bool: | |
| return self.check[index] < 0 | |
| def _free_next(self, index: int) -> int: | |
| return -self.check[index] - 1 | |
| def _set_free_links(self, index: int, prev: int, nxt: int): | |
| self.base[index] = prev | |
| self.check[index] = -(nxt + 1) | |
| def _remove_free(self, index: int): | |
| prev = self.base[index] | |
| nxt = self._free_next(index) | |
| if prev != -1: | |
| self.check[prev] = -(nxt + 1) | |
| else: | |
| self.free_head = nxt | |
| if nxt != -1: | |
| self.base[nxt] = prev | |
| else: | |
| self.free_tail = prev | |
| self.base[index] = 0 | |
| self.check[index] = 0 | |
| def _insert_free_head(self, index: int): | |
| old_head = self.free_head | |
| self._set_free_links(index, -1, old_head) | |
| if old_head != -1: | |
| self.base[old_head] = index | |
| else: | |
| self.free_tail = index | |
| self.free_head = index | |
| def _allocate_slot(self, index: int): | |
| if not self._is_free(index): | |
| raise Exception(f"Index {index} is not free") | |
| self._remove_free(index) | |
| def extend(self, size: int): | |
| old_size = len(self.base) | |
| if size <= old_size: | |
| return | |
| self.base += [0] * (size - old_size) | |
| self.check += [0] * (size - old_size) | |
| start = max(2, old_size) | |
| end = size | |
| if start >= end: | |
| return | |
| for i in range(start, end): | |
| prev = i - 1 if i > start else self.free_tail | |
| nxt = i + 1 if i + 1 < end else -1 | |
| self.base[i] = prev | |
| self.check[i] = -(nxt + 1) | |
| if self.free_tail != -1: | |
| self.check[self.free_tail] = -(start + 1) | |
| else: | |
| self.free_head = start | |
| self.free_tail = end - 1 | |
| def search(self, s: str) -> bool: | |
| p = 1 | |
| for c in s: | |
| t = abs(self.base[p]) + ord(c) | |
| if t >= len(self.check): | |
| return False | |
| if self.check[t] != p: | |
| return False | |
| p = t | |
| else: | |
| # check if it's a valid end of word | |
| return self.base[p] < 0 | |
| def find_empty(self, children: list[int]) -> int: | |
| offsets = sorted(set(children)) | |
| max_offset = offsets[-1] if offsets else 0 | |
| while True: | |
| while max_offset >= len(self.check): | |
| self.extend(len(self.base) * 2) | |
| cur = self.free_head | |
| while cur != -1: | |
| for child in offsets: | |
| candidate = cur + child | |
| if candidate >= len(self.check) or not self._is_free(candidate): | |
| break | |
| else: | |
| return cur | |
| cur = self._free_next(cur) | |
| self.extend(len(self.base) * 2) | |
| def relocate(self, parent: int, children: list[int]): | |
| old_base = abs(self.base[parent]) | |
| sign = -1 if self.base[parent] < 0 else 1 | |
| unique_children = sorted(set(children)) | |
| new_base = self.find_empty(unique_children) | |
| for child in unique_children: | |
| old_pos = old_base + child | |
| if old_pos >= len(self.check) or self.check[old_pos] != parent: | |
| continue | |
| new_pos = new_base + child | |
| while new_pos >= len(self.check): | |
| self.extend(len(self.base) * 2) | |
| moved_base = self.base[old_pos] | |
| self._allocate_slot(new_pos) | |
| self.base[new_pos] = moved_base | |
| self.check[new_pos] = parent | |
| child_start = abs(moved_base) | |
| child_end = min(child_start + self.data_set_size, len(self.check)) | |
| for i in range(child_start, child_end): | |
| if self.check[i] == old_pos: | |
| self.check[i] = new_pos | |
| self._insert_free_head(old_pos) | |
| self.base[parent] = sign * new_base | |
| return new_base | |
| def add(self, data: str): | |
| p = 1 | |
| for c in data: | |
| code = ord(c) | |
| next_index = abs(self.base[p]) + code | |
| while next_index >= len(self.base): | |
| self.extend(len(self.base) * 2) | |
| if self._is_free(next_index): | |
| self._allocate_slot(next_index) | |
| self.base[next_index] = self.find_empty([0]) | |
| self.check[next_index] = p | |
| p = next_index | |
| elif self.check[next_index] != p: | |
| # conflict | |
| children = [] | |
| for i in range(min(self.data_set_size, len(self.check) - abs(self.base[p]))): | |
| if self.check[abs(self.base[p]) + i] == p: | |
| children.append(i) | |
| children.append(code) | |
| self.relocate(p, children=children) | |
| next_index = abs(self.base[p]) + code | |
| while next_index >= len(self.base): | |
| self.extend(len(self.base) * 2) | |
| self._allocate_slot(next_index) | |
| self.base[next_index] = self.find_empty([0]) | |
| self.check[next_index] = p | |
| p = next_index | |
| else: # already exist | |
| p = next_index | |
| self.base[p] *= -1 # mark end of word | |
| def print_base_and_check(self): | |
| # ignore empty value | |
| for i in range(len(self.base)): | |
| if not self._is_free(i) and (self.base[i] != 0 or self.check[i] != 0): | |
| print(f"Index: {i}, Base: {self.base[i]}, Check: {self.check[i]}") | |
| if __name__ == "__main__": | |
| dat = DAT() | |
| # https://www.mit.edu/~ecprice/wordlist.10000 | |
| with open("wordlist.10000.txt", "r", encoding="utf-8") as f: | |
| words = [line[:-1] for line in f.readlines()] | |
| random.seed(42) | |
| random.shuffle(words) | |
| for word in words: | |
| dat.add(word) | |
| # dat.print_base_and_check() | |
| # print("===" * 10) | |
| for word in words: | |
| # print(f"Searching for '{word}': {dat.search(word)}") | |
| assert dat.search(word) == True, f"Failed to find '{word}'" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment