Skip to content

Instantly share code, notes, and snippets.

@z0z0r4
Last active February 23, 2026 08:16
Show Gist options
  • Select an option

  • Save z0z0r4/ceb9c7ec98bc8beee4862e09426ea436 to your computer and use it in GitHub Desktop.

Select an option

Save z0z0r4/ceb9c7ec98bc8beee4862e09426ea436 to your computer and use it in GitHub Desktop.
Double-Array Tire
import random
class DAT:
default_size: int = 4096
data_set_size = 126
base: list[int]
check: list[int]
def __init__(self):
self.base = [0] * self.default_size
self.check = [0] * self.default_size
self.base[1] = 1
def extend(self, size: int):
self.base += [0] * (size - len(self.base))
self.check += [0] * (size - len(self.check))
def search(self, s: str) -> bool:
p = 1
for c in s:
t = abs(self.base[p]) + ord(c)
if t >= len(self.check):
return False
if self.check[t] != p:
return False
p = t
else:
# check if it's a valid end of word
return self.base[p] < 0
def find_empty(self, index: int, children: list[int]) -> int:
while True:
index += 1
if index >= len(self.check):
self.extend(len(self.base) * 2)
if self.check[index] == 0:
for child in children:
while index + child >= len(self.check):
self.extend(len(self.base) * 2)
if self.check[index + child] != 0:
break
else:
return index
def relocate(self, parent: int, children: list[int]):
old_base = abs(self.base[parent])
sign = -1 if self.base[parent] < 0 else 1
new_base = self.find_empty(old_base, children)
for child in children:
old_pos = old_base + child
if old_pos >= len(self.check) or self.check[old_pos] != parent:
continue
new_pos = new_base + child
while new_pos >= len(self.check):
self.extend(len(self.base) * 2)
self.base[new_pos] = self.base[old_pos]
self.check[new_pos] = parent
child_start = abs(self.base[old_pos])
child_end = min(child_start + self.data_set_size, len(self.check))
for i in range(child_start, child_end):
if self.check[i] == old_pos:
self.check[i] = new_pos
self.check[old_pos] = 0
self.base[old_pos] = 0
self.base[parent] = sign * new_base
return new_base
def add(self, data: str):
p = 1
for c in data:
code = ord(c)
next_index = abs(self.base[p]) + code
while next_index >= len(self.base):
self.extend(len(self.base) * 2)
if self.check[next_index] == 0:
self.base[next_index] = self.find_empty(next_index, [0])
self.check[next_index] = p
p = next_index
elif self.check[next_index] != p:
# conflict
children = []
for i in range(min(self.data_set_size, len(self.check) - abs(self.base[p]))):
if self.check[abs(self.base[p]) + i] == p:
children.append(i)
children.append(code)
self.relocate(p, children=children)
next_index = abs(self.base[p]) + code
while next_index >= len(self.base):
self.extend(len(self.base) * 2)
self.base[next_index] = self.find_empty(next_index, [0])
self.check[next_index] = p
p = next_index
else: # already exist
p = next_index
self.base[p] *= -1 # mark end of word
def print_base_and_check(self):
# ignore empty value
for i in range(len(self.base)):
if self.base[i] != 0 or self.check[i] != 0:
print(f"Index: {i}, Base: {self.base[i]}, Check: {self.check[i]}")
if __name__ == "__main__":
dat = DAT()
# https://www.mit.edu/~ecprice/wordlist.10000
with open("wordlist.10000.txt", "r", encoding="utf-8") as f:
words = [line[:-1] for line in f.readlines()]
random.seed(42)
random.shuffle(words)
for word in words:
dat.add(word)
# dat.print_base_and_check()
# print("===" * 10)
for word in words:
# print(f"Searching for '{word}': {dat.search(word)}")
assert dat.search(word) == True, f"Failed to find '{word}'"
import random
class DAT:
default_size: int = 4096
data_set_size = 126
base: list[int]
check: list[int]
free_head: int
free_tail: int
def __init__(self):
self.base = [0] * self.default_size
self.check = [0] * self.default_size
self.base[1] = 1
self.free_head = -1
self.free_tail = -1
self._init_free_list(2, self.default_size)
def _init_free_list(self, start: int, end: int):
if start >= end:
return
self.free_head = start
self.free_tail = end - 1
for i in range(start, end):
prev = i - 1 if i > start else -1
nxt = i + 1 if i + 1 < end else -1
self.base[i] = prev
self.check[i] = -(nxt + 1)
def _is_free(self, index: int) -> bool:
return self.check[index] < 0
def _free_next(self, index: int) -> int:
return -self.check[index] - 1
def _set_free_links(self, index: int, prev: int, nxt: int):
self.base[index] = prev
self.check[index] = -(nxt + 1)
def _remove_free(self, index: int):
prev = self.base[index]
nxt = self._free_next(index)
if prev != -1:
self.check[prev] = -(nxt + 1)
else:
self.free_head = nxt
if nxt != -1:
self.base[nxt] = prev
else:
self.free_tail = prev
self.base[index] = 0
self.check[index] = 0
def _insert_free_head(self, index: int):
old_head = self.free_head
self._set_free_links(index, -1, old_head)
if old_head != -1:
self.base[old_head] = index
else:
self.free_tail = index
self.free_head = index
def _allocate_slot(self, index: int):
if not self._is_free(index):
raise Exception(f"Index {index} is not free")
self._remove_free(index)
def extend(self, size: int):
old_size = len(self.base)
if size <= old_size:
return
self.base += [0] * (size - old_size)
self.check += [0] * (size - old_size)
start = max(2, old_size)
end = size
if start >= end:
return
for i in range(start, end):
prev = i - 1 if i > start else self.free_tail
nxt = i + 1 if i + 1 < end else -1
self.base[i] = prev
self.check[i] = -(nxt + 1)
if self.free_tail != -1:
self.check[self.free_tail] = -(start + 1)
else:
self.free_head = start
self.free_tail = end - 1
def search(self, s: str) -> bool:
p = 1
for c in s:
t = abs(self.base[p]) + ord(c)
if t >= len(self.check):
return False
if self.check[t] != p:
return False
p = t
else:
# check if it's a valid end of word
return self.base[p] < 0
def find_empty(self, children: list[int]) -> int:
offsets = sorted(set(children))
max_offset = offsets[-1] if offsets else 0
while True:
while max_offset >= len(self.check):
self.extend(len(self.base) * 2)
cur = self.free_head
while cur != -1:
for child in offsets:
candidate = cur + child
if candidate >= len(self.check) or not self._is_free(candidate):
break
else:
return cur
cur = self._free_next(cur)
self.extend(len(self.base) * 2)
def relocate(self, parent: int, children: list[int]):
old_base = abs(self.base[parent])
sign = -1 if self.base[parent] < 0 else 1
unique_children = sorted(set(children))
new_base = self.find_empty(unique_children)
for child in unique_children:
old_pos = old_base + child
if old_pos >= len(self.check) or self.check[old_pos] != parent:
continue
new_pos = new_base + child
while new_pos >= len(self.check):
self.extend(len(self.base) * 2)
moved_base = self.base[old_pos]
self._allocate_slot(new_pos)
self.base[new_pos] = moved_base
self.check[new_pos] = parent
child_start = abs(moved_base)
child_end = min(child_start + self.data_set_size, len(self.check))
for i in range(child_start, child_end):
if self.check[i] == old_pos:
self.check[i] = new_pos
self._insert_free_head(old_pos)
self.base[parent] = sign * new_base
return new_base
def add(self, data: str):
p = 1
for c in data:
code = ord(c)
next_index = abs(self.base[p]) + code
while next_index >= len(self.base):
self.extend(len(self.base) * 2)
if self._is_free(next_index):
self._allocate_slot(next_index)
self.base[next_index] = self.find_empty([0])
self.check[next_index] = p
p = next_index
elif self.check[next_index] != p:
# conflict
children = []
for i in range(min(self.data_set_size, len(self.check) - abs(self.base[p]))):
if self.check[abs(self.base[p]) + i] == p:
children.append(i)
children.append(code)
self.relocate(p, children=children)
next_index = abs(self.base[p]) + code
while next_index >= len(self.base):
self.extend(len(self.base) * 2)
self._allocate_slot(next_index)
self.base[next_index] = self.find_empty([0])
self.check[next_index] = p
p = next_index
else: # already exist
p = next_index
self.base[p] *= -1 # mark end of word
def print_base_and_check(self):
# ignore empty value
for i in range(len(self.base)):
if not self._is_free(i) and (self.base[i] != 0 or self.check[i] != 0):
print(f"Index: {i}, Base: {self.base[i]}, Check: {self.check[i]}")
if __name__ == "__main__":
dat = DAT()
# https://www.mit.edu/~ecprice/wordlist.10000
with open("wordlist.10000.txt", "r", encoding="utf-8") as f:
words = [line[:-1] for line in f.readlines()]
random.seed(42)
random.shuffle(words)
for word in words:
dat.add(word)
# dat.print_base_and_check()
# print("===" * 10)
for word in words:
# print(f"Searching for '{word}': {dat.search(word)}")
assert dat.search(word) == True, f"Failed to find '{word}'"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment