Skip to content

Instantly share code, notes, and snippets.

@verdy-p
Forked from cloudwu/hashcompare.lua
Last active March 6, 2026 06:48
Show Gist options
  • Select an option

  • Save verdy-p/5e167a3101e26af297719394d44d0c94 to your computer and use it in GitHub Desktop.

Select an option

Save verdy-p/5e167a3101e26af297719394d44d0c94 to your computer and use it in GitHub Desktop.
Compare some hash function for lua table implementation
--[[
370104 words from https://github.com/dwyl/english-words/blob/master/words_alpha.txt
3/4 5/8 7/8 9/16 11/16 13/16 15/32 17/32 19/32 21/32
(h << 5) + (h >> 2) + x: (3)84874 (4)81670 (4)113326 (4)80336 (2)96169 (5)111520 (4)125243 (3)79309 (4)87762 (5)95643
(h << 4) + (h >> 3) + x: (2)84819 *(1)81351 (3)113283 (5)80439 (3)96264 (2)111197 (3)125200 (4)79486 (2)87582 (2)95430
(h << 5) - (h >> 2) + x: *(1)84464 (2)81428 (2)113108 (2)80201 (4)96464 (4)111469 *(1)125052 (2)79222 (3)87666 (3)95466
(h << 4) - (h >> 3) + x: (4)85050 (3)81587 *(1)113084 *(1)80112 *(1)96131 *(1)111134 (2)125185 *(1)79163 *(1)87361 *(1)95239
((h + x) * 0xAAAB) >> 3: (5)85143 (5)81973 (5)113538 (3)80323 (5)96593 (3)111401 (5)125306 (5)79636 (5)88120 (4)95536
]]
local hashes = {
h129 = function(h, x) return (h << 5) + (h >> 2) + x end,
h129_43 = function(h, x) return (h << 4) + (h >> 3) + x end,
h127 = function(h, x) return (h << 4) - (h >> 3) + x end,
h127_52 = function(h, x) return (h << 5) - (h >> 2) + x end,
aaab = function(h, x) return ((h + x) * 0xAAAB) >> 3 end
}
local function genhexs(n)
local random = math.random
local words = {}
for i = 1, n do
local tmp = {}
for j = 1, 8 do
tmp[j] = string.format('%04x', random(0, 65535))
end
words[i] = table.concat(tmp)
end
return words
end
local function readwords(filename)
local words = {}
local n = 1
for line in io.lines(filename) do
words[n] = line
n = n + 1
end
return words
end
local function shuffle(words)
local n = #words
for i = 1, n - 1 do
local j = math.random(i, n)
words[i], words[j] = words[j], words[i]
end
return words
end
local words = shuffle(readwords('words_alpha.txt'))
--local words = genhexs(100000)
print(#words, 'words')
local seed = 0
local function hashword(word, func)
local h = seed ~ #word
str:gsub('.', function(c)
h = h ~ (func(h, c:byte()) & 0xffffffff)
return '' -- this make gsub faster
end)
return h
end
local function test(words, slots, hashfunc)
local size = 1 << slots
local collisions = 0
for i = 1, #words - slots + 1, slots do
local tbl = {}
for j = 0, slots - 1 do
local hashvalue = hashword(words[i + j], hashfunc) % size
if tbl[hashvalue] then
collisions = collisions + 1
else
tbl[hashvalue] = true
end
end
end
return collisions
end
local N = 22
local tmp = {}
local results = {}
for hashname in pairs(hashes) do
results[hashname] = {}
end
for i = 3, N, 2 do
local slots = i
table.insert(tmp, string.format('\t%d/%d', slots, 1 << slots))
local ranks = {}
for hashname, hashfunc in pairs(hashes) do
table.insert(ranks, { hashname = hashname, collisions = test(words, slots, hashfunc) })
end
table.sort(ranks, function(a, b) return a.collisions < b.collisions end)
for i, rank in ipairs(ranks) do
results[rank.hashname][i] = { collisions = rank.collisions, rank = i }
end
end
print(table.concat(tmp))
for hashname in pairs(hashes) do
tmp = { hashname }
for i = 3, N, 2 do
local result = results[hashname][i]
table.insert(tmp, string.format('\t(%d)%d', result.rank, result.collisions))
end
print(table.concat(tmp))
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment