-
-
Save verdy-p/5e167a3101e26af297719394d44d0c94 to your computer and use it in GitHub Desktop.
Compare some hash function for lua table implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --[[ | |
| 370104 words from https://github.com/dwyl/english-words/blob/master/words_alpha.txt | |
| 3/4 5/8 7/8 9/16 11/16 13/16 15/32 17/32 19/32 21/32 | |
| (h << 5) + (h >> 2) + x: (3)84874 (4)81670 (4)113326 (4)80336 (2)96169 (5)111520 (4)125243 (3)79309 (4)87762 (5)95643 | |
| (h << 4) + (h >> 3) + x: (2)84819 *(1)81351 (3)113283 (5)80439 (3)96264 (2)111197 (3)125200 (4)79486 (2)87582 (2)95430 | |
| (h << 5) - (h >> 2) + x: *(1)84464 (2)81428 (2)113108 (2)80201 (4)96464 (4)111469 *(1)125052 (2)79222 (3)87666 (3)95466 | |
| (h << 4) - (h >> 3) + x: (4)85050 (3)81587 *(1)113084 *(1)80112 *(1)96131 *(1)111134 (2)125185 *(1)79163 *(1)87361 *(1)95239 | |
| ((h + x) * 0xAAAB) >> 3: (5)85143 (5)81973 (5)113538 (3)80323 (5)96593 (3)111401 (5)125306 (5)79636 (5)88120 (4)95536 | |
| ]] | |
| local hashes = { | |
| h129 = function(h, x) return (h << 5) + (h >> 2) + x end, | |
| h129_43 = function(h, x) return (h << 4) + (h >> 3) + x end, | |
| h127 = function(h, x) return (h << 4) - (h >> 3) + x end, | |
| h127_52 = function(h, x) return (h << 5) - (h >> 2) + x end, | |
| aaab = function(h, x) return ((h + x) * 0xAAAB) >> 3 end | |
| } | |
| local function genhexs(n) | |
| local random = math.random | |
| local words = {} | |
| for i = 1, n do | |
| local tmp = {} | |
| for j = 1, 8 do | |
| tmp[j] = string.format('%04x', random(0, 65535)) | |
| end | |
| words[i] = table.concat(tmp) | |
| end | |
| return words | |
| end | |
| local function readwords(filename) | |
| local words = {} | |
| local n = 1 | |
| for line in io.lines(filename) do | |
| words[n] = line | |
| n = n + 1 | |
| end | |
| return words | |
| end | |
| local function shuffle(words) | |
| local n = #words | |
| for i = 1, n - 1 do | |
| local j = math.random(i, n) | |
| words[i], words[j] = words[j], words[i] | |
| end | |
| return words | |
| end | |
| local words = shuffle(readwords('words_alpha.txt')) | |
| --local words = genhexs(100000) | |
| print(#words, 'words') | |
| local seed = 0 | |
| local function hashword(word, func) | |
| local h = seed ~ #word | |
| str:gsub('.', function(c) | |
| h = h ~ (func(h, c:byte()) & 0xffffffff) | |
| return '' -- this make gsub faster | |
| end) | |
| return h | |
| end | |
| local function test(words, slots, hashfunc) | |
| local size = 1 << slots | |
| local collisions = 0 | |
| for i = 1, #words - slots + 1, slots do | |
| local tbl = {} | |
| for j = 0, slots - 1 do | |
| local hashvalue = hashword(words[i + j], hashfunc) % size | |
| if tbl[hashvalue] then | |
| collisions = collisions + 1 | |
| else | |
| tbl[hashvalue] = true | |
| end | |
| end | |
| end | |
| return collisions | |
| end | |
| local N = 22 | |
| local tmp = {} | |
| local results = {} | |
| for hashname in pairs(hashes) do | |
| results[hashname] = {} | |
| end | |
| for i = 3, N, 2 do | |
| local slots = i | |
| table.insert(tmp, string.format('\t%d/%d', slots, 1 << slots)) | |
| local ranks = {} | |
| for hashname, hashfunc in pairs(hashes) do | |
| table.insert(ranks, { hashname = hashname, collisions = test(words, slots, hashfunc) }) | |
| end | |
| table.sort(ranks, function(a, b) return a.collisions < b.collisions end) | |
| for i, rank in ipairs(ranks) do | |
| results[rank.hashname][i] = { collisions = rank.collisions, rank = i } | |
| end | |
| end | |
| print(table.concat(tmp)) | |
| for hashname in pairs(hashes) do | |
| tmp = { hashname } | |
| for i = 3, N, 2 do | |
| local result = results[hashname][i] | |
| table.insert(tmp, string.format('\t(%d)%d', result.rank, result.collisions)) | |
| end | |
| print(table.concat(tmp)) | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment