Last active
October 27, 2025 19:48
-
-
Save fabiolimace/17c5dcdeac9a0eecd69ee8f3abd36d32 to your computer and use it in GitHub Desktop.
Hash Function Uniqueness and Collision
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Calculates hash uniqueness. | |
| # | |
| # Usage: | |
| # | |
| # hash-uniqueness.sh PROGRAM LENGTH WINDOW | |
| # | |
| # hash-uniqueness.sh md5sum | |
| # | |
| # hash-uniqueness.sh crc32sum 8 4 | |
| # | |
| # hash-uniqueness.sh sha256sum 32 6 | |
| # | |
| program=$1 | |
| length=${2:-32} | |
| window=${3:-8} | |
| # mimic md5sum | |
| function crc32sum { | |
| cksum "$1" | awk '{ $1=sprintf("%08x", $1); print $1, $3 }'; | |
| } | |
| mkdir -p /run/shm/words; | |
| for i in $(cat /usr/share/dict/american-english /usr/share/dict/british-english /usr/share/dict/brazilian /usr/share/dict/portuguese | sort | uniq); do [ ! -f "/run/shm/words/$i" ] && echo "$i" > "/run/shm/words/$i"; done; | |
| [ -f /run/shm/words.$program ] && [ -d /run/shm/words ] && [ "$(cat /run/shm/words.$program | wc -l)" -ne "$(find /run/shm/words -type f | wc -l)" ] && rm -f /run/shm/words.$program; | |
| [ ! -f /run/shm/words.$program ] && for i in $(find /run/shm/words -type f | sort | uniq); do $program "$i" >> /run/shm/words.$program; done; | |
| counter=0 | |
| accumulator=0 | |
| maximum=$(find /run/shm/words -type f | wc -l) | |
| for i in $(seq 1 $(( $length - $window )) ); do | |
| counter=$(( counter + 1 )) | |
| accumulator=$(( $accumulator + $(cut -c $i-$(( $i + $window - 1 )) /run/shm/words.$program | sort | uniq | wc -l) )) | |
| done; | |
| echo "uniqueness: $( echo "( 100.0 * ( $accumulator / $counter ) ) / $maximum " | bc -l ) %"; | |
| echo "collisions: $( echo "100.0 - ( 100.0 * ( $accumulator / $counter ) ) / $maximum " | bc -l ) %"; | |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
More Examples
Dictionaries in
/usr/share/dict/:american-englishbritish-englishbrazilianportuguese(~500k words)Length = 8, Window = 6
Length = 32, Window = 8