Skip to content

Instantly share code, notes, and snippets.

@Xitsa
Created May 25, 2017 06:33
Show Gist options
  • Select an option

  • Save Xitsa/556bb1e2ebcc31c8fe5659baad81e22e to your computer and use it in GitHub Desktop.

Select an option

Save Xitsa/556bb1e2ebcc31c8fe5659baad81e22e to your computer and use it in GitHub Desktop.
Simple model to analyze hash table characteristics
// To compile use this command
// g++ -std=c++11 e.cxx && ./a.out
#include <iostream>
#include <iomanip>
#include <string>
#include <map>
#include <random>
#include <cmath>
#define COUNT_OF(x) ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
const size_t N = 1024 * 1024;
const size_t HashTableSize = N / 8;
const size_t PacketsCount = 400000;
const size_t KeySize = 11;
struct TKey {
uint8_t Arr[KeySize];
};
template<typename K, typename E, typename D>
static void randomize_key(K& key, E& engine, D& dist)
{
for (auto & v : key.Arr) {
v = dist(engine);
}
}
static void print_key(std::string message, const TKey& key)
{
std::cout << message << ": ";
for (auto v : key.Arr) {
std::cout << static_cast<unsigned int>(v) << " ";
}
std::cout << std::endl;
}
static size_t get_hash(const TKey& key)
{
unsigned int hash = 0x11081979;
for (auto v : key.Arr) {
hash = hash * 33 + v;
}
return hash % HashTableSize;
}
template <typename T>
unsigned int get_max_value(const T& hash_table)
{
unsigned int Max = 0;
for (const auto v : hash_table) {
if (v.second > Max)
Max = v.second;
}
return Max;
}
template <typename T>
void print_distribution(const T& distribution)
{
unsigned int total = 0;
for (auto v : distribution) {
total += v.first * v.second;
const double percentOfAll = static_cast<double>(total) / PacketsCount * 100.0;
std::cout << std::setw(10) << v.first << " == "
<< std::setw(10) << v.second << " "
<< std::setw(10) << total << " "
<< std::setw(10) << percentOfAll << std::endl;
}
}
int main()
{
// Seed with a real random value, if available
std::random_device r;
std::default_random_engine e1(r());
std::uniform_int_distribution<uint8_t> uniform_dist(0, 255);
std::map<size_t, unsigned int> hash_table;
for (size_t f = 0; f < PacketsCount; ++f) {
struct TKey key;
randomize_key(key, e1, uniform_dist);
const size_t hash = get_hash(key);
if (hash_table.find(hash) == hash_table.end()) {
hash_table[hash] = 0;
}
++hash_table[hash];
}
const double Fill = hash_table.size() / static_cast<double>(HashTableSize) * 100.0f;
const unsigned int MaxDeep = get_max_value(hash_table);
std::cout << std::setprecision(5) << "Fill = " << Fill << std::endl;
std::cout << std::setprecision(5) << "MaxDeep = " << MaxDeep << std::endl;
std::map<unsigned int, unsigned int> distribution;
for (auto v : hash_table) {
const unsigned int deepness = v.second;
if (distribution.find(deepness) == distribution.end()) {
distribution[deepness] = 0;
}
++distribution[deepness];
}
print_distribution(distribution);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment