Last active
January 24, 2021 13:47
-
-
Save dwilliamson/96a451a38dfd82e9709caae16ddbbeed to your computer and use it in GitHub Desktop.
Trivial perfect hash generator that uses only a single array for runtime lookup with next to no ALU. Very good for small table sizes (e.g. < 128). Very bad for larger table sizes. This has been quickly "STL-ified" to remove use of my own containers.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uint32_t NextPow2(uint32_t v) | |
{ | |
v--; | |
v |= (v >> 1); | |
v |= (v >> 2); | |
v |= (v >> 4); | |
v |= (v >> 8); | |
v |= (v >> 16); | |
v++; | |
return v; | |
} | |
uint32_t Hash(uint32_t key, uint32_t shifter) | |
{ | |
#if 1 | |
// This version is negligible ALU but leads to bigger tables and, perhaps, worse cache performance | |
return key << shifter; | |
#else | |
// This hash mix version is more ALU but generates much smaller tables, leading to better cache performance | |
return key ^ ((key << shifter) + 0x9E3779B9 + (key << 6) + (key >> 2)); | |
#endif | |
} | |
class clcpp_attr(reflect) PerfectHash | |
{ | |
public: | |
// max_table_size: Maximum size of the internal array indexed by hashed keys | |
// max_multiplier: Max shifter applied to key before generating an array index. | |
// | |
// Increasing any of these increases the startup search time for an exact hash table. | |
PerfectHash(uint32_t max_table_size, uint32_t max_shifter) | |
: m_maxTableSize(max_table_size) | |
, m_maxShifter(max_shifter) | |
{ | |
} | |
// Pack the key for the build step | |
void Preload(uint32_t key, void* data) | |
{ | |
m_preloadEntries.push_back({data, key}); | |
} | |
void Build() | |
{ | |
assert(m_shifter == -1); | |
assert(m_tableSize == 0); | |
// Check power-of-two table sizes | |
uint32_t nb_keys = m_preloadEntries.size(); | |
for (uint32_t table_size = NextPow2(nb_keys); table_size < m_maxTableSize; table_size *= 2) | |
{ | |
std::vector<bool> taken(table_size); | |
// Check all values of shifter | |
for (uint32_t shifter = 0; shifter < m_maxShifter; shifter++) | |
{ | |
// An array of flags signaling if a key has already hashed to this location | |
std::fill(taken.begin(), taken.end(), false); | |
// Generate array locations for each key while checking for collisions | |
bool collision = false; | |
for (uint32_t key_index = 0; key_index < nb_keys; key_index++) | |
{ | |
uint32_t key = m_preloadEntries[key_index].key; | |
uint32_t index = Hash(key, shifter) & (table_size - 1); | |
if (taken[index]) | |
{ | |
collision = true; | |
break; | |
} | |
taken[index] = true; | |
} | |
// If there were no collisions, this is a valid table size/multiplier pair | |
if (!collision) | |
{ | |
m_shifter = shifter; | |
break; | |
} | |
} | |
// Final early-out check for a valid pair | |
if (m_shifter != -1) | |
{ | |
m_tableSize = table_size; | |
break; | |
} | |
} | |
assert(m_shifter >= 0); | |
assert(m_tableSize > 0); | |
// Build the exact hash tables | |
m_hashedDataArray.resize(m_tableSize); | |
for (const Entry& entry : m_preloadEntries) | |
{ | |
uint32_t index = Hash(entry.key, m_shifter) & (m_tableSize - 1); | |
m_hashedDataArray[index] = entry.data; | |
} | |
} | |
void* Get(uint32_t key) const | |
{ | |
uint32_t index = Hash(key, m_shifter) & (m_tableSize - 1); | |
return m_hashedDataArray[index]; | |
} | |
private: | |
// Key/data pair | |
struct clcpp_attr(reflect) Entry | |
{ | |
void* data; | |
uint32_t key; | |
}; | |
// Construction input | |
const uint32_t m_maxTableSize; | |
const uint32_t m_maxShifter; | |
// Preloaded list of key/data pairs before adding to the hash | |
std::vector<Entry> m_preloadEntries; | |
// Generated key shifter and exact hash table size | |
uint32_t m_shifter = -1; | |
uint32_t m_tableSize = 0; | |
// Runtime lookup array | |
std::vector<void*> m_hashedDataArray; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment