Created
February 14, 2025 07:10
-
-
Save mason276752/001b5c5eab686ef85b61ef04bcc46551 to your computer and use it in GitHub Desktop.
elastic-funnel-hashtable
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
emcc elastic_hashtable.cpp -o elastic_hashtable_wasm.js \ | |
-O3 -s MODULARIZE=1 -s EXPORT_NAME="elastic_hashtable_wasm" \ | |
-s INITIAL_MEMORY=128MB -s ALLOW_MEMORY_GROWTH=1 \ | |
-s EXPORTED_RUNTIME_METHODS="['cwrap']" -s SINGLE_FILE=1 \ | |
-lembind --no-entry | |
emcc funnel_hashtable.cpp -o funnel_hashtable_wasm.js \ | |
-O3 -s MODULARIZE=1 -s EXPORT_NAME="funnel_hashtable_wasm" \ | |
-s INITIAL_MEMORY=128MB -s ALLOW_MEMORY_GROWTH=1 \ | |
-s EXPORTED_RUNTIME_METHODS="['cwrap']" -s SINGLE_FILE=1 \ | |
-lembind --no-entry |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <emscripten/bind.h> | |
#include <vector> | |
#include <string> | |
#include <cmath> | |
#include <cstdlib> | |
#include <unordered_map> | |
using namespace emscripten; | |
class ElasticHashTable { | |
private: | |
struct Entry { | |
std::string key; | |
int value; | |
bool occupied; | |
}; | |
std::vector<std::vector<Entry>> levels; | |
std::vector<int> salts; | |
std::vector<int> occupancies; | |
int capacity; | |
int max_inserts; | |
int num_inserts; | |
double delta; | |
int c = 4; | |
int hash(const std::string& key, int level) { | |
std::hash<std::string> hash_fn; | |
return (hash_fn(key) ^ salts[level]) & 0x7FFFFFFF; | |
} | |
int quad_probe(const std::string& key, int level, int j, int table_size) { | |
return (hash(key, level) + j * j) % table_size; | |
} | |
public: | |
ElasticHashTable(int cap, double del = 0.1) : capacity(cap), delta(del), num_inserts(0) { | |
if (capacity <= 0 || !(0 < delta && delta < 1)) { | |
throw std::invalid_argument("Invalid capacity or delta"); | |
} | |
max_inserts = capacity - static_cast<int>(delta * capacity); | |
int num_levels = std::max(1, static_cast<int>(std::floor(std::log2(capacity)))); | |
int remaining = capacity; | |
for (int i = 0; i < num_levels - 1; ++i) { | |
int size = std::max(1, remaining / (1 << (num_levels - i))); | |
levels.push_back(std::vector<Entry>(size, {"", 0, false})); | |
salts.push_back(rand()); | |
occupancies.push_back(0); | |
remaining -= size; | |
} | |
levels.push_back(std::vector<Entry>(remaining, {"", 0, false})); | |
salts.push_back(rand()); | |
occupancies.push_back(0); | |
} | |
bool insert(std::string key, int value) { | |
if (num_inserts >= max_inserts) { | |
throw std::runtime_error("Hash table is full (maximum allowed insertions reached)"); | |
} | |
for (size_t i = 0; i < levels.size(); ++i) { | |
auto& level = levels[i]; | |
int size = level.size(); | |
int free_slots = size - occupancies[i]; | |
double load = static_cast<double>(free_slots) / size; | |
int probe_limit = std::max(1, static_cast<int>(c * std::min(std::log2(1 / load), std::log2(1 / delta)))); | |
for (int j = 0; j < probe_limit; ++j) { | |
int idx = quad_probe(key, i, j, size); | |
if (!level[idx].occupied) { | |
level[idx] = {key, value, true}; | |
++occupancies[i]; | |
++num_inserts; | |
return true; | |
} else if (level[idx].key == key) { | |
level[idx].value = value; | |
return true; | |
} | |
} | |
} | |
throw std::runtime_error("Insertion failed in all levels; hash table is full."); | |
} | |
int search(std::string key) { | |
for (size_t i = 0; i < levels.size(); ++i) { | |
auto& level = levels[i]; | |
int size = level.size(); | |
for (int j = 0; j < size; ++j) { | |
int idx = quad_probe(key, i, j, size); | |
if (!level[idx].occupied) break; | |
if (level[idx].key == key) return level[idx].value; | |
} | |
} | |
return -1; // Not found | |
} | |
}; | |
EMSCRIPTEN_BINDINGS(elastic_hash_table) { | |
class_<ElasticHashTable>("ElasticHashTable") | |
.constructor<int, double>() | |
.function("insert", &ElasticHashTable::insert) | |
.function("search", &ElasticHashTable::search); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ElasticHashTable { | |
constructor(capacity, delta = 0.1) { | |
if (capacity <= 0) { | |
throw new Error("Capacity must be positive."); | |
} | |
if (!(0 < delta && delta < 1)) { | |
throw new Error("delta must be between 0 and 1."); | |
} | |
this.capacity = capacity; | |
this.delta = delta; | |
this.maxInserts = capacity - Math.floor(delta * capacity); | |
this.numInserts = 0; | |
let numLevels = Math.max(1, Math.floor(Math.log2(capacity))); | |
this.levels = []; | |
this.salts = []; | |
this.occupancies = new Array(numLevels).fill(0); | |
this.c = 4; | |
let sizes = []; | |
let remaining = capacity; | |
for (let i = 0; i < numLevels - 1; i++) { | |
let size = Math.max(1, Math.floor(remaining / (2 ** (numLevels - i)))); | |
sizes.push(size); | |
remaining -= size; | |
} | |
sizes.push(remaining); | |
for (let s of sizes) { | |
this.levels.push(new Array(s).fill(null)); | |
this.salts.push(Math.floor(Math.random() * Number.MAX_SAFE_INTEGER)); | |
} | |
} | |
_hash(key, level) { | |
return Math.abs(this._hashFunction(`${key}-${this.salts[level]}`)); | |
} | |
_quadProbe(key, level, j, tableSize) { | |
return (this._hash(key, level) + j * j) % tableSize; | |
} | |
_hashFunction(str) { | |
let hash = 0; | |
for (let i = 0; i < str.length; i++) { | |
hash = (hash << 5) - hash + str.charCodeAt(i); | |
hash |= 0; | |
} | |
return hash; | |
} | |
insert(key, value) { | |
if (this.numInserts >= this.maxInserts) { | |
throw new Error("Hash table is full (maximum allowed insertions reached). "); | |
} | |
for (let i = 0; i < this.levels.length; i++) { | |
let level = this.levels[i]; | |
let size = level.length; | |
let occ = this.occupancies[i]; | |
let free = size - occ; | |
let load = free / size; | |
let probeLimit = Math.max(1, this.c * Math.min( | |
load > 0 ? Math.log2(1 / load) : 0, | |
Math.log2(1 / this.delta) | |
)); | |
if (i < this.levels.length - 1) { | |
let nextLevel = this.levels[i + 1]; | |
let nextFree = nextLevel.length - this.occupancies[i + 1]; | |
let loadNext = nextFree / nextLevel.length; | |
let threshold = 0.25; | |
if (load > (this.delta / 2) && loadNext > threshold) { | |
for (let j = 0; j < probeLimit; j++) { | |
let idx = this._quadProbe(key, i, j, size); | |
if (level[idx] === null) { | |
level[idx] = [key, value]; | |
this.occupancies[i]++; | |
this.numInserts++; | |
return true; | |
} | |
} | |
} else if (load <= (this.delta / 2)) { | |
continue; | |
} else if (loadNext <= threshold) { | |
for (let j = 0; j < size; j++) { | |
let idx = this._quadProbe(key, i, j, size); | |
if (level[idx] === null) { | |
level[idx] = [key, value]; | |
this.occupancies[i]++; | |
this.numInserts++; | |
return true; | |
} | |
} | |
} | |
} else { | |
for (let j = 0; j < size; j++) { | |
let idx = this._quadProbe(key, i, j, size); | |
if (level[idx] === null) { | |
level[idx] = [key, value]; | |
this.occupancies[i]++; | |
this.numInserts++; | |
return true; | |
} | |
} | |
} | |
} | |
throw new Error("Insertion failed in all levels; hash table is full."); | |
} | |
search(key) { | |
for (let i = 0; i < this.levels.length; i++) { | |
let level = this.levels[i]; | |
let size = level.length; | |
for (let j = 0; j < size; j++) { | |
let idx = this._quadProbe(key, i, j, size); | |
let entry = level[idx]; | |
if (entry === null) break; | |
if (entry[0] === key) return entry[1]; | |
} | |
} | |
return null; | |
} | |
has(key) { | |
return this.search(key) !== null; | |
} | |
size() { | |
return this.numInserts; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <emscripten/bind.h> | |
#include <vector> | |
#include <string> | |
#include <cmath> | |
#include <cstdlib> | |
#include <unordered_map> | |
using namespace emscripten; | |
class FunnelHashTable { | |
private: | |
struct Entry { | |
std::string key; | |
int value; | |
bool occupied; | |
}; | |
std::vector<std::vector<Entry>> levels; | |
std::vector<int> salts; | |
std::vector<int> bucket_counts; | |
int capacity; | |
int max_inserts; | |
int num_inserts; | |
double delta; | |
int beta; | |
int hash(const std::string& key, int level) { | |
std::hash<std::string> hash_fn; | |
return (hash_fn(key) ^ salts[level]) & 0x7FFFFFFF; | |
} | |
public: | |
FunnelHashTable(int cap, double del = 0.1) : capacity(cap), delta(del), num_inserts(0) { | |
if (capacity <= 0 || !(0 < delta && delta < 1)) { | |
throw std::invalid_argument("Invalid capacity or delta"); | |
} | |
max_inserts = capacity - static_cast<int>(delta * capacity); | |
int alpha = static_cast<int>(std::ceil(4 * std::log2(1 / delta) + 10)); | |
beta = static_cast<int>(std::ceil(2 * std::log2(1 / delta))); | |
int special_size = std::max(1, static_cast<int>(std::floor(3 * delta * capacity / 4))); | |
int primary_size = capacity - special_size; | |
int total_buckets = primary_size / beta; | |
double a1 = total_buckets / (4 * (1 - std::pow(0.75, alpha))); | |
int remaining_buckets = total_buckets; | |
for (int i = 0; i < alpha; i++) { | |
int a_i = std::max(1, static_cast<int>(std::round(a1 * std::pow(0.75, i)))); | |
a_i = std::min(a_i, remaining_buckets); | |
bucket_counts.push_back(a_i); | |
levels.push_back(std::vector<Entry>(a_i * beta, {"", 0, false})); | |
salts.push_back(rand()); | |
remaining_buckets -= a_i; | |
} | |
} | |
bool insert(std::string key, int value) { | |
if (num_inserts >= max_inserts) { | |
throw std::runtime_error("Hash table is full"); | |
} | |
for (size_t i = 0; i < levels.size(); ++i) { | |
auto& level = levels[i]; | |
int bucket_index = hash(key, i) % bucket_counts[i]; | |
int start = bucket_index * beta; | |
int end = start + beta; | |
for (int idx = start; idx < end; ++idx) { | |
if (!level[idx].occupied) { | |
level[idx] = {key, value, true}; | |
++num_inserts; | |
return true; | |
} else if (level[idx].key == key) { | |
level[idx].value = value; | |
return true; | |
} | |
} | |
} | |
throw std::runtime_error("Insertion failed; table is full."); | |
} | |
int search(std::string key) { | |
for (size_t i = 0; i < levels.size(); ++i) { | |
auto& level = levels[i]; | |
int bucket_index = hash(key, i) % bucket_counts[i]; | |
int start = bucket_index * beta; | |
int end = start + beta; | |
for (int idx = start; idx < end; ++idx) { | |
if (!level[idx].occupied) break; | |
if (level[idx].key == key) return level[idx].value; | |
} | |
} | |
return -1; | |
} | |
}; | |
EMSCRIPTEN_BINDINGS(funnel_hash_table) { | |
class_<FunnelHashTable>("FunnelHashTable") | |
.constructor<int, double>() | |
.function("insert", &FunnelHashTable::insert) | |
.function("search", &FunnelHashTable::search); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class FunnelHashTable { | |
constructor(capacity, delta = 0.1) { | |
if (capacity <= 0) { | |
throw new Error("Capacity must be positive."); | |
} | |
if (!(0 < delta && delta < 1)) { | |
throw new Error("delta must be between 0 and 1."); | |
} | |
this.capacity = capacity; | |
this.delta = delta; | |
this.maxInserts = capacity - Math.floor(delta * capacity); | |
this.numInserts = 0; | |
this.alpha = Math.ceil(4 * Math.log2(1 / delta) + 10); | |
this.beta = Math.ceil(2 * Math.log2(1 / delta)); | |
this.specialSize = Math.max(1, Math.floor(3 * delta * capacity / 4)); | |
this.primarySize = capacity - this.specialSize; | |
let totalBuckets = Math.floor(this.primarySize / this.beta); | |
let a1 = totalBuckets / (4 * (1 - Math.pow(0.75, this.alpha))); | |
this.levels = []; | |
this.levelBucketCounts = []; | |
this.levelSalts = []; | |
let remainingBuckets = totalBuckets; | |
for (let i = 0; i < this.alpha; i++) { | |
let a_i = Math.max(1, Math.round(a1 * Math.pow(0.75, i))); | |
if (remainingBuckets <= 0 || a_i <= 0) break; | |
a_i = Math.min(a_i, remainingBuckets); | |
this.levelBucketCounts.push(a_i); | |
this.levels.push(new Array(a_i * this.beta).fill(null)); | |
this.levelSalts.push(Math.floor(Math.random() * Number.MAX_SAFE_INTEGER)); | |
remainingBuckets -= a_i; | |
} | |
if (remainingBuckets > 0 && this.levels.length > 0) { | |
let extra = remainingBuckets * this.beta; | |
this.levels[this.levels.length - 1].push(...new Array(extra).fill(null)); | |
this.levelBucketCounts[this.levelBucketCounts.length - 1] += remainingBuckets; | |
} | |
this.specialArray = new Array(this.specialSize).fill(null); | |
this.specialSalt = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER); | |
this.specialOccupancy = 0; | |
} | |
_hashLevel(key, levelIndex) { | |
return Math.abs(this._hashFunction(`${key}-${this.levelSalts[levelIndex]}`)); | |
} | |
_hashSpecial(key) { | |
return Math.abs(this._hashFunction(`${key}-${this.specialSalt}`)); | |
} | |
_hashFunction(str) { | |
let hash = 0; | |
for (let i = 0; i < str.length; i++) { | |
hash = (hash << 5) - hash + str.charCodeAt(i); | |
hash |= 0; | |
} | |
return hash; | |
} | |
insert(key, value) { | |
if (this.numInserts >= this.maxInserts) { | |
throw new Error("Hash table is full (maximum allowed insertions reached). "); | |
} | |
for (let i = 0; i < this.levels.length; i++) { | |
let level = this.levels[i]; | |
let numBuckets = this.levelBucketCounts[i]; | |
let bucketIndex = this._hashLevel(key, i) % numBuckets; | |
let start = bucketIndex * this.beta; | |
let end = start + this.beta; | |
for (let idx = start; idx < end; idx++) { | |
if (level[idx] === null) { | |
level[idx] = [key, value]; | |
this.numInserts++; | |
return true; | |
} else if (level[idx][0] === key) { | |
level[idx] = [key, value]; | |
return true; | |
} | |
} | |
} | |
let special = this.specialArray; | |
let size = special.length; | |
let probeLimit = Math.max(1, Math.ceil(Math.log(Math.log(this.capacity + 1) + 1))); | |
for (let j = 0; j < probeLimit; j++) { | |
let idx = (this._hashSpecial(key) + j) % size; | |
if (special[idx] === null) { | |
special[idx] = [key, value]; | |
this.specialOccupancy++; | |
this.numInserts++; | |
return true; | |
} else if (special[idx][0] === key) { | |
special[idx] = [key, value]; | |
return true; | |
} | |
} | |
let idx1 = this._hashSpecial(key) % size; | |
let idx2 = (this._hashSpecial(key) + 1) % size; | |
if (special[idx1] === null) { | |
special[idx1] = [key, value]; | |
this.specialOccupancy++; | |
this.numInserts++; | |
return true; | |
} else if (special[idx2] === null) { | |
special[idx2] = [key, value]; | |
this.specialOccupancy++; | |
this.numInserts++; | |
return true; | |
} | |
throw new Error("Special array insertion failed; table is full."); | |
} | |
search(key) { | |
for (let i = 0; i < this.levels.length; i++) { | |
let level = this.levels[i]; | |
let numBuckets = this.levelBucketCounts[i]; | |
let bucketIndex = this._hashLevel(key, i) % numBuckets; | |
let start = bucketIndex * this.beta; | |
let end = start + this.beta; | |
for (let idx = start; idx < end; idx++) { | |
if (level[idx] === null) break; | |
if (level[idx][0] === key) return level[idx][1]; | |
} | |
} | |
let special = this.specialArray; | |
let size = special.length; | |
let probeLimit = Math.max(1, Math.ceil(Math.log(Math.log(this.capacity + 1) + 1))); | |
for (let j = 0; j < probeLimit; j++) { | |
let idx = (this._hashSpecial(key) + j) % size; | |
if (special[idx] === null) break; | |
if (special[idx][0] === key) return special[idx][1]; | |
} | |
let idx1 = this._hashSpecial(key) % size; | |
let idx2 = (this._hashSpecial(key) + 1) % size; | |
if (special[idx1] && special[idx1][0] === key) return special[idx1][1]; | |
if (special[idx2] && special[idx2][0] === key) return special[idx2][1]; | |
return null; | |
} | |
has(key) { | |
return this.search(key) !== null; | |
} | |
size() { | |
return this.numInserts; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment