Skip to content

Instantly share code, notes, and snippets.

@mason276752
Created February 14, 2025 07:10
Show Gist options
  • Save mason276752/001b5c5eab686ef85b61ef04bcc46551 to your computer and use it in GitHub Desktop.
Save mason276752/001b5c5eab686ef85b61ef04bcc46551 to your computer and use it in GitHub Desktop.
elastic-funnel-hashtable
emcc elastic_hashtable.cpp -o elastic_hashtable_wasm.js \
-O3 -s MODULARIZE=1 -s EXPORT_NAME="elastic_hashtable_wasm" \
-s INITIAL_MEMORY=128MB -s ALLOW_MEMORY_GROWTH=1 \
-s EXPORTED_RUNTIME_METHODS="['cwrap']" -s SINGLE_FILE=1 \
-lembind --no-entry
emcc funnel_hashtable.cpp -o funnel_hashtable_wasm.js \
-O3 -s MODULARIZE=1 -s EXPORT_NAME="funnel_hashtable_wasm" \
-s INITIAL_MEMORY=128MB -s ALLOW_MEMORY_GROWTH=1 \
-s EXPORTED_RUNTIME_METHODS="['cwrap']" -s SINGLE_FILE=1 \
-lembind --no-entry
#include <emscripten/bind.h>
#include <vector>
#include <string>
#include <cmath>
#include <cstdlib>
#include <unordered_map>
using namespace emscripten;
class ElasticHashTable {
private:
struct Entry {
std::string key;
int value;
bool occupied;
};
std::vector<std::vector<Entry>> levels;
std::vector<int> salts;
std::vector<int> occupancies;
int capacity;
int max_inserts;
int num_inserts;
double delta;
int c = 4;
int hash(const std::string& key, int level) {
std::hash<std::string> hash_fn;
return (hash_fn(key) ^ salts[level]) & 0x7FFFFFFF;
}
int quad_probe(const std::string& key, int level, int j, int table_size) {
return (hash(key, level) + j * j) % table_size;
}
public:
ElasticHashTable(int cap, double del = 0.1) : capacity(cap), delta(del), num_inserts(0) {
if (capacity <= 0 || !(0 < delta && delta < 1)) {
throw std::invalid_argument("Invalid capacity or delta");
}
max_inserts = capacity - static_cast<int>(delta * capacity);
int num_levels = std::max(1, static_cast<int>(std::floor(std::log2(capacity))));
int remaining = capacity;
for (int i = 0; i < num_levels - 1; ++i) {
int size = std::max(1, remaining / (1 << (num_levels - i)));
levels.push_back(std::vector<Entry>(size, {"", 0, false}));
salts.push_back(rand());
occupancies.push_back(0);
remaining -= size;
}
levels.push_back(std::vector<Entry>(remaining, {"", 0, false}));
salts.push_back(rand());
occupancies.push_back(0);
}
bool insert(std::string key, int value) {
if (num_inserts >= max_inserts) {
throw std::runtime_error("Hash table is full (maximum allowed insertions reached)");
}
for (size_t i = 0; i < levels.size(); ++i) {
auto& level = levels[i];
int size = level.size();
int free_slots = size - occupancies[i];
double load = static_cast<double>(free_slots) / size;
int probe_limit = std::max(1, static_cast<int>(c * std::min(std::log2(1 / load), std::log2(1 / delta))));
for (int j = 0; j < probe_limit; ++j) {
int idx = quad_probe(key, i, j, size);
if (!level[idx].occupied) {
level[idx] = {key, value, true};
++occupancies[i];
++num_inserts;
return true;
} else if (level[idx].key == key) {
level[idx].value = value;
return true;
}
}
}
throw std::runtime_error("Insertion failed in all levels; hash table is full.");
}
int search(std::string key) {
for (size_t i = 0; i < levels.size(); ++i) {
auto& level = levels[i];
int size = level.size();
for (int j = 0; j < size; ++j) {
int idx = quad_probe(key, i, j, size);
if (!level[idx].occupied) break;
if (level[idx].key == key) return level[idx].value;
}
}
return -1; // Not found
}
};
EMSCRIPTEN_BINDINGS(elastic_hash_table) {
class_<ElasticHashTable>("ElasticHashTable")
.constructor<int, double>()
.function("insert", &ElasticHashTable::insert)
.function("search", &ElasticHashTable::search);
}
class ElasticHashTable {
constructor(capacity, delta = 0.1) {
if (capacity <= 0) {
throw new Error("Capacity must be positive.");
}
if (!(0 < delta && delta < 1)) {
throw new Error("delta must be between 0 and 1.");
}
this.capacity = capacity;
this.delta = delta;
this.maxInserts = capacity - Math.floor(delta * capacity);
this.numInserts = 0;
let numLevels = Math.max(1, Math.floor(Math.log2(capacity)));
this.levels = [];
this.salts = [];
this.occupancies = new Array(numLevels).fill(0);
this.c = 4;
let sizes = [];
let remaining = capacity;
for (let i = 0; i < numLevels - 1; i++) {
let size = Math.max(1, Math.floor(remaining / (2 ** (numLevels - i))));
sizes.push(size);
remaining -= size;
}
sizes.push(remaining);
for (let s of sizes) {
this.levels.push(new Array(s).fill(null));
this.salts.push(Math.floor(Math.random() * Number.MAX_SAFE_INTEGER));
}
}
_hash(key, level) {
return Math.abs(this._hashFunction(`${key}-${this.salts[level]}`));
}
_quadProbe(key, level, j, tableSize) {
return (this._hash(key, level) + j * j) % tableSize;
}
_hashFunction(str) {
let hash = 0;
for (let i = 0; i < str.length; i++) {
hash = (hash << 5) - hash + str.charCodeAt(i);
hash |= 0;
}
return hash;
}
insert(key, value) {
if (this.numInserts >= this.maxInserts) {
throw new Error("Hash table is full (maximum allowed insertions reached). ");
}
for (let i = 0; i < this.levels.length; i++) {
let level = this.levels[i];
let size = level.length;
let occ = this.occupancies[i];
let free = size - occ;
let load = free / size;
let probeLimit = Math.max(1, this.c * Math.min(
load > 0 ? Math.log2(1 / load) : 0,
Math.log2(1 / this.delta)
));
if (i < this.levels.length - 1) {
let nextLevel = this.levels[i + 1];
let nextFree = nextLevel.length - this.occupancies[i + 1];
let loadNext = nextFree / nextLevel.length;
let threshold = 0.25;
if (load > (this.delta / 2) && loadNext > threshold) {
for (let j = 0; j < probeLimit; j++) {
let idx = this._quadProbe(key, i, j, size);
if (level[idx] === null) {
level[idx] = [key, value];
this.occupancies[i]++;
this.numInserts++;
return true;
}
}
} else if (load <= (this.delta / 2)) {
continue;
} else if (loadNext <= threshold) {
for (let j = 0; j < size; j++) {
let idx = this._quadProbe(key, i, j, size);
if (level[idx] === null) {
level[idx] = [key, value];
this.occupancies[i]++;
this.numInserts++;
return true;
}
}
}
} else {
for (let j = 0; j < size; j++) {
let idx = this._quadProbe(key, i, j, size);
if (level[idx] === null) {
level[idx] = [key, value];
this.occupancies[i]++;
this.numInserts++;
return true;
}
}
}
}
throw new Error("Insertion failed in all levels; hash table is full.");
}
search(key) {
for (let i = 0; i < this.levels.length; i++) {
let level = this.levels[i];
let size = level.length;
for (let j = 0; j < size; j++) {
let idx = this._quadProbe(key, i, j, size);
let entry = level[idx];
if (entry === null) break;
if (entry[0] === key) return entry[1];
}
}
return null;
}
has(key) {
return this.search(key) !== null;
}
size() {
return this.numInserts;
}
}
#include <emscripten/bind.h>
#include <vector>
#include <string>
#include <cmath>
#include <cstdlib>
#include <unordered_map>
using namespace emscripten;
class FunnelHashTable {
private:
struct Entry {
std::string key;
int value;
bool occupied;
};
std::vector<std::vector<Entry>> levels;
std::vector<int> salts;
std::vector<int> bucket_counts;
int capacity;
int max_inserts;
int num_inserts;
double delta;
int beta;
int hash(const std::string& key, int level) {
std::hash<std::string> hash_fn;
return (hash_fn(key) ^ salts[level]) & 0x7FFFFFFF;
}
public:
FunnelHashTable(int cap, double del = 0.1) : capacity(cap), delta(del), num_inserts(0) {
if (capacity <= 0 || !(0 < delta && delta < 1)) {
throw std::invalid_argument("Invalid capacity or delta");
}
max_inserts = capacity - static_cast<int>(delta * capacity);
int alpha = static_cast<int>(std::ceil(4 * std::log2(1 / delta) + 10));
beta = static_cast<int>(std::ceil(2 * std::log2(1 / delta)));
int special_size = std::max(1, static_cast<int>(std::floor(3 * delta * capacity / 4)));
int primary_size = capacity - special_size;
int total_buckets = primary_size / beta;
double a1 = total_buckets / (4 * (1 - std::pow(0.75, alpha)));
int remaining_buckets = total_buckets;
for (int i = 0; i < alpha; i++) {
int a_i = std::max(1, static_cast<int>(std::round(a1 * std::pow(0.75, i))));
a_i = std::min(a_i, remaining_buckets);
bucket_counts.push_back(a_i);
levels.push_back(std::vector<Entry>(a_i * beta, {"", 0, false}));
salts.push_back(rand());
remaining_buckets -= a_i;
}
}
bool insert(std::string key, int value) {
if (num_inserts >= max_inserts) {
throw std::runtime_error("Hash table is full");
}
for (size_t i = 0; i < levels.size(); ++i) {
auto& level = levels[i];
int bucket_index = hash(key, i) % bucket_counts[i];
int start = bucket_index * beta;
int end = start + beta;
for (int idx = start; idx < end; ++idx) {
if (!level[idx].occupied) {
level[idx] = {key, value, true};
++num_inserts;
return true;
} else if (level[idx].key == key) {
level[idx].value = value;
return true;
}
}
}
throw std::runtime_error("Insertion failed; table is full.");
}
int search(std::string key) {
for (size_t i = 0; i < levels.size(); ++i) {
auto& level = levels[i];
int bucket_index = hash(key, i) % bucket_counts[i];
int start = bucket_index * beta;
int end = start + beta;
for (int idx = start; idx < end; ++idx) {
if (!level[idx].occupied) break;
if (level[idx].key == key) return level[idx].value;
}
}
return -1;
}
};
EMSCRIPTEN_BINDINGS(funnel_hash_table) {
class_<FunnelHashTable>("FunnelHashTable")
.constructor<int, double>()
.function("insert", &FunnelHashTable::insert)
.function("search", &FunnelHashTable::search);
}
class FunnelHashTable {
constructor(capacity, delta = 0.1) {
if (capacity <= 0) {
throw new Error("Capacity must be positive.");
}
if (!(0 < delta && delta < 1)) {
throw new Error("delta must be between 0 and 1.");
}
this.capacity = capacity;
this.delta = delta;
this.maxInserts = capacity - Math.floor(delta * capacity);
this.numInserts = 0;
this.alpha = Math.ceil(4 * Math.log2(1 / delta) + 10);
this.beta = Math.ceil(2 * Math.log2(1 / delta));
this.specialSize = Math.max(1, Math.floor(3 * delta * capacity / 4));
this.primarySize = capacity - this.specialSize;
let totalBuckets = Math.floor(this.primarySize / this.beta);
let a1 = totalBuckets / (4 * (1 - Math.pow(0.75, this.alpha)));
this.levels = [];
this.levelBucketCounts = [];
this.levelSalts = [];
let remainingBuckets = totalBuckets;
for (let i = 0; i < this.alpha; i++) {
let a_i = Math.max(1, Math.round(a1 * Math.pow(0.75, i)));
if (remainingBuckets <= 0 || a_i <= 0) break;
a_i = Math.min(a_i, remainingBuckets);
this.levelBucketCounts.push(a_i);
this.levels.push(new Array(a_i * this.beta).fill(null));
this.levelSalts.push(Math.floor(Math.random() * Number.MAX_SAFE_INTEGER));
remainingBuckets -= a_i;
}
if (remainingBuckets > 0 && this.levels.length > 0) {
let extra = remainingBuckets * this.beta;
this.levels[this.levels.length - 1].push(...new Array(extra).fill(null));
this.levelBucketCounts[this.levelBucketCounts.length - 1] += remainingBuckets;
}
this.specialArray = new Array(this.specialSize).fill(null);
this.specialSalt = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER);
this.specialOccupancy = 0;
}
_hashLevel(key, levelIndex) {
return Math.abs(this._hashFunction(`${key}-${this.levelSalts[levelIndex]}`));
}
_hashSpecial(key) {
return Math.abs(this._hashFunction(`${key}-${this.specialSalt}`));
}
_hashFunction(str) {
let hash = 0;
for (let i = 0; i < str.length; i++) {
hash = (hash << 5) - hash + str.charCodeAt(i);
hash |= 0;
}
return hash;
}
insert(key, value) {
if (this.numInserts >= this.maxInserts) {
throw new Error("Hash table is full (maximum allowed insertions reached). ");
}
for (let i = 0; i < this.levels.length; i++) {
let level = this.levels[i];
let numBuckets = this.levelBucketCounts[i];
let bucketIndex = this._hashLevel(key, i) % numBuckets;
let start = bucketIndex * this.beta;
let end = start + this.beta;
for (let idx = start; idx < end; idx++) {
if (level[idx] === null) {
level[idx] = [key, value];
this.numInserts++;
return true;
} else if (level[idx][0] === key) {
level[idx] = [key, value];
return true;
}
}
}
let special = this.specialArray;
let size = special.length;
let probeLimit = Math.max(1, Math.ceil(Math.log(Math.log(this.capacity + 1) + 1)));
for (let j = 0; j < probeLimit; j++) {
let idx = (this._hashSpecial(key) + j) % size;
if (special[idx] === null) {
special[idx] = [key, value];
this.specialOccupancy++;
this.numInserts++;
return true;
} else if (special[idx][0] === key) {
special[idx] = [key, value];
return true;
}
}
let idx1 = this._hashSpecial(key) % size;
let idx2 = (this._hashSpecial(key) + 1) % size;
if (special[idx1] === null) {
special[idx1] = [key, value];
this.specialOccupancy++;
this.numInserts++;
return true;
} else if (special[idx2] === null) {
special[idx2] = [key, value];
this.specialOccupancy++;
this.numInserts++;
return true;
}
throw new Error("Special array insertion failed; table is full.");
}
search(key) {
for (let i = 0; i < this.levels.length; i++) {
let level = this.levels[i];
let numBuckets = this.levelBucketCounts[i];
let bucketIndex = this._hashLevel(key, i) % numBuckets;
let start = bucketIndex * this.beta;
let end = start + this.beta;
for (let idx = start; idx < end; idx++) {
if (level[idx] === null) break;
if (level[idx][0] === key) return level[idx][1];
}
}
let special = this.specialArray;
let size = special.length;
let probeLimit = Math.max(1, Math.ceil(Math.log(Math.log(this.capacity + 1) + 1)));
for (let j = 0; j < probeLimit; j++) {
let idx = (this._hashSpecial(key) + j) % size;
if (special[idx] === null) break;
if (special[idx][0] === key) return special[idx][1];
}
let idx1 = this._hashSpecial(key) % size;
let idx2 = (this._hashSpecial(key) + 1) % size;
if (special[idx1] && special[idx1][0] === key) return special[idx1][1];
if (special[idx2] && special[idx2][0] === key) return special[idx2][1];
return null;
}
has(key) {
return this.search(key) !== null;
}
size() {
return this.numInserts;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment