Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Created February 9, 2026 00:08
Show Gist options
  • Select an option

  • Save jweinst1/a5aa4453ab73b9499d66d3c1c6eedc71 to your computer and use it in GitHub Desktop.

Select an option

Save jweinst1/a5aa4453ab73b9499d66d3c1c6eedc71 to your computer and use it in GitHub Desktop.
Hamming distance based LLM without need for training
#include <iostream>
#include <vector>
#include <string>
#include <random>
#include <unordered_map>
#include <arm_neon.h> // Specialized for your M2 chip
// 1024-bit vector = 128 bytes = 2 Cache Lines
// We use alignas(64) to ensure the CPU doesn't cross cache line boundaries
struct alignas(64) HyperVector {
uint64_t bits[16];
};
class SemanticEngine {
private:
std::unordered_map<std::string, HyperVector> dna_registry;
public:
// Generates a deterministic random vector for any string
HyperVector get_dna(const std::string& root) {
if (dna_registry.find(root) != dna_registry.end()) return dna_registry[root];
HyperVector hv;
std::mt19937_64 gen(std::hash<std::string>{}(root));
std::uniform_int_distribution<uint64_t> dist(0, 0xFFFFFFFFFFFFFFFF);
for (int i = 0; i < 16; ++i) hv.bits[i] = dist(gen);
dna_registry[root] = hv;
return hv;
}
// Bundles multiple concepts into one Document Fingerprint using Bitwise OR
// (Note: In a massive system, we'd use Majority Vote, but OR works for sparse roots)
HyperVector create_fingerprint(const std::vector<std::string>& roots) {
HyperVector doc = {0};
for (const auto& r : roots) {
HyperVector root_dna = get_dna(r);
for (int i = 0; i < 16; ++i) {
doc.bits[i] |= root_dna.bits[i];
}
}
return doc;
}
// Optimized Hamming Distance for M2 (ARMv8)
int compare(const HyperVector& a, const HyperVector& b) {
int distance = 0;
for (int i = 0; i < 16; ++i) {
// Builtin_popcountll maps to the 'CNT' instruction on M2
distance += __builtin_popcountll(a.bits[i] ^ b.bits[i]);
}
return distance;
}
};
int main() {
SemanticEngine engine;
// 1. Define our concepts
// Document A: Heart Inflammation (Cardio + Itis)
std::vector<std::string> docA_roots = {"cardio", "itis"};
// Document B: Heart Disease (Cardio + Patho)
std::vector<std::string> docB_roots = {"cardio", "patho"};
// Document C: Bone Disease (Osteo + Patho) - Unrelated to Heart
std::vector<std::string> docC_roots = {"osteo", "patho"};
// 2. Generate Fingerprints
auto fingerA = engine.create_fingerprint(docA_roots);
auto fingerB = engine.create_fingerprint(docB_roots);
auto fingerC = engine.create_fingerprint(docC_roots);
// 3. The Query: Find things related to "Heart" (Cardio)
auto query = engine.get_dna("cardio");
std::cout << "--- 1-Bit Semantic Search Benchmark (M2) ---" << std::endl;
std::cout << "Target Query: 'cardio'" << std::endl;
std::cout << "Distance to 'Cardio-Itis': " << engine.compare(fingerA, query) << std::endl;
std::cout << "Distance to 'Cardio-Patho': " << engine.compare(fingerB, query) << std::endl;
std::cout << "Distance to 'Osteo-Patho': " << engine.compare(fingerC, query) << std::endl;
std::cout << "(Lower score = higher similarity. Random noise is ~512)" << std::endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment