Last active
May 7, 2024 10:02
-
-
Save CyberShadow/ec6b9c97151cba43b1bcb01db9a8a947 to your computer and use it in GitHub Desktop.
Multi-word anagram search with llama.cpp filtering
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std.algorithm.iteration; | |
import std.algorithm.setops; | |
import std.algorithm.sorting; | |
import std.array; | |
import std.exception; | |
import std.file; | |
import std.range; | |
import std.stdio; | |
import std.string; | |
import ae.sys.net; | |
import ae.sys.net.cachedcurl; | |
import ae.utils.array; | |
import ae.utils.funopt; | |
import ae.utils.json; | |
import ae.utils.main; | |
import ae.utils.parallelism; | |
import ae.utils.text; | |
ubyte[256] lettersOf(T)(T t) { ubyte[256] result; foreach (c; t) result[c]++; return result; } | |
bool isAnagramOf(A, B)(A a, B b) { return lettersOf(a) == lettersOf(b); } | |
double getTruthiness(string proposition) | |
{ | |
struct CompletionQuery | |
{ | |
int n_predict = 1; | |
int n_probs = 2; | |
float temperature = 0; | |
string grammar = q{ | |
root ::= ("true" | "false") | |
}; | |
string prompt; | |
} | |
CompletionQuery query; | |
query.prompt = | |
"true or false? | |
Q: " ~ proposition ~ " | |
A: "; | |
auto result = post("http://10.20.0.1:26255/completion", query.toJson.asBytes); | |
@JSONPartial | |
struct CompletionResponse | |
{ | |
@JSONPartial | |
struct TokenProbabilities | |
{ | |
struct Prob | |
{ | |
double prob; | |
string tok_str; | |
} | |
Prob[] probs; | |
} | |
TokenProbabilities[] completion_probabilities; | |
} | |
return result | |
.asText | |
.jsonParse!CompletionResponse | |
.completion_probabilities[0] | |
.probs | |
.filter!(prob => prob.tok_str == "true") | |
.front | |
.prob; | |
} | |
void program( | |
string letters, | |
size_t firstWordLength, | |
size_t secondWordLength, | |
) | |
{ | |
auto words = readText("/usr/share/dict/words") | |
.splitLines; | |
stderr.writeln("Finding anagrams..."); | |
auto options = cartesianProduct( | |
words.filter!(w => w.length == firstWordLength).array, | |
words.filter!(w => w.length == secondWordLength).array, | |
) | |
.array | |
.parallelCachedFilter!(pair => isAnagramOf(chain(pair.expand), letters)); | |
stderr.writeln("Finding likelihoods..."); | |
auto optionLikelihood = options | |
.map!(option => `"%s" is a common phrase.`.format([option.expand].join(" "))) | |
.map!getTruthiness | |
.array; | |
auto order = options.length.iota.array; | |
order.sort!((a, b) => optionLikelihood[a] > optionLikelihood[b]); | |
foreach (i; order) | |
writefln("%s -> %s", [options[i].expand].join(" "), optionLikelihood[i]); | |
} | |
mixin main!(funopt!program); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment