Created
September 28, 2025 21:32
-
-
Save Dicklesworthstone/8269a46f454d0ed35cf4d7b3a2f182fe to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // =================================================================== | |
| // Optimal & Safe Codegen/Reasoning Contract — Native HVM Implementation | |
| // Problem: Bushy SQL Join-Order Optimizer with SUP-driven search | |
| // =================================================================== | |
| // | |
| // Encodings | |
| // --------- | |
| // Term ::= TScan rel set | |
| // | TJoin alg left right set | |
| // | TSup lab left right set | |
| // Alg ::= AHash | ANL | |
| // Plan ::= Plan node rows cost set | |
| // Node ::= NScan rel | NJoin alg lnode rnode | |
| // Set ::= Nil | Cons i is // list of relation ids (sorted, unique) | |
| // Pair ::= Pair a b | |
| // Res ::= Res ok cost_sup cost_dp rows_sup node_sup | |
| // | |
| // Labels: each subset (Set) is mapped to a unique label `lab = ToMask(set)`. | |
| // All alternatives for that subset are entangled under that label; collapse | |
| // chooses exactly one branch per label (diagonal semantics). | |
| // | |
| // Numerics | |
| // -------- | |
| // - Integer-only U60 arithmetic. | |
| // - Base table rows stored in *thousands* of rows (ROW_SCALE = 1000). | |
| // - Selectivities stored as per-million integers (SEL_SCALE = 1_000_000). | |
| // - Join rows computed iteratively: rows = rows * sel(i,j) / SEL_SCALE, | |
| // to avoid overflow and preserve precision in fixed-point. | |
| // | |
| // =================================================================== | |
| // Constants | |
| // =================================================================== | |
| (ROW_SCALE) = 1000 // base rows scaled down by 1000 | |
| (SEL_SCALE) = 1000000 // selectivity is per-million | |
| // =================================================================== | |
| // Boolean / Pred helpers (0/1 booleans as in HVM examples) | |
| // =================================================================== | |
| (Not 0) = 1 | |
| (Not 1) = 0 | |
| (And 0 b) = 0 | |
| (And 1 b) = b | |
| (Or 0 b) = b | |
| (Or 1 b) = 1 | |
| // a == b <=> not (a<b) and not (b<a) | |
| (Eq a b) = | |
| let lt1 = (< a b) | |
| let lt2 = (< b a) | |
| let n1 = (Not lt1) | |
| let n2 = (Not lt2) | |
| (And n1 n2) | |
| // if1 c t e with c in {0,1} | |
| (If1 1 t e) = t | |
| (If1 0 t e) = e | |
| (MinNat a b) = (If1 (< a b) a b) | |
| (MaxNat a b) = (If1 (< a b) b a) | |
| // =================================================================== | |
| // Lists | |
| // =================================================================== | |
| (Append Nil ys) = ys | |
| (Append (Cons x xs) ys) = (Cons x (Append xs ys)) | |
| (Concat Nil) = Nil | |
| (Concat (Cons xs xss)) = (Append xs (Concat xss)) | |
| (Length Nil) = 0 | |
| (Length (Cons _ xs)) = (+ 1 (Length xs)) | |
| (Reverse xs) = (RevAcc xs Nil) | |
| (RevAcc Nil acc) = acc | |
| (RevAcc (Cons x xs) acc) = (RevAcc xs (Cons x acc)) | |
| (IsEmpty Nil) = 1 | |
| (IsEmpty (Cons _ _)) = 0 | |
| (Map f Nil) = Nil | |
| (Map f (Cons x xs)) = (Cons (f x) (Map f xs)) | |
| // =================================================================== | |
| // Sets (as sorted lists of unique U60 indices) | |
| // =================================================================== | |
| (Contains Nil v) = 0 | |
| (Contains (Cons x xs) v) = | |
| (If1 (< v x) 0 (If1 (Eq x v) 1 (Contains xs v))) | |
| (Disjoint Nil ys) = 1 | |
| (Disjoint (Cons x xs) ys) = | |
| (If1 (Contains ys x) 0 (Disjoint xs ys)) | |
| // Merge two sorted sets into a sorted union (no dups) | |
| (Union Nil ys) = ys | |
| (Union xs Nil) = xs | |
| (Union (Cons a as) (Cons b bs)) = | |
| (If1 (< a b) | |
| (Cons a (Union as (Cons b bs))) | |
| (If1 (< b a) | |
| (Cons b (Union (Cons a as) bs)) | |
| (Cons a (Union as bs)))) | |
| // Convert Set -> unique label by encoding as bitmask (sum of 2^i) | |
| (ToMask set) = (ToMaskAcc set 0) | |
| (ToMaskAcc Nil acc) = acc | |
| (ToMaskAcc (Cons i is) acc) = (ToMaskAcc is (+ acc (Pow2 i))) | |
| // pow2(i): 2^i by repeated doubling | |
| (Pow2 i) = (Pow2Acc i 1) | |
| (Pow2Acc 0 p) = p | |
| (Pow2Acc i p) = (Pow2Acc (- i 1) (+ p p)) | |
| // Range a..b inclusive (assumes a<=b) | |
| (Range a b) = (If1 (Eq a b) (Cons a Nil) (Cons a (Range (+ a 1) b))) | |
| // =================================================================== | |
| // Catalog for demo (8 relations) | |
| // Effective base rows (in thousands): round(original_rows * filter / 1000) | |
| // =================================================================== | |
| (RowsOf 0) = 750 // 1,500,000 * 0.50 / 1000 | |
| (RowsOf 1) = 800 // 2,000,000 * 0.40 / 1000 | |
| (RowsOf 2) = 750 // 750,000 * 1.00 / 1000 | |
| (RowsOf 3) = 1000 // 1,250,000 * 0.80 / 1000 | |
| (RowsOf 4) = 875 // 3,500,000 * 0.25 / 1000 | |
| (RowsOf 5) = 600 // 600,000 * 1.00 / 1000 | |
| (RowsOf 6) = 1440 // 2,400,000 * 0.60 / 1000 | |
| (RowsOf 7) = 1260 // 1,800,000 * 0.70 / 1000 | |
| // Default (unused): identity | |
| (RowsOf _) = 1 | |
| // Pairwise selectivity per-million, symmetric; default = 1_000_000 | |
| (Sel i j) = | |
| let a = (MinNat i j) | |
| let b = (MaxNat i j) | |
| (Sel' a b) | |
| (Sel' 0 1) = 100 // 0.0001 | |
| (Sel' 0 2) = 10000 // 0.01 | |
| (Sel' 1 3) = 20000 // 0.02 | |
| (Sel' 2 3) = 5000 // 0.005 | |
| (Sel' 2 4) = 500 // 0.0005 | |
| (Sel' 3 5) = 10000 // 0.01 | |
| (Sel' 4 6) = 1000 // 0.001 | |
| (Sel' 6 7) = 20000 // 0.02 | |
| (Sel' 5 7) = 30000 // 0.03 | |
| (Sel' 1 4) = 20000 // 0.02 | |
| (Sel' _ _) = (SEL_SCALE) | |
| // =================================================================== | |
| // Algebraic data (constructors) | |
| // =================================================================== | |
| (AHash) = AHash | |
| (ANL) = ANL | |
| // Term of the search space: | |
| (TScan rel set) = (TScan rel set) | |
| (TJoin alg left right set) = (TJoin alg left right set) | |
| (TSup lab left right set) = (TSup lab left right set) | |
| // Plan and nodes: | |
| (NScan rel) = (NScan rel) | |
| (NJoin alg l r) = (NJoin alg l r) | |
| (Plan node rows cost set) = (Plan node rows cost set) | |
| // Result container: | |
| (Res ok cost_sup cost_dp rows_sup node_sup) = (Res ok cost_sup cost_dp rows_sup node_sup) | |
| // Getters: | |
| (GetRows (Plan _ rows _ _)) = rows | |
| (GetCost (Plan _ _ cost _)) = cost | |
| (GetSet (Plan _ _ _ set)) = set | |
| (GetNode (Plan node _ _ _)) = node | |
| // =================================================================== | |
| // Cost model | |
| // =================================================================== | |
| (ScanCost rows) = rows | |
| (HashJoinCost lrows rrows out) = (+ (+ lrows rrows) out) | |
| (NLJoinCost lrows rrows out) = (+ (* lrows rrows) out) | |
| // Compute join output rows with iterative scaling: | |
| // out = (((lrows * rrows) * sel(i1,j1) / 1e6) * sel(i2,j2)/1e6) ... | |
| (JoinRows lrows rrows lset rset) = | |
| let base = (* lrows rrows) | |
| (RefinePairs base lset rset) | |
| (RefinePairs acc Nil rset) = acc | |
| (RefinePairs acc (Cons i is) rset) = | |
| let a = (RefineWith acc i rset) | |
| (RefinePairs a is rset) | |
| (RefineWith acc i Nil) = acc | |
| (RefineWith acc i (Cons j js)) = | |
| let s = (Sel i j) | |
| let tmp = (/ (* acc s) (SEL_SCALE)) | |
| (RefineWith tmp i js) | |
| // =================================================================== | |
| // Splits enumeration (unique, anchor-based) | |
| // For non-empty set S = [a | xs], we enumerate all partitions (L,R) | |
| // such that a ∈ L, R ≠ ∅. This yields every proper split exactly once. | |
| // =================================================================== | |
| (Splits set) = | |
| let a = (Head set) | |
| let xs = (Tail set) | |
| let pr = (Assign xs) // list of Pair(Lt, Rt) over xs | |
| (Anchored a pr) | |
| (Anchored a Nil) = Nil | |
| (Anchored a (Cons (Pair lt rt) rest)) = | |
| let nonempty = (Not (IsEmpty rt)) | |
| let head = (If1 nonempty (Cons (Pair (Cons a lt) rt) Nil) Nil) | |
| (Append head (Anchored a rest)) | |
| // All 2^(|xs|) assignments of xs into (Lt,Rt) | |
| (Assign Nil) = (Cons (Pair Nil Nil) Nil) | |
| (Assign (Cons x xs)) = | |
| let tail = (Assign xs) | |
| let putL = (Map (PutLeft x) tail) | |
| let putR = (Map (PutRight x) tail) | |
| (Append putL putR) | |
| (PutLeft x (Pair lt rt)) = (Pair (Cons x lt) rt) | |
| (PutRight x (Pair lt rt)) = (Pair lt (Cons x rt)) | |
| (Head (Cons x _)) = x | |
| (Tail (Cons _ xs)) = xs | |
| // =================================================================== | |
| // Builder: enumerate all alternatives as a single TSup group per set | |
| // =================================================================== | |
| (Enum set) = | |
| (If1 (Eq (Length set) 1) | |
| (TScan (Head set) set) | |
| (EnumMulti set)) | |
| (EnumMulti set) = | |
| let lab = (ToMask set) | |
| let splits = (Splits set) // [ Pair(L,R) ... ] | |
| let alts = (MakeAlts splits set) // at least 2 alternatives | |
| (SupOf lab alts set) | |
| (MakeAlts Nil set) = Nil | |
| (MakeAlts (Cons (Pair l r) rest) set) = | |
| let lt = (Enum l) | |
| let rt = (Enum r) | |
| let a1 = (TJoin (AHash) lt rt set) | |
| let a2 = (TJoin (ANL) lt rt set) | |
| (Cons a1 (Cons a2 (MakeAlts rest set))) | |
| // Fold a non-empty list of terms under the same label into a TSup chain. | |
| (SupOf lab (Cons a (Cons b Nil)) set) = (TSup lab a b set) | |
| (SupOf lab (Cons a (Cons b rest)) set) = | |
| let s = (TSup lab a b set) | |
| (SupOf lab (Cons s rest) set) | |
| // =================================================================== | |
| // SUP-group collapse + exact evaluator | |
| // =================================================================== | |
| (Best (TScan rel set)) = | |
| let rows = (RowsOf rel) | |
| let cost = (ScanCost rows) | |
| (Plan (NScan rel) rows cost set) | |
| (Best (TJoin alg left right set)) = | |
| let pl = (Best left) | |
| let pr = (Best right) | |
| let lr = (GetRows pl) | |
| let rr = (GetRows pr) | |
| let out = (JoinRows lr rr (GetSet pl) (GetSet pr)) | |
| let c_op = (If1 (EqAlg alg AHash) | |
| (HashJoinCost lr rr out) | |
| (NLJoinCost lr rr out)) | |
| let cost = (+ (+ (GetCost pl) (GetCost pr)) c_op) | |
| (Plan (NJoin alg (GetNode pl) (GetNode pr)) out cost set) | |
| (Best (TSup lab left right set)) = | |
| let alts = (CollectLeaves lab (TSup lab left right set)) | |
| let plans = (Map Best alts) | |
| (MinPlanList plans) | |
| // Recognize equality of algorithm constructors | |
| (EqAlg AHash AHash) = 1 | |
| (EqAlg ANL ANL) = 1 | |
| (EqAlg _ _) = 0 | |
| // Flatten all leaves under one label | |
| (CollectLeaves lab (TSup l a b set)) = | |
| (If1 (Eq lab l) | |
| (Append (CollectLeaves lab a) (CollectLeaves lab b)) | |
| (Cons (TSup l a b set) Nil)) | |
| (CollectLeaves lab t) = (Cons t Nil) | |
| // Reduce a non-empty list of plans to the one with minimum cost | |
| (MinPlanList (Cons x Nil)) = x | |
| (MinPlanList (Cons x xs)) = | |
| let m = (MinPlanList xs) | |
| (If1 (< (GetCost x) (GetCost m)) x m) | |
| // =================================================================== | |
| // Dynamic-Programming Reference (for self-audit) | |
| // =================================================================== | |
| (DPBest set) = | |
| (If1 (Eq (Length set) 1) | |
| (Best (TScan (Head set) set)) | |
| (DPBestNonTrivial set)) | |
| (DPBestNonTrivial set) = | |
| let splits = (Splits set) | |
| let cands = (DPAlts splits set) | |
| (MinPlanList cands) | |
| (DPAlts Nil set) = Nil | |
| (DPAlts (Cons (Pair l r) rest) set) = | |
| let pl = (DPBest l) | |
| let pr = (DPBest r) | |
| let lr = (GetRows pl) | |
| let rr = (GetRows pr) | |
| let out = (JoinRows lr rr (GetSet pl) (GetSet pr)) | |
| let c_hash = (+ (+ (GetCost pl) (GetCost pr)) (HashJoinCost lr rr out)) | |
| let c_nl = (+ (+ (GetCost pl) (GetCost pr)) (NLJoinCost lr rr out)) | |
| let ph = (Plan (NJoin (AHash) (GetNode pl) (GetNode pr)) out c_hash set) | |
| let pn = (Plan (NJoin (ANL) (GetNode pl) (GetNode pr)) out c_nl set) | |
| (Cons ph (Cons pn (DPAlts rest set))) | |
| // =================================================================== | |
| // Pretty top-level | |
| // =================================================================== | |
| (Solve n) = | |
| let full = (Range 0 (- n 1)) | |
| let term = (Enum full) | |
| let best = (Best term) | |
| let dp = (DPBest full) | |
| let ok = (Eq (GetCost best) (GetCost dp)) | |
| (Res ok (GetCost best) (GetCost dp) (GetRows best) (GetNode best)) | |
| // Demo: 8 relations with the catalog above. | |
| @main = (Solve 8) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment