Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save Dicklesworthstone/8269a46f454d0ed35cf4d7b3a2f182fe to your computer and use it in GitHub Desktop.

Select an option

Save Dicklesworthstone/8269a46f454d0ed35cf4d7b3a2f182fe to your computer and use it in GitHub Desktop.
// ===================================================================
// Optimal & Safe Codegen/Reasoning Contract — Native HVM Implementation
// Problem: Bushy SQL Join-Order Optimizer with SUP-driven search
// ===================================================================
//
// Encodings
// ---------
// Term ::= TScan rel set
// | TJoin alg left right set
// | TSup lab left right set
// Alg ::= AHash | ANL
// Plan ::= Plan node rows cost set
// Node ::= NScan rel | NJoin alg lnode rnode
// Set ::= Nil | Cons i is // list of relation ids (sorted, unique)
// Pair ::= Pair a b
// Res ::= Res ok cost_sup cost_dp rows_sup node_sup
//
// Labels: each subset (Set) is mapped to a unique label `lab = ToMask(set)`.
// All alternatives for that subset are entangled under that label; collapse
// chooses exactly one branch per label (diagonal semantics).
//
// Numerics
// --------
// - Integer-only U60 arithmetic.
// - Base table rows stored in *thousands* of rows (ROW_SCALE = 1000).
// - Selectivities stored as per-million integers (SEL_SCALE = 1_000_000).
// - Join rows computed iteratively: rows = rows * sel(i,j) / SEL_SCALE,
// to avoid overflow and preserve precision in fixed-point.
//
// ===================================================================
// Constants
// ===================================================================
(ROW_SCALE) = 1000 // base rows scaled down by 1000
(SEL_SCALE) = 1000000 // selectivity is per-million
// ===================================================================
// Boolean / Pred helpers (0/1 booleans as in HVM examples)
// ===================================================================
(Not 0) = 1
(Not 1) = 0
(And 0 b) = 0
(And 1 b) = b
(Or 0 b) = b
(Or 1 b) = 1
// a == b <=> not (a<b) and not (b<a)
(Eq a b) =
let lt1 = (< a b)
let lt2 = (< b a)
let n1 = (Not lt1)
let n2 = (Not lt2)
(And n1 n2)
// if1 c t e with c in {0,1}
(If1 1 t e) = t
(If1 0 t e) = e
(MinNat a b) = (If1 (< a b) a b)
(MaxNat a b) = (If1 (< a b) b a)
// ===================================================================
// Lists
// ===================================================================
(Append Nil ys) = ys
(Append (Cons x xs) ys) = (Cons x (Append xs ys))
(Concat Nil) = Nil
(Concat (Cons xs xss)) = (Append xs (Concat xss))
(Length Nil) = 0
(Length (Cons _ xs)) = (+ 1 (Length xs))
(Reverse xs) = (RevAcc xs Nil)
(RevAcc Nil acc) = acc
(RevAcc (Cons x xs) acc) = (RevAcc xs (Cons x acc))
(IsEmpty Nil) = 1
(IsEmpty (Cons _ _)) = 0
(Map f Nil) = Nil
(Map f (Cons x xs)) = (Cons (f x) (Map f xs))
// ===================================================================
// Sets (as sorted lists of unique U60 indices)
// ===================================================================
(Contains Nil v) = 0
(Contains (Cons x xs) v) =
(If1 (< v x) 0 (If1 (Eq x v) 1 (Contains xs v)))
(Disjoint Nil ys) = 1
(Disjoint (Cons x xs) ys) =
(If1 (Contains ys x) 0 (Disjoint xs ys))
// Merge two sorted sets into a sorted union (no dups)
(Union Nil ys) = ys
(Union xs Nil) = xs
(Union (Cons a as) (Cons b bs)) =
(If1 (< a b)
(Cons a (Union as (Cons b bs)))
(If1 (< b a)
(Cons b (Union (Cons a as) bs))
(Cons a (Union as bs))))
// Convert Set -> unique label by encoding as bitmask (sum of 2^i)
(ToMask set) = (ToMaskAcc set 0)
(ToMaskAcc Nil acc) = acc
(ToMaskAcc (Cons i is) acc) = (ToMaskAcc is (+ acc (Pow2 i)))
// pow2(i): 2^i by repeated doubling
(Pow2 i) = (Pow2Acc i 1)
(Pow2Acc 0 p) = p
(Pow2Acc i p) = (Pow2Acc (- i 1) (+ p p))
// Range a..b inclusive (assumes a<=b)
(Range a b) = (If1 (Eq a b) (Cons a Nil) (Cons a (Range (+ a 1) b)))
// ===================================================================
// Catalog for demo (8 relations)
// Effective base rows (in thousands): round(original_rows * filter / 1000)
// ===================================================================
(RowsOf 0) = 750 // 1,500,000 * 0.50 / 1000
(RowsOf 1) = 800 // 2,000,000 * 0.40 / 1000
(RowsOf 2) = 750 // 750,000 * 1.00 / 1000
(RowsOf 3) = 1000 // 1,250,000 * 0.80 / 1000
(RowsOf 4) = 875 // 3,500,000 * 0.25 / 1000
(RowsOf 5) = 600 // 600,000 * 1.00 / 1000
(RowsOf 6) = 1440 // 2,400,000 * 0.60 / 1000
(RowsOf 7) = 1260 // 1,800,000 * 0.70 / 1000
// Default (unused): identity
(RowsOf _) = 1
// Pairwise selectivity per-million, symmetric; default = 1_000_000
(Sel i j) =
let a = (MinNat i j)
let b = (MaxNat i j)
(Sel' a b)
(Sel' 0 1) = 100 // 0.0001
(Sel' 0 2) = 10000 // 0.01
(Sel' 1 3) = 20000 // 0.02
(Sel' 2 3) = 5000 // 0.005
(Sel' 2 4) = 500 // 0.0005
(Sel' 3 5) = 10000 // 0.01
(Sel' 4 6) = 1000 // 0.001
(Sel' 6 7) = 20000 // 0.02
(Sel' 5 7) = 30000 // 0.03
(Sel' 1 4) = 20000 // 0.02
(Sel' _ _) = (SEL_SCALE)
// ===================================================================
// Algebraic data (constructors)
// ===================================================================
(AHash) = AHash
(ANL) = ANL
// Term of the search space:
(TScan rel set) = (TScan rel set)
(TJoin alg left right set) = (TJoin alg left right set)
(TSup lab left right set) = (TSup lab left right set)
// Plan and nodes:
(NScan rel) = (NScan rel)
(NJoin alg l r) = (NJoin alg l r)
(Plan node rows cost set) = (Plan node rows cost set)
// Result container:
(Res ok cost_sup cost_dp rows_sup node_sup) = (Res ok cost_sup cost_dp rows_sup node_sup)
// Getters:
(GetRows (Plan _ rows _ _)) = rows
(GetCost (Plan _ _ cost _)) = cost
(GetSet (Plan _ _ _ set)) = set
(GetNode (Plan node _ _ _)) = node
// ===================================================================
// Cost model
// ===================================================================
(ScanCost rows) = rows
(HashJoinCost lrows rrows out) = (+ (+ lrows rrows) out)
(NLJoinCost lrows rrows out) = (+ (* lrows rrows) out)
// Compute join output rows with iterative scaling:
// out = (((lrows * rrows) * sel(i1,j1) / 1e6) * sel(i2,j2)/1e6) ...
(JoinRows lrows rrows lset rset) =
let base = (* lrows rrows)
(RefinePairs base lset rset)
(RefinePairs acc Nil rset) = acc
(RefinePairs acc (Cons i is) rset) =
let a = (RefineWith acc i rset)
(RefinePairs a is rset)
(RefineWith acc i Nil) = acc
(RefineWith acc i (Cons j js)) =
let s = (Sel i j)
let tmp = (/ (* acc s) (SEL_SCALE))
(RefineWith tmp i js)
// ===================================================================
// Splits enumeration (unique, anchor-based)
// For non-empty set S = [a | xs], we enumerate all partitions (L,R)
// such that a ∈ L, R ≠ ∅. This yields every proper split exactly once.
// ===================================================================
(Splits set) =
let a = (Head set)
let xs = (Tail set)
let pr = (Assign xs) // list of Pair(Lt, Rt) over xs
(Anchored a pr)
(Anchored a Nil) = Nil
(Anchored a (Cons (Pair lt rt) rest)) =
let nonempty = (Not (IsEmpty rt))
let head = (If1 nonempty (Cons (Pair (Cons a lt) rt) Nil) Nil)
(Append head (Anchored a rest))
// All 2^(|xs|) assignments of xs into (Lt,Rt)
(Assign Nil) = (Cons (Pair Nil Nil) Nil)
(Assign (Cons x xs)) =
let tail = (Assign xs)
let putL = (Map (PutLeft x) tail)
let putR = (Map (PutRight x) tail)
(Append putL putR)
(PutLeft x (Pair lt rt)) = (Pair (Cons x lt) rt)
(PutRight x (Pair lt rt)) = (Pair lt (Cons x rt))
(Head (Cons x _)) = x
(Tail (Cons _ xs)) = xs
// ===================================================================
// Builder: enumerate all alternatives as a single TSup group per set
// ===================================================================
(Enum set) =
(If1 (Eq (Length set) 1)
(TScan (Head set) set)
(EnumMulti set))
(EnumMulti set) =
let lab = (ToMask set)
let splits = (Splits set) // [ Pair(L,R) ... ]
let alts = (MakeAlts splits set) // at least 2 alternatives
(SupOf lab alts set)
(MakeAlts Nil set) = Nil
(MakeAlts (Cons (Pair l r) rest) set) =
let lt = (Enum l)
let rt = (Enum r)
let a1 = (TJoin (AHash) lt rt set)
let a2 = (TJoin (ANL) lt rt set)
(Cons a1 (Cons a2 (MakeAlts rest set)))
// Fold a non-empty list of terms under the same label into a TSup chain.
(SupOf lab (Cons a (Cons b Nil)) set) = (TSup lab a b set)
(SupOf lab (Cons a (Cons b rest)) set) =
let s = (TSup lab a b set)
(SupOf lab (Cons s rest) set)
// ===================================================================
// SUP-group collapse + exact evaluator
// ===================================================================
(Best (TScan rel set)) =
let rows = (RowsOf rel)
let cost = (ScanCost rows)
(Plan (NScan rel) rows cost set)
(Best (TJoin alg left right set)) =
let pl = (Best left)
let pr = (Best right)
let lr = (GetRows pl)
let rr = (GetRows pr)
let out = (JoinRows lr rr (GetSet pl) (GetSet pr))
let c_op = (If1 (EqAlg alg AHash)
(HashJoinCost lr rr out)
(NLJoinCost lr rr out))
let cost = (+ (+ (GetCost pl) (GetCost pr)) c_op)
(Plan (NJoin alg (GetNode pl) (GetNode pr)) out cost set)
(Best (TSup lab left right set)) =
let alts = (CollectLeaves lab (TSup lab left right set))
let plans = (Map Best alts)
(MinPlanList plans)
// Recognize equality of algorithm constructors
(EqAlg AHash AHash) = 1
(EqAlg ANL ANL) = 1
(EqAlg _ _) = 0
// Flatten all leaves under one label
(CollectLeaves lab (TSup l a b set)) =
(If1 (Eq lab l)
(Append (CollectLeaves lab a) (CollectLeaves lab b))
(Cons (TSup l a b set) Nil))
(CollectLeaves lab t) = (Cons t Nil)
// Reduce a non-empty list of plans to the one with minimum cost
(MinPlanList (Cons x Nil)) = x
(MinPlanList (Cons x xs)) =
let m = (MinPlanList xs)
(If1 (< (GetCost x) (GetCost m)) x m)
// ===================================================================
// Dynamic-Programming Reference (for self-audit)
// ===================================================================
(DPBest set) =
(If1 (Eq (Length set) 1)
(Best (TScan (Head set) set))
(DPBestNonTrivial set))
(DPBestNonTrivial set) =
let splits = (Splits set)
let cands = (DPAlts splits set)
(MinPlanList cands)
(DPAlts Nil set) = Nil
(DPAlts (Cons (Pair l r) rest) set) =
let pl = (DPBest l)
let pr = (DPBest r)
let lr = (GetRows pl)
let rr = (GetRows pr)
let out = (JoinRows lr rr (GetSet pl) (GetSet pr))
let c_hash = (+ (+ (GetCost pl) (GetCost pr)) (HashJoinCost lr rr out))
let c_nl = (+ (+ (GetCost pl) (GetCost pr)) (NLJoinCost lr rr out))
let ph = (Plan (NJoin (AHash) (GetNode pl) (GetNode pr)) out c_hash set)
let pn = (Plan (NJoin (ANL) (GetNode pl) (GetNode pr)) out c_nl set)
(Cons ph (Cons pn (DPAlts rest set)))
// ===================================================================
// Pretty top-level
// ===================================================================
(Solve n) =
let full = (Range 0 (- n 1))
let term = (Enum full)
let best = (Best term)
let dp = (DPBest full)
let ok = (Eq (GetCost best) (GetCost dp))
(Res ok (GetCost best) (GetCost dp) (GetRows best) (GetNode best))
// Demo: 8 relations with the catalog above.
@main = (Solve 8)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment