Created
September 24, 2025 23:52
-
-
Save dfabulich/57ee6372f62be5a0dfd69b4697e97ce5 to your computer and use it in GitHub Desktop.
Node.js script to generate SQL to query VNDB by starsort
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // based on https://gist.github.com/dfabulich/fc6b13a8bffc5518c4731347de642749 | |
| // but VNDB supports a wider range of discrete values, so I had to write this script to generate the SQL | |
| const MIN_VOTE = 10; | |
| const MAX_VOTE = 100; | |
| function buildBuckets(alias) { | |
| const parts = []; | |
| for (let v = MIN_VOTE; v <= MAX_VOTE; v++) { | |
| parts.push( | |
| `SUM(CASE WHEN ${alias}.vote = ${v} THEN ${alias}.cnt ELSE 0 END) AS rated${v}` | |
| ); | |
| } | |
| parts.push(`SUM(${alias}.cnt) AS count`); | |
| return parts.join(",\n "); | |
| } | |
| function buildSmoothedNumerator(prefix = "") { | |
| const terms = []; | |
| for (let v = MIN_VOTE; v <= MAX_VOTE; v++) { | |
| terms.push(`${v}*(${prefix}rated${v}+1)`); | |
| } | |
| return `(${terms.join(" + ")})`; | |
| } | |
| function buildSmoothedSecondMoment(prefix = "") { | |
| const terms = []; | |
| for (let v = MIN_VOTE; v <= MAX_VOTE; v++) { | |
| terms.push(`${v * v}*(${prefix}rated${v}+1)`); | |
| } | |
| return `(${terms.join(" + ")})`; | |
| } | |
| function buildRawNumerator(prefix = "") { | |
| const terms = []; | |
| for (let v = MIN_VOTE; v <= MAX_VOTE; v++) { | |
| terms.push(`${v}*${prefix}rated${v}`); | |
| } | |
| return `(${terms.join(" + ")})`; | |
| } | |
| const NUM_BUCKETS = MAX_VOTE - MIN_VOTE + 1; | |
| const smoothedDenominator = `${NUM_BUCKETS} + `; // will append total counts sum | |
| function buildSumAllRated(prefix = "") { | |
| const terms = []; | |
| for (let v = MIN_VOTE; v <= MAX_VOTE; v++) { | |
| terms.push(`${prefix}rated${v}`); | |
| } | |
| return `(${terms.join(" + ")})`; | |
| } | |
| function buildSQL() { | |
| const groupedAlias = "g"; | |
| const buckets = buildBuckets("g"); | |
| const sumAllRated = buildSumAllRated(); | |
| const smNum = buildSmoothedNumerator(); | |
| const sm2Num = buildSmoothedSecondMoment(); | |
| const rawNum = buildRawNumerator(); | |
| // Smoothed mean: mu = smNum / (R + total) | |
| const smDen = `(${smoothedDenominator}${sumAllRated})`; | |
| const mu = `(${smNum})/${smDen}`; | |
| // Smoothed second moment: m2 = sm2Num / (R + total) | |
| const m2 = `(${sm2Num})/${smDen}`; | |
| // Variance (smoothed): var = m2 - mu^2 | |
| const variance = `${m2} - POWER(${mu}, 2)`; | |
| // starsort = mu - 1.65 * sqrt( var / (R + total + 1) ) | |
| // Following the original example’s denominator pattern (6 + sum) -> (R + total + 1) | |
| const starsort = `(${mu}) - 1.65*SQRT( ( ${variance} ) / ( ${NUM_BUCKETS + 1} + ${sumAllRated} ) )`; | |
| const average = `(${rawNum})/NULLIF(${sumAllRated}, 0)`; | |
| // SQL layout: | |
| // - inner-most: group votes per (vid, vote) | |
| // - next: pivot/group into rated10..rated100 and total count | |
| // - then: compute average and starsort | |
| // - final: join to vn to select VNs and order | |
| return ` | |
| SELECT | |
| vn.id, | |
| vn.title, | |
| starsorted.starsort | |
| FROM vn | |
| JOIN ( | |
| SELECT | |
| rating_counts.*, | |
| ${average} AS average, | |
| ${starsort} AS starsort | |
| FROM ( | |
| SELECT | |
| g.vid, | |
| ${buckets} | |
| FROM ( | |
| SELECT | |
| u.vid, | |
| u.vote, | |
| COUNT(*) AS cnt | |
| FROM ulist_vns u | |
| WHERE u.vote BETWEEN ${MIN_VOTE} AND ${MAX_VOTE} | |
| GROUP BY u.vid, u.vote | |
| ) AS ${groupedAlias} | |
| GROUP BY g.vid | |
| ) AS rating_counts | |
| ) AS starsorted | |
| ON starsorted.vid = vn.id | |
| ORDER BY starsort DESC | |
| LIMIT 100; | |
| `.trim(); | |
| } | |
| console.log(buildSQL()); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment