Skip to content

Instantly share code, notes, and snippets.

@dfabulich
Created September 24, 2025 23:52
Show Gist options
  • Select an option

  • Save dfabulich/57ee6372f62be5a0dfd69b4697e97ce5 to your computer and use it in GitHub Desktop.

Select an option

Save dfabulich/57ee6372f62be5a0dfd69b4697e97ce5 to your computer and use it in GitHub Desktop.
Node.js script to generate SQL to query VNDB by starsort
// based on https://gist.github.com/dfabulich/fc6b13a8bffc5518c4731347de642749
// but VNDB supports a wider range of discrete values, so I had to write this script to generate the SQL
const MIN_VOTE = 10;
const MAX_VOTE = 100;
function buildBuckets(alias) {
const parts = [];
for (let v = MIN_VOTE; v <= MAX_VOTE; v++) {
parts.push(
`SUM(CASE WHEN ${alias}.vote = ${v} THEN ${alias}.cnt ELSE 0 END) AS rated${v}`
);
}
parts.push(`SUM(${alias}.cnt) AS count`);
return parts.join(",\n ");
}
function buildSmoothedNumerator(prefix = "") {
const terms = [];
for (let v = MIN_VOTE; v <= MAX_VOTE; v++) {
terms.push(`${v}*(${prefix}rated${v}+1)`);
}
return `(${terms.join(" + ")})`;
}
function buildSmoothedSecondMoment(prefix = "") {
const terms = [];
for (let v = MIN_VOTE; v <= MAX_VOTE; v++) {
terms.push(`${v * v}*(${prefix}rated${v}+1)`);
}
return `(${terms.join(" + ")})`;
}
function buildRawNumerator(prefix = "") {
const terms = [];
for (let v = MIN_VOTE; v <= MAX_VOTE; v++) {
terms.push(`${v}*${prefix}rated${v}`);
}
return `(${terms.join(" + ")})`;
}
const NUM_BUCKETS = MAX_VOTE - MIN_VOTE + 1;
const smoothedDenominator = `${NUM_BUCKETS} + `; // will append total counts sum
function buildSumAllRated(prefix = "") {
const terms = [];
for (let v = MIN_VOTE; v <= MAX_VOTE; v++) {
terms.push(`${prefix}rated${v}`);
}
return `(${terms.join(" + ")})`;
}
function buildSQL() {
const groupedAlias = "g";
const buckets = buildBuckets("g");
const sumAllRated = buildSumAllRated();
const smNum = buildSmoothedNumerator();
const sm2Num = buildSmoothedSecondMoment();
const rawNum = buildRawNumerator();
// Smoothed mean: mu = smNum / (R + total)
const smDen = `(${smoothedDenominator}${sumAllRated})`;
const mu = `(${smNum})/${smDen}`;
// Smoothed second moment: m2 = sm2Num / (R + total)
const m2 = `(${sm2Num})/${smDen}`;
// Variance (smoothed): var = m2 - mu^2
const variance = `${m2} - POWER(${mu}, 2)`;
// starsort = mu - 1.65 * sqrt( var / (R + total + 1) )
// Following the original example’s denominator pattern (6 + sum) -> (R + total + 1)
const starsort = `(${mu}) - 1.65*SQRT( ( ${variance} ) / ( ${NUM_BUCKETS + 1} + ${sumAllRated} ) )`;
const average = `(${rawNum})/NULLIF(${sumAllRated}, 0)`;
// SQL layout:
// - inner-most: group votes per (vid, vote)
// - next: pivot/group into rated10..rated100 and total count
// - then: compute average and starsort
// - final: join to vn to select VNs and order
return `
SELECT
vn.id,
vn.title,
starsorted.starsort
FROM vn
JOIN (
SELECT
rating_counts.*,
${average} AS average,
${starsort} AS starsort
FROM (
SELECT
g.vid,
${buckets}
FROM (
SELECT
u.vid,
u.vote,
COUNT(*) AS cnt
FROM ulist_vns u
WHERE u.vote BETWEEN ${MIN_VOTE} AND ${MAX_VOTE}
GROUP BY u.vid, u.vote
) AS ${groupedAlias}
GROUP BY g.vid
) AS rating_counts
) AS starsorted
ON starsorted.vid = vn.id
ORDER BY starsort DESC
LIMIT 100;
`.trim();
}
console.log(buildSQL());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment