Skip to content

Instantly share code, notes, and snippets.

@Koushikphy
Last active April 21, 2025 21:24
Show Gist options
  • Save Koushikphy/08ffdde286c61959a1b7f5b8e85640b1 to your computer and use it in GitHub Desktop.
Save Koushikphy/08ffdde286c61959a1b7f5b8e85640b1 to your computer and use it in GitHub Desktop.
// Checks for:
// 1. Validity of tags
// 2. Serial number of thoughts
// 3. Validity of LaTeX
// 4. Validity of Backtracking
// 5. Newline after tags
// 6. First person singular/plural in necessary places
// and few more
// How to use: Go to Colab Notbook > Open dev console > paste the script
var tagList = [];
var thoughtCounter = 0;
var allowedTags=new RegExp([
'PROMPT',
'Assistant',
'THOUGHT_\\d+',
'RESPONSE'
].join('|'));
var allowedPattern = new RegExp([
'Formulating',
'Planning',
'Hypothesizing',
'Deriving',
'Verifying',
'Reflecting',
'Mistake',
'Backtracking_\\d+',
'Final_Decision',
'Final_Summary',
].join('|'));
function assert(test, message) {
// If test not true, print the message
if (!test) console.error(message);
}
function checkNoBlankLines(cellText) {
// Returns true if there is no blank lines at the beginning or end of a cell
return !/^\n|\n$/.test(cellText)
}
function checkNewLine(cellText, tags) {
// Check a blank line after every tags
let texts = cellText.split('\n');
let lineIndex = texts.findIndex(str => str.includes(tags[0]));
return tags[0] == "Assistant" || texts[lineIndex + 1] == '';
}
function listInvLatex(text, tags) {
// Clean valid LaTeX: \(\), \[\]
let cleaned = text.replace(/\\\\\((.*?)\\\\\)|\\\\\[(.*?)\\\\\]/g, '');
// Match invalid LaTeX: \(\), \[\], $ $, $$ $$
let matchL = [...cleaned.matchAll(/\\\((.*?)\\\)|\\\[(.*?)\\\]|\$\$(.*?)\$\$|\$(.*?)\$/gs)];
let matches = matchL.map(match => match[0]); // full match
assert(!matches.length, `Invalid LaTeX found in ${tags[0]} : ${matches}.`)
}
function nonLatexNumber(text, tags){
// Clean valid LaTeX: \(\), \[\]
let cleaned = text.replace(/\\\\\((.*?)\\\\\)|\\\\\[(.*?)\\\\\]/g, '');
// Match invalid LaTeX: \(\), \[\], $ $, $$ $$
let matches = cleaned.match(/\b\d+\b/g)
assert(!matches, `Non LaTeX number found in ${tags[0]}: ${matches}`)
}
function containsUnicode(str) {
// test unreadable unicode character that may come from copy-paste
return /[^\x00-\x7F]/.test(str);
}
function checkFirstPersonPlural(text, tags) {
// Check first person plural language in thoughts
let cleaned = text.replace(/\\\\\((.*?)\\\\\)|\\\\\[(.*?)\\\\\]/g, '');
// Do not search inside equation
let fpp= cleaned.match(/\b(we|our|us)\b/gi);
assert(!fpp, `First person plural found in ${tags[0]} : ${fpp}.`);
}
function checkFirstPersonSingular(text, tags) {
// Check first person singular language in everywhere except thoughts
let cleaned = text.replace(/\\\\\((.*?)\\\\\)|\\\\\[(.*?)\\\\\]/g, '');
// Do not search inside equation
let fps = cleaned.match(/(?<!\w|\.)\b(I|me|my|mine|myself)\b(?!\w|\.)/gi)
assert(!fps, `First person singular found in ${tags[0]} : ${fps}.`);
}
function checkBacktrack(arr) {
// Check if backtracking tag is valid
const isMistake = entry => entry.includes('Mistake');
for (let i = 0; i < arr.length; i++) {
let entry = arr[i];
let backtrackTag = entry.find(tag => tag.startsWith('Backtracking_'));
if (!backtrackTag) continue;
assert(entry.filter(tag => !tag.startsWith('Backtracking_')).length >1,
`Backtracking tag should have at least one reasoning tag in ${entry[0]}.` );
let targetNum = backtrackTag.slice(13)
assert(targetNum.length == 2, `Improper format in backtracking in: ${entry[0]}`);
let targetThought = `THOUGHT_${targetNum.padStart(2, '0')}`
// Find index just before the last mistake chain
let j = i - 1;
while (j >= 0 && !isMistake(arr[j])) j--;
while (j >= 0 && isMistake(arr[j])) j--;
assert(arr[j][0] == targetThought, `Backtracking is point to wrong thought in ${entry[0]}.`)
}
}
function checkThoughtTags(tags) {
// Check if the tags are valid, and numbering are proper
let thoughtNumStr = tags[0].slice(8)
let thoughtNum = parseInt(thoughtNumStr, 10);
if (Number.isNaN(thoughtNum)) {
console.error(`Invalid thought number: ${tags[0]}`)
return
}
// Thought number should be in format "01, 02..." (Assuming max 100 thoughts)
assert(thoughtNumStr.length == 2, `Improper format in thought number: ${tags[0]}`)
tags = tags.slice(1) // remove the "THOUGHT_XX"
// Filter out any invalid tags
let invalid = tags.filter(tag => !allowedPattern.test(tag));
assert(invalid.length == 0, `THOUGHT ${thoughtNum} has invalid tags: ${invalid}`);
//check serial number of tags
assert(thoughtNum == thoughtCounter + 1, `Thought ${thoughtNum} appeared after ${thoughtCounter}.`);
thoughtCounter = thoughtNum;
assert(tags.length != 0, `No tags found for Thought ${thoughtCounter}.`); // Min 1 tag is mandatory for tags
assert(!(tags.includes("Final_Decision") && tags.length > 1), "Final_Decision tag should not contain any other tags.");
assert(!(tags.includes("Final_Summary") && tags.length > 1), "Final_Summary tag should not contain any other tags.");
}
function getTags(text) {
// returns tags from the cell, assuming tags are only available in the first line itself
const line = text.match(/^(?!\s*$).+/m)?.[0]; // first non empty line
return line ? [...line.matchAll(/\*\*\[(.*?)\]\*\*/g)].map(m => m[1]) : [];
}
function tagSanity(tags){
let valid = allowedTags.test(tags[0])
assert(valid, `Unknown tag found:${tags[0]}`)
return valid
}
//-------------------------------------//
document.querySelectorAll('.cell').forEach((cell, i) => {
let cellText = cell.getText();
let tags = getTags(cellText);
if (tags.length == 0) return; //blank cells?
if (!tagSanity(tags)) return
tagList.push(tags);
if (tags[0].startsWith("THOUGHT")) {
checkThoughtTags(tags);
// Only first person singular is allowed in thoughts
checkFirstPersonPlural(cellText, tags)
} else {
checkFirstPersonSingular(cellText, tags);
}
assert(!containsUnicode(cellText), `Unicode character found in ${tags[0]}`);
listInvLatex(cellText, tags)
nonLatexNumber(cellText, tags)
assert(checkNewLine(cellText, tags), `Put a new line after tags in ${tags[0]}.`); // should be a newline after tags
assert(checkNoBlankLines(cellText, tags), `Remove blank lines from the beginning/end of cell ${tags[0]}.`);
}
)
checkBacktrack(tagList)
var n = tagList.length;
assert(tagList[n - 3][1] == 'Final_Decision', "2 Cells before the last should be Decision.");
assert(tagList[n - 2][1] == 'Final_Summary', "Cell before the last should be Summary.");
assert(tagList[n - 1][0] == 'RESPONSE', "Last cell should be Response.");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment