Last active
April 21, 2025 21:24
-
-
Save Koushikphy/08ffdde286c61959a1b7f5b8e85640b1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Checks for: | |
// 1. Validity of tags | |
// 2. Serial number of thoughts | |
// 3. Validity of LaTeX | |
// 4. Validity of Backtracking | |
// 5. Newline after tags | |
// 6. First person singular/plural in necessary places | |
// and few more | |
// How to use: Go to Colab Notbook > Open dev console > paste the script | |
var tagList = []; | |
var thoughtCounter = 0; | |
var allowedTags=new RegExp([ | |
'PROMPT', | |
'Assistant', | |
'THOUGHT_\\d+', | |
'RESPONSE' | |
].join('|')); | |
var allowedPattern = new RegExp([ | |
'Formulating', | |
'Planning', | |
'Hypothesizing', | |
'Deriving', | |
'Verifying', | |
'Reflecting', | |
'Mistake', | |
'Backtracking_\\d+', | |
'Final_Decision', | |
'Final_Summary', | |
].join('|')); | |
function assert(test, message) { | |
// If test not true, print the message | |
if (!test) console.error(message); | |
} | |
function checkNoBlankLines(cellText) { | |
// Returns true if there is no blank lines at the beginning or end of a cell | |
return !/^\n|\n$/.test(cellText) | |
} | |
function checkNewLine(cellText, tags) { | |
// Check a blank line after every tags | |
let texts = cellText.split('\n'); | |
let lineIndex = texts.findIndex(str => str.includes(tags[0])); | |
return tags[0] == "Assistant" || texts[lineIndex + 1] == ''; | |
} | |
function listInvLatex(text, tags) { | |
// Clean valid LaTeX: \(\), \[\] | |
let cleaned = text.replace(/\\\\\((.*?)\\\\\)|\\\\\[(.*?)\\\\\]/g, ''); | |
// Match invalid LaTeX: \(\), \[\], $ $, $$ $$ | |
let matchL = [...cleaned.matchAll(/\\\((.*?)\\\)|\\\[(.*?)\\\]|\$\$(.*?)\$\$|\$(.*?)\$/gs)]; | |
let matches = matchL.map(match => match[0]); // full match | |
assert(!matches.length, `Invalid LaTeX found in ${tags[0]} : ${matches}.`) | |
} | |
function nonLatexNumber(text, tags){ | |
// Clean valid LaTeX: \(\), \[\] | |
let cleaned = text.replace(/\\\\\((.*?)\\\\\)|\\\\\[(.*?)\\\\\]/g, ''); | |
// Match invalid LaTeX: \(\), \[\], $ $, $$ $$ | |
let matches = cleaned.match(/\b\d+\b/g) | |
assert(!matches, `Non LaTeX number found in ${tags[0]}: ${matches}`) | |
} | |
function containsUnicode(str) { | |
// test unreadable unicode character that may come from copy-paste | |
return /[^\x00-\x7F]/.test(str); | |
} | |
function checkFirstPersonPlural(text, tags) { | |
// Check first person plural language in thoughts | |
let cleaned = text.replace(/\\\\\((.*?)\\\\\)|\\\\\[(.*?)\\\\\]/g, ''); | |
// Do not search inside equation | |
let fpp= cleaned.match(/\b(we|our|us)\b/gi); | |
assert(!fpp, `First person plural found in ${tags[0]} : ${fpp}.`); | |
} | |
function checkFirstPersonSingular(text, tags) { | |
// Check first person singular language in everywhere except thoughts | |
let cleaned = text.replace(/\\\\\((.*?)\\\\\)|\\\\\[(.*?)\\\\\]/g, ''); | |
// Do not search inside equation | |
let fps = cleaned.match(/(?<!\w|\.)\b(I|me|my|mine|myself)\b(?!\w|\.)/gi) | |
assert(!fps, `First person singular found in ${tags[0]} : ${fps}.`); | |
} | |
function checkBacktrack(arr) { | |
// Check if backtracking tag is valid | |
const isMistake = entry => entry.includes('Mistake'); | |
for (let i = 0; i < arr.length; i++) { | |
let entry = arr[i]; | |
let backtrackTag = entry.find(tag => tag.startsWith('Backtracking_')); | |
if (!backtrackTag) continue; | |
assert(entry.filter(tag => !tag.startsWith('Backtracking_')).length >1, | |
`Backtracking tag should have at least one reasoning tag in ${entry[0]}.` ); | |
let targetNum = backtrackTag.slice(13) | |
assert(targetNum.length == 2, `Improper format in backtracking in: ${entry[0]}`); | |
let targetThought = `THOUGHT_${targetNum.padStart(2, '0')}` | |
// Find index just before the last mistake chain | |
let j = i - 1; | |
while (j >= 0 && !isMistake(arr[j])) j--; | |
while (j >= 0 && isMistake(arr[j])) j--; | |
assert(arr[j][0] == targetThought, `Backtracking is point to wrong thought in ${entry[0]}.`) | |
} | |
} | |
function checkThoughtTags(tags) { | |
// Check if the tags are valid, and numbering are proper | |
let thoughtNumStr = tags[0].slice(8) | |
let thoughtNum = parseInt(thoughtNumStr, 10); | |
if (Number.isNaN(thoughtNum)) { | |
console.error(`Invalid thought number: ${tags[0]}`) | |
return | |
} | |
// Thought number should be in format "01, 02..." (Assuming max 100 thoughts) | |
assert(thoughtNumStr.length == 2, `Improper format in thought number: ${tags[0]}`) | |
tags = tags.slice(1) // remove the "THOUGHT_XX" | |
// Filter out any invalid tags | |
let invalid = tags.filter(tag => !allowedPattern.test(tag)); | |
assert(invalid.length == 0, `THOUGHT ${thoughtNum} has invalid tags: ${invalid}`); | |
//check serial number of tags | |
assert(thoughtNum == thoughtCounter + 1, `Thought ${thoughtNum} appeared after ${thoughtCounter}.`); | |
thoughtCounter = thoughtNum; | |
assert(tags.length != 0, `No tags found for Thought ${thoughtCounter}.`); // Min 1 tag is mandatory for tags | |
assert(!(tags.includes("Final_Decision") && tags.length > 1), "Final_Decision tag should not contain any other tags."); | |
assert(!(tags.includes("Final_Summary") && tags.length > 1), "Final_Summary tag should not contain any other tags."); | |
} | |
function getTags(text) { | |
// returns tags from the cell, assuming tags are only available in the first line itself | |
const line = text.match(/^(?!\s*$).+/m)?.[0]; // first non empty line | |
return line ? [...line.matchAll(/\*\*\[(.*?)\]\*\*/g)].map(m => m[1]) : []; | |
} | |
function tagSanity(tags){ | |
let valid = allowedTags.test(tags[0]) | |
assert(valid, `Unknown tag found:${tags[0]}`) | |
return valid | |
} | |
//-------------------------------------// | |
document.querySelectorAll('.cell').forEach((cell, i) => { | |
let cellText = cell.getText(); | |
let tags = getTags(cellText); | |
if (tags.length == 0) return; //blank cells? | |
if (!tagSanity(tags)) return | |
tagList.push(tags); | |
if (tags[0].startsWith("THOUGHT")) { | |
checkThoughtTags(tags); | |
// Only first person singular is allowed in thoughts | |
checkFirstPersonPlural(cellText, tags) | |
} else { | |
checkFirstPersonSingular(cellText, tags); | |
} | |
assert(!containsUnicode(cellText), `Unicode character found in ${tags[0]}`); | |
listInvLatex(cellText, tags) | |
nonLatexNumber(cellText, tags) | |
assert(checkNewLine(cellText, tags), `Put a new line after tags in ${tags[0]}.`); // should be a newline after tags | |
assert(checkNoBlankLines(cellText, tags), `Remove blank lines from the beginning/end of cell ${tags[0]}.`); | |
} | |
) | |
checkBacktrack(tagList) | |
var n = tagList.length; | |
assert(tagList[n - 3][1] == 'Final_Decision', "2 Cells before the last should be Decision."); | |
assert(tagList[n - 2][1] == 'Final_Summary', "Cell before the last should be Summary."); | |
assert(tagList[n - 1][0] == 'RESPONSE', "Last cell should be Response."); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment