Last active
April 23, 2023 20:19
-
-
Save vincelwt/e4604681511fa7930561858e7757b0b3 to your computer and use it in GitHub Desktop.
Remove duplicates & merge KindleClippings.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Remove duplicates and merge multiple Kindle clipping files | |
// Usage: deno run -A remove-duplicates.ts MyKindleClippings1.txt MyKindleClippings2.txt | |
type ClippingData = { | |
date: string; | |
content: string; | |
endLocation: string; | |
}; | |
async function removeDuplicates(filePaths: string[]) { | |
const uniqueClippings: Map<string, ClippingData> = new Map(); | |
for (const filePath of filePaths) { | |
const data = await Deno.readTextFile(filePath); | |
const clippings = data.split("=========="); | |
for (const clipping of clippings) { | |
const lines = clipping.trim().split("\n"); | |
if (lines.length < 4) continue; // Skip empty clippings | |
const title = lines[0].trim(); | |
const highlightInfo = lines[1].trim(); | |
const locationMatch = highlightInfo.match(/Location (\d+)-?(\d+)?/); | |
if (!locationMatch) continue; | |
const location = locationMatch[1]; // Extract only the starting location | |
const endLocation = locationMatch[2] || ""; // Extract the end location, if available | |
const dateMatch = highlightInfo.match(/Added on (.+)/); | |
const date = dateMatch ? dateMatch[1] : new Date().toString(); // Use the current date if not found | |
const key = `${title}|${location}`; | |
const content = lines.slice(3).join("\n").trim(); | |
if (uniqueClippings.has(key)) { | |
// Keep the latest version of duplicate clippings | |
const existingData = uniqueClippings.get(key)!; | |
if (new Date(date) > new Date(existingData.date)) { | |
uniqueClippings.set(key, { date, content, endLocation }); | |
} | |
} else { | |
uniqueClippings.set(key, { date, content, endLocation }); | |
} | |
} | |
} | |
const outputFile = "merged_cleaned_clippings.txt"; | |
const outputData = Array.from(uniqueClippings.entries()) | |
.sort( | |
([, a], [, b]) => new Date(a.date).getTime() - new Date(b.date).getTime() | |
) | |
.map(([key, { date, content, endLocation }]) => { | |
const [title, location] = key.split("|"); | |
const locationInfo = endLocation | |
? `Location ${location}-${endLocation}` | |
: `Location ${location}`; | |
return `${title}\n- Your Highlight on ${locationInfo} | Added on ${date}\n\n${content}`; | |
}) | |
.join("\n==========\n"); | |
await Deno.writeTextFile(outputFile, outputData); | |
console.log(`Cleaned clippings saved to ${outputFile}`); | |
} | |
if (Deno.args.length < 1) { | |
console.error("Please provide one or more file names as arguments."); | |
Deno.exit(1); | |
} | |
const filePaths = Deno.args; | |
removeDuplicates(filePaths); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment