Skip to content

Instantly share code, notes, and snippets.

@vincelwt
Last active April 23, 2023 20:19
Show Gist options
  • Save vincelwt/e4604681511fa7930561858e7757b0b3 to your computer and use it in GitHub Desktop.
Save vincelwt/e4604681511fa7930561858e7757b0b3 to your computer and use it in GitHub Desktop.
Remove duplicates & merge KindleClippings.txt
// Remove duplicates and merge multiple Kindle clipping files
// Usage: deno run -A remove-duplicates.ts MyKindleClippings1.txt MyKindleClippings2.txt
type ClippingData = {
date: string;
content: string;
endLocation: string;
};
async function removeDuplicates(filePaths: string[]) {
const uniqueClippings: Map<string, ClippingData> = new Map();
for (const filePath of filePaths) {
const data = await Deno.readTextFile(filePath);
const clippings = data.split("==========");
for (const clipping of clippings) {
const lines = clipping.trim().split("\n");
if (lines.length < 4) continue; // Skip empty clippings
const title = lines[0].trim();
const highlightInfo = lines[1].trim();
const locationMatch = highlightInfo.match(/Location (\d+)-?(\d+)?/);
if (!locationMatch) continue;
const location = locationMatch[1]; // Extract only the starting location
const endLocation = locationMatch[2] || ""; // Extract the end location, if available
const dateMatch = highlightInfo.match(/Added on (.+)/);
const date = dateMatch ? dateMatch[1] : new Date().toString(); // Use the current date if not found
const key = `${title}|${location}`;
const content = lines.slice(3).join("\n").trim();
if (uniqueClippings.has(key)) {
// Keep the latest version of duplicate clippings
const existingData = uniqueClippings.get(key)!;
if (new Date(date) > new Date(existingData.date)) {
uniqueClippings.set(key, { date, content, endLocation });
}
} else {
uniqueClippings.set(key, { date, content, endLocation });
}
}
}
const outputFile = "merged_cleaned_clippings.txt";
const outputData = Array.from(uniqueClippings.entries())
.sort(
([, a], [, b]) => new Date(a.date).getTime() - new Date(b.date).getTime()
)
.map(([key, { date, content, endLocation }]) => {
const [title, location] = key.split("|");
const locationInfo = endLocation
? `Location ${location}-${endLocation}`
: `Location ${location}`;
return `${title}\n- Your Highlight on ${locationInfo} | Added on ${date}\n\n${content}`;
})
.join("\n==========\n");
await Deno.writeTextFile(outputFile, outputData);
console.log(`Cleaned clippings saved to ${outputFile}`);
}
if (Deno.args.length < 1) {
console.error("Please provide one or more file names as arguments.");
Deno.exit(1);
}
const filePaths = Deno.args;
removeDuplicates(filePaths);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment