travisbhartwell · June 26, 2025 22:16 · travisbhartwell · Jun 26, 2025
diff --git a/process_messages.py b/process_messages.py
 #!/usr/bin/env python3

 from collections import defaultdict
 import csv
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
 import sys


 @dataclass(frozen=True, order=True)
 class Person:
    name: str
    profile_url: str


 @dataclass(frozen=True, order=True)
 class Message:
    conversation_id: str
    converstaion_title: str
    message_from: Person
    message_to: Person
    date: datetime
    subject: str
    content: str
    folder: str

    def other_than_me(self, my_name: str) -> str:
        if self.message_from.name == my_name:
            return self.message_to.name
        else:
            return self.message_from.name

    def other_than_me_profile(self, my_name: str) -> str:
        if self.message_from.name == my_name:
            return self.message_to.profile_url
        else:
            return self.message_from.profile_url


 @dataclass(frozen=True, order=True)
 class Conversation:
    filename: Path
    most_recent: datetime
    other_than_me: str
    other_than_me_profile: str
    conversation_title: str
    message_count: int


 CONVERSATION_ID_FIELD = "CONVERSATION ID"
 CONVERSATION_TITLE_FIELD = "CONVERSATION TITLE"
 FROM_FIELD = "FROM"
 SENDER_PROFILE_URL_FIELD = "SENDER PROFILE URL"
 TO_FIELD = "TO"
 RECIPIENT_PROFILE_URLS_FIELD = "RECIPIENT PROFILE URLS"
 DATE_FIELD = "DATE"
 SUBJECT_FIELD = "SUBJECT"
 CONTENT_FIELD = "CONTENT"
 FOLDER_FIELD = "FOLDER"


 def load_messages_csv(messages_csv: Path) -> list[Message]:
    messages = []

    with messages_csv.open() as f:
        reader = csv.DictReader(f)

        for row in reader:
            # Skip messages to multiple people, usually spam
            if "," in row[TO_FIELD]:
                continue

            message_from = Person(row[FROM_FIELD], row[SENDER_PROFILE_URL_FIELD])
            message_to = Person(row[TO_FIELD], row[RECIPIENT_PROFILE_URLS_FIELD])
            date = datetime.strptime(row[DATE_FIELD], "%Y-%m-%d %H:%M:%S %Z")
            message = Message(
                row[CONVERSATION_ID_FIELD],
                row[CONVERSATION_TITLE_FIELD],
                message_from,
                message_to,
                date,
                row[SUBJECT_FIELD],
                row[CONTENT_FIELD],
                row[FOLDER_FIELD],
            )

            messages.append(message)

    return sorted(messages)


 def group_messages(messages: list[Message]) -> dict[str, dict[str, list[Message]]]:
    grouped_messages = {}

    by_folder = defaultdict(list)

    for message in messages:
        by_folder[message.folder].append(message)

    for folder, folder_messages in by_folder.items():
        grouped_messages[folder] = defaultdict(list)

        for message in folder_messages:
            grouped_messages[folder][message.conversation_id].append(message)

    return grouped_messages


 def render_conversation(
    output_filename: Path,
    conversation_id: str,
    other_than_me: str,
    other_than_me_profile: str,
    messages: list[Message],
 ):
    conversation_title = messages[0].converstaion_title
    message_count = len(messages)
    most_recent = messages[-1].date

    print(
        f"Writing conversation '{conversation_id}' with {message_count} messages with title '{conversation_title}' to '{output_filename}'."
    )

    with output_filename.open("w") as f:
        f.write(f"# Converation with {other_than_me}: '{conversation_title}'\n")
        f.write(f"* [{other_than_me}]({other_than_me_profile})\n")
        f.write(f"* **Conversation Id**: {conversation_id}\n")
        f.write(f"* **Total Messages**: {message_count}\n")
        f.write(f"* **Most Recent Message**: {most_recent}\n\n")

        for message in messages:
            f.write(f"---\n\n")
            f.write(f"* **Date**: {message.date}\n")
            f.write(f"* **From**: {message.message_from.name}\n\n")
            f.write(message.content)
            f.write("\n\n")


 def render_summary(
    output_filename: Path, folder_name: str, conversations: list[Conversation]
 ):
    sorted_conversations = sorted(
        conversations, key=lambda x: (x.most_recent, x.other_than_me), reverse=True
    )
    with output_filename.open("w") as f:
        f.write(f"# Linked In Messages in Folder {folder_name}\n\n")

        f.write(
            f"| Person | Most Recent Message | Conversation Title | Message Count | Messages |\n"
        )
        f.write(
            f"|--------|---------------------|--------------------|---------------|----------|\n"
        )

        for conversation in sorted_conversations:
            f.write(
                f"| [{conversation.other_than_me}]({conversation.other_than_me_profile}) "
            )
            f.write(f"| {conversation.most_recent} ")
            f.write(f"| {conversation.conversation_title} ")
            f.write(f"| {conversation.message_count} ")

            conversation_link = conversation.filename.as_posix().split(".")[0]
            folder_start = conversation_link.find(folder_name)
            conversation_link = conversation_link[folder_start:]
            f.write(f"| [messages](./{conversation_link}.html) |\n")


 def render_message_folder(
    output_directory: Path,
    my_name: str,
    folder_name: str,
    conversations: dict[str, list[Message]],
 ) -> None:
    folder_output_directory = output_directory.joinpath(folder_name)
    if not folder_output_directory.exists():
        folder_output_directory.mkdir()

    print(
        f"Writing messages from message folder '{folder_name}' to directory '{folder_output_directory}'."
    )

    conversations_by_other_than_me = defaultdict(list)
    conversation_files = []

    for conversation_id, messages in conversations.items():
        other_than_me = messages[0].other_than_me(my_name)
        other_than_me_profile = messages[0].other_than_me_profile(my_name)
        conversations_by_other_than_me[other_than_me].append(
            messages[0].conversation_id
        )

        conversation_name = other_than_me.replace(" ", "-").replace("/", "-")
        conversation_count = len(conversations_by_other_than_me[other_than_me])
        conversation_filename = folder_output_directory.joinpath(
            f"{conversation_name}-{conversation_count}.md"
        )
        sorted_messages = sorted(messages, key=lambda x: x.date)
        conversation_title = messages[0].converstaion_title.replace("|", "\|")

        conversation = Conversation(
            conversation_filename,
            sorted_messages[-1].date,
            other_than_me,
            other_than_me_profile,
            conversation_title if conversation_title else "None",
            len(sorted_messages),
        )
        conversation_files.append(conversation)

        render_conversation(
            conversation_filename,
            conversation_id,
            other_than_me,
            other_than_me_profile,
            sorted_messages,
        )

        render_summary(
            output_directory.joinpath(f"{folder_name}.md"),
            folder_name,
            conversation_files,
        )


 def main():
    if len(sys.argv) < 4:
        print(
            f'{sys.argv[0]} <messages.csv> <output-directory> "<My Name>"',
            file=sys.stderr,
        )
        sys.exit(1)

    messages_csv = Path(sys.argv[1])

    if not messages_csv.exists():
        print(f"Messages file '{messages_csv}' does not exist.")
        sys.exit(1)

    output_directory = Path(sys.argv[2])
    if not output_directory.exists():
        print(f"Output folder '{output_directory}' doesn't exist, creating.")
        output_directory.mkdir(parents=True)

    my_name = sys.argv[3]

    messages = load_messages_csv(messages_csv)
    print(f"Loaded {len(messages)} messages.")

    grouped_messages = group_messages(messages)

    for folder, conversations in grouped_messages.items():
        render_message_folder(output_directory, my_name, folder, conversations)


 if __name__ == "__main__":
    messages = main()
diff --git a/render-messages b/render-messages
 #!/usr/bin/env bash
 # -*- mode: shell-script; sh-shell: bash; sh-basic-offset: 4; sh-indentation: 4; coding: utf-8 -*-
 # shellcheck shell=bash

 set -o nounset -o errexit -o errtrace -o pipefail

 if ! DATA_DIRECTORY=$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd -P); then
    echo >&2 "Error fetching data directory."
    exit 1
 fi
 readonly DATA_DIRECTORY

 readonly MESSAGES_DIRECTORY="${DATA_DIRECTORY}/messages"

 if [[ ! -d "${MESSAGES_DIRECTORY}" ]]; then
    echo >&2 "Messages directory not found!"
    exit 1
 fi

 cd "${MESSAGES_DIRECTORY}"

 shopt -s globstar

 readonly SITE_TEMPLATE="${DATA_DIRECTORY}/site.html.template"

 for markdown_file in **/*.md; do
    echo "Processing file '${markdown_file}'"

    file_base="$(basename "${markdown_file}" .md)"
    file_dir="$(dirname "${markdown_file}")"
    html_file="${file_dir}/${file_base}.html"

    if ! CONTENT=$(pandoc --from markdown --to html5 "${markdown_file}"); then
        echo >&2 "Error processing '${markdown_file}'."
    else
        export CONTENT
        envsubst < "${SITE_TEMPLATE}" > "${html_file}"
    fi
 done
diff --git a/site.html.template b/site.html.template
 <!doctype html>
 <html lang="en" data-theme="light">

 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <meta name="color-scheme" content="light dark" />
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css">
 </head>

 <body>
  <main class="container">
    ${CONTENT}
  </main>
 </body>

 </html>
	#!/usr/bin/env python3

	from collections import defaultdict
	import csv
	from dataclasses import dataclass
	from datetime import datetime
	from pathlib import Path
	import sys


	@dataclass(frozen=True, order=True)
	class Person:
	name: str
	profile_url: str


	@dataclass(frozen=True, order=True)
	class Message:
	conversation_id: str
	converstaion_title: str
	message_from: Person
	message_to: Person
	date: datetime
	subject: str
	content: str
	folder: str

	def other_than_me(self, my_name: str) -> str:
	if self.message_from.name == my_name:
	return self.message_to.name
	else:
	return self.message_from.name

	def other_than_me_profile(self, my_name: str) -> str:
	if self.message_from.name == my_name:
	return self.message_to.profile_url
	else:
	return self.message_from.profile_url


	@dataclass(frozen=True, order=True)
	class Conversation:
	filename: Path
	most_recent: datetime
	other_than_me: str
	other_than_me_profile: str
	conversation_title: str
	message_count: int


	CONVERSATION_ID_FIELD = "CONVERSATION ID"
	CONVERSATION_TITLE_FIELD = "CONVERSATION TITLE"
	FROM_FIELD = "FROM"
	SENDER_PROFILE_URL_FIELD = "SENDER PROFILE URL"
	TO_FIELD = "TO"
	RECIPIENT_PROFILE_URLS_FIELD = "RECIPIENT PROFILE URLS"
	DATE_FIELD = "DATE"
	SUBJECT_FIELD = "SUBJECT"
	CONTENT_FIELD = "CONTENT"
	FOLDER_FIELD = "FOLDER"


	def load_messages_csv(messages_csv: Path) -> list[Message]:
	messages = []

	with messages_csv.open() as f:
	reader = csv.DictReader(f)

	for row in reader:
	# Skip messages to multiple people, usually spam
	if "," in row[TO_FIELD]:
	continue

	message_from = Person(row[FROM_FIELD], row[SENDER_PROFILE_URL_FIELD])
	message_to = Person(row[TO_FIELD], row[RECIPIENT_PROFILE_URLS_FIELD])
	date = datetime.strptime(row[DATE_FIELD], "%Y-%m-%d %H:%M:%S %Z")
	message = Message(
	row[CONVERSATION_ID_FIELD],
	row[CONVERSATION_TITLE_FIELD],
	message_from,
	message_to,
	date,
	row[SUBJECT_FIELD],
	row[CONTENT_FIELD],
	row[FOLDER_FIELD],
	)

	messages.append(message)

	return sorted(messages)


	def group_messages(messages: list[Message]) -> dict[str, dict[str, list[Message]]]:
	grouped_messages = {}

	by_folder = defaultdict(list)

	for message in messages:
	by_folder[message.folder].append(message)

	for folder, folder_messages in by_folder.items():
	grouped_messages[folder] = defaultdict(list)

	for message in folder_messages:
	grouped_messages[folder][message.conversation_id].append(message)

	return grouped_messages


	def render_conversation(
	output_filename: Path,
	conversation_id: str,
	other_than_me: str,
	other_than_me_profile: str,
	messages: list[Message],
	):
	conversation_title = messages[0].converstaion_title
	message_count = len(messages)
	most_recent = messages[-1].date

	print(
	f"Writing conversation '{conversation_id}' with {message_count} messages with title '{conversation_title}' to '{output_filename}'."
	)

	with output_filename.open("w") as f:
	f.write(f"# Converation with {other_than_me}: '{conversation_title}'\n")
	f.write(f"* [{other_than_me}]({other_than_me_profile})\n")
	f.write(f"* Conversation Id: {conversation_id}\n")
	f.write(f"* Total Messages: {message_count}\n")
	f.write(f"* Most Recent Message: {most_recent}\n\n")

	for message in messages:
	f.write(f"---\n\n")
	f.write(f"* Date: {message.date}\n")
	f.write(f"* From: {message.message_from.name}\n\n")
	f.write(message.content)
	f.write("\n\n")


	def render_summary(
	output_filename: Path, folder_name: str, conversations: list[Conversation]
	):
	sorted_conversations = sorted(
	conversations, key=lambda x: (x.most_recent, x.other_than_me), reverse=True
	)
	with output_filename.open("w") as f:
	f.write(f"# Linked In Messages in Folder {folder_name}\n\n")

	f.write(
	f"\| Person \| Most Recent Message \| Conversation Title \| Message Count \| Messages \|\n"
	)
	f.write(
	f"\|--------\|---------------------\|--------------------\|---------------\|----------\|\n"
	)

	for conversation in sorted_conversations:
	f.write(
	f"\| [{conversation.other_than_me}]({conversation.other_than_me_profile}) "
	)
	f.write(f"\| {conversation.most_recent} ")
	f.write(f"\| {conversation.conversation_title} ")
	f.write(f"\| {conversation.message_count} ")

	conversation_link = conversation.filename.as_posix().split(".")[0]
	folder_start = conversation_link.find(folder_name)
	conversation_link = conversation_link[folder_start:]
	f.write(f"\| [messages](./{conversation_link}.html) \|\n")


	def render_message_folder(
	output_directory: Path,
	my_name: str,
	folder_name: str,
	conversations: dict[str, list[Message]],
	) -> None:
	folder_output_directory = output_directory.joinpath(folder_name)
	if not folder_output_directory.exists():
	folder_output_directory.mkdir()

	print(
	f"Writing messages from message folder '{folder_name}' to directory '{folder_output_directory}'."
	)

	conversations_by_other_than_me = defaultdict(list)
	conversation_files = []

	for conversation_id, messages in conversations.items():
	other_than_me = messages[0].other_than_me(my_name)
	other_than_me_profile = messages[0].other_than_me_profile(my_name)
	conversations_by_other_than_me[other_than_me].append(
	messages[0].conversation_id
	)

	conversation_name = other_than_me.replace(" ", "-").replace("/", "-")
	conversation_count = len(conversations_by_other_than_me[other_than_me])
	conversation_filename = folder_output_directory.joinpath(
	f"{conversation_name}-{conversation_count}.md"
	)
	sorted_messages = sorted(messages, key=lambda x: x.date)
	conversation_title = messages[0].converstaion_title.replace("\|", "\\|")

	conversation = Conversation(
	conversation_filename,
	sorted_messages[-1].date,
	other_than_me,
	other_than_me_profile,
	conversation_title if conversation_title else "None",
	len(sorted_messages),
	)
	conversation_files.append(conversation)

	render_conversation(
	conversation_filename,
	conversation_id,
	other_than_me,
	other_than_me_profile,
	sorted_messages,
	)

	render_summary(
	output_directory.joinpath(f"{folder_name}.md"),
	folder_name,
	conversation_files,
	)


	def main():
	if len(sys.argv) < 4:
	print(
	f'{sys.argv[0]} <messages.csv> <output-directory> "<My Name>"',
	file=sys.stderr,
	)
	sys.exit(1)

	messages_csv = Path(sys.argv[1])

	if not messages_csv.exists():
	print(f"Messages file '{messages_csv}' does not exist.")
	sys.exit(1)

	output_directory = Path(sys.argv[2])
	if not output_directory.exists():
	print(f"Output folder '{output_directory}' doesn't exist, creating.")
	output_directory.mkdir(parents=True)

	my_name = sys.argv[3]

	messages = load_messages_csv(messages_csv)
	print(f"Loaded {len(messages)} messages.")

	grouped_messages = group_messages(messages)

	for folder, conversations in grouped_messages.items():
	render_message_folder(output_directory, my_name, folder, conversations)


	if __name__ == "__main__":
	messages = main()
	#!/usr/bin/env bash
	# -- mode: shell-script; sh-shell: bash; sh-basic-offset: 4; sh-indentation: 4; coding: utf-8 --
	# shellcheck shell=bash

	set -o nounset -o errexit -o errtrace -o pipefail

	if ! DATA_DIRECTORY=$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd -P); then
	echo >&2 "Error fetching data directory."
	exit 1
	fi
	readonly DATA_DIRECTORY

	readonly MESSAGES_DIRECTORY="${DATA_DIRECTORY}/messages"

	if [[ ! -d "${MESSAGES_DIRECTORY}" ]]; then
	echo >&2 "Messages directory not found!"
	exit 1
	fi

	cd "${MESSAGES_DIRECTORY}"

	shopt -s globstar

	readonly SITE_TEMPLATE="${DATA_DIRECTORY}/site.html.template"

	for markdown_file in */.md; do
	echo "Processing file '${markdown_file}'"

	file_base="$(basename "${markdown_file}" .md)"
	file_dir="$(dirname "${markdown_file}")"
	html_file="${file_dir}/${file_base}.html"

	if ! CONTENT=$(pandoc --from markdown --to html5 "${markdown_file}"); then
	echo >&2 "Error processing '${markdown_file}'."
	else
	export CONTENT
	envsubst < "${SITE_TEMPLATE}" > "${html_file}"
	fi
	done
	<!doctype html>
	<html lang="en" data-theme="light">

	<head>
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width, initial-scale=1">
	<meta name="color-scheme" content="light dark" />
	<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css">
	</head>

	<body>
	<main class="container">
	${CONTENT}
	</main>
	</body>

	</html>