Skip to content

Instantly share code, notes, and snippets.

@phreakin
Last active April 25, 2025 05:16
Show Gist options
  • Save phreakin/b3751a185240c08ae96c27573e4d67ba to your computer and use it in GitHub Desktop.
Save phreakin/b3751a185240c08ae96c27573e4d67ba to your computer and use it in GitHub Desktop.
A python script to parse Facebook messenger data from your downloaded Facebook data and creates searchable html files of them
import os
import json
import re
import shutil
from pathlib import Path
from datetime import datetime
from argparse import ArgumentParser
from html import escape
MY_NAME = "YOUR NAME AS IT IS ON FB HERE"
MEDIA_TYPES = {
"audio": [
".mp3",
".m4a",
".aac",
".wav",
".ogg",
".opus",
".flac",
".wma",
".aiff",
],
"photos": [".jpg", ".jpeg", ".png", ".webp", ".heic", ".heif", ".tiff", ".bmp"],
"gifs": [".gif"],
"videos": [
".mp4",
".mov",
".mkv",
".avi",
".wmv",
".flv",
".webm",
".m4v",
".3gp",
".mpeg",
".mpg",
".rmvb",
".vob",
".ts",
],
"files": [
".docx",
".pdf",
".txt",
".zip",
".pptx",
".xlsx",
".csv",
".rar",
".7z",
".tar",
".gz",
".bz2",
],
}
def readable_timestamp(ts_ms):
return datetime.fromtimestamp(ts_ms / 1000).strftime("%m/%d/%Y %I:%M %p")
def linkify(text):
def replacer(match):
url = match.group(0)
return f'<a href="{url}" target="_blank" rel="noopener noreferrer">{url}</a>'
url_pattern = re.compile(r"(https?://[^\s]+)")
return url_pattern.sub(replacer, text)
def parse_messages(json_files):
all_messages = []
participants = set()
for file in sorted(json_files):
try:
with open(file, "r", encoding="utf-8") as f:
data = json.load(f)
except UnicodeDecodeError:
with open(file, "r", encoding="utf-8", errors="replace") as f:
data = json.load(f)
participants.update(p.get("name") for p in data.get("participants", []))
for msg in data.get("messages", []):
if "content" in msg:
all_messages.append(
{
"timestamp": msg["timestamp_ms"],
"sender": msg["sender_name"],
"content": linkify(escape(msg["content"], quote=False)),
}
)
all_messages.sort(key=lambda m: m["timestamp"])
output_html = []
for msg in all_messages:
ts = readable_timestamp(msg["timestamp"])
css_class = "from-me" if msg["sender"] == MY_NAME else "from-them"
output_html.append(
f"<p class='message {css_class}' data-sender='{msg['sender'].lower()}' data-content='{msg['content'].lower()}'><strong>[{ts}]</strong> <span class='sender'>{escape(msg['sender'])}</span>: <span class='content'>{msg['content']}</span></p>"
)
return "\n".join(output_html), participants
def generate_media_previews(folder, rel_url):
if not folder.exists():
return ""
previews = ""
for file in sorted(folder.iterdir()):
ext = file.suffix.lower()
url = f"{rel_url}/{file.name}"
if ext in MEDIA_TYPES["photos"] + MEDIA_TYPES["gifs"]:
previews += (
f"<div class='media-box'><img src='{url}' class='img-fluid'></div>"
)
elif ext in MEDIA_TYPES["videos"]:
previews += f"<div class='media-box'><video controls width='100%'><source src='{url}' type='video/mp4'></video></div>"
elif ext in MEDIA_TYPES["audio"]:
previews += f"<div class='media-box'><audio controls><source src='{url}'></audio></div>"
return previews
def generate_media_table(folder, rel_url):
if not folder.exists():
return ""
rows = ""
for file in sorted(folder.iterdir()):
if file.is_file():
ext = file.suffix.replace(".", "").upper()
rows += f"<tr><td>{escape(file.name)}</td><td>{ext}</td><td><a href='{rel_url}/{file.name}' target='_blank'>View</a></td></tr>"
if rows:
return f"<h3>{folder.name.title()} Files</h3><table class='table table-sm table-dark table-striped'><thead><tr><th>Filename</th><th>Type</th><th>View/Download</th></tr></thead><tbody>{rows}</tbody></table>"
return ""
def copy_media_dirs(convo_dir, output_convo_dir):
for media_folder in MEDIA_TYPES:
src = convo_dir / media_folder
dst = output_convo_dir / media_folder
if src.exists() and src.is_dir():
shutil.copytree(src, dst, dirs_exist_ok=True)
def generate_html_page(
title, messages_html, media_tables, media_previews, participants
):
participants_html = (
"<p id='participants' ><strong>Participants:</strong> "
+ ", ".join(escape(p) for p in sorted(participants))
+ "</p>"
if participants
else ""
)
return f"""
<!DOCTYPE html>
<html lang='en'>
<head>
<meta charset='UTF-8'>
<title>{escape(title)}</title>
<link href='https://cdn.jsdelivr.net/npm/bootstrap@latest/dist/css/bootstrap.min.css' rel='stylesheet'>
<link href='https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@latest/css/all.min.css' rel='stylesheet'>
<link href='https://cdn.jsdelivr.net/npm/sweetalert2@latest/dist/sweetalert2.min.css' rel='stylesheet'>
<link href='https://cdn.jsdelivr.net/npm/tippy.js@latest/dist/tippy.css' rel='stylesheet'>
<link href='assets/css/all.css' rel='stylesheet'>
<style>
body {{ background: #222222; color: #F5F5F5; font-family: monospace; padding: 20px; }}
.media-box {{ margin: 10px 0; }}
.chat-log p {{ margin-bottom: 5px; }}
#search {{ width: 100%; margin-bottom: 20px; padding: 10px; border-radius: 5px; border: none; }}
.from-me {{ color: #0fd5db; }}
.from-them {{ color: #9bff7d; }}
.p {{ margin-top: 5px; margin-bottom: 5px; }}
#participants {{ font-size="32" text-align: "center"; margin-bottom: 20px; }}
</style>
</head>
<body>
<h1 class='text-warning text-center'>
{escape(title)}
</h1>
{participants_html}
<input id='search' type='text' placeholder='Search by sender or message...' />
<div class='chat-log'>
{messages_html}
</div>
<hr/>
{media_previews}
{media_tables}
<script>
document.getElementById('search').addEventListener('input', function(e) {{
const value = e.target.value.toLowerCase();
document.querySelectorAll('.message').forEach(msg => {{
const sender = msg.dataset.sender || '';
const content = msg.dataset.content || '';
msg.style.display = (sender.includes(value) || content.includes(value)) ? 'block' : 'none';
}});
}});
</script>
<script src='https://cdn.jsdelivr.net/npm/bootstrap@latest/dist/js/bootstrap.bundle.min.js'></script>
<script src='https://cdn.jsdelivr.net/npm/sweetalert2@latest/dist/sweetalert2.all.min.js'></script>
<script src='https://cdn.jsdelivr.net/npm/tippy.js@latest/dist/tippy-bundle.umd.min.js'></script>
<script src='https://cdn.jsdelivr.net/npm/clipboard@latest/dist/clipboard.min.js'></script>
<script src='https://cdn.jsdelivr.net/npm/clipboard-polyfill@latest/dist/clipboard-polyfill.min.js'></script>
<script src='assets/js/all.js'></script>
</body>
</html>
"""
def build_convo_html(convo_dir, output_dir, rel_output_path):
json_files = list(convo_dir.glob("*.json"))
if not json_files:
return None
messages_html, participants = parse_messages(json_files)
media_html = ""
tables_html = ""
for folder_name in MEDIA_TYPES:
media_path = convo_dir / folder_name
rel_url = f"./{folder_name}" if media_path.exists() else ""
media_html += generate_media_previews(media_path, rel_url)
tables_html += generate_media_table(media_path, rel_url)
output_path = output_dir / rel_output_path / "index.html"
output_path.parent.mkdir(parents=True, exist_ok=True)
copy_media_dirs(convo_dir, output_path.parent)
try:
with open(json_files[0], "r", encoding="utf-8") as f:
title = json.load(f).get("title", convo_dir.name)
except:
title = convo_dir.name
with open(output_path, "w", encoding="utf-8") as f:
f.write(
generate_html_page(
title, messages_html, tables_html, media_html, participants
)
)
return str(rel_output_path).replace("\\", "/")
def build_index(all_convos, output_dir):
sections = ""
for section, convos in all_convos.items():
links = "\n".join(
[
f"<li class='list-group-item bg-dark'><a class='text-warning' href='{c}/index.html'>{c}</a></li>"
for c in sorted(convos)
]
)
sections += f"<h2 class='text-warning'>{section.title()}</h2><ul class='list-group mb-4'>{links}</ul>"
html = f"""<!DOCTYPE html>
<html lang='en'>
<head>
<meta charset='UTF-8'>
<title>Messenger Archive</title>
<link href='https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css' rel='stylesheet'>
</head>
<body class='bg-dark text-light'>
<div class='container mt-5'>
<h1 class='text-warning text-center mb-5'>
Facebook Messenger Archive
</h1>
{sections}
</div>
</body>
</html>"""
with open(output_dir / "index.html", "w", encoding="utf-8") as f:
f.write(html)
def process_all(root_input, output_root):
categories = [
"inbox",
"archived_threads",
"e2ee_cutover",
"stickers_used",
"support_files",
]
all_convos = {}
for category in categories:
input_path = Path(root_input) / category
if not input_path.exists():
continue
for convo_dir in input_path.iterdir():
if convo_dir.is_dir():
rel_path = Path(category) / convo_dir.name
result = build_convo_html(convo_dir, output_root, rel_path)
if result:
all_convos.setdefault(category, []).append(result)
build_index(all_convos, output_root)
if __name__ == "__main__":
parser = ArgumentParser(
description="Parse Facebook Messenger export into HTML archive."
)
parser.add_argument(
"messages_folder",
help="Path to 'messages' folder (contains inbox/, archived_threads/, e2ee_cutover/)",
)
parser.add_argument("--output", help="Output folder", default="output_html")
args = parser.parse_args()
process_all(args.messages_folder, Path(args.output).resolve())
@phreakin
Copy link
Author

Get your Facebook data from Facebook. Once you have it, unzip it to wherever and put this script inside the root folder of wherever you unzipped the data. By default, this script parses data in facebook_data\your_facebook_activity\messages

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment