Last active
February 5, 2021 18:10
-
-
Save timmolderez/2a6ffa4efa04bbf15c7a411b43f55a9c to your computer and use it in GitHub Desktop.
Slack - generate a wordcloud per user from a Slack chat export
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Restructures Slack chat export files so that the messages are now grouped per user | |
(normally the export is grouped per channel) | |
Not actually useful or anything :) Mainly just for fun so you can generate a | |
wordcloud per user, then play the "guess whose wordcloud is this?"-game | |
Usage: fill in INPUT_DIR and OUTPUT_DIR, then run the script.. | |
- See this R script to create wordclouds from a chat export: | |
https://github.com/codeandsupply/chat-word-cloud | |
(If you want a wordcloud per channel, you can directly use that R script. | |
If you want one per user, run this Python script first. | |
Use `setwd()` to choose which folder/user to generate a wordcloud for..) | |
- See this page on how admins can export chat history: | |
https://slack.com/intl/en-be/help/articles/201658943-Export-your-workspace-data | |
""" | |
import json | |
import os | |
from typing import Dict | |
INPUT_DIR = '~/Desktop/slack-export/' # Folder containing the (unzipped) chat export | |
OUTPUT_DIR = '~/Desktop/slack-export-per-user/' # Folder where the restructered data will be stored | |
def restructure_workspace_export_per_user() -> None: | |
for file in os.scandir(INPUT_DIR): | |
if file.is_dir(): | |
restructure_channel_export_per_user(file) | |
close_output_files() | |
def restructure_channel_export_per_user(channel_dir: str) -> None: | |
for json_file in os.scandir(channel_dir): | |
with open(json_file, 'r', encoding='utf-8') as f: | |
all_messages = json.load(f) | |
for msg in all_messages: | |
process_message_dict(msg) | |
def process_message_dict(message: Dict) -> None: | |
if 'user_profile' not in message: | |
# Skip messages sent by a bot | |
return | |
display_name = message['user_profile']['display_name'] | |
out_dir = f'{OUTPUT_DIR}{display_name}' | |
out_file = f'{out_dir}/all_messages.json' | |
os.makedirs(out_dir, exist_ok=True) | |
contents = json.dumps(message) + ',\n' | |
if not os.path.isfile(out_file): | |
contents = '[\n' + contents | |
with open(f'{out_dir}/all_messages.json', 'a', | |
encoding='utf-8', newline='\n') as f: | |
f.write(contents) | |
def close_output_files(): | |
# Wrap up all of the output files so they're valid JSON | |
for user_dir in os.scandir(OUTPUT_DIR): | |
with open(f'{user_dir.path}/all_messages.json', 'a', | |
encoding='utf-8', newline='\n') as f: | |
# Removes the last '\n,' | |
f.seek(f.tell() - 2, os.SEEK_SET) | |
f.truncate() | |
# Add the closing ']' | |
f.write('\n]') | |
# Workaround for a bug in the wordcloud R script; it only works | |
# when there's more than one .json file, so just tossing in an empty one.. | |
with open(f'{user_dir.path}/dummy.json', 'w', | |
encoding='utf-8', newline='\n') as f: | |
f.write('[]\n') | |
restructure_workspace_export_per_user() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment