timmolderez · February 5, 2021 18:10
diff --git a/slack_export_by_user.py b/slack_export_by_user.py
 """
 Restructures Slack chat export files so that the messages are now grouped per user
 (normally the export is grouped per channel)

 Not actually useful or anything :) Mainly just for fun so you can generate a
 wordcloud per user, then play the "guess whose wordcloud is this?"-game

 Usage: fill in INPUT_DIR and OUTPUT_DIR, then run the script..

 - See this R script to create wordclouds from a chat export:
  https://github.com/codeandsupply/chat-word-cloud
  (If you want a wordcloud per channel, you can directly use that R script.
   If you want one per user, run this Python script first.
   Use `setwd()` to choose which folder/user to generate a wordcloud for..)
 - See this page on how admins can export chat history:
  https://slack.com/intl/en-be/help/articles/201658943-Export-your-workspace-data
 """

 import json
 import os
 from typing import Dict

 INPUT_DIR = '~/Desktop/slack-export/' # Folder containing the (unzipped) chat export
 OUTPUT_DIR = '~/Desktop/slack-export-per-user/' # Folder where the restructered data will be stored

 def restructure_workspace_export_per_user() -> None:
    for file in os.scandir(INPUT_DIR):
        if file.is_dir():
            restructure_channel_export_per_user(file)
    close_output_files()


 def restructure_channel_export_per_user(channel_dir: str) -> None:
    for json_file in os.scandir(channel_dir):
        with open(json_file, 'r', encoding='utf-8') as f:
            all_messages = json.load(f)
            for msg in all_messages:
                process_message_dict(msg)


 def process_message_dict(message: Dict) -> None:
    if 'user_profile' not in message:
        # Skip messages sent by a bot
        return
    display_name = message['user_profile']['display_name']
    out_dir = f'{OUTPUT_DIR}{display_name}'
    out_file = f'{out_dir}/all_messages.json'
    os.makedirs(out_dir, exist_ok=True)

    contents = json.dumps(message) + ',\n'
    if not os.path.isfile(out_file):
        contents = '[\n' + contents

    with open(f'{out_dir}/all_messages.json', 'a',
              encoding='utf-8', newline='\n') as f:
        f.write(contents)


 def close_output_files():
    # Wrap up all of the output files so they're valid JSON
    for user_dir in os.scandir(OUTPUT_DIR):
        with open(f'{user_dir.path}/all_messages.json', 'a',
                  encoding='utf-8', newline='\n') as f:
            # Removes the last '\n,'
            f.seek(f.tell() - 2, os.SEEK_SET)
            f.truncate()
            # Add the closing ']'
            f.write('\n]')

        # Workaround for a bug in the wordcloud R script; it only works
        # when there's more than one .json file, so just tossing in an empty one..
        with open(f'{user_dir.path}/dummy.json', 'w',
                  encoding='utf-8', newline='\n') as f:
            f.write('[]\n')


 restructure_workspace_export_per_user()
	"""
	Restructures Slack chat export files so that the messages are now grouped per user
	(normally the export is grouped per channel)

	Not actually useful or anything :) Mainly just for fun so you can generate a
	wordcloud per user, then play the "guess whose wordcloud is this?"-game

	Usage: fill in INPUT_DIR and OUTPUT_DIR, then run the script..

	- See this R script to create wordclouds from a chat export:
	https://github.com/codeandsupply/chat-word-cloud
	(If you want a wordcloud per channel, you can directly use that R script.
	If you want one per user, run this Python script first.
	Use `setwd()` to choose which folder/user to generate a wordcloud for..)
	- See this page on how admins can export chat history:
	https://slack.com/intl/en-be/help/articles/201658943-Export-your-workspace-data
	"""

	import json
	import os
	from typing import Dict

	INPUT_DIR = '~/Desktop/slack-export/' # Folder containing the (unzipped) chat export
	OUTPUT_DIR = '~/Desktop/slack-export-per-user/' # Folder where the restructered data will be stored

	def restructure_workspace_export_per_user() -> None:
	for file in os.scandir(INPUT_DIR):
	if file.is_dir():
	restructure_channel_export_per_user(file)
	close_output_files()


	def restructure_channel_export_per_user(channel_dir: str) -> None:
	for json_file in os.scandir(channel_dir):
	with open(json_file, 'r', encoding='utf-8') as f:
	all_messages = json.load(f)
	for msg in all_messages:
	process_message_dict(msg)


	def process_message_dict(message: Dict) -> None:
	if 'user_profile' not in message:
	# Skip messages sent by a bot
	return
	display_name = message['user_profile']['display_name']
	out_dir = f'{OUTPUT_DIR}{display_name}'
	out_file = f'{out_dir}/all_messages.json'
	os.makedirs(out_dir, exist_ok=True)

	contents = json.dumps(message) + ',\n'
	if not os.path.isfile(out_file):
	contents = '[\n' + contents

	with open(f'{out_dir}/all_messages.json', 'a',
	encoding='utf-8', newline='\n') as f:
	f.write(contents)


	def close_output_files():
	# Wrap up all of the output files so they're valid JSON
	for user_dir in os.scandir(OUTPUT_DIR):
	with open(f'{user_dir.path}/all_messages.json', 'a',
	encoding='utf-8', newline='\n') as f:
	# Removes the last '\n,'
	f.seek(f.tell() - 2, os.SEEK_SET)
	f.truncate()
	# Add the closing ']'
	f.write('\n]')

	# Workaround for a bug in the wordcloud R script; it only works
	# when there's more than one .json file, so just tossing in an empty one..
	with open(f'{user_dir.path}/dummy.json', 'w',
	encoding='utf-8', newline='\n') as f:
	f.write('[]\n')


	restructure_workspace_export_per_user()