-
-
Save minniel/1a07d4bc445225fd0fc1cf605274f866 to your computer and use it in GitHub Desktop.
| from slacker import Slacker | |
| import json | |
| import argparse | |
| import os | |
| import shutil | |
| import copy | |
| from datetime import datetime | |
| # This script finds all channels, private channels and direct messages | |
| # that your user participates in, downloads the complete history for | |
| # those converations and writes each conversation out to seperate json files. | |
| # | |
| # This user centric history gathering is nice because the official slack data exporter | |
| # only exports public channels. | |
| # | |
| # PS, this only works if your slack team has a paid account which allows for unlimited history. | |
| # | |
| # PPS, this use of the API is blessed by Slack. | |
| # https://get.slack.help/hc/en-us/articles/204897248 | |
| # " If you want to export the contents of your own private groups and direct messages | |
| # please see our API documentation." | |
| # | |
| # get your slack user token at the bottom of this page | |
| # https://api.slack.com/web | |
| # | |
| # dependencies: | |
| # pip install slacker #https://github.com/os/slacker | |
| # | |
| # usage examples | |
| # python slack_history.py --token='123token' | |
| # python slack_history.py --token='123token' --dryRun=True | |
| # python slack_history.py --token='123token' --skipDirectMessages | |
| # python slack_history.py --token='123token' --skipDirectMessages --skipPrivateChannels | |
| # fetches the complete message history for a channel/group/im | |
| # | |
| # pageableObject could be: | |
| # slack.channel | |
| # slack.groups | |
| # slack.im | |
| # | |
| # channelId is the id of the channel/group/im you want to download history for. | |
| def getHistory(pageableObject, channelId, pageSize = 100): | |
| messages = [] | |
| lastTimestamp = None | |
| while(True): | |
| response = pageableObject.history( | |
| channel = channelId, | |
| latest = lastTimestamp, | |
| oldest = 0, | |
| count = pageSize | |
| ).body | |
| messages.extend(response['messages']) | |
| if (response['has_more'] == True): | |
| lastTimestamp = messages[-1]['ts'] # -1 means last element in a list | |
| else: | |
| break | |
| return messages | |
| def mkdir(directory): | |
| if not os.path.isdir(directory): | |
| os.makedirs(directory) | |
| # create datetime object from slack timestamp ('ts') string | |
| def parseTimeStamp( timeStamp ): | |
| if '.' in timeStamp: | |
| t_list = timeStamp.split('.') | |
| if len( t_list ) != 2: | |
| raise ValueError( 'Invalid time stamp' ) | |
| else: | |
| return datetime.utcfromtimestamp( float(t_list[0]) ) | |
| # move channel files from old directory to one with new channel name | |
| def channelRename( oldRoomName, newRoomName ): | |
| # check if any files need to be moved | |
| if not os.path.isdir( oldRoomName ): | |
| return | |
| mkdir( newRoomName ) | |
| for fileName in os.listdir( oldRoomName ): | |
| shutil.move( os.path.join( oldRoomName, fileName ), newRoomName ) | |
| os.rmdir( oldRoomName ) | |
| def writeMessageFile( fileName, messages ): | |
| with open(fileName, 'w') as outFile: | |
| json.dump( messages, outFile, indent=4) | |
| # parse messages by date | |
| def parseMessages( parentDir, roomDir, messages, roomType ): | |
| nameChangeFlag = roomType + "_name" | |
| currentFileDate = '' | |
| currentMessages = [] | |
| for message in messages: | |
| #first store the date of the next message | |
| ts = parseTimeStamp( message['ts'] ) | |
| fileDate = '{:%Y-%m-%d}'.format(ts) | |
| #if it's on a different day, write out the previous day's messages | |
| if fileDate != currentFileDate: | |
| outFileName = '{parent}/{room}/{file}.json'.format( parent = parentDir, room = roomDir, file = currentFileDate ) | |
| writeMessageFile( outFileName, currentMessages ) | |
| currentFileDate = fileDate | |
| currentMessages = [] | |
| # check if current message is a name change | |
| # dms won't have name change events | |
| if roomType != "im" and ( 'subtype' in message ) and message['subtype'] == nameChangeFlag: | |
| roomDir = message['name'] | |
| oldRoomPath = '{parent}/{room}'.format( parent = parentDir, room = message['old_name'] ) | |
| newRoomPath = '{parent}/{room}'.format( parent = parentDir, room = roomDir ) | |
| channelRename( oldRoomPath, newRoomPath ) | |
| currentMessages.append( message ) | |
| outFileName = '{parent}/{room}/{file}.json'.format( parent = parentDir, room = roomDir, file = currentFileDate ) | |
| writeMessageFile( outFileName, currentMessages ) | |
| # fetch and write history for all public channels | |
| def getChannels(slack, dryRun): | |
| channels = slack.channels.list().body['channels'] | |
| print("\nfound channels: ") | |
| for channel in channels: | |
| print(channel['name']) | |
| if not dryRun: | |
| parentDir = "channel" | |
| mkdir(parentDir) | |
| for channel in channels: | |
| print("getting history for channel {0}".format(channel['name'])) | |
| channelDir = channel['name'] | |
| mkdir( os.path.join( parentDir, channelDir ) ) | |
| messages = getHistory(slack.channels, channel['id']) | |
| parseMessages( parentDir, channelDir, messages, 'channel') | |
| # write channels.json file | |
| def dumpChannelFile( slack ): | |
| print("Making channels file") | |
| channels = slack.channels.list().body['channels'] | |
| #have to convert private channels to channels to be read in properly | |
| groups = slack.groups.list().body['groups'] | |
| print( str(len(channels) ) ) | |
| for group in groups: | |
| print( str(len(channels) ) ) | |
| new_channel = copy.copy(channels[0]) | |
| new_channel['id'] = group['id'] | |
| new_channel['name'] = group['name'] | |
| new_channel['created'] = group['created'] | |
| new_channel['creator'] = group['creator'] | |
| new_channel['is_archived'] = group['is_archived'] | |
| new_channel['is_channel'] = True | |
| new_channel['is_general'] = False | |
| new_channel['is_member'] = True | |
| new_channel['members'] = group['members'] | |
| new_channel['num_members'] = len(group['members']) | |
| new_channel['purpose'] = group['purpose'] | |
| new_channel['topic'] = group['topic'] | |
| channels.append( new_channel ) | |
| #We will be overwriting this file on each run. | |
| with open('channels.json', 'w') as outFile: | |
| json.dump( channels , outFile, indent=4) | |
| # fetch and write history for all direct message conversations | |
| # also known as IMs in the slack API. | |
| def getDirectMessages(slack, ownerId, userIdNameMap, dryRun): | |
| dms = slack.im.list().body['ims'] | |
| print("\nfound direct messages (1:1) with the following users:") | |
| for dm in dms: | |
| print(userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)")) | |
| if not dryRun: | |
| parentDir = "direct_message" | |
| mkdir(parentDir) | |
| for dm in dms: | |
| name = userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)")#note: double check naming of dm directory | |
| print("getting history for direct messages with {0}".format(name)) | |
| dmDir = name | |
| mkdir('{parent}/{dm}'.format( parent = parentDir, dm = dmDir )) | |
| messages = getHistory(slack.im, dm['id']) | |
| parseMessages( parentDir, dmDir, messages, "im" ) | |
| # fetch and write history for all private channels | |
| # also known as groups in the slack API. | |
| def getPrivateChannels(slack, dryRun): | |
| groups = slack.groups.list().body['groups'] | |
| print("\nfound private channels:") | |
| for group in groups: | |
| print("{0}: ({1} members)".format(group['name'], len(group['members']))) | |
| if not dryRun: | |
| parentDir = "private_channels" | |
| mkdir(parentDir) | |
| for group in groups: | |
| messages = [] | |
| print("getting history for private channel {0} with id {1}".format(group['name'], group['id'])) | |
| groupDir = group['name'] | |
| mkdir( '{parent}/{group}'.format( parent = parentDir, group = groupDir ) ) | |
| messages = getHistory(slack.groups, group['id']) | |
| parseMessages( parentDir, groupDir, messages, 'group' ) | |
| # fetch all users for the channel and return a map userId -> userName | |
| def getUserMap(slack): | |
| #get all users in the slack organization | |
| users = slack.users.list().body['members'] | |
| userIdNameMap = {} | |
| for user in users: | |
| userIdNameMap[user['id']] = user['name'] | |
| print("found {0} users ".format(len(users))) | |
| return userIdNameMap | |
| # stores json of user info | |
| def dumpUserFile(slack): | |
| #write to user file, any existing file needs to be overwritten. | |
| with open( "users.json", 'w') as userFile: | |
| json.dump( slack.users.list().body['members'], userFile, indent=4 ) | |
| # get basic info about the slack channel to ensure the authentication token works | |
| def doTestAuth(slack): | |
| testAuth = slack.auth.test().body | |
| teamName = testAuth['team'] | |
| currentUser = testAuth['user'] | |
| print("Successfully authenticated for team {0} and user {1} ".format(teamName, currentUser)) | |
| return testAuth | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description='download slack history') | |
| parser.add_argument('--token', help="an api token for a slack user") | |
| parser.add_argument( | |
| '--dryRun', | |
| action='store_true', | |
| default=False, | |
| help="if dryRun is true, don't fetch/write history only get channel names") | |
| parser.add_argument( | |
| '--skipPrivateChannels', | |
| action='store_true', | |
| default=False, | |
| help="skip fetching history for private channels") | |
| parser.add_argument( | |
| '--skipChannels', | |
| action='store_true', | |
| default=False, | |
| help="skip fetching history for channels") | |
| parser.add_argument( | |
| '--skipDirectMessages', | |
| action='store_true', | |
| default=False, | |
| help="skip fetching history for directMessages") | |
| args = parser.parse_args() | |
| slack = Slacker(args.token) | |
| testAuth = doTestAuth(slack) | |
| userIdNameMap = getUserMap(slack) | |
| dryRun = args.dryRun | |
| if not dryRun: | |
| #write channel and user jsons | |
| dumpUserFile(slack) | |
| dumpChannelFile(slack) | |
| if not args.skipChannels: | |
| getChannels(slack, dryRun) | |
| if not args.skipPrivateChannels: | |
| getPrivateChannels(slack, dryRun) | |
| if not args.skipDirectMessages: | |
| getDirectMessages(slack, testAuth['user_id'], userIdNameMap, dryRun) |
usage examples
python slack_history.py --token='123token'
python slack_history.py --token='123token' --dryRun=True
python slack_history.py --token='123token' --skipDirectMessages
python slack_history.py --token='123token' --skipDirectMessages --skipPrivateChannels
omg so sorry, I had ' ' wrapped around my token...
I am getting this error for some channels – any idea whats wrong?
Traceback (most recent call last): File "slack_history.py", line 289, in <module> getPrivateChannels(slack, dryRun) File "slack_history.py", line 215, in getPrivateChannels parseMessages( parentDir, groupDir, messages, 'group' ) File "slack_history.py", line 120, in parseMessages channelRename( oldRoomPath, newRoomPath ) File "slack_history.py", line 87, in channelRename shutil.move( os.path.join( oldRoomName, fileName ), newRoomName ) File "/usr/lib/python2.7/shutil.py", line 292, in move raise Error, "Destination path '%s' already exists" % real_dst shutil.Error: Destination path 'private_channels/testchannel-intern/.json' already exists
Thanks
Max
Any chance to archive the images too?
could you re-add the MIT license headers from Chandler's initial version?
what license are you using for your additions (ie channel/channel_name/YYYY-MM-DD.json VS channels/channel_name.json) and nit picking ;P channelS :P
Hmmm not working for me, I'm using my bot token.