Skip to content

Instantly share code, notes, and snippets.

@teledyn
Created June 17, 2022 05:25
Show Gist options
  • Save teledyn/7135f7a8f9205d8eb7736ee05d460017 to your computer and use it in GitHub Desktop.
Save teledyn/7135f7a8f9205d8eb7736ee05d460017 to your computer and use it in GitHub Desktop.
A simple throw-away script to transform LinkedIn Complete-Download Shares.csv ready to add to Emacs org-roam
# this is free software, public domain, it's a crude go at an obvious solution
# so no guarantees are given, it wasn't perfect, but I only needed one run
#
# this script will transform the LinkedIn Complete Download Shares.csv file
# producing a collection of timestamp-LinkedIn-YYYY-MM.org files containing
# posts for that month. These files are then loaded into org-roam via dired,
# an ID assigned using 'C-c n o' (org-id-get-create) and saved.
#
# I used the script to populate an org-roam/posts directory, then used a macro
# to step through, id-create and save each. The post items will be tagged based
# on the original Linkedin hashtags
#
# note: your linkedin archives are not complete. Regardless what they promise
# or what GDPR requires, you only get posts back to 2010, and the posts omit
# attached material, images etc; each post contains a lnkd.in link back to
# the original post, so if you delete your account or LI folds, these will
# be useless.
#
# usage: python Shares.py <path to Shares.csv> <targetdir>
#
# it works for me with Python 3.10 on Ubuntu 22.04 - ymmv
import csv
import re
import sys
HEADER_TEMPLATE = (
""":PROPERTIES:
:END:
#+filetags: posts linkedin social_media
#+title: %s
""")
def process(txt : str):
"Extract tags from post body and return sanitized txt + tags"
tags = ''
if txt:
txt = txt.replace('"','').strip() # .replace('\n\n','\n').strip()
hashtags = [
re.sub(r'-','_',tag) for tag in re.findall(
'#([\\w][\\w_-]*)', txt, re.IGNORECASE)
]
if hashtags:
tags = "\t:" + ':'.join(hashtags) + ':'
return txt, tags
def shares_org( row ):
text, tags = process(row[2])
return (
"* %(date)s [[%(link)s][source]]\t%(tags)s\n%(text)s%(share)s%(media)s"
% {'date': row[0],
'link': row[1],
'tags': tags,
'text': "%s\n" % text if text else '',
'share': "- link: %s\n" % row[3] if row[3] else '',
'media': "- media: %s\n" % row[4] if row[4] else ''
})
def process_csv(csv_file, target='./'):
with open(csv_file,'r') as csvfile:
shares = csv.reader(csvfile)
n = 0
current_plug = ''
last_page = ''
skip = True
target = target + '/' if target[-1] != '/' else target
for row in shares: # by definition these are in date order
if skip : # ignore first line
skip = False
continue
row_ts = row[0].translate({ord(i): None for i in ':- '})
row_page = ('LinkedIn-' + row[0][:7])
row_plug = row_ts + '-' + row_page
org_page = shares_org(row)
if row_page != last_page:
last_page = row_page
current_plug = row_plug
out_mode = 'w' #overwrite previous
else:
out_mode = 'a'
with open(target + current_plug + '.org', out_mode) as orgfile:
if out_mode == 'w':
print("New file: " + current_plug)
orgfile.write(HEADER_TEMPLATE % row_page)
orgfile.write(org_page)
n += 1
print(n, " items added")
return 0
def usage(prog):
print("Usage: " + prog + " Shares.csv targetdir\n")
if __name__ == '__main__':
if len(sys.argv) < 2:
usage(sys.argv[0])
sys.exit(1)
sys.exit(process_csv(sys.argv[1], sys.argv[2]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment