|
# this is free software, public domain, it's a crude go at an obvious solution |
|
# so no guarantees are given, it wasn't perfect, but I only needed one run |
|
# |
|
# this script will transform the LinkedIn Complete Download Shares.csv file |
|
# producing a collection of timestamp-LinkedIn-YYYY-MM.org files containing |
|
# posts for that month. These files are then loaded into org-roam via dired, |
|
# an ID assigned using 'C-c n o' (org-id-get-create) and saved. |
|
# |
|
# I used the script to populate an org-roam/posts directory, then used a macro |
|
# to step through, id-create and save each. The post items will be tagged based |
|
# on the original Linkedin hashtags |
|
# |
|
# note: your linkedin archives are not complete. Regardless what they promise |
|
# or what GDPR requires, you only get posts back to 2010, and the posts omit |
|
# attached material, images etc; each post contains a lnkd.in link back to |
|
# the original post, so if you delete your account or LI folds, these will |
|
# be useless. |
|
# |
|
# usage: python Shares.py <path to Shares.csv> <targetdir> |
|
# |
|
# it works for me with Python 3.10 on Ubuntu 22.04 - ymmv |
|
|
|
import csv |
|
import re |
|
import sys |
|
|
|
HEADER_TEMPLATE = ( |
|
""":PROPERTIES: |
|
:END: |
|
#+filetags: posts linkedin social_media |
|
#+title: %s |
|
""") |
|
|
|
def process(txt : str): |
|
"Extract tags from post body and return sanitized txt + tags" |
|
tags = '' |
|
if txt: |
|
txt = txt.replace('"','').strip() # .replace('\n\n','\n').strip() |
|
hashtags = [ |
|
re.sub(r'-','_',tag) for tag in re.findall( |
|
'#([\\w][\\w_-]*)', txt, re.IGNORECASE) |
|
] |
|
if hashtags: |
|
tags = "\t:" + ':'.join(hashtags) + ':' |
|
return txt, tags |
|
|
|
def shares_org( row ): |
|
text, tags = process(row[2]) |
|
return ( |
|
"* %(date)s [[%(link)s][source]]\t%(tags)s\n%(text)s%(share)s%(media)s" |
|
% {'date': row[0], |
|
'link': row[1], |
|
'tags': tags, |
|
'text': "%s\n" % text if text else '', |
|
'share': "- link: %s\n" % row[3] if row[3] else '', |
|
'media': "- media: %s\n" % row[4] if row[4] else '' |
|
}) |
|
|
|
def process_csv(csv_file, target='./'): |
|
with open(csv_file,'r') as csvfile: |
|
shares = csv.reader(csvfile) |
|
n = 0 |
|
current_plug = '' |
|
last_page = '' |
|
skip = True |
|
target = target + '/' if target[-1] != '/' else target |
|
for row in shares: # by definition these are in date order |
|
if skip : # ignore first line |
|
skip = False |
|
continue |
|
row_ts = row[0].translate({ord(i): None for i in ':- '}) |
|
row_page = ('LinkedIn-' + row[0][:7]) |
|
row_plug = row_ts + '-' + row_page |
|
org_page = shares_org(row) |
|
if row_page != last_page: |
|
last_page = row_page |
|
current_plug = row_plug |
|
out_mode = 'w' #overwrite previous |
|
else: |
|
out_mode = 'a' |
|
|
|
with open(target + current_plug + '.org', out_mode) as orgfile: |
|
if out_mode == 'w': |
|
print("New file: " + current_plug) |
|
orgfile.write(HEADER_TEMPLATE % row_page) |
|
orgfile.write(org_page) |
|
|
|
n += 1 |
|
print(n, " items added") |
|
return 0 |
|
|
|
def usage(prog): |
|
print("Usage: " + prog + " Shares.csv targetdir\n") |
|
|
|
if __name__ == '__main__': |
|
if len(sys.argv) < 2: |
|
usage(sys.argv[0]) |
|
sys.exit(1) |
|
sys.exit(process_csv(sys.argv[1], sys.argv[2])) |