teledyn · June 17, 2022 05:25
diff --git a/Shares.py b/Shares.py
 # this is free software, public domain, it's a crude go at an obvious solution
 # so no guarantees are given, it wasn't perfect, but I only needed one run
 #
 # this script will transform the LinkedIn Complete Download Shares.csv file
 # producing a collection of timestamp-LinkedIn-YYYY-MM.org files containing
 # posts for that month.  These files are then loaded into org-roam via dired,
 # an ID assigned using 'C-c n o' (org-id-get-create) and saved.
 #
 # I used the script to populate an org-roam/posts directory, then used a macro
 # to step through, id-create and save each.  The post items will be tagged based
 # on the original Linkedin hashtags
 #
 # note: your linkedin archives are not complete. Regardless what they promise
 # or what GDPR requires, you only get posts back to 2010, and the posts omit
 # attached material, images etc; each post contains a lnkd.in link back to
 # the original post, so if you delete your account or LI folds, these will
 # be useless.
 #
 # usage:  python Shares.py <path to Shares.csv> <targetdir>
 #
 # it works for me with Python 3.10 on Ubuntu 22.04 - ymmv

 import csv
 import re
 import sys

 HEADER_TEMPLATE = (
 """:PROPERTIES:
 :END:
 #+filetags: posts linkedin social_media
 #+title: %s
 """)

 def process(txt : str):
    "Extract tags from post body and return sanitized txt + tags"
    tags = ''
    if txt:
        txt = txt.replace('"','').strip() # .replace('\n\n','\n').strip()
        hashtags = [
            re.sub(r'-','_',tag) for tag in re.findall(
                '#([\\w][\\w_-]*)', txt, re.IGNORECASE)
            ]
        if hashtags:
            tags = "\t:" + ':'.join(hashtags) + ':'
    return txt, tags

 def shares_org( row ): 
    text, tags = process(row[2])
    return (
        "* %(date)s [[%(link)s][source]]\t%(tags)s\n%(text)s%(share)s%(media)s"
        % {'date': row[0],
           'link': row[1],
           'tags': tags,
           'text': "%s\n" % text if text else '',
           'share': "- link: %s\n" % row[3] if row[3] else '',
           'media': "- media: %s\n" % row[4] if row[4] else ''
        })

 def process_csv(csv_file, target='./'):
    with open(csv_file,'r') as csvfile:
        shares = csv.reader(csvfile)
        n = 0
        current_plug = ''
        last_page = ''
        skip = True
        target = target + '/' if target[-1] != '/' else target
        for row in shares:  # by definition these are in date order
            if skip : # ignore first line
                skip = False
                continue
            row_ts = row[0].translate({ord(i): None for i in ':- '})
            row_page = ('LinkedIn-' + row[0][:7])
            row_plug = row_ts + '-' + row_page
            org_page = shares_org(row)
            if row_page != last_page:
                last_page = row_page
                current_plug = row_plug
                out_mode = 'w' #overwrite previous
            else:
                out_mode = 'a'

            with open(target + current_plug + '.org', out_mode) as orgfile:
                if out_mode == 'w':
                    print("New file: " + current_plug)
                    orgfile.write(HEADER_TEMPLATE % row_page)
                orgfile.write(org_page)

            n += 1
            print(n, " items added")
    return 0

 def usage(prog):
    print("Usage: " + prog + " Shares.csv targetdir\n")

 if __name__ == '__main__':
    if len(sys.argv) < 2:
        usage(sys.argv[0])
        sys.exit(1)
    sys.exit(process_csv(sys.argv[1], sys.argv[2]))
	# this is free software, public domain, it's a crude go at an obvious solution
	# so no guarantees are given, it wasn't perfect, but I only needed one run
	#
	# this script will transform the LinkedIn Complete Download Shares.csv file
	# producing a collection of timestamp-LinkedIn-YYYY-MM.org files containing
	# posts for that month. These files are then loaded into org-roam via dired,
	# an ID assigned using 'C-c n o' (org-id-get-create) and saved.
	#
	# I used the script to populate an org-roam/posts directory, then used a macro
	# to step through, id-create and save each. The post items will be tagged based
	# on the original Linkedin hashtags
	#
	# note: your linkedin archives are not complete. Regardless what they promise
	# or what GDPR requires, you only get posts back to 2010, and the posts omit
	# attached material, images etc; each post contains a lnkd.in link back to
	# the original post, so if you delete your account or LI folds, these will
	# be useless.
	#
	# usage: python Shares.py <path to Shares.csv> <targetdir>
	#
	# it works for me with Python 3.10 on Ubuntu 22.04 - ymmv

	import csv
	import re
	import sys

	HEADER_TEMPLATE = (
	""":PROPERTIES:
	:END:
	#+filetags: posts linkedin social_media
	#+title: %s
	""")

	def process(txt : str):
	"Extract tags from post body and return sanitized txt + tags"
	tags = ''
	if txt:
	txt = txt.replace('"','').strip() # .replace('\n\n','\n').strip()
	hashtags = [
	re.sub(r'-','_',tag) for tag in re.findall(
	'#([\\w][\\w_-]*)', txt, re.IGNORECASE)
	]
	if hashtags:
	tags = "\t:" + ':'.join(hashtags) + ':'
	return txt, tags

	def shares_org( row ):
	text, tags = process(row[2])
	return (
	"* %(date)s [[%(link)s][source]]\t%(tags)s\n%(text)s%(share)s%(media)s"
	% {'date': row[0],
	'link': row[1],
	'tags': tags,
	'text': "%s\n" % text if text else '',
	'share': "- link: %s\n" % row[3] if row[3] else '',
	'media': "- media: %s\n" % row[4] if row[4] else ''
	})

	def process_csv(csv_file, target='./'):
	with open(csv_file,'r') as csvfile:
	shares = csv.reader(csvfile)
	n = 0
	current_plug = ''
	last_page = ''
	skip = True
	target = target + '/' if target[-1] != '/' else target
	for row in shares: # by definition these are in date order
	if skip : # ignore first line
	skip = False
	continue
	row_ts = row[0].translate({ord(i): None for i in ':- '})
	row_page = ('LinkedIn-' + row[0][:7])
	row_plug = row_ts + '-' + row_page
	org_page = shares_org(row)
	if row_page != last_page:
	last_page = row_page
	current_plug = row_plug
	out_mode = 'w' #overwrite previous
	else:
	out_mode = 'a'

	with open(target + current_plug + '.org', out_mode) as orgfile:
	if out_mode == 'w':
	print("New file: " + current_plug)
	orgfile.write(HEADER_TEMPLATE % row_page)
	orgfile.write(org_page)

	n += 1
	print(n, " items added")
	return 0

	def usage(prog):
	print("Usage: " + prog + " Shares.csv targetdir\n")

	if __name__ == '__main__':
	if len(sys.argv) < 2:
	usage(sys.argv[0])
	sys.exit(1)
	sys.exit(process_csv(sys.argv[1], sys.argv[2]))