Created
December 1, 2017 04:51
-
-
Save brandtg/767a816d261eb9ea692ac9528bd12d35 to your computer and use it in GitHub Desktop.
A script to convert Pocket's export format into something Safari can import
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# Input: https://getpocket.com/export | |
# Output: Importable bookmarks HTML file | |
# | |
# Usage: | |
# ./pocket_to_safari.py "2017-11-30 - Pocket Export" < ril_export.html > pocket.html | |
# | |
import sys | |
from bs4 import BeautifulSoup | |
from jinja2 import Template | |
################################################## | |
# Set default encoding to utf8 | |
# See: https://stackoverflow.com/questions/2276200/changing-default-encoding-of-python | |
# sys.setdefaultencoding() does not exist, here! | |
reload(sys) # Reload does the trick! | |
sys.setdefaultencoding('UTF8') | |
################################################## | |
folder_name = sys.argv[1] if len(sys.argv) > 1 else 'Pocket Export' | |
soup = BeautifulSoup(sys.stdin.read(), 'html.parser') | |
read_archive_list = [ | |
elt | |
for elt | |
in soup.find_all('h1') | |
if elt.text == 'Read Archive'][0].find_next_sibling() | |
links = [ | |
{'text': elt.text, 'href': elt['href']} | |
for elt | |
in read_archive_list.find_all('a')] | |
template = Template("""\ | |
<!DOCTYPE netscape-bookmark-file-1> | |
<html> | |
<head> | |
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> | |
<title>Bookmarks</title> | |
</head> | |
<body> | |
<h1>Bookmarks</h1> | |
<dt><h3 folded>{{ folder_name }}</h3> | |
<dl><p> | |
{% for link in links %}<dt><a href="{{ link['href'] }}">{{ link['text'] }}</a> | |
{% endfor %} | |
</dl><p> | |
</body> | |
</html> | |
""") | |
rendered_template = template.render(folder_name=folder_name, links=links) | |
sys.stdout.write(rendered_template) | |
sys.stdout.flush() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@majorgear happy to hear! Surprised it still (kind of) works after all these years