Created
October 30, 2024 18:06
-
-
Save j2kun/d010edc41ad636382211f5df2eb8ee19 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import fire | |
from atproto import Client, models | |
from scripts import utils as utils | |
from scripts import syndication as syndication | |
# A simple text file with two urls per line | |
DATABASE_FILE = "scripts/published_bluesky.txt" | |
BLUESKY_HANDLE = "jeremykun.com" | |
BLUESKY_API_URL = "https://bsky.social" | |
import re | |
from typing import List, Dict | |
def parse_urls(text: str) -> List[Dict]: | |
spans = [] | |
# partial/naive URL regex based on: https://stackoverflow.com/a/3809435 | |
# tweaked to disallow some training punctuation | |
url_regex = rb"[$|\W](https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*[-a-zA-Z0-9@%_\+~#//=])?)" | |
text_bytes = text.encode("UTF-8") | |
for m in re.finditer(url_regex, text_bytes): | |
spans.append( | |
{ | |
"start": m.start(1), | |
"end": m.end(1), | |
"url": m.group(1).decode("UTF-8"), | |
} | |
) | |
return spans | |
# Parse facets from text and resolve the handles to DIDs | |
def parse_facets(text: str) -> List[Dict]: | |
facets = [] | |
for u in parse_urls(text): | |
facets.append( | |
{ | |
"index": { | |
"byteStart": u["start"], | |
"byteEnd": u["end"], | |
}, | |
"features": [ | |
{ | |
"$type": "app.bsky.richtext.facet#link", | |
# NOTE: URI ("I") not URL ("L") | |
"uri": u["url"], | |
} | |
], | |
} | |
) | |
return facets | |
def bluesky_post_publisher(post: str, bluesky_client=None, **kwargs): | |
if not bluesky_client: | |
raise ValueError("bluesky_client must be provided") | |
facets = parse_facets(post) | |
root_post_ref = models.create_strong_ref( | |
bluesky_client.send_post(post, facets=facets) | |
) | |
return root_post_ref.uri | |
def bluesky_thread_adjuster(posts, blog_post_permalink=None, **kwargs): | |
if not blog_post_permalink: | |
raise ValueError("blog_post_permalink must be provided") | |
# TODO: shorten links while parsing facets? | |
# Bluesky has a 300-character per post limit | |
# This is a hacky method to handle this. Could beef it up later. | |
limited_posts = [] | |
for i, post in enumerate(posts): | |
max_len = 300 | |
if i == 0: | |
backref = f"\n\nArchived at: {blog_post_permalink}" | |
post += backref | |
max_len -= len(backref) | |
limited_posts.extend(utils.split_post(post, max_char_len=max_len)) | |
return limited_posts | |
def bluesky_thread_publisher(posts, bluesky_client=None, **kwargs): | |
if not bluesky_client: | |
raise ValueError("bluesky_client must be provided") | |
root_post_ref = None | |
last_post_ref = None | |
for i, post in enumerate(posts): | |
facets = parse_facets(post) | |
if i == 0: | |
# create the root post | |
root_post_ref = models.create_strong_ref( | |
bluesky_client.send_post(post, facets=facets) | |
) | |
last_post_ref = root_post_ref | |
else: | |
assert root_post_ref is not None | |
assert last_post_ref is not None | |
last_post_ref = models.create_strong_ref( | |
bluesky_client.send_post( | |
post, | |
facets=facets, | |
reply_to=models.AppBskyFeedPost.ReplyRef( | |
parent=last_post_ref, root=root_post_ref | |
), | |
) | |
) | |
print( | |
f"Successfully posted post {i} of the thread: " | |
f"{last_post_ref.cid} -> {last_post_ref.uri}" | |
) | |
return root_post_ref.uri | |
def publish_to_bluesky(since_days=1, dry_run=False): | |
"""Idempotently publish shortform and regular posts to bluesky.""" | |
if dry_run: | |
bluesky_client = None | |
else: | |
password = os.environ.get("BLUESKY_PASSWORD") | |
bluesky_client = Client(base_url=BLUESKY_API_URL) | |
bluesky_client.login(BLUESKY_HANDLE, password) | |
syndication.syndicate_to_service( | |
"bluesky", | |
database_filepath=DATABASE_FILE, | |
thread_publisher=bluesky_thread_publisher, | |
thread_adjuster=bluesky_thread_adjuster, | |
post_publisher=bluesky_post_publisher, | |
since_days=since_days, | |
dry_run=dry_run, | |
bluesky_client=bluesky_client, | |
) | |
if __name__ == "__main__": | |
fire.Fire(publish_to_bluesky) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment