Skip to content

Instantly share code, notes, and snippets.

@j2kun
Created October 30, 2024 18:06
Show Gist options
  • Save j2kun/d010edc41ad636382211f5df2eb8ee19 to your computer and use it in GitHub Desktop.
Save j2kun/d010edc41ad636382211f5df2eb8ee19 to your computer and use it in GitHub Desktop.
import os
import fire
from atproto import Client, models
from scripts import utils as utils
from scripts import syndication as syndication
# A simple text file with two urls per line
DATABASE_FILE = "scripts/published_bluesky.txt"
BLUESKY_HANDLE = "jeremykun.com"
BLUESKY_API_URL = "https://bsky.social"
import re
from typing import List, Dict
def parse_urls(text: str) -> List[Dict]:
spans = []
# partial/naive URL regex based on: https://stackoverflow.com/a/3809435
# tweaked to disallow some training punctuation
url_regex = rb"[$|\W](https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*[-a-zA-Z0-9@%_\+~#//=])?)"
text_bytes = text.encode("UTF-8")
for m in re.finditer(url_regex, text_bytes):
spans.append(
{
"start": m.start(1),
"end": m.end(1),
"url": m.group(1).decode("UTF-8"),
}
)
return spans
# Parse facets from text and resolve the handles to DIDs
def parse_facets(text: str) -> List[Dict]:
facets = []
for u in parse_urls(text):
facets.append(
{
"index": {
"byteStart": u["start"],
"byteEnd": u["end"],
},
"features": [
{
"$type": "app.bsky.richtext.facet#link",
# NOTE: URI ("I") not URL ("L")
"uri": u["url"],
}
],
}
)
return facets
def bluesky_post_publisher(post: str, bluesky_client=None, **kwargs):
if not bluesky_client:
raise ValueError("bluesky_client must be provided")
facets = parse_facets(post)
root_post_ref = models.create_strong_ref(
bluesky_client.send_post(post, facets=facets)
)
return root_post_ref.uri
def bluesky_thread_adjuster(posts, blog_post_permalink=None, **kwargs):
if not blog_post_permalink:
raise ValueError("blog_post_permalink must be provided")
# TODO: shorten links while parsing facets?
# Bluesky has a 300-character per post limit
# This is a hacky method to handle this. Could beef it up later.
limited_posts = []
for i, post in enumerate(posts):
max_len = 300
if i == 0:
backref = f"\n\nArchived at: {blog_post_permalink}"
post += backref
max_len -= len(backref)
limited_posts.extend(utils.split_post(post, max_char_len=max_len))
return limited_posts
def bluesky_thread_publisher(posts, bluesky_client=None, **kwargs):
if not bluesky_client:
raise ValueError("bluesky_client must be provided")
root_post_ref = None
last_post_ref = None
for i, post in enumerate(posts):
facets = parse_facets(post)
if i == 0:
# create the root post
root_post_ref = models.create_strong_ref(
bluesky_client.send_post(post, facets=facets)
)
last_post_ref = root_post_ref
else:
assert root_post_ref is not None
assert last_post_ref is not None
last_post_ref = models.create_strong_ref(
bluesky_client.send_post(
post,
facets=facets,
reply_to=models.AppBskyFeedPost.ReplyRef(
parent=last_post_ref, root=root_post_ref
),
)
)
print(
f"Successfully posted post {i} of the thread: "
f"{last_post_ref.cid} -> {last_post_ref.uri}"
)
return root_post_ref.uri
def publish_to_bluesky(since_days=1, dry_run=False):
"""Idempotently publish shortform and regular posts to bluesky."""
if dry_run:
bluesky_client = None
else:
password = os.environ.get("BLUESKY_PASSWORD")
bluesky_client = Client(base_url=BLUESKY_API_URL)
bluesky_client.login(BLUESKY_HANDLE, password)
syndication.syndicate_to_service(
"bluesky",
database_filepath=DATABASE_FILE,
thread_publisher=bluesky_thread_publisher,
thread_adjuster=bluesky_thread_adjuster,
post_publisher=bluesky_post_publisher,
since_days=since_days,
dry_run=dry_run,
bluesky_client=bluesky_client,
)
if __name__ == "__main__":
fire.Fire(publish_to_bluesky)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment