Skip to content

Instantly share code, notes, and snippets.

@jongan69
Created December 26, 2024 19:21
Show Gist options
  • Save jongan69/163735bf718212afa5ceeff5b637e264 to your computer and use it in GitHub Desktop.
Save jongan69/163735bf718212afa5ceeff5b637e264 to your computer and use it in GitHub Desktop.
Turn all of your github repos from the past year into a powerpoint
import os
# Set tokenizers parallelism before importing transformers
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import subprocess
import json
import random
import requests
from pptx import Presentation
from pptx.util import Pt, Inches
from pptx.dml.color import RGBColor
from datetime import datetime
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from openai import OpenAI
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# AI Image Generation Function
def generate_image(prompt, image_path):
"""
Generate an image using OpenAI's DALLΒ·E 3 API.
Saves the image to the specified path.
"""
print(f"Generating image for prompt: {prompt}")
try:
response = client.images.generate(
model="dall-e-3",
prompt=prompt,
size="1024x1024",
quality="hd",
n=1,
style="vivid"
)
image_url = response.data[0].url
img_data = requests.get(image_url).content
os.makedirs(os.path.dirname(image_path), exist_ok=True)
with open(image_path, "wb") as f:
f.write(img_data)
print(f"Image saved to {image_path}")
return image_path
except Exception as e:
print(f"Failed to generate image: {e}")
return None
# Step 1: Load model
def initialize_model():
print("Loading google/flan-t5-base model...")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
return tokenizer, model
# Step 2: Fetch repositories with more metadata
def fetch_repositories(owner, limit=30):
# Update command to fetch more repository details
cmd = f"gh repo list {owner} --json name,description,updatedAt,visibility,stargazerCount,primaryLanguage,licenseInfo,forkCount,url,homepageUrl,isArchived --limit {limit}"
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.returncode != 0:
raise Exception(f"Error fetching repositories: {result.stderr}")
all_repos = json.loads(result.stdout)
public_repos = [repo for repo in all_repos if repo.get("visibility", "").lower() == "public"]
public_repos.sort(key=lambda x: x.get("stargazerCount", 0), reverse=True)
print(f"Found {len(public_repos)} public repositories")
return public_repos
# Step 3: Filter repositories updated since January 1st of current year
def filter_recent_repositories(repositories):
current_year = datetime.now().year
january_first = datetime(current_year, 1, 1)
recent_repos = []
for repo in repositories:
try:
updated_at = datetime.strptime(repo["updatedAt"], "%Y-%m-%dT%H:%M:%SZ")
if updated_at >= january_first:
recent_repos.append(repo)
except Exception as e:
print(f"Error parsing repository date: {e}")
continue
print(f"Found {len(recent_repos)} repositories updated since January 1st, {current_year}")
return recent_repos
# Step 4: Enhanced repository summarization
def summarize_repositories(tokenizer, model, repositories):
summaries = []
for repo in repositories:
name = repo["name"]
description = repo["description"] or "No description provided."
stars = repo.get("stargazerCount", 0)
# Handle None cases safely
language = repo.get("primaryLanguage", {})
language = language.get("name", "Not specified") if language else "Not specified"
forks = repo.get("forkCount", 0)
# Fix the licenseInfo handling
license_info = "No license specified"
if repo.get("licenseInfo"):
license_info = repo["licenseInfo"].get("name", "No license specified")
url = repo.get("url", "")
homepage = repo.get("homepageUrl", "")
is_archived = repo.get("isArchived", False)
# Generate AI summary with explicit clean_up_tokenization_spaces
prompt = f"Based on this project information, provide a clear 2-3 sentence summary explaining what it does and its main purpose without mentioning Github at all:\nProject Name: {name}\nProject Description: {description}\n"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
outputs = model.generate(inputs.input_ids, max_length=128, num_return_sequences=1)
summary = tokenizer.decode(
outputs[0],
skip_special_tokens=True,
clean_up_tokenization_spaces=True
)
# Create metadata string
metadata = [
f"🌟 Stars: {stars}",
f"πŸ”€ Forks: {forks}",
f"πŸ’» Language: {language}",
f"πŸ“œ License: {license_info}",
f"πŸ”— URL: {url}",
]
if homepage:
metadata.append(f"🏠 Homepage: {homepage}")
if is_archived:
metadata.append("πŸ“¦ Status: Archived")
summaries.append({
"name": name,
"summary": summary,
"metadata": "\n".join(metadata)
})
return summaries
# Step 5: Enhanced slide creation
def add_colorful_slide(prs, title, summary, metadata, image_path=None):
slide_layout = prs.slide_layouts[6]
slide = prs.slides.add_slide(slide_layout)
# Set background color
background_colors = [RGBColor(255, 179, 186), RGBColor(186, 255, 201), RGBColor(186, 225, 255), RGBColor(255, 223, 186)]
slide.background.fill.solid()
slide.background.fill.fore_color.rgb = random.choice(background_colors)
# Add title
title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.5), Inches(9), Inches(1))
title_frame = title_box.text_frame
title_frame.text = title
title_frame.paragraphs[0].font.size = Pt(32)
title_frame.paragraphs[0].font.bold = True
# Add summary
content_box = slide.shapes.add_textbox(Inches(0.5), Inches(2), Inches(4.5 if image_path else 9), Inches(2))
content_frame = content_box.text_frame
content_frame.word_wrap = True
p = content_frame.paragraphs[0]
p.text = summary
p.font.size = Pt(20)
# Add metadata
metadata_box = slide.shapes.add_textbox(Inches(0.5), Inches(4), Inches(4.5 if image_path else 9), Inches(2))
metadata_frame = metadata_box.text_frame
metadata_frame.word_wrap = True
p = metadata_frame.paragraphs[0]
p.text = metadata
p.font.size = Pt(16)
# Add image if available
if image_path and os.path.exists(image_path):
slide.shapes.add_picture(image_path, Inches(5.5), Inches(2), Inches(4), Inches(4))
# Step 6: Updated presentation creation
def create_presentation(repositories, output_file):
prs = Presentation()
for idx, repo in enumerate(repositories):
title = f"{repo['name']} ⭐"
summary = repo["summary"]
metadata = repo["metadata"]
prompt = f"Create a stunning visual for the GitHub project '{repo['name']}' based on this summary: {summary}"
image_path = f"./images/image_{idx}.png"
image_path = generate_image(prompt, image_path)
add_colorful_slide(prs, title, summary, metadata, image_path)
prs.save(output_file)
# Main Function
def main():
owner = input("Enter the GitHub username or organization: ")
output_file = "Public_GitHub_Presentation_With_Images.pptx"
repositories = fetch_repositories(owner)
recent_repositories = filter_recent_repositories(repositories)
if not recent_repositories:
print("No public repositories updated in the last year.")
return
tokenizer, model = initialize_model()
summarized_repositories = summarize_repositories(tokenizer, model, recent_repositories)
create_presentation(summarized_repositories, output_file)
print(f"Presentation saved to {output_file}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment