jongan69 · December 26, 2024 19:21
diff --git a/powerpoint.py b/powerpoint.py
 import os
 # Set tokenizers parallelism before importing transformers
 os.environ["TOKENIZERS_PARALLELISM"] = "false"

 import subprocess
 import json
 import random
 import requests
 from pptx import Presentation
 from pptx.util import Pt, Inches
 from pptx.dml.color import RGBColor
 from datetime import datetime
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from openai import OpenAI
 from dotenv import load_dotenv

 # Load environment variables
 load_dotenv()

 # Initialize OpenAI client
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

 # AI Image Generation Function
 def generate_image(prompt, image_path):
    """
    Generate an image using OpenAI's DALL·E 3 API.
    Saves the image to the specified path.
    """
    print(f"Generating image for prompt: {prompt}")
    try:
        response = client.images.generate(
            model="dall-e-3",
            prompt=prompt,
            size="1024x1024",
            quality="hd",
            n=1,
            style="vivid"
        )
        image_url = response.data[0].url
        img_data = requests.get(image_url).content
        os.makedirs(os.path.dirname(image_path), exist_ok=True)
        with open(image_path, "wb") as f:
            f.write(img_data)
        print(f"Image saved to {image_path}")
        return image_path
    except Exception as e:
        print(f"Failed to generate image: {e}")
        return None

 # Step 1: Load model
 def initialize_model():
    print("Loading google/flan-t5-base model...")
    tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
    model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
    return tokenizer, model

 # Step 2: Fetch repositories with more metadata
 def fetch_repositories(owner, limit=30):
    # Update command to fetch more repository details
    cmd = f"gh repo list {owner} --json name,description,updatedAt,visibility,stargazerCount,primaryLanguage,licenseInfo,forkCount,url,homepageUrl,isArchived --limit {limit}"
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        raise Exception(f"Error fetching repositories: {result.stderr}")
    all_repos = json.loads(result.stdout)
    public_repos = [repo for repo in all_repos if repo.get("visibility", "").lower() == "public"]
    public_repos.sort(key=lambda x: x.get("stargazerCount", 0), reverse=True)
    print(f"Found {len(public_repos)} public repositories")
    return public_repos

 # Step 3: Filter repositories updated since January 1st of current year
 def filter_recent_repositories(repositories):
    current_year = datetime.now().year
    january_first = datetime(current_year, 1, 1)
    recent_repos = []
    for repo in repositories:
        try:
            updated_at = datetime.strptime(repo["updatedAt"], "%Y-%m-%dT%H:%M:%SZ")
            if updated_at >= january_first:
                recent_repos.append(repo)
        except Exception as e:
            print(f"Error parsing repository date: {e}")
            continue
    print(f"Found {len(recent_repos)} repositories updated since January 1st, {current_year}")
    return recent_repos

 # Step 4: Enhanced repository summarization
 def summarize_repositories(tokenizer, model, repositories):
    summaries = []
    for repo in repositories:
        name = repo["name"]
        description = repo["description"] or "No description provided."
        stars = repo.get("stargazerCount", 0)
        # Handle None cases safely
        language = repo.get("primaryLanguage", {})
        language = language.get("name", "Not specified") if language else "Not specified"
        forks = repo.get("forkCount", 0)
        # Fix the licenseInfo handling
        license_info = "No license specified"
        if repo.get("licenseInfo"):
            license_info = repo["licenseInfo"].get("name", "No license specified")
        url = repo.get("url", "")
        homepage = repo.get("homepageUrl", "")
        is_archived = repo.get("isArchived", False)
        
        # Generate AI summary with explicit clean_up_tokenization_spaces
        prompt = f"Based on this project information, provide a clear 2-3 sentence summary explaining what it does and its main purpose without mentioning Github at all:\nProject Name: {name}\nProject Description: {description}\n"
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
        outputs = model.generate(inputs.input_ids, max_length=128, num_return_sequences=1)
        summary = tokenizer.decode(
            outputs[0], 
            skip_special_tokens=True, 
            clean_up_tokenization_spaces=True
        )
        
        # Create metadata string
        metadata = [
            f"🌟 Stars: {stars}",
            f"🔀 Forks: {forks}",
            f"💻 Language: {language}",
            f"📜 License: {license_info}",
            f"🔗 URL: {url}",
        ]
        if homepage:
            metadata.append(f"🏠 Homepage: {homepage}")
        if is_archived:
            metadata.append("📦 Status: Archived")
            
        summaries.append({
            "name": name,
            "summary": summary,
            "metadata": "\n".join(metadata)
        })
    return summaries

 # Step 5: Enhanced slide creation
 def add_colorful_slide(prs, title, summary, metadata, image_path=None):
    slide_layout = prs.slide_layouts[6]
    slide = prs.slides.add_slide(slide_layout)
    
    # Set background color
    background_colors = [RGBColor(255, 179, 186), RGBColor(186, 255, 201), RGBColor(186, 225, 255), RGBColor(255, 223, 186)]
    slide.background.fill.solid()
    slide.background.fill.fore_color.rgb = random.choice(background_colors)
    
    # Add title
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.5), Inches(9), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title
    title_frame.paragraphs[0].font.size = Pt(32)
    title_frame.paragraphs[0].font.bold = True
    
    # Add summary
    content_box = slide.shapes.add_textbox(Inches(0.5), Inches(2), Inches(4.5 if image_path else 9), Inches(2))
    content_frame = content_box.text_frame
    content_frame.word_wrap = True
    p = content_frame.paragraphs[0]
    p.text = summary
    p.font.size = Pt(20)
    
    # Add metadata
    metadata_box = slide.shapes.add_textbox(Inches(0.5), Inches(4), Inches(4.5 if image_path else 9), Inches(2))
    metadata_frame = metadata_box.text_frame
    metadata_frame.word_wrap = True
    p = metadata_frame.paragraphs[0]
    p.text = metadata
    p.font.size = Pt(16)
    
    # Add image if available
    if image_path and os.path.exists(image_path):
        slide.shapes.add_picture(image_path, Inches(5.5), Inches(2), Inches(4), Inches(4))

 # Step 6: Updated presentation creation
 def create_presentation(repositories, output_file):
    prs = Presentation()
    for idx, repo in enumerate(repositories):
        title = f"{repo['name']} ⭐"
        summary = repo["summary"]
        metadata = repo["metadata"]
        prompt = f"Create a stunning visual for the GitHub project '{repo['name']}' based on this summary: {summary}"
        image_path = f"./images/image_{idx}.png"
        image_path = generate_image(prompt, image_path)
        add_colorful_slide(prs, title, summary, metadata, image_path)
    prs.save(output_file)

 # Main Function
 def main():
    owner = input("Enter the GitHub username or organization: ")
    output_file = "Public_GitHub_Presentation_With_Images.pptx"
    repositories = fetch_repositories(owner)
    recent_repositories = filter_recent_repositories(repositories)
    if not recent_repositories:
        print("No public repositories updated in the last year.")
        return
    tokenizer, model = initialize_model()
    summarized_repositories = summarize_repositories(tokenizer, model, recent_repositories)
    create_presentation(summarized_repositories, output_file)
    print(f"Presentation saved to {output_file}")

 if __name__ == "__main__":
    main()
	import os
	# Set tokenizers parallelism before importing transformers
	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	import subprocess
	import json
	import random
	import requests
	from pptx import Presentation
	from pptx.util import Pt, Inches
	from pptx.dml.color import RGBColor
	from datetime import datetime
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	from openai import OpenAI
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# Initialize OpenAI client
	client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

	# AI Image Generation Function
	def generate_image(prompt, image_path):
	"""
	Generate an image using OpenAI's DALL·E 3 API.
	Saves the image to the specified path.
	"""
	print(f"Generating image for prompt: {prompt}")
	try:
	response = client.images.generate(
	model="dall-e-3",
	prompt=prompt,
	size="1024x1024",
	quality="hd",
	n=1,
	style="vivid"
	)
	image_url = response.data[0].url
	img_data = requests.get(image_url).content
	os.makedirs(os.path.dirname(image_path), exist_ok=True)
	with open(image_path, "wb") as f:
	f.write(img_data)
	print(f"Image saved to {image_path}")
	return image_path
	except Exception as e:
	print(f"Failed to generate image: {e}")
	return None

	# Step 1: Load model
	def initialize_model():
	print("Loading google/flan-t5-base model...")
	tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
	model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
	return tokenizer, model

	# Step 2: Fetch repositories with more metadata
	def fetch_repositories(owner, limit=30):
	# Update command to fetch more repository details
	cmd = f"gh repo list {owner} --json name,description,updatedAt,visibility,stargazerCount,primaryLanguage,licenseInfo,forkCount,url,homepageUrl,isArchived --limit {limit}"
	result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
	if result.returncode != 0:
	raise Exception(f"Error fetching repositories: {result.stderr}")
	all_repos = json.loads(result.stdout)
	public_repos = [repo for repo in all_repos if repo.get("visibility", "").lower() == "public"]
	public_repos.sort(key=lambda x: x.get("stargazerCount", 0), reverse=True)
	print(f"Found {len(public_repos)} public repositories")
	return public_repos

	# Step 3: Filter repositories updated since January 1st of current year
	def filter_recent_repositories(repositories):
	current_year = datetime.now().year
	january_first = datetime(current_year, 1, 1)
	recent_repos = []
	for repo in repositories:
	try:
	updated_at = datetime.strptime(repo["updatedAt"], "%Y-%m-%dT%H:%M:%SZ")
	if updated_at >= january_first:
	recent_repos.append(repo)
	except Exception as e:
	print(f"Error parsing repository date: {e}")
	continue
	print(f"Found {len(recent_repos)} repositories updated since January 1st, {current_year}")
	return recent_repos

	# Step 4: Enhanced repository summarization
	def summarize_repositories(tokenizer, model, repositories):
	summaries = []
	for repo in repositories:
	name = repo["name"]
	description = repo["description"] or "No description provided."
	stars = repo.get("stargazerCount", 0)
	# Handle None cases safely
	language = repo.get("primaryLanguage", {})
	language = language.get("name", "Not specified") if language else "Not specified"
	forks = repo.get("forkCount", 0)
	# Fix the licenseInfo handling
	license_info = "No license specified"
	if repo.get("licenseInfo"):
	license_info = repo["licenseInfo"].get("name", "No license specified")
	url = repo.get("url", "")
	homepage = repo.get("homepageUrl", "")
	is_archived = repo.get("isArchived", False)

	# Generate AI summary with explicit clean_up_tokenization_spaces
	prompt = f"Based on this project information, provide a clear 2-3 sentence summary explaining what it does and its main purpose without mentioning Github at all:\nProject Name: {name}\nProject Description: {description}\n"
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
	outputs = model.generate(inputs.input_ids, max_length=128, num_return_sequences=1)
	summary = tokenizer.decode(
	outputs[0],
	skip_special_tokens=True,
	clean_up_tokenization_spaces=True
	)

	# Create metadata string
	metadata = [
	f"🌟 Stars: {stars}",
	f"🔀 Forks: {forks}",
	f"💻 Language: {language}",
	f"📜 License: {license_info}",
	f"🔗 URL: {url}",
	]
	if homepage:
	metadata.append(f"🏠 Homepage: {homepage}")
	if is_archived:
	metadata.append("📦 Status: Archived")

	summaries.append({
	"name": name,
	"summary": summary,
	"metadata": "\n".join(metadata)
	})
	return summaries

	# Step 5: Enhanced slide creation
	def add_colorful_slide(prs, title, summary, metadata, image_path=None):
	slide_layout = prs.slide_layouts[6]
	slide = prs.slides.add_slide(slide_layout)

	# Set background color
	background_colors = [RGBColor(255, 179, 186), RGBColor(186, 255, 201), RGBColor(186, 225, 255), RGBColor(255, 223, 186)]
	slide.background.fill.solid()
	slide.background.fill.fore_color.rgb = random.choice(background_colors)

	# Add title
	title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.5), Inches(9), Inches(1))
	title_frame = title_box.text_frame
	title_frame.text = title
	title_frame.paragraphs[0].font.size = Pt(32)
	title_frame.paragraphs[0].font.bold = True

	# Add summary
	content_box = slide.shapes.add_textbox(Inches(0.5), Inches(2), Inches(4.5 if image_path else 9), Inches(2))
	content_frame = content_box.text_frame
	content_frame.word_wrap = True
	p = content_frame.paragraphs[0]
	p.text = summary
	p.font.size = Pt(20)

	# Add metadata
	metadata_box = slide.shapes.add_textbox(Inches(0.5), Inches(4), Inches(4.5 if image_path else 9), Inches(2))
	metadata_frame = metadata_box.text_frame
	metadata_frame.word_wrap = True
	p = metadata_frame.paragraphs[0]
	p.text = metadata
	p.font.size = Pt(16)

	# Add image if available
	if image_path and os.path.exists(image_path):
	slide.shapes.add_picture(image_path, Inches(5.5), Inches(2), Inches(4), Inches(4))

	# Step 6: Updated presentation creation
	def create_presentation(repositories, output_file):
	prs = Presentation()
	for idx, repo in enumerate(repositories):
	title = f"{repo['name']} ⭐"
	summary = repo["summary"]
	metadata = repo["metadata"]
	prompt = f"Create a stunning visual for the GitHub project '{repo['name']}' based on this summary: {summary}"
	image_path = f"./images/image_{idx}.png"
	image_path = generate_image(prompt, image_path)
	add_colorful_slide(prs, title, summary, metadata, image_path)
	prs.save(output_file)

	# Main Function
	def main():
	owner = input("Enter the GitHub username or organization: ")
	output_file = "Public_GitHub_Presentation_With_Images.pptx"
	repositories = fetch_repositories(owner)
	recent_repositories = filter_recent_repositories(repositories)
	if not recent_repositories:
	print("No public repositories updated in the last year.")
	return
	tokenizer, model = initialize_model()
	summarized_repositories = summarize_repositories(tokenizer, model, recent_repositories)
	create_presentation(summarized_repositories, output_file)
	print(f"Presentation saved to {output_file}")

	if __name__ == "__main__":
	main()