Last active
January 29, 2024 20:14
-
-
Save Saik0s/943b20822472e2634a7eb033a9f8c5ff to your computer and use it in GitHub Desktop.
create youtube video summary
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import requests | |
import json | |
import sys | |
# Constants | |
CURRENT_VERSION = "1.1.0" | |
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" | |
def get_transcript(url): | |
response = requests.get(f"{SERVER_URL}/transcript?url={url}") | |
transcript_json = response.json() | |
return ' '.join([item['text'] for item in transcript_json]) | |
def get_video_details(url): | |
response = requests.get(f"{SERVER_URL}/videoDetails?url={url}") | |
details_json = response.json() | |
video_title = details_json['title'] | |
channel_title = details_json['channel']['name'] | |
return video_title, channel_title | |
def check_version(): | |
response = requests.get(f"{SERVER_URL}/newestVersion?plugin=TubePlus") | |
version_json = response.json() | |
return version_json['version'] | |
def generate_summary(prompt): | |
inst = ''' | |
You are Transcript Thief, a GPT model customized for analyzing YouTube video transcripts. Here your specific use case and instructions: | |
You will generate increasingly concise, entity-dense summaries of the above Article. | |
Repeat the following 2 steps 5 times. | |
Step 1. Identify 1-3 informative Entities ("; " delimited) from the Article which are missing from the previously generated summary. | |
Step 2. Write a new, denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities. | |
A Missing Entity is: | |
- Relevant: to the main story. | |
- Specific: descriptive yet concise (5 words or fewer). | |
- Novel: not in the previous summary. | |
- Faithful: present in the Article. | |
- Anywhere: located anywhere in the Article. | |
Guidelines: | |
- The first summary should be long (4-5 sentences, ~80 words) yet highly non-specific, containing little information beyond the entities marked as missing. Use overly verbose language and fillers (e.g., "this article discusses") to reach ~80 words. | |
- Make every word count: rewrite the previous summary to improve flow and make space for additional entities. | |
- Make space with fusion, compression, and removal of uninformative phrases like "the article discusses". | |
- The summaries should become highly dense and concise yet self-contained, e.g., easily understood without the Article. | |
- Missing entities can appear anywhere in the new summary. | |
- Never drop entities from the previous summary. If space cannot be made, add fewer new entities. | |
Remember, use the exact same number of words for each summary. | |
Answer in JSON. The JSON should be a dictionary with key "summaries" that contains a list (length 5) of dictionaries whose keys are "Missing_Entities" and "Denser_Summary". | |
''' | |
headers = { | |
'Content-Type': 'application/json', | |
'Authorization': f'Bearer {OPENROUTER_API_KEY}' | |
} | |
data = { | |
'model': 'mistralai/mixtral-8x7b-instruct', | |
'prompt': f'[INST] {inst} [/INST] {prompt}', | |
'max_tokens': 1000, | |
'temperature': 0 | |
} | |
response = requests.post(OPENROUTER_URL, headers=headers, json=data) | |
return response.json()['choices'][0]['text'] | |
def main(input_url, summary_flag): | |
if "/shorts/" in input_url: | |
shorts_id = input_url.split('/')[-1] | |
input_url = f"https://www.youtube.com/watch?v={shorts_id}" | |
transcript = get_transcript(input_url) | |
video_title, channel_title = get_video_details(input_url) | |
latest_version = check_version() | |
if CURRENT_VERSION != latest_version: | |
print(f"Warning: Your TubePlus version is not up to date. Please update to the latest version ({latest_version}).") | |
print("") | |
print(f'Title: "{video_title}", by the channel "{channel_title}"') | |
if summary_flag == "--summary": | |
summary = generate_summary(transcript) | |
print("Summary:") | |
print(summary) | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: | |
print("Usage: python script.py <YouTube URL> [--summary]") | |
else: | |
main(sys.argv[1], sys.argv[2] if len(sys.argv) > 2 else None) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment