Created
January 3, 2025 00:57
-
-
Save schappim/ce44ddf20e3929a4cc6f67f29caf5d21 to your computer and use it in GitHub Desktop.
A ruby script to summarize YouTube videos
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# frozen_string_literal: true | |
# This script downloads the English auto-generated subtitles for a YouTube video, | |
# converts them to a single text block, and sends them to OpenAI for summarization. | |
# Requirements: | |
# - yt-dlp (brew install yt-dlp) | |
# - OpenAI Ruby gem (gem install 'ruby-openai') | |
# - An OpenAI API key set as an environment variable (export OPENAI_API_KEY=your-api-key) | |
# - A YouTube video URL | |
require 'optparse' | |
require 'open3' | |
require 'openai' # https://github.com/alexrudall/ruby-openai?tab=readme-ov-file#ruby-openai | |
# Converts a VTT file to a single text block without extra line breaks or duplicates | |
def convert_vtt_to_text(vtt_file_path, output_file_path = nil) | |
# Regex to match lines like "00:00:00.000 --> 00:00:01.000" | |
timestamp_pattern = Regexp.new('^\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}') | |
# Regex to match cues with optional position data, etc. | |
# e.g. "00:00:05.000 --> 00:00:07.000 position:10% line:0% align:start size:35%" | |
extended_timestamp_pattern = Regexp.new('^\d{2}:\d{2}:\d{2}\.\d{3} --> .*') | |
lines = File.readlines(vtt_file_path, encoding: 'UTF-8').map(&:strip) | |
# Filter out empty lines, "WEBVTT" header, and timestamp lines | |
filtered_lines = lines.reject do |line| | |
line.empty? || | |
line.upcase.start_with?('WEBVTT') || | |
timestamp_pattern.match(line) || | |
extended_timestamp_pattern.match(line) | |
end | |
# Remove HTML or VTT cue tags (e.g. <v Speaker1>, <c>, etc.) | |
plain_text_lines = filtered_lines.map do |line| | |
line.gsub(/<[^>]+>/, '') | |
end | |
# Remove consecutive duplicate lines | |
unique_lines = [] | |
previous_line = nil | |
plain_text_lines.each do |line| | |
unless line == previous_line | |
unique_lines << line | |
previous_line = line | |
end | |
end | |
# Join them into a single paragraph with spaces, | |
# then squeeze any extra spaces into one space | |
subtitle_text = unique_lines.join(' ') | |
subtitle_text = subtitle_text.gsub(/\s+/, ' ') | |
if output_file_path | |
File.write(output_file_path, subtitle_text, encoding: 'UTF-8') | |
else | |
puts subtitle_text | |
end | |
end | |
options = {} | |
OptionParser.new do |opts| | |
opts.banner = "Usage: ruby #{File.basename(__FILE__)} [options] YT_URL" | |
opts.on('-o', '--output FILE', 'Output transcript file (optional)') do |file| | |
options[:output_file] = file | |
end | |
end.parse! | |
# Grab the YouTube URL from the command line | |
url = ARGV.shift | |
unless url | |
puts "Error: No URL provided." | |
puts "Usage: ruby #{File.basename(__FILE__)} [options] YT_URL" | |
exit 1 | |
end | |
# 1. Run yt-dlp to download English (original) auto subtitles in .vtt format | |
# The subtitles are saved to tmp/<video_title>.vtt | |
yt_cmd = %Q[ | |
yt-dlp --write-auto-sub \ | |
--sub-langs "en-orig" \ | |
--skip-download \ | |
--output "tmp/%(title)s.%(ext)s" \ | |
"#{url}" | |
] | |
system(yt_cmd) | |
# 2. Identify the newly created .vtt file in the tmp directory | |
vtt_files = Dir.glob("tmp/*.vtt") | |
if vtt_files.empty? | |
puts "No VTT file found. Check whether subtitles (en-orig) are actually available for this video." | |
exit 1 | |
end | |
# If there's more than one .vtt file, pick the first | |
vtt_file = vtt_files.first | |
# Default output is tmp/transcript.txt, or the file supplied via --output | |
transcript_output_path = options[:output_file] || "tmp/transcript.txt" | |
# 3. Convert the VTT file to a single text block | |
convert_vtt_to_text(vtt_file, transcript_output_path) | |
puts "Transcript saved to: #{transcript_output_path}" | |
# 4. Read the transcript file and send it to OpenAI for summarization | |
transcript = File.read(transcript_output_path) | |
# Initialize the OpenAI client | |
client = OpenAI::Client.new(access_token: ENV.fetch('OPENAI_API_KEY')) | |
# multipline prompt | |
prompt = <<~PROMPT | |
Please concisely summarise this YouTube video transcript into concise bullet points, | |
providing any actionable insights, knowledge or (rules of thumb, calculations or equations) | |
that I can use to improve my startup. | |
If the video makes claims or statements for example "How to Grow Your Business SO Fast in 2025 It Feels ILLEGAL", | |
please provide the actionable steps that the video suggests to achieve the claims or statements. | |
Transcript: #{transcript} | |
PROMPT | |
response = client.chat( | |
parameters: { | |
model: "gpt-4o", # Required. | |
messages: [ | |
{ "role": "system", "content": "You help startup founders gain insights, knowledge, and ideas by concisely summarizing in bullet point form YouTube video transcripts." }, | |
{ role: "user", content: prompt} | |
], | |
temperature: 0.7, | |
} | |
) | |
puts | |
puts | |
puts response.dig("choices", 0, "message", "content") | |
puts | |
puts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment