schappim · January 3, 2025 00:57
diff --git a/summarize_youtube.rb b/summarize_youtube.rb
 #!/usr/bin/env ruby
 # frozen_string_literal: true

 # This script downloads the English auto-generated subtitles for a YouTube video,
 # converts them to a single text block, and sends them to OpenAI for summarization.

 # Requirements:
 # - yt-dlp (brew install yt-dlp)
 # - OpenAI Ruby gem (gem install 'ruby-openai')
 # - An OpenAI API key set as an environment variable (export OPENAI_API_KEY=your-api-key)
 # - A YouTube video URL

 require 'optparse'
 require 'open3'
 require 'openai' # https://github.com/alexrudall/ruby-openai?tab=readme-ov-file#ruby-openai

 # Converts a VTT file to a single text block without extra line breaks or duplicates
 def convert_vtt_to_text(vtt_file_path, output_file_path = nil)
  # Regex to match lines like "00:00:00.000 --> 00:00:01.000"
  timestamp_pattern = Regexp.new('^\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}')

  # Regex to match cues with optional position data, etc.
  # e.g. "00:00:05.000 --> 00:00:07.000 position:10% line:0% align:start size:35%"
  extended_timestamp_pattern = Regexp.new('^\d{2}:\d{2}:\d{2}\.\d{3} --> .*')

  lines = File.readlines(vtt_file_path, encoding: 'UTF-8').map(&:strip)

  # Filter out empty lines, "WEBVTT" header, and timestamp lines
  filtered_lines = lines.reject do |line|
    line.empty? ||
      line.upcase.start_with?('WEBVTT') ||
      timestamp_pattern.match(line) ||
      extended_timestamp_pattern.match(line)
  end

  # Remove HTML or VTT cue tags (e.g. <v Speaker1>, <c>, etc.)
  plain_text_lines = filtered_lines.map do |line|
    line.gsub(/<[^>]+>/, '')
  end

  # Remove consecutive duplicate lines
  unique_lines = []
  previous_line = nil

  plain_text_lines.each do |line|
    unless line == previous_line
      unique_lines << line
      previous_line = line
    end
  end

  # Join them into a single paragraph with spaces,
  # then squeeze any extra spaces into one space
  subtitle_text = unique_lines.join(' ')
  subtitle_text = subtitle_text.gsub(/\s+/, ' ')

  if output_file_path
    File.write(output_file_path, subtitle_text, encoding: 'UTF-8')
  else
    puts subtitle_text
  end
 end

 options = {}
 OptionParser.new do |opts|
  opts.banner = "Usage: ruby #{File.basename(__FILE__)} [options] YT_URL"

  opts.on('-o', '--output FILE', 'Output transcript file (optional)') do |file|
    options[:output_file] = file
  end
 end.parse!

 # Grab the YouTube URL from the command line
 url = ARGV.shift
 unless url
  puts "Error: No URL provided."
  puts "Usage: ruby #{File.basename(__FILE__)} [options] YT_URL"
  exit 1
 end

 # 1. Run yt-dlp to download English (original) auto subtitles in .vtt format
 #    The subtitles are saved to tmp/<video_title>.vtt
 yt_cmd = %Q[
  yt-dlp --write-auto-sub \
         --sub-langs "en-orig" \
         --skip-download \
         --output "tmp/%(title)s.%(ext)s" \
         "#{url}"
 ]
 system(yt_cmd)

 # 2. Identify the newly created .vtt file in the tmp directory
 vtt_files = Dir.glob("tmp/*.vtt")
 if vtt_files.empty?
  puts "No VTT file found. Check whether subtitles (en-orig) are actually available for this video."
  exit 1
 end

 # If there's more than one .vtt file, pick the first
 vtt_file = vtt_files.first

 # Default output is tmp/transcript.txt, or the file supplied via --output
 transcript_output_path = options[:output_file] || "tmp/transcript.txt"

 # 3. Convert the VTT file to a single text block
 convert_vtt_to_text(vtt_file, transcript_output_path)

 puts "Transcript saved to: #{transcript_output_path}"

 # 4. Read the transcript file and send it to OpenAI for summarization
 transcript = File.read(transcript_output_path)

 # Initialize the OpenAI client

 client = OpenAI::Client.new(access_token: ENV.fetch('OPENAI_API_KEY'))

 # multipline prompt
 prompt = <<~PROMPT
  Please concisely summarise this YouTube video transcript into concise bullet points,
  providing any actionable insights, knowledge or (rules of thumb, calculations or equations)
  that I can use to improve my startup.

  If the video makes claims or statements for example "How to Grow Your Business SO Fast in 2025 It Feels ILLEGAL",
  please provide the actionable steps that the video suggests to achieve the claims or statements.

  Transcript: #{transcript}
 PROMPT


 response = client.chat(
  parameters: {
    model: "gpt-4o", # Required.
    messages: [
      { "role": "system", "content": "You help startup founders gain insights, knowledge, and ideas by concisely summarizing in bullet point form YouTube video transcripts." },
      { role: "user", content: prompt}
    ],
    temperature: 0.7,
  }
 )

 puts
 puts
 puts response.dig("choices", 0, "message", "content")
 puts
 puts
	#!/usr/bin/env ruby
	# frozen_string_literal: true

	# This script downloads the English auto-generated subtitles for a YouTube video,
	# converts them to a single text block, and sends them to OpenAI for summarization.

	# Requirements:
	# - yt-dlp (brew install yt-dlp)
	# - OpenAI Ruby gem (gem install 'ruby-openai')
	# - An OpenAI API key set as an environment variable (export OPENAI_API_KEY=your-api-key)
	# - A YouTube video URL

	require 'optparse'
	require 'open3'
	require 'openai' # https://github.com/alexrudall/ruby-openai?tab=readme-ov-file#ruby-openai

	# Converts a VTT file to a single text block without extra line breaks or duplicates
	def convert_vtt_to_text(vtt_file_path, output_file_path = nil)
	# Regex to match lines like "00:00:00.000 --> 00:00:01.000"
	timestamp_pattern = Regexp.new('^\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}')

	# Regex to match cues with optional position data, etc.
	# e.g. "00:00:05.000 --> 00:00:07.000 position:10% line:0% align:start size:35%"
	extended_timestamp_pattern = Regexp.new('^\d{2}:\d{2}:\d{2}\.\d{3} --> .*')

	lines = File.readlines(vtt_file_path, encoding: 'UTF-8').map(&:strip)

	# Filter out empty lines, "WEBVTT" header, and timestamp lines
	filtered_lines = lines.reject do \|line\|
	line.empty? \|\|
	line.upcase.start_with?('WEBVTT') \|\|
	timestamp_pattern.match(line) \|\|
	extended_timestamp_pattern.match(line)
	end

	# Remove HTML or VTT cue tags (e.g. <v Speaker1>, <c>, etc.)
	plain_text_lines = filtered_lines.map do \|line\|
	line.gsub(/<[^>]+>/, '')
	end

	# Remove consecutive duplicate lines
	unique_lines = []
	previous_line = nil

	plain_text_lines.each do \|line\|
	unless line == previous_line
	unique_lines << line
	previous_line = line
	end
	end

	# Join them into a single paragraph with spaces,
	# then squeeze any extra spaces into one space
	subtitle_text = unique_lines.join(' ')
	subtitle_text = subtitle_text.gsub(/\s+/, ' ')

	if output_file_path
	File.write(output_file_path, subtitle_text, encoding: 'UTF-8')
	else
	puts subtitle_text
	end
	end

	options = {}
	OptionParser.new do \|opts\|
	opts.banner = "Usage: ruby #{File.basename(__FILE__)} [options] YT_URL"

	opts.on('-o', '--output FILE', 'Output transcript file (optional)') do \|file\|
	options[:output_file] = file
	end
	end.parse!

	# Grab the YouTube URL from the command line
	url = ARGV.shift
	unless url
	puts "Error: No URL provided."
	puts "Usage: ruby #{File.basename(__FILE__)} [options] YT_URL"
	exit 1
	end

	# 1. Run yt-dlp to download English (original) auto subtitles in .vtt format
	# The subtitles are saved to tmp/<video_title>.vtt
	yt_cmd = %Q[
	yt-dlp --write-auto-sub \
	--sub-langs "en-orig" \
	--skip-download \
	--output "tmp/%(title)s.%(ext)s" \
	"#{url}"
	]
	system(yt_cmd)

	# 2. Identify the newly created .vtt file in the tmp directory
	vtt_files = Dir.glob("tmp/*.vtt")
	if vtt_files.empty?
	puts "No VTT file found. Check whether subtitles (en-orig) are actually available for this video."
	exit 1
	end

	# If there's more than one .vtt file, pick the first
	vtt_file = vtt_files.first

	# Default output is tmp/transcript.txt, or the file supplied via --output
	transcript_output_path = options[:output_file] \|\| "tmp/transcript.txt"

	# 3. Convert the VTT file to a single text block
	convert_vtt_to_text(vtt_file, transcript_output_path)

	puts "Transcript saved to: #{transcript_output_path}"

	# 4. Read the transcript file and send it to OpenAI for summarization
	transcript = File.read(transcript_output_path)

	# Initialize the OpenAI client

	client = OpenAI::Client.new(access_token: ENV.fetch('OPENAI_API_KEY'))

	# multipline prompt
	prompt = <<~PROMPT
	Please concisely summarise this YouTube video transcript into concise bullet points,
	providing any actionable insights, knowledge or (rules of thumb, calculations or equations)
	that I can use to improve my startup.

	If the video makes claims or statements for example "How to Grow Your Business SO Fast in 2025 It Feels ILLEGAL",
	please provide the actionable steps that the video suggests to achieve the claims or statements.

	Transcript: #{transcript}
	PROMPT


	response = client.chat(
	parameters: {
	model: "gpt-4o", # Required.
	messages: [
	{ "role": "system", "content": "You help startup founders gain insights, knowledge, and ideas by concisely summarizing in bullet point form YouTube video transcripts." },
	{ role: "user", content: prompt}
	],
	temperature: 0.7,
	}
	)

	puts
	puts
	puts response.dig("choices", 0, "message", "content")
	puts
	puts