Skip to content

Instantly share code, notes, and snippets.

@grigio
Created June 12, 2025 06:58
Show Gist options
  • Save grigio/2e236fc0add21fbf0032d9193614d2b9 to your computer and use it in GitHub Desktop.
Save grigio/2e236fc0add21fbf0032d9193614d2b9 to your computer and use it in GitHub Desktop.
it uses yt-dlp to fetch VTT and convert it to readeable TEXT
#!/bin/bash
# Check if yt-dlp is installed
if ! command -v yt-dlp &> /dev/null; then
echo "Error: yt-dlp is not installed. Please install it first."
exit 1
fi
# Check if a URL argument is provided
if [ -z "$1" ]; then
echo "Usage: subdwn <youtube_video_url> [sub_lang]"
echo " sub_lang: Optional subtitle language (default: en)"
exit 1
fi
URL="$1"
LANG="${2:-en}" # Use "en" if no language provided
# Create a temporary directory to store subtitles
TMPDIR=$(mktemp -d)
trap 'rm -rf "$TMPDIR"' EXIT
# Download subtitles (auto-generated or manual) in the best available language (default: English)
# --write-auto-sub: download auto-generated subtitles if manual not available
# --sub-lang $LANG: fetch subtitles in specified language; defaults to "en"
# --skip-download: do not download video
yt-dlp --skip-download --write-auto-sub --sub-lang "$LANG" --sub-format "vtt" -o "$TMPDIR/sub.%(ext)s" "$URL" >/dev/null 2>&1
# Check if subtitle file was downloaded
SUBFILE=$(find "$TMPDIR" -name "sub.*" | head -n 1)
if [ -z "$SUBFILE" ]; then
echo "No subtitles found for this video."
exit 1
fi
# Convert WebVTT (.vtt) subtitles to plain text
# Remove WEBVTT header, timestamps, and empty lines
# sed -e '1d' \
# -e '/^[0-9]/d' \
# -e '/^$/d' \
# -e 's/<[^>]*>//g' \
# "$SUBFILE"
grep -v -E '<|>' "$SUBFILE" | awk '!seen[$0]++' | tail -n +6
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment