Created
June 12, 2025 06:58
-
-
Save grigio/2e236fc0add21fbf0032d9193614d2b9 to your computer and use it in GitHub Desktop.
it uses yt-dlp to fetch VTT and convert it to readeable TEXT
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check if yt-dlp is installed | |
if ! command -v yt-dlp &> /dev/null; then | |
echo "Error: yt-dlp is not installed. Please install it first." | |
exit 1 | |
fi | |
# Check if a URL argument is provided | |
if [ -z "$1" ]; then | |
echo "Usage: subdwn <youtube_video_url> [sub_lang]" | |
echo " sub_lang: Optional subtitle language (default: en)" | |
exit 1 | |
fi | |
URL="$1" | |
LANG="${2:-en}" # Use "en" if no language provided | |
# Create a temporary directory to store subtitles | |
TMPDIR=$(mktemp -d) | |
trap 'rm -rf "$TMPDIR"' EXIT | |
# Download subtitles (auto-generated or manual) in the best available language (default: English) | |
# --write-auto-sub: download auto-generated subtitles if manual not available | |
# --sub-lang $LANG: fetch subtitles in specified language; defaults to "en" | |
# --skip-download: do not download video | |
yt-dlp --skip-download --write-auto-sub --sub-lang "$LANG" --sub-format "vtt" -o "$TMPDIR/sub.%(ext)s" "$URL" >/dev/null 2>&1 | |
# Check if subtitle file was downloaded | |
SUBFILE=$(find "$TMPDIR" -name "sub.*" | head -n 1) | |
if [ -z "$SUBFILE" ]; then | |
echo "No subtitles found for this video." | |
exit 1 | |
fi | |
# Convert WebVTT (.vtt) subtitles to plain text | |
# Remove WEBVTT header, timestamps, and empty lines | |
# sed -e '1d' \ | |
# -e '/^[0-9]/d' \ | |
# -e '/^$/d' \ | |
# -e 's/<[^>]*>//g' \ | |
# "$SUBFILE" | |
grep -v -E '<|>' "$SUBFILE" | awk '!seen[$0]++' | tail -n +6 | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment