Skip to content

Instantly share code, notes, and snippets.

@arraytools
Created August 3, 2025 18:01
Show Gist options
  • Save arraytools/421c1ededd090b86dc10bea879ff4f8e to your computer and use it in GitHub Desktop.
Save arraytools/421c1ededd090b86dc10bea879ff4f8e to your computer and use it in GitHub Desktop.
Download youtube video and caption using yt-dlp utility.
#!/bin/bash
# Usage: ./yt-int.sh <youtube_url>
if [ -z "$1" ]; then
echo "Usage: $0 <youtube_url>"
exit 1
fi
URL="$1"
# Temporary file to hold format list
formats_file=$(mktemp)
# Get all formats for the video
yt-dlp -F "$URL" > "$formats_file"
declare -A video_choices
declare -a res_order=(1080 720 360)
idx=1
echo "Searching for common video formats (1080p, 720p, 360p) ..."
for res in "${res_order[@]}"; do
# Find video-only formats in mp4 or webm with matching resolution in "p" or WxH format
line=$(grep -E "^[0-9]+.*(mp4|webm).*video only" "$formats_file" | grep -E "[[:space:]]${res}p?[[:space:]]" | head -n 1)
if [ -z "$line" ]; then
line=$(grep -E "^[0-9]+.*(mp4|webm).*video only" "$formats_file" | grep -E "x$res" | head -n 1)
fi
if [ -n "$line" ]; then
code=$(echo "$line" | awk '{print $1}')
desc=$(echo "$line" | cut -c 5- | awk '{$1=$1};1') # Trim leading spaces
echo "[$idx] $desc"
video_choices[$idx]="$code"
((idx++))
fi
done
# If no standard formats found, fallback to top 3 video-only mp4/webm formats
if [ ${#video_choices[@]} -eq 0 ]; then
echo "No standard 1080p, 720p, or 360p video-only formats detected"
echo "Showing top 3 video-only mp4/webm formats instead:"
mapfile -t lines < <(grep -E "^[0-9]+.*(mp4|webm).*video only" "$formats_file" | head -n 3)
idx=1
for line in "${lines[@]}"; do
code=$(echo "$line" | awk '{print $1}')
desc=$(echo "$line" | cut -c 5- | awk '{$1=$1};1')
echo "[$idx] $desc"
video_choices[$idx]="$code"
((idx++))
done
fi
# Prompt user for video format choice
read -p "Enter the number of the video format you want to download: " vnum
if [[ -z "${video_choices[$vnum]}" ]]; then
echo "Invalid choice or no formats available. Exiting."
rm -f "$formats_file"
exit 1
fi
vcode="${video_choices[$vnum]}"
# Automatically pick best m4a audio format
acode=$(grep -E "^[0-9]+.*m4a.*audio only" "$formats_file" | head -n 1 | awk '{print $1}')
if [ -z "$acode" ]; then
echo "No m4a audio format found, selecting best audio instead."
acode=$(grep -E "^[0-9]+.*audio only" "$formats_file" | head -n 1 | awk '{print $1}')
fi
echo "Selected video format code: $vcode"
echo "Selected audio format code: $acode"
# Output template with title and video ID
OUTTEMPLATE='%(title)s [%(id)s].%(ext)s'
echo "Downloading auto-generated English captions (subtitles)..."
# Try SRT first, fallback to VTT if SRT unavailable
if yt-dlp --write-auto-subs --sub-lang en --sub-format srt --skip-download -o "$OUTTEMPLATE" "$URL"; then
echo "Auto-generated English subtitles downloaded in SRT format."
else
echo "Failed to download subtitles in SRT format, trying VTT format..."
yt-dlp --write-auto-subs --sub-lang en --sub-format vtt --skip-download -o "$OUTTEMPLATE" "$URL"
fi
echo "Downloading video and audio streams and merging into MP4..."
if yt-dlp -f "${vcode}+${acode}" --remux-video mp4 -o "$OUTTEMPLATE" "$URL"; then
# Download succeeded - rename files removing trailing [ID] and fix double .en in subtitles
video_file0=$(yt-dlp --get-filename -o "$OUTTEMPLATE" "$URL")
video_ext="${video_file0##*.}"
video_base="${video_file0%.*}"
# Remove trailing spaces + [ID]
echo "video_base: $video_base"
base_no_id=$(echo "$video_base" | sed -E 's/ *\[[^]]+\]$//')
final_video="${base_no_id}.mp4"
video_file="${video_base}.mp4"
if [[ "$video_file" != "$final_video" ]]; then
echo "video_file: $video_file"
if [ -f "$video_file" ]; then
mv -- "$video_file" "$final_video"
echo "Renamed video file to: $final_video"
else
echo "Video file '$video_file' not found after download."
fi
else
echo "Video filename did not require renaming."
fi
# For subtitles:
# Fix the subtitle file variable to avoid duplicated ".en"
# Original filenames have pattern: 'title [ID].en.srt' (or .vtt)
# So subtitle basename = video_base (which includes [ID]), append '.en.srt' duplicates '.en'
# Instead, detect subtitles by replacing extension in video_file from 'ext' to 'en.srt' or 'en.vtt' after removing [ID]
# Prepare possible subtitle filenames by replacing video extension to srt/vtt with .en
# First, build subtitle base without [ID]
base_no_id_sub="${base_no_id}" # Already stripped [ID]
# Possible subtitle filenames
orig_sub_srt="${video_base}.en.srt" # original file with [ID]
sub_file_srt="${base_no_id}.en.srt" # cleaned final filename without [ID]
if [[ "$orig_sub_srt" != "$sub_file_srt" && -f "$orig_sub_srt" ]]; then
mv -- "$orig_sub_srt" "$sub_file_srt"
echo "Renamed subtitle file to: $sub_file_srt"
elif [ -f "$sub_file_srt" ]; then
echo "Subtitle filename did not require renaming."
else
echo "Subtitle file not found."
fi
else
echo "ERROR: Video+audio download failed. No video file created."
rm -f "$formats_file"
exit 1
fi
# Cleanup
rm -f "$formats_file"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment