Created
August 3, 2025 18:01
-
-
Save arraytools/421c1ededd090b86dc10bea879ff4f8e to your computer and use it in GitHub Desktop.
Download youtube video and caption using yt-dlp utility.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Usage: ./yt-int.sh <youtube_url> | |
if [ -z "$1" ]; then | |
echo "Usage: $0 <youtube_url>" | |
exit 1 | |
fi | |
URL="$1" | |
# Temporary file to hold format list | |
formats_file=$(mktemp) | |
# Get all formats for the video | |
yt-dlp -F "$URL" > "$formats_file" | |
declare -A video_choices | |
declare -a res_order=(1080 720 360) | |
idx=1 | |
echo "Searching for common video formats (1080p, 720p, 360p) ..." | |
for res in "${res_order[@]}"; do | |
# Find video-only formats in mp4 or webm with matching resolution in "p" or WxH format | |
line=$(grep -E "^[0-9]+.*(mp4|webm).*video only" "$formats_file" | grep -E "[[:space:]]${res}p?[[:space:]]" | head -n 1) | |
if [ -z "$line" ]; then | |
line=$(grep -E "^[0-9]+.*(mp4|webm).*video only" "$formats_file" | grep -E "x$res" | head -n 1) | |
fi | |
if [ -n "$line" ]; then | |
code=$(echo "$line" | awk '{print $1}') | |
desc=$(echo "$line" | cut -c 5- | awk '{$1=$1};1') # Trim leading spaces | |
echo "[$idx] $desc" | |
video_choices[$idx]="$code" | |
((idx++)) | |
fi | |
done | |
# If no standard formats found, fallback to top 3 video-only mp4/webm formats | |
if [ ${#video_choices[@]} -eq 0 ]; then | |
echo "No standard 1080p, 720p, or 360p video-only formats detected" | |
echo "Showing top 3 video-only mp4/webm formats instead:" | |
mapfile -t lines < <(grep -E "^[0-9]+.*(mp4|webm).*video only" "$formats_file" | head -n 3) | |
idx=1 | |
for line in "${lines[@]}"; do | |
code=$(echo "$line" | awk '{print $1}') | |
desc=$(echo "$line" | cut -c 5- | awk '{$1=$1};1') | |
echo "[$idx] $desc" | |
video_choices[$idx]="$code" | |
((idx++)) | |
done | |
fi | |
# Prompt user for video format choice | |
read -p "Enter the number of the video format you want to download: " vnum | |
if [[ -z "${video_choices[$vnum]}" ]]; then | |
echo "Invalid choice or no formats available. Exiting." | |
rm -f "$formats_file" | |
exit 1 | |
fi | |
vcode="${video_choices[$vnum]}" | |
# Automatically pick best m4a audio format | |
acode=$(grep -E "^[0-9]+.*m4a.*audio only" "$formats_file" | head -n 1 | awk '{print $1}') | |
if [ -z "$acode" ]; then | |
echo "No m4a audio format found, selecting best audio instead." | |
acode=$(grep -E "^[0-9]+.*audio only" "$formats_file" | head -n 1 | awk '{print $1}') | |
fi | |
echo "Selected video format code: $vcode" | |
echo "Selected audio format code: $acode" | |
# Output template with title and video ID | |
OUTTEMPLATE='%(title)s [%(id)s].%(ext)s' | |
echo "Downloading auto-generated English captions (subtitles)..." | |
# Try SRT first, fallback to VTT if SRT unavailable | |
if yt-dlp --write-auto-subs --sub-lang en --sub-format srt --skip-download -o "$OUTTEMPLATE" "$URL"; then | |
echo "Auto-generated English subtitles downloaded in SRT format." | |
else | |
echo "Failed to download subtitles in SRT format, trying VTT format..." | |
yt-dlp --write-auto-subs --sub-lang en --sub-format vtt --skip-download -o "$OUTTEMPLATE" "$URL" | |
fi | |
echo "Downloading video and audio streams and merging into MP4..." | |
if yt-dlp -f "${vcode}+${acode}" --remux-video mp4 -o "$OUTTEMPLATE" "$URL"; then | |
# Download succeeded - rename files removing trailing [ID] and fix double .en in subtitles | |
video_file0=$(yt-dlp --get-filename -o "$OUTTEMPLATE" "$URL") | |
video_ext="${video_file0##*.}" | |
video_base="${video_file0%.*}" | |
# Remove trailing spaces + [ID] | |
echo "video_base: $video_base" | |
base_no_id=$(echo "$video_base" | sed -E 's/ *\[[^]]+\]$//') | |
final_video="${base_no_id}.mp4" | |
video_file="${video_base}.mp4" | |
if [[ "$video_file" != "$final_video" ]]; then | |
echo "video_file: $video_file" | |
if [ -f "$video_file" ]; then | |
mv -- "$video_file" "$final_video" | |
echo "Renamed video file to: $final_video" | |
else | |
echo "Video file '$video_file' not found after download." | |
fi | |
else | |
echo "Video filename did not require renaming." | |
fi | |
# For subtitles: | |
# Fix the subtitle file variable to avoid duplicated ".en" | |
# Original filenames have pattern: 'title [ID].en.srt' (or .vtt) | |
# So subtitle basename = video_base (which includes [ID]), append '.en.srt' duplicates '.en' | |
# Instead, detect subtitles by replacing extension in video_file from 'ext' to 'en.srt' or 'en.vtt' after removing [ID] | |
# Prepare possible subtitle filenames by replacing video extension to srt/vtt with .en | |
# First, build subtitle base without [ID] | |
base_no_id_sub="${base_no_id}" # Already stripped [ID] | |
# Possible subtitle filenames | |
orig_sub_srt="${video_base}.en.srt" # original file with [ID] | |
sub_file_srt="${base_no_id}.en.srt" # cleaned final filename without [ID] | |
if [[ "$orig_sub_srt" != "$sub_file_srt" && -f "$orig_sub_srt" ]]; then | |
mv -- "$orig_sub_srt" "$sub_file_srt" | |
echo "Renamed subtitle file to: $sub_file_srt" | |
elif [ -f "$sub_file_srt" ]; then | |
echo "Subtitle filename did not require renaming." | |
else | |
echo "Subtitle file not found." | |
fi | |
else | |
echo "ERROR: Video+audio download failed. No video file created." | |
rm -f "$formats_file" | |
exit 1 | |
fi | |
# Cleanup | |
rm -f "$formats_file" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment