-
-
Save FurloSK/7f52303a10ab7478e3cddfe4bcc50881 to your computer and use it in GitHub Desktop.
| #!/bin/sh | |
| # Extract subtitles from each MKV/MP4 file in the given directory | |
| # [updated 2024-01-09 by FurloSK] | |
| # Permanent gist address: https://gist.github.com/FurloSK/7f52303a10ab7478e3cddfe4bcc50881 | |
| # | |
| # ===== Usage ===== | |
| # extractSubtitles.sh [-i] [<fileOrDirectory>] | |
| # -i | |
| # Supplying this option will skip extraction and only print information about subtitles in file | |
| # <fileOrDirectory> | |
| # If a directory is given, will process all MKV/MP4 files in this directory (and subdirectories) | |
| # If a file is given, will process this single file | |
| # If the parameter is skipped altogether, will process current directory (and subdirectories) | |
| # | |
| # ===== History ===== | |
| # Original version by ComputerNerdFromHell (site no longer working): | |
| # http://www.computernerdfromhell.com/blog/automatically-extract-subtitles-from-mkv | |
| # Archived here: https://web.archive.org/web/20181119144734/http://www.computernerdfromhell.com/blog/automatically-extract-subtitles-from-mkv/ | |
| # Resubmitted by nux: | |
| # https://askubuntu.com/questions/452268/extract-subtitle-from-mkv-files/452279#452279 | |
| # Completely rewritten and tweaked by FurloSK: | |
| # https://superuser.com/questions/1527829/extracting-subtitles-from-mkv-file/1649627#1649627 | |
| # Permanent gist address: https://gist.github.com/FurloSK/7f52303a10ab7478e3cddfe4bcc50881 | |
| # | |
| # ============================================================================= | |
| # Config part: this is the only thing you need to tweak | |
| # MKVToolNix path - Leave empty if you have the tools added to $PATH. | |
| # This is needed e.g. on macOS, if you just downloaded MKVToolNix app and dragged it to Applications folder | |
| toolPath='/Applications/+ Moje/MKVToolNix.app/Contents/MacOS/' | |
| # ============================================================================= | |
| # Start of script | |
| # by default, process all files in local dir | |
| DIR="." | |
| skipExtraction=false | |
| # first parameter might be -i switch, which will only print subtitle tracks instead of extracting them | |
| if [[ "$1" == "-i" ]] ; then | |
| skipExtraction=true | |
| # if also directory or file is given, print info about it instead of default local dir | |
| if [[ "$#" -eq 2 && "$1" == "-i" ]] ; then | |
| DIR="$2" | |
| fi | |
| # otherwise if directory or file is given, extract subtitles from that one | |
| elif [[ "$#" -eq 1 ]] ; then | |
| DIR="$1" | |
| fi | |
| # Get all the MKV/MP4 files in this dir and its subdirs | |
| find "$DIR" -type f \( -iname '*.mkv' -o -iname '*.mp4' -o -iname '*.avi' -o -iname '*.ts' \) | while read filename | |
| do | |
| echo "\nProcessing file $filename:" | |
| # Get base file name (without extension) | |
| fileBasename=${filename%.*} | |
| # Parse info about all subtitles tracks from file | |
| # This will output lines in this format, one line per subtitle track, fields delimited by tabulator: | |
| # trackID <tab> trackLanguage <tab> trackCodecID <tab> trackCodec | |
| "${toolPath}mkvmerge" -J "$filename" | python -c "exec(\"import sys, json;\njs = json.load(sys.stdin);\nif not 'tracks' in js:\n\tprint('unsupported');\n\tsys.exit();\nfor track in js['tracks']:\n\tif track['type'] == 'subtitles':\n\t\tprint(str(track['id']) + '\t' + track['properties']['language'] + '\t' + (track['properties']['codec_id'] if 'codec_id' in track['properties'] else 'undefined') + '\t' + track['codec'])\")" | while IFS=$'\t' read -r trackNumber trackLanguage trackCodecID trackCodec; | |
| #"${toolPath}mkvmerge" -J "$filename" | python -c "exec(\"import sys, json;\nfor track in json.load(sys.stdin)['tracks']:\n\tif track['type'] == 'subtitles':\n\t\tprint(str(track['id']) + '\t' + track['properties']['language'] + '\t' + (track['properties']['codec_id'] if 'codec_id' in track['properties'] else track['codec']) + '\t' + track['codec'])\")" | while IFS=$'\t' read -r trackNumber trackLanguage trackCodecID trackCodec; | |
| do | |
| # if JSON tracks extraction failed, continue to next file | |
| if [ $trackNumber = 'unsupported' ] ; then | |
| echo " Unsupported file, skipping..." | |
| continue; | |
| fi | |
| echo " Found subtitle track #${trackNumber}: $trackLanguage ($trackCodec, $trackCodecID)" | |
| # address missing ['properties']['codec_id'] in JSON | |
| if [ $trackCodecID = 'undefined' ] ; then | |
| # fix DVBSUB codec automatically | |
| if [ $trackCodec = 'DVBSUB' ] ; then | |
| trackCodecID='S_DVBSUB' | |
| echo " Warning: missing codec_id for $trackCodec track => corrected to $trackCodecID." | |
| else | |
| echo " Error: missing codec_id for $trackCodec track!" | |
| fi | |
| fi | |
| # if we are only printing tracks, not extracting them, print track and continue | |
| if [ $skipExtraction = true ] ; then | |
| continue; | |
| fi | |
| # optional: process only some types of subtitle tracks (according to $trackCodecID) | |
| # See codec types here (under header Subtitle Codec Mappings): | |
| # https://datatracker.ietf.org/doc/html/draft-ietf-cellar-codec/#name-subtitle-codec-mappings | |
| # E.g. to skip DVD subtitles, add S_VOBSUB | |
| if [[ $trackCodecID == 'unwantedCodecID_#1' || $trackCodecID == 'unwantedCodecID_#2' ]] ; then | |
| echo " Unwanted codec ID $trackCodecID, skipping track..." | |
| continue; | |
| fi | |
| # determine proper extension | |
| if [ $trackCodecID = 'S_TEXT/SSA' ] ; then | |
| extension='ssa' | |
| elif [ $trackCodecID = 'S_TEXT/ASS' ] ; then | |
| extension='ass' | |
| elif [ $trackCodecID = 'S_TEXT/USF' ] ; then | |
| extension='usf' | |
| elif [ $trackCodecID = 'S_TEXT/WEBVTT' ] ; then | |
| extension='vtt' | |
| elif [ $trackCodecID = 'S_DVBSUB' ] ; then | |
| extension='dvb' | |
| else # fallback to standard .srt file (S_VOBSUB files will still get their proper extension) | |
| extension='srt' | |
| fi | |
| # prepare output filename | |
| # (adding . [dot] between filename and language, so VLC will properly recognize the language) | |
| outFilename="${fileBasename} [#${trackNumber}].${trackLanguage}.${extension}" | |
| # extract track with language and track id | |
| echo " Extracting track to file ${outFilename}" | |
| echo " Executing command \"${toolPath}mkvextract\" tracks \"${filename}\" ${trackNumber}:\"${outFilename}\"" | |
| result=`"${toolPath}mkvextract" tracks "${filename}" ${trackNumber}:"${outFilename}"` | |
| echo " > $result" | |
| #`"${toolPath}mkvextract" tracks "${filename}" ${trackNumber}:"${outFilename}" > /dev/null 2>&1` | |
| #========================================================================== | |
| # Lines below are from the original source by ComputerNerdFromHell. | |
| # They are now all obsolete and redundant (kept just for reference) | |
| # Extract the track to a .tmp file | |
| #`"${toolPath}mkvextract" tracks "$filename" $trackNumber:"$subtitlename.srt.tmp" > /dev/null 2>&1` | |
| #`chmod g+rw "$subtitlename.srt.tmp"` | |
| # # Do a super-primitive language guess: ENGLISH | |
| # langtest=`egrep -ic ' you | to | the ' "$subtitlename".srt.tmp` | |
| # trimregex="" | |
| # | |
| # # Check if subtitle passes our language filter (10 or more matches) | |
| # if [ $langtest -ge 10 ]; then | |
| # # Regex to remove credits at the end of subtitles (read my reason why!) | |
| # `sed 's/\r//g' < "$subtitlename.srt.tmp" \ | |
| # | sed 's/%/%%/g' \ | |
| # | awk '{if (a){printf("\t")};printf $0; a=1; } /^$/{print ""; a=0;}' \ | |
| # | grep -iv "$trimregex" \ | |
| # | sed 's/\t/\r\n/g' > "$subtitlename.srt"` | |
| # `rm "$subtitlename.srt.tmp"` | |
| # `chmod g+rw "$subtitlename.srt"` | |
| # else | |
| # # Not our desired language: add a number to the filename and keep anyway, just in case | |
| # `mv "$subtitlename.srt.tmp" "$subtitlename.$tracknumber.srt" > /dev/null 2>&1` | |
| # fi | |
| echo "" | |
| done | |
| done |
Nice scripting Thnx!
For MacOs users there are 3 things to check to make this script work for you:
1 - Make sure the path to MKVToolnix is correct. In my case this worked(line30):
toolPath='/Applications/MKVToolNix-79.0.app/Contents/MacOS/'
2 - Make sure python can be found. In my case I had to add the version number after python (line62):
"${toolPath}mkvmerge" -J "$filename" | python3 -c
3 - When downloading the script there was (in my case) a line-break that produced a python 'syntax error' (line 62).
Make sure that the line-break is after ' trackCodec; '
Then everything worked just fine !)
For use with Ubuntu, change first line to #!/bin/bash, comment the toolPath line, and add the -e option to the first echo command.
This needs support for PGS added
On MacOS
toolPath="/Applications/$(ls /Applications | grep MKVToolNix)/Contents/MacOS/"
For Ubuntu 25.10, replace any occurence of python with python3 in the script.
Nice, thanks.
echo "\nProcessing file $filename:"should beecho -e "\nProcessing file $filename:".