Skip to content

Instantly share code, notes, and snippets.

@natyusha
Last active August 1, 2025 03:54
Show Gist options
  • Save natyusha/34c6b9418e3348f37f3bc0650d9282e2 to your computer and use it in GitHub Desktop.
Save natyusha/34c6b9418e3348f37f3bc0650d9282e2 to your computer and use it in GitHub Desktop.
This script takes a raw video file and muxes it together with another video file. It will retain chapters/subtitles/attachments from the old file and sync them if there is a delay. Options are included for simply formatting a new file, adding extra audio tracks, and exporting subtitles/attachments/chapters to go along with raws.
#!/usr/bin/env python3
import os, re, sys, json, time, shutil, argparse, subprocess
from xml.etree import ElementTree
r"""
Description:
- This script takes a raw video file and muxes it together with another video file
- It will retain chapters/subtitles/attachments from the old file and sync them if there is a delay
Author:
- natyusha
Requirements:
- programs : python 3.7+, aegisub, aegisub-cli, mkvtoolnix, ffmpeg
- fonts : Lato-Bol.ttf, Lato-BolIta.ttf
- bash : grep
- pip : audio-offset-finder
- mpv : chapter-make-read.lua
Usage:
- to auto merge create a "map.txt" file with the path to all files in a single line separated by a pipe: "vid_new"|"vid_old"|title|subgroup|"vid_aud"
- make sure there are no unicode characters present in the parent folder names of the new video files
- relative paths to the working directory can be used for all files
- any path can have extra info appended to it after a semicolon
- for vid_new: "path";comment
- for vid_old: "path";comment;ext
- for vid_aud: "path";comment;lang;trackname
- the parent folder of the files can also be set with the -in or -io tags if the full path isn't desired
- lines in "map.txt" starting with a "#" are treated as a comment
- entering "FORMAT" (without quotes) as "vid_new" will use the same video for the output as "vid_old" while applying the styling
- this can be used for raws and external subtitles by naming the subtitles the same as the raw with ".en" appended"
- place the mapping file in the terminal's working directory and run the script
- the title is optional and will default to the title of the original video
- use the copy as path context menu command (on windows) for both sets of files then organise them accordingly
- the script will output to a "mux" subfolder of the working directory and use the filenames from the second half of the mapping
- chapters will be copied over from the same location as the "vid_old" if they are named "original_filename.xml"
- if there are no external chapters they will be taken from "vid_old" or "vid_new" as a last resort
- subtitles will be copied over from the same location as the "vid_old" if they are named "original_filename.en.ass"
- subtitles will be extracted from the original file if loose ones aren't present and will follow the same naming scheme as above
- copied subtitles will be resampled to the upgraded file's resolution and the correct script properties will be set
- Note: non ".ass" / ".srt" subtitles aren't handled by this script as they require manual editing and aren't that common
Arguments:
- run "pairedmuxing.py -h" for info on how to format the mapping file and for more details on the arguments
- if you want to maintain the original subtitle styling enter "original" as a positional argument
- if you want to change the language tag assigned to the audio simply enter a language tag as an argument e.g. "eng"
- must be the second argument if "original" is being used
"""
sys.stdout.reconfigure(encoding='utf-8', line_buffering=True) # allow unicode characters in print and flush after each newline
err = '\033[31m⨯\033[0m' # use the red terminal colour for ⨯
# functions to check for a valid language tag argument
def language_tag(tag):
tags = tag.replace(' ', '').split(',')
[t for t in tags if not re.match(r'^[a-z]{2,3}(-[A-Z]{2})?$', t) and argparse.ArgumentTypeError(f'{err}Language(s) must be a valid IETF tag(s)')]
return ','.join(tags)
# undo special characters from windows filenames, convert backticks and remove trailing white-space
def undo_reserved(title):
reserved = {'⧵': r'\\', '⁄': r'\/', '꞉': ':', '*': r'\*', '?': r'\?', '<': '<', '>': '>', '|': r'\|', '`': "'", '[“”]': r'\"', r'[ \t]+$': ''}
for key, value in reserved.items(): title = re.sub(key, value, title)
return title
# format video paths
def vid_paths(arg, mapping_idx): return f'{os.path.join(arg, mapping[mapping_idx].split(';')[0].strip('"'))}'
# output elapsed since previous time.time()
def elapsed(start): return f'{round(time.time() - start, 2)}s'
# change extension of file mapping
def mod_ext(find, replace, file, dot='.'): return re.sub(fr'\.{find}$', f'{dot}{replace}', file, flags=re.I)
# parse captured stdout / stderr
def parse_std(capture):
out = None
if capture.stdout: out = f'│├─{capture.stdout.decode("utf-8").strip('\r\n').replace('\r\n', '\r\n│├─')}'
if capture.stderr: out += f'│├{err}{capture.stderr.decode("utf-8").strip('\r\n').replace('\r\n', f'\r\n│├{err}')}'
if out: out = re.sub(r'^│├─Warning: No attachment matched the spec.*$\n?', '', out, flags=re.M) # remove messages warning about removing attachments
if out: out = re.sub(r'^│├(─Error:.*$)\n?', rf'│{err}\1', out, flags=re.M) # add error symbol to errors from stdout
if out: return print(out)
# grab the file information of a given file in json format and store it in a variable
def identify(file): return json.loads(subprocess.run(f'{mkvtoolnix}mkvmerge.exe -J "{file}"', capture_output=True, universal_newlines=True, encoding='utf-8').stdout)
# check mkvmerge -J output for the chapter count
def has_chapters(ident):
try: return ident['chapters'][0]['num_entries']
except Exception: return None
# search for subtitles which contain (or don't) the specified text [flags: L = exclusion / l = inclusion]
def grep(txt_search, flags='l', path='.'): return [line for line in subprocess.run(f'grep -r{flags} --include="*.ass" "{txt_search}" {path}', capture_output=True).stdout.decode('utf-8').split('\n') if line]
# replace text in the subtitles using regex
def replace(file, find, replace, log=True):
with open(file, 'r+', encoding='utf8') as f:
if log: print(f'│├─{file}')
result = re.sub(find, replace, f.read(), flags=re.M)
f.seek(0)
f.write(result)
f.truncate()
# check the arguments
parser = argparse.ArgumentParser(description='Mux + sync subtitles, attachments and chapters from one video container to another.\nThis is achieved using a user populated "map.txt" file located in the script\'s working directory.\n\nmap.txt details:'
'\n format {path to raw}|{path to original}|{mkv title}|{subgroup name}|{path to extra audio}\n *for the above formatting only the new and old video paths are required'
'\n *any path accepts extra info after a semicolon "PATH";lang;title'
'\n *enter "FORMAT" (w/o quotes) as {path to raw} to format the original only\n *the paths can be relative or absolute and support double quotes'
, epilog='NOTE: This script may require editing of the "# path variables" section to function correctly.', formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-map', '--mapping' , type=str , default='' , help='replace the map.txt file with the contents of this argument')
parser.add_argument('-in' , '--import-n', type=str , default='' , help='optional import folder location for vid_new to avoid using the full path')
parser.add_argument('-io' , '--import-o', type=str , default='' , help='optional import folder location for vid_old to avoid using the full path')
parser.add_argument('-ia' , '--import-a', type=str , default='' , help='optional import folder location for vid_aud to avoid using the full path')
parser.add_argument('-t' , '--tags' , type=str , default='' , help='optional bracketed tags to append to the filename (comma separated)')
parser.add_argument('-la' , '--lang-aud', type=language_tag , default='jpn,und', help='comma separated IETF language tags for accepted output audio tracks')
parser.add_argument('-ls' , '--lang-set', type=language_tag , default='jpn' , help='an IETF language tag for the output audio to be set to')
parser.add_argument('-ss' , '--skip-sub', action='store_true', default=False , help='skips all subtitle exports')
parser.add_argument('-sc' , '--skip-cln', action='store_true', default=False , help='skips all subtitle modifications/cleaning')
parser.add_argument('-se' , '--skip-ext', action='store_true', default=False , help='skips all attachment exports')
parser.add_argument('-sx' , '--skip-xml', action='store_true', default=False , help='skips all chapter xml exports')
parser.add_argument('-an' , '--attach-n', action='store_true', default=False , help='skips adding attachments (generally fonts) from the new file')
parser.add_argument('-ao' , '--attach-o', action='store_true', default=False , help='skips adding attachments (generally fonts) from the old file')
parser.add_argument('-o' , '--original', action='store_true', default=False , help='if you want to maintain the original subtitle styling')
parser.add_argument('-s' , '--slow' , action='store_true', default=False , help='disables the 5min trim for audio offset calc (checks the entire video)')
parser.add_argument('-v' , '--verbose' , action='store_true', default=False , help='show console output for commands from mkvtoolnix')
parser.add_argument('-e' , '--external', action='store_true', default=False , help='export external files next to vid_new (fonts, attachments)')
args = parser.parse_args()
# path variables
mkvtoolnix = 'C:\\Program Files\\MKVToolNix\\'
aegisub_cli = 'C:\\Program Files\\Aegisub\\aegisub-cli.exe'
attach_path = f'{os.environ['USERPROFILE']}\\Documents\\refs\\'
output_path = '.\\mux\\'
# unwanted fonts (replaced with a single version of lato)
excluded_fonts, excluded_list = ('Lato-Bol.ttf', 'Lato-Bold.ttf', 'LatoWeb-Bold.ttf', 'Lato-BolIta.ttf', 'LatoWeb-BoldItalic.ttf', 'Lato-BoldItalic.ttf'), f'{attach_path}excluded_fonts.txt'
if os.path.exists(excluded_list):
with open(excluded_list, 'r') as file:
excluded_fonts = excluded_fonts + tuple([line.strip() for line in file if line.strip()]) # read each line, strip whitespace, filter out empty lines and add to the existing tuple
# command modifications
mod_tracks = f' --edit track:v1 --set flag-default=1 --set flag-forced=0 --set language="{args.lang_set.split(',')[0]}" --edit track:a1 --set flag-default=1 --set flag-forced=0 --set language="{args.lang_set.split(',')[0]}"'
add_fonts = f' --add-attachment {attach_path}Lato-Bol.ttf --add-attachment {attach_path}Lato-BolIta.ttf' if not args.original else ''
del_fonts = ''.join(f' --delete-attachment name:{font}' for font in excluded_fonts) if not args.original else ''
add_tags = ''.join(f' [{tag.lstrip().rstrip()}]' for tag in args.tags.split(',')) if args.tags else ''
del_images = ' --delete-attachment mime-type:image/jpeg --delete-attachment mime-type:image/png'
del_attach_n = '' if not args.attach_n else ' -M'
del_attach_o = '' if not args.attach_o else ' -M'
enable_trim = ' --trim 300' if not args.slow else ''
quiet = ' -q' if not args.verbose else ''
# force common chapter name schemes to english
chapter_formatting = {r'第 (\d+) 章': r'Chapter \1'}
# subtitle regex
vid_formats = '(mkv|avi|mp4|mov|ogm|wmv|mpg|mpeg|mk3d|m4v)'
sub_formats = ('en.ass', 'en.srt')
res_x_map = r'^(?=PlayResX:)'
updated_by = 'Script Updated By: natyusha'
update_info = '\nUpdate Details: timing, tweaks, unified font style\n'
style_map = r'(^\[V4\+ Styles\]$\n^Format:.*$\n)'
style_chk = r'^Style: (?:Default|Def|Alternate|Thoughts?|Thinking|Top)(?:_dvd)?,(?:Lato|Gandhi Sans),.*$\n'
style_404p = 'Style: Default,Lato,30,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,1.26,0.84,2,17,17,22,1\nStyle: Alternate,Lato,30,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,1.26,0.84,2,17,17,22,1\n'
style_432p = 'Style: Default,Lato,32,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,1.35,0.9,2,18,18,24,1\nStyle: Alternate,Lato,32,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,1.35,0.9,2,18,18,24,1\n'
style_480p = 'Style: Default,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,1.5,1,2,20,20,27,1\nStyle: Alternate,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,1.5,1,2,20,20,27,1\n'
style_480ps = 'Style: Default,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,84.375,100,0,0,1,1.5,1,2,17,17,27,1\nStyle: Alternate,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,84.375,100,0,0,1,1.5,1,2,17,17,27,1\n'
style_576p = 'Style: Default,Lato,43,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,1.8,1.2,2,24,24,32,1\nStyle: Alternate,Lato,43,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,1.8,1.2,2,24,24,32,1\n'
style_720p = 'Style: Default,Lato,54,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,2.25,1.5,2,30,30,40,1\nStyle: Alternate,Lato,54,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,2.25,1.5,2,30,30,40,1\n'
style_1080p = 'Style: Default,Lato,81,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,3.375,2.25,2,45,45,60,1\nStyle: Alternate,Lato,81,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,3.375,2.25,2,45,45,60,1\n'
style_1280u = 'Style: Default,Lato,96,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,84.375,100,0,0,1,4,2.7,2,45,45,71,1\nStyle: Alternate,Lato,96,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,84.375,100,0,0,1,4,2.7,2,45,45,71,1\n'
line_chk = r'^(Dialogue: \d+,\d+:\d{2}:\d{2}\.\d{2},\d+:\d{2}:\d{2}\.\d{2},)'
line_def = r'(?:Default_dvd|Def|main),'
line_ita = r'(?:Thoughts_dvd|Thinking|Thoughts?|italics),'
line_top = r'(?:Top(?:_dvd)?),,0,0,0,,'
print('\n###############################################'
'\n## Paired Muxing Script for Quality Upgrades ##'
'\n###############################################')
try:
mapping_file = open('map.txt', encoding='utf8').read().split('\n') if os.stat('map.txt').st_size > 0 else ''
mapping_list = [args.mapping] if args.mapping else mapping_file
if not mapping_list: raise
mappings = [m.split('|') for m in mapping_list if not m.startswith('#') and m] # ignore emty lines or lines starting with a pound
except Exception:
print(f'\n{err}Aborting: "map.txt" not found or a malformed mapping is present')
exit(1)
print('\n####### Video Multiplexing & Formatting #######')
for idx, mapping in enumerate(mappings):
vid_new, vid_old, chapters, sub_file, sub_ext, offset = vid_paths(args.import_n, 0), vid_paths(args.import_o, 1), None, '', '', 0
vid_aud = vid_paths(args.import_a, 4) if len(mapping) > 4 and mapping[4] else None
mkv_pre = f'{output_path}{os.path.basename(mod_ext(vid_formats, 'mkv', vid_old))}'
mkv_out = f'{mkv_pre[:-4]}{add_tags} ({idx + 1}){mkv_pre[-4:]}'
sub_out = f'{mod_ext(vid_formats, 'en.ass', mkv_out)}'
# check if there is a new file or the old one is being formatted
if vid_new.lower() == 'format':
print(f'\n╭{os.path.basename(vid_old)}\n├─Marking the Old Video for Formatting...')
vid_new = vid_old
else:
print(f'\n╭{os.path.basename(vid_new)}\n├─Source: {os.path.basename(vid_old)}')
if not args.skip_sub or not args.skip_xml:
# convert wmv to mkv for mkvmerge compatibility
if (vid_new and vid_new.lower().endswith('.wmv')) or (vid_aud and vid_aud.lower().endswith('.wmv')):
start = time.time()
print('├┬Converting .wmv to .mkv...')
if vid_new and vid_new.lower().endswith('.wmv'): convert, vid_new = subprocess.run(f'ffmpeg -v quiet -y -i "{vid_new}" -c:v copy -c:a copy "{mod_ext('wmv', 'mkv', vid_new)}"'), mod_ext('wmv', 'mkv', vid_new)
if vid_aud and vid_aud.lower().endswith('.wmv'): convert, vid_aud = subprocess.run(f'ffmpeg -v quiet -y -i "{vid_aud}" -c:v copy -c:a copy "{mod_ext('wmv', 'mkv', vid_aud)}"'), mod_ext('wmv', 'mkv', vid_aud)
print(f'│╰─Completed in {elapsed(start)}')
if vid_new != vid_old:
# determine the audio offset in seconds by using the first 5 minutes of audio (increase trim for slightly higher accuracy and much slower parsing)
start = time.time()
print('├┬Determining Audio Offset...')
try:
offset = json.loads(subprocess.run(f'audio-offset-finder --find-offset-of "{vid_old}" --within "{vid_new}"{enable_trim} --json', capture_output=True).stdout)
offset = offset['time_offset'] if offset else 0
except Exception:
print(f'│{err}─Failed! Unable to Parse Video')
print(f'│╰─Completed in {elapsed(start)} [{round(offset, 3)}s]')
# determine the extra audio offset in seconds by using the first 5 minutes of audio (increase trim for slightly higher accuracy and much slower parsing)
if vid_aud:
start = time.time()
print('├┬Determining Extra Audio Offset...')
try:
extra_offset = json.loads(subprocess.run(f'audio-offset-finder --find-offset-of "{vid_aud}" --within "{vid_new}"{enable_trim} --json', capture_output=True).stdout)
extra_offset = extra_offset['time_offset'] if extra_offset else 0
except Exception:
print(f'│{err}─Failed! Unable to Parse Video')
print(f'│╰─Completed in {elapsed(start)} [{round(extra_offset, 3)}s]')
# grab the file information of the original file in json format and store it in a variable
print('├┬Parsing Track Info & MultiPlexing...')
start, ident_old, sub_track, attachment_pairs = time.time(), identify(vid_old), None, []
# if available add the title from the mapping otherwise use the one from the original file to ensure titles from the new file aren't used
try: title = mapping[2] if len(mapping) > 2 and mapping[2] else ident_old['container']['properties'].get('title')
except Exception: print(f'│{err}─Failed! Unable to Parse Container')
mod_title = f' --edit info --set title="{undo_reserved(title)}"' if title else ''
# parse the identification variable to determine the track id of any .ass or .srt subtitles (if present)
try:
for track in ident_old['tracks']:
if track['codec'] == 'SubStationAlpha' : sub_ext, sub_track = 'en.ass', str(track['id'])
elif track['codec'] == 'SubRip/SRT' : sub_ext, sub_track = 'en.srt', str(track['id'])
continue
if args.external: attachment_pairs = ' '.join([f'{attachment['id']}:"{attachment['file_name']}"' for attachment in ident_old['attachments']]) # parse attachment ids and names if exporting them externally
except Exception:
print(f'│{err}─Failed! Unable to Parse Video')
# parse the mkv file identification to determine if there are multiple chapters present
xml_file, ident_new, sync_chapter, new_chapters = mod_ext(vid_formats, 'xml', vid_old), identify(vid_new), ' --no-chapters', ' --no-chapters'
if os.path.isfile(xml_file): sync_chapter = f' --chapters "{xml_file}" --chapter-sync {int(offset * 1000)}'
elif has_chapters(ident_old) and has_chapters(ident_old) > 1: sync_chapter = f' --chapter-sync {int(offset * 1000)}' # only add chapters from the video if there is more than one and an external chapter file isn't present
elif has_chapters(ident_new) and has_chapters(ident_new) > 1: new_chapters = '' # use chapters from the new file if there is more than one and the old file didn't add chapters already
if not args.external:
# if an extra audio track is being used determine the track id(s)
if vid_aud:
ident_aud, aud_opts = identify(vid_aud), mapping[4].split(';')
lang_aud = aud_opts[2].strip(" '\"") if len(aud_opts) > 2 and aud_opts[2] else args.lang_set.split(',')[0]
name_aud = f'"{aud_opts[3].strip(" '\"")}"' if len(aud_opts) > 3 and aud_opts[3] else '""'
for track in ident_aud['tracks']:
if track['type'] == 'audio': aud_track = str(track['id'])
continue
vid_aud = f' -D -S -B -T -M --no-chapters --default-track-flag {aud_track}:0 --language {aud_track}:{lang_aud} --track-name {aud_track}:{name_aud} --sync {aud_track}:{int(extra_offset * 1000)} "{vid_aud}"'
else: vid_aud = ''
# mux the attachments, chapters (with offset) and audio tracks from the original file(s) into the new file
start, merge = time.time(), subprocess.run(f'{mkvtoolnix}mkvmerge.exe{quiet} -o "{mkv_out}" -S{del_attach_n}{new_chapters} -a {args.lang_aud} "{vid_new}" -A -D -S{del_attach_o}{sync_chapter} "{vid_old}"{vid_aud}', capture_output=True)
parse_std(merge)
print(f'│╰─Completed in {elapsed(start)}')
print('├┬Editing Properties...')
# if there are chapters rename them following regex substitution pair values and don't allow empty chapter names
start, mod_chapters, mod_xml = time.time(), '', 'mod_chapters.xml'
if sync_chapter != ' --no-chapters' or new_chapters != ' --no-chapters':
try:
chapter_xml = subprocess.run(f'{mkvtoolnix}mkvextract chapters "{mkv_out}"', capture_output=True, text=True, encoding="utf-8", errors="replace", check=True).stdout.strip() # capture chapter XML directly from mkvextract stdout
if chapter_xml: # check if chapter_xml is non-empty
# parse the XML string and wrap in ElementTree
root, tree, modified = ElementTree.fromstring(chapter_xml), ElementTree.ElementTree(), False
tree._setroot(root)
# get all ChapterAtom elements and check for existing ChapterString elements
chapter_atoms, chapters = tree.findall('.//ChapterAtom'), tree.findall('.//ChapterDisplay/ChapterString')
# check if all chapters lack ChapterString or have empty ChapterString
all_empty = not chapters or all(chapter.text is None or chapter.text.strip() == '' for chapter in chapters)
# create or update ChapterString for each ChapterAtom
if all_empty and chapter_atoms:
for index, atom in enumerate(chapter_atoms, 1):
display = atom.find('ChapterDisplay')
if display is None:
display = ElementTree.SubElement(atom, 'ChapterDisplay')
chapter_string = ElementTree.SubElement(display, 'ChapterString')
chapter_string.text, modified = f'Chapter {index:02d}', True
else:
# check if ChapterString exists
chapter_string = display.find('ChapterString')
if chapter_string is None:
chapter_string = ElementTree.SubElement(display, 'ChapterString')
chapter_string.text, modified = f'Chapter {index:02d}', True
elif chapter_string.text is None or chapter_string.text.strip() == '':
chapter_string.text, modified = f'Chapter {index:02d}', True
elif chapters:
# apply regex substitutions to existing ChapterString elements
for key, value in chapter_formatting.items():
for chapter in chapters:
if chapter.text is not None and re.match(key, chapter.text): chapter.text, modified = re.sub(key, value, chapter.text), True
if modified:
tree.write(mod_xml, encoding='utf-8', xml_declaration=True)
mod_chapters = f' --chapters {mod_xml}'
else:
mod_chapters = ''
except Exception as e:
mod_chapters = ''
print(f"│{err}─Failed! Unable to Rename Chapters: {e}")
# remove any tags, set the video+audio tracks to japanese (or the language from the mapping), set the title, and remove single entry chapters for the muxed file
propedit = subprocess.run(f'{mkvtoolnix}mkvpropedit.exe{quiet} "{mkv_out}" -t all:{mod_tracks}{del_fonts}{add_fonts}{mod_title}{del_images}{mod_chapters}', capture_output=True)
parse_std(propedit)
# clean up chapter xml files if they exist
if os.path.exists(mod_xml): os.remove(mod_xml)
print(f'│╰─Completed in {elapsed(start)}')
if args.external:
# extract the fonts for each video file into a folder with the same name as the old parent file when in external mode (appends _attach)
if not args.skip_ext and attachment_pairs:
print('├┬Extracting Attachments...')
start, cwd, vid_old_opts = time.time(), os.getcwd(), mapping[1].split(';') # save the current directory as you can't globaly specify the output directory for mkvmerge
vid_ext = f'{vid_old_opts[2]}_' if len(vid_old_opts) > 2 and vid_old_opts[2] else ''
attach_path, chapter_file = os.path.join(os.path.dirname(vid_new), os.path.basename(mod_ext(vid_formats, f'{vid_ext}attach', vid_old, '_'))), os.path.join(os.path.dirname(vid_new), os.path.basename(mod_ext(vid_formats, 'xml', vid_new)))
os.makedirs(attach_path, exist_ok=True) # create directory for the attachments and move to it (where mkvmerge will output)
os.chdir(attach_path)
extract = subprocess.run(f'{mkvtoolnix}mkvextract.exe{quiet} "{vid_old}" attachments {attachment_pairs}', capture_output=True)
parse_std(extract)
os.chdir(cwd) # move back to the cwd
# print(f'│├─{attachment_pairs}') # results in too much console spam but can be useful
print(f'│╰─Completed in {elapsed(start)}')
# apply the chapter sync and extract to the vid_new directory as external mode skips the chapter section
if not args.skip_xml:
print('├┬Extracting Offset Chapters...')
start, vid_tmp = time.time(), 'temp_chapter_sync.mkv'
if sync_chapter != ' --no-chapters' or new_chapters != ' --no-chapters':
sync = subprocess.run(f'{mkvtoolnix}mkvmerge.exe{quiet}{sync_chapter} -o "{vid_tmp}" "{vid_old}"', capture_output=True) # mkvmerge only outputs to mkv
parse_std(sync)
extract = subprocess.run(f'{mkvtoolnix}mkvextract.exe{quiet} "{vid_tmp}" chapters "{chapter_file}"', capture_output=True)
parse_std(extract)
# generate a chp chapter sidecar file (for use with mpv and chapter-make-read.lua) from the chapter xml for quick quality checking of the external subtitles (mpv doesn't support chapters as xml)
tree, output = ElementTree.parse(chapter_file), ''
for chapter in tree.getroot().findall('.//ChapterAtom'):
start_time = chapter.find('ChapterTimeStart').text
title = chapter.find('.//ChapterString').text or ''
output += f"{start_time} {title}\n" # format: HH:MM:SS.sss <title>
# write to final chp file
with open(f'{chapter_file.replace('.xml', '.chp')}', 'w', encoding='utf-8') as f:
f.write(output)
if os.path.isfile(vid_tmp): os.remove(vid_tmp)
print(f'│╰─Completed in {elapsed(start)}')
# extract the subtitle track from the original file and name it the same as the output file with ".en" appended
if not args.skip_sub:
if sub_track:
start, sub_file = time.time(), mod_ext(vid_formats, sub_ext, vid_old)
print('├┬Extracting Subtitles...')
extract = subprocess.run(f'{mkvtoolnix}mkvextract.exe{quiet} "{vid_old}" tracks "{sub_track}:{sub_file}', capture_output=True)
parse_std(extract)
print(f'│├─{os.path.basename(sub_file)}')
print(f'│╰─Completed in {elapsed(start)}')
else: # check for external subtitles if there are no sub tracks
for s in sub_formats:
if os.path.isfile(mod_ext(vid_formats, s, vid_old)):
print('├┬External Subtitles Found...')
sub_file = mod_ext(vid_formats, s, vid_old)
print(f'│├─{os.path.basename(sub_file)}')
print(f'│╰─Completed in {elapsed(start)}')
continue
# convert any .srt subtitle to .ass
if sub_file.endswith('en.srt'):
print('├┬Converting SubRip/SRT to SubStationAlpha...')
start, convert, sub_file = time.time(), subprocess.run(f'ffmpeg -v quiet -y -i {sub_file} {sub_file.replace('.en.srt', '.en.ass')}'), sub_file.replace('.en.srt', '.en.ass')
print(f'│╰─Completed in {elapsed(start)}')
# resample the subtitle file resolution to match the output file
if os.path.isfile(sub_file):
print('├┬Resampling & Offsetting Subtitles...')
if args.external: mkv_out, sub_out = vid_new, f'{mod_ext(vid_formats, 'en.ass', vid_new)}' # if skipping videos target the new video filename and create the mux folder
shutil.copy(sub_file, 'temp_sub_in.ass') # copy original subtitles to a temp file to avoid modifying them
os.rename(mkv_out, 'temp_mkv_out.mkv') # aegisub-cli will fail if the file paths contain any unicode so use temp filenames and rename the video temporarily
start, resample = time.time(), subprocess.run(f'{aegisub_cli} --loglevel 2 --video temp_mkv_out.mkv temp_sub_in.ass temp_sub_out.ass tool/resampleres')
os.rename('temp_mkv_out.mkv', mkv_out) # restore the video filename
# apply the audio offset to the resampled file and append the subgroup name if specified
subgroup = f'_{mapping[3]}' if len(mapping) > 3 and mapping[3] else ''
subprocess.run(f'ffmpeg -v quiet -y -itsoffset {offset} -i temp_sub_out.ass "{sub_out.replace('.en.ass', f'{subgroup}.en.ass')}"')
print(f'│╰─Completed in {elapsed(start)}')
print('╰─Muxing Complete!')
# clean up temp subtitle files
if os.path.isfile('temp_sub_in.ass'): os.remove('temp_sub_in.ass')
if os.path.isfile('temp_sub_out.ass'): os.remove('temp_sub_out.ass')
if not args.skip_cln:
start = time.time()
print('\n##### Subtitle Script Properties & Styles #####')
print('\n╭SubStation Alpha Operations')
if not args.original:
# insert subtitle properties required for the unified style to remain consistent
print('├┬Inserting Credits / ScaledBorderAndShadow / WrapStyle...')
for file in grep(updated_by, flags='L'):
if file: replace(file, res_x_map, updated_by + update_info)
for file in grep('ScaledBorderAndShadow:', flags='L'):
if file: replace(file, res_x_map, 'ScaledBorderAndShadow: yes\n')
for file in grep('ScaledBorderAndShadow: no'):
if file: replace(file, r'^ScaledBorderAndShadow: no', 'ScaledBorderAndShadow: yes')
for file in grep('WrapStyle:', flags='L'):
if file: replace(file, res_x_map, 'WrapStyle: 0\n')
for file in grep(r'WrapStyle: [1-9]'):
if file: replace(file, r'^WrapStyle: [1-9]', 'WrapStyle: 0')
print('│╰─Done')
# insert SakuraCircle styled fonts after checking for dupes
print('├┬Inserting Unified Font Styles...')
for file in grep(''):
replace(file, style_chk, '', False)
replace(file, fr'{line_chk}{line_def}', r'\1Default,', False)
replace(file, fr'{line_chk}{line_ita}', r'\1Alternate,', False)
replace(file, fr'{line_chk}{line_top}', r'\1Default,,0,0,0,,{\\an8}', False)
replace(file, r'(?<=^\[Aegisub Project Garbage\]\n)[\s\S]*(?=\n^\[V4\+ Styles\])', '', False) # clear aegisub project garbage
for file in grep('PlayResY: 404'):
replace(file, style_map, fr'\1{style_404p}')
for file in grep('PlayResY: 432'):
replace(file, style_map, fr'\1{style_432p}')
for file in grep('PlayResY: 480'):
if grep('PlayResX: 854', path=f'"{file}"'): replace(file, style_map, fr'\1{style_480p}')
elif grep('PlayResX: 720', path=f'"{file}"'): replace(file, style_map, fr'\1{style_480ps}')
for file in grep('PlayResY: 576'):
replace(file, style_map, fr'\1{style_576p}')
for file in grep('PlayResY: 720'):
replace(file, style_map, fr'\1{style_720p}')
for file in grep('PlayResY: 1080'):
replace(file, style_map, fr'\1{style_1080p}')
for file in grep('PlayResY: 1280'):
replace(file, style_map, fr'\1{style_1280u}')
print('│╰─Done')
# regex for fixing common subtitle script errors
print('├┬Cleaning the Script...')
for file in grep('[Script Info]'):
if file:
replace(file, r'--' , '—' , False) # Convert double hyphen to single long hyphen
replace(file, r'’' , "'" , False) # Convert curly to straight single quotes
replace(file, r'“|”', '"' , False) # Convert doubly curly quotes to single double quotes
replace(file, r"''" , '"' , False) # Convert double single quotes to single double quote
replace(file, r'…' , '...', False) # Convert ellipses to periods
replace(file, r' ' , ' ' , False) # Convert double spaces to single spaces
replace(file, r'(?!^Style: .+?,.*,)100\.039(?=,100,)', '100', False) # Fix Aegisub Font Size Conversions
replace(file, r"""(?<!\d )(?:(?<=[}\.," -])|(?<=\\[nN]))l(?=[\., ]|[fnst] |'[md]|'ll|'ve|t's|t'll|sn't|-l|nside|dea|ntro)""" , 'I') # Replace lowercase l when it should be an uppercase I (caused by old OCR techniques)
print('│╰─Done')
print(f'╰─Completed in {elapsed(start)}')
print('\n############# All Tasks Complete! #############')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment