Last active
August 1, 2025 03:54
-
-
Save natyusha/34c6b9418e3348f37f3bc0650d9282e2 to your computer and use it in GitHub Desktop.
This script takes a raw video file and muxes it together with another video file. It will retain chapters/subtitles/attachments from the old file and sync them if there is a delay. Options are included for simply formatting a new file, adding extra audio tracks, and exporting subtitles/attachments/chapters to go along with raws.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os, re, sys, json, time, shutil, argparse, subprocess | |
from xml.etree import ElementTree | |
r""" | |
Description: | |
- This script takes a raw video file and muxes it together with another video file | |
- It will retain chapters/subtitles/attachments from the old file and sync them if there is a delay | |
Author: | |
- natyusha | |
Requirements: | |
- programs : python 3.7+, aegisub, aegisub-cli, mkvtoolnix, ffmpeg | |
- fonts : Lato-Bol.ttf, Lato-BolIta.ttf | |
- bash : grep | |
- pip : audio-offset-finder | |
- mpv : chapter-make-read.lua | |
Usage: | |
- to auto merge create a "map.txt" file with the path to all files in a single line separated by a pipe: "vid_new"|"vid_old"|title|subgroup|"vid_aud" | |
- make sure there are no unicode characters present in the parent folder names of the new video files | |
- relative paths to the working directory can be used for all files | |
- any path can have extra info appended to it after a semicolon | |
- for vid_new: "path";comment | |
- for vid_old: "path";comment;ext | |
- for vid_aud: "path";comment;lang;trackname | |
- the parent folder of the files can also be set with the -in or -io tags if the full path isn't desired | |
- lines in "map.txt" starting with a "#" are treated as a comment | |
- entering "FORMAT" (without quotes) as "vid_new" will use the same video for the output as "vid_old" while applying the styling | |
- this can be used for raws and external subtitles by naming the subtitles the same as the raw with ".en" appended" | |
- place the mapping file in the terminal's working directory and run the script | |
- the title is optional and will default to the title of the original video | |
- use the copy as path context menu command (on windows) for both sets of files then organise them accordingly | |
- the script will output to a "mux" subfolder of the working directory and use the filenames from the second half of the mapping | |
- chapters will be copied over from the same location as the "vid_old" if they are named "original_filename.xml" | |
- if there are no external chapters they will be taken from "vid_old" or "vid_new" as a last resort | |
- subtitles will be copied over from the same location as the "vid_old" if they are named "original_filename.en.ass" | |
- subtitles will be extracted from the original file if loose ones aren't present and will follow the same naming scheme as above | |
- copied subtitles will be resampled to the upgraded file's resolution and the correct script properties will be set | |
- Note: non ".ass" / ".srt" subtitles aren't handled by this script as they require manual editing and aren't that common | |
Arguments: | |
- run "pairedmuxing.py -h" for info on how to format the mapping file and for more details on the arguments | |
- if you want to maintain the original subtitle styling enter "original" as a positional argument | |
- if you want to change the language tag assigned to the audio simply enter a language tag as an argument e.g. "eng" | |
- must be the second argument if "original" is being used | |
""" | |
sys.stdout.reconfigure(encoding='utf-8', line_buffering=True) # allow unicode characters in print and flush after each newline | |
err = '\033[31m⨯\033[0m' # use the red terminal colour for ⨯ | |
# functions to check for a valid language tag argument | |
def language_tag(tag): | |
tags = tag.replace(' ', '').split(',') | |
[t for t in tags if not re.match(r'^[a-z]{2,3}(-[A-Z]{2})?$', t) and argparse.ArgumentTypeError(f'{err}Language(s) must be a valid IETF tag(s)')] | |
return ','.join(tags) | |
# undo special characters from windows filenames, convert backticks and remove trailing white-space | |
def undo_reserved(title): | |
reserved = {'⧵': r'\\', '⁄': r'\/', '꞉': ':', '*': r'\*', '?': r'\?', '<': '<', '>': '>', '|': r'\|', '`': "'", '[“”]': r'\"', r'[ \t]+$': ''} | |
for key, value in reserved.items(): title = re.sub(key, value, title) | |
return title | |
# format video paths | |
def vid_paths(arg, mapping_idx): return f'{os.path.join(arg, mapping[mapping_idx].split(';')[0].strip('"'))}' | |
# output elapsed since previous time.time() | |
def elapsed(start): return f'{round(time.time() - start, 2)}s' | |
# change extension of file mapping | |
def mod_ext(find, replace, file, dot='.'): return re.sub(fr'\.{find}$', f'{dot}{replace}', file, flags=re.I) | |
# parse captured stdout / stderr | |
def parse_std(capture): | |
out = None | |
if capture.stdout: out = f'│├─{capture.stdout.decode("utf-8").strip('\r\n').replace('\r\n', '\r\n│├─')}' | |
if capture.stderr: out += f'│├{err}{capture.stderr.decode("utf-8").strip('\r\n').replace('\r\n', f'\r\n│├{err}')}' | |
if out: out = re.sub(r'^│├─Warning: No attachment matched the spec.*$\n?', '', out, flags=re.M) # remove messages warning about removing attachments | |
if out: out = re.sub(r'^│├(─Error:.*$)\n?', rf'│{err}\1', out, flags=re.M) # add error symbol to errors from stdout | |
if out: return print(out) | |
# grab the file information of a given file in json format and store it in a variable | |
def identify(file): return json.loads(subprocess.run(f'{mkvtoolnix}mkvmerge.exe -J "{file}"', capture_output=True, universal_newlines=True, encoding='utf-8').stdout) | |
# check mkvmerge -J output for the chapter count | |
def has_chapters(ident): | |
try: return ident['chapters'][0]['num_entries'] | |
except Exception: return None | |
# search for subtitles which contain (or don't) the specified text [flags: L = exclusion / l = inclusion] | |
def grep(txt_search, flags='l', path='.'): return [line for line in subprocess.run(f'grep -r{flags} --include="*.ass" "{txt_search}" {path}', capture_output=True).stdout.decode('utf-8').split('\n') if line] | |
# replace text in the subtitles using regex | |
def replace(file, find, replace, log=True): | |
with open(file, 'r+', encoding='utf8') as f: | |
if log: print(f'│├─{file}') | |
result = re.sub(find, replace, f.read(), flags=re.M) | |
f.seek(0) | |
f.write(result) | |
f.truncate() | |
# check the arguments | |
parser = argparse.ArgumentParser(description='Mux + sync subtitles, attachments and chapters from one video container to another.\nThis is achieved using a user populated "map.txt" file located in the script\'s working directory.\n\nmap.txt details:' | |
'\n format {path to raw}|{path to original}|{mkv title}|{subgroup name}|{path to extra audio}\n *for the above formatting only the new and old video paths are required' | |
'\n *any path accepts extra info after a semicolon "PATH";lang;title' | |
'\n *enter "FORMAT" (w/o quotes) as {path to raw} to format the original only\n *the paths can be relative or absolute and support double quotes' | |
, epilog='NOTE: This script may require editing of the "# path variables" section to function correctly.', formatter_class=argparse.RawTextHelpFormatter) | |
parser.add_argument('-map', '--mapping' , type=str , default='' , help='replace the map.txt file with the contents of this argument') | |
parser.add_argument('-in' , '--import-n', type=str , default='' , help='optional import folder location for vid_new to avoid using the full path') | |
parser.add_argument('-io' , '--import-o', type=str , default='' , help='optional import folder location for vid_old to avoid using the full path') | |
parser.add_argument('-ia' , '--import-a', type=str , default='' , help='optional import folder location for vid_aud to avoid using the full path') | |
parser.add_argument('-t' , '--tags' , type=str , default='' , help='optional bracketed tags to append to the filename (comma separated)') | |
parser.add_argument('-la' , '--lang-aud', type=language_tag , default='jpn,und', help='comma separated IETF language tags for accepted output audio tracks') | |
parser.add_argument('-ls' , '--lang-set', type=language_tag , default='jpn' , help='an IETF language tag for the output audio to be set to') | |
parser.add_argument('-ss' , '--skip-sub', action='store_true', default=False , help='skips all subtitle exports') | |
parser.add_argument('-sc' , '--skip-cln', action='store_true', default=False , help='skips all subtitle modifications/cleaning') | |
parser.add_argument('-se' , '--skip-ext', action='store_true', default=False , help='skips all attachment exports') | |
parser.add_argument('-sx' , '--skip-xml', action='store_true', default=False , help='skips all chapter xml exports') | |
parser.add_argument('-an' , '--attach-n', action='store_true', default=False , help='skips adding attachments (generally fonts) from the new file') | |
parser.add_argument('-ao' , '--attach-o', action='store_true', default=False , help='skips adding attachments (generally fonts) from the old file') | |
parser.add_argument('-o' , '--original', action='store_true', default=False , help='if you want to maintain the original subtitle styling') | |
parser.add_argument('-s' , '--slow' , action='store_true', default=False , help='disables the 5min trim for audio offset calc (checks the entire video)') | |
parser.add_argument('-v' , '--verbose' , action='store_true', default=False , help='show console output for commands from mkvtoolnix') | |
parser.add_argument('-e' , '--external', action='store_true', default=False , help='export external files next to vid_new (fonts, attachments)') | |
args = parser.parse_args() | |
# path variables | |
mkvtoolnix = 'C:\\Program Files\\MKVToolNix\\' | |
aegisub_cli = 'C:\\Program Files\\Aegisub\\aegisub-cli.exe' | |
attach_path = f'{os.environ['USERPROFILE']}\\Documents\\refs\\' | |
output_path = '.\\mux\\' | |
# unwanted fonts (replaced with a single version of lato) | |
excluded_fonts, excluded_list = ('Lato-Bol.ttf', 'Lato-Bold.ttf', 'LatoWeb-Bold.ttf', 'Lato-BolIta.ttf', 'LatoWeb-BoldItalic.ttf', 'Lato-BoldItalic.ttf'), f'{attach_path}excluded_fonts.txt' | |
if os.path.exists(excluded_list): | |
with open(excluded_list, 'r') as file: | |
excluded_fonts = excluded_fonts + tuple([line.strip() for line in file if line.strip()]) # read each line, strip whitespace, filter out empty lines and add to the existing tuple | |
# command modifications | |
mod_tracks = f' --edit track:v1 --set flag-default=1 --set flag-forced=0 --set language="{args.lang_set.split(',')[0]}" --edit track:a1 --set flag-default=1 --set flag-forced=0 --set language="{args.lang_set.split(',')[0]}"' | |
add_fonts = f' --add-attachment {attach_path}Lato-Bol.ttf --add-attachment {attach_path}Lato-BolIta.ttf' if not args.original else '' | |
del_fonts = ''.join(f' --delete-attachment name:{font}' for font in excluded_fonts) if not args.original else '' | |
add_tags = ''.join(f' [{tag.lstrip().rstrip()}]' for tag in args.tags.split(',')) if args.tags else '' | |
del_images = ' --delete-attachment mime-type:image/jpeg --delete-attachment mime-type:image/png' | |
del_attach_n = '' if not args.attach_n else ' -M' | |
del_attach_o = '' if not args.attach_o else ' -M' | |
enable_trim = ' --trim 300' if not args.slow else '' | |
quiet = ' -q' if not args.verbose else '' | |
# force common chapter name schemes to english | |
chapter_formatting = {r'第 (\d+) 章': r'Chapter \1'} | |
# subtitle regex | |
vid_formats = '(mkv|avi|mp4|mov|ogm|wmv|mpg|mpeg|mk3d|m4v)' | |
sub_formats = ('en.ass', 'en.srt') | |
res_x_map = r'^(?=PlayResX:)' | |
updated_by = 'Script Updated By: natyusha' | |
update_info = '\nUpdate Details: timing, tweaks, unified font style\n' | |
style_map = r'(^\[V4\+ Styles\]$\n^Format:.*$\n)' | |
style_chk = r'^Style: (?:Default|Def|Alternate|Thoughts?|Thinking|Top)(?:_dvd)?,(?:Lato|Gandhi Sans),.*$\n' | |
style_404p = 'Style: Default,Lato,30,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,1.26,0.84,2,17,17,22,1\nStyle: Alternate,Lato,30,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,1.26,0.84,2,17,17,22,1\n' | |
style_432p = 'Style: Default,Lato,32,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,1.35,0.9,2,18,18,24,1\nStyle: Alternate,Lato,32,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,1.35,0.9,2,18,18,24,1\n' | |
style_480p = 'Style: Default,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,1.5,1,2,20,20,27,1\nStyle: Alternate,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,1.5,1,2,20,20,27,1\n' | |
style_480ps = 'Style: Default,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,84.375,100,0,0,1,1.5,1,2,17,17,27,1\nStyle: Alternate,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,84.375,100,0,0,1,1.5,1,2,17,17,27,1\n' | |
style_576p = 'Style: Default,Lato,43,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,1.8,1.2,2,24,24,32,1\nStyle: Alternate,Lato,43,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,1.8,1.2,2,24,24,32,1\n' | |
style_720p = 'Style: Default,Lato,54,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,2.25,1.5,2,30,30,40,1\nStyle: Alternate,Lato,54,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,2.25,1.5,2,30,30,40,1\n' | |
style_1080p = 'Style: Default,Lato,81,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,3.375,2.25,2,45,45,60,1\nStyle: Alternate,Lato,81,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,3.375,2.25,2,45,45,60,1\n' | |
style_1280u = 'Style: Default,Lato,96,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,84.375,100,0,0,1,4,2.7,2,45,45,71,1\nStyle: Alternate,Lato,96,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,84.375,100,0,0,1,4,2.7,2,45,45,71,1\n' | |
line_chk = r'^(Dialogue: \d+,\d+:\d{2}:\d{2}\.\d{2},\d+:\d{2}:\d{2}\.\d{2},)' | |
line_def = r'(?:Default_dvd|Def|main),' | |
line_ita = r'(?:Thoughts_dvd|Thinking|Thoughts?|italics),' | |
line_top = r'(?:Top(?:_dvd)?),,0,0,0,,' | |
print('\n###############################################' | |
'\n## Paired Muxing Script for Quality Upgrades ##' | |
'\n###############################################') | |
try: | |
mapping_file = open('map.txt', encoding='utf8').read().split('\n') if os.stat('map.txt').st_size > 0 else '' | |
mapping_list = [args.mapping] if args.mapping else mapping_file | |
if not mapping_list: raise | |
mappings = [m.split('|') for m in mapping_list if not m.startswith('#') and m] # ignore emty lines or lines starting with a pound | |
except Exception: | |
print(f'\n{err}Aborting: "map.txt" not found or a malformed mapping is present') | |
exit(1) | |
print('\n####### Video Multiplexing & Formatting #######') | |
for idx, mapping in enumerate(mappings): | |
vid_new, vid_old, chapters, sub_file, sub_ext, offset = vid_paths(args.import_n, 0), vid_paths(args.import_o, 1), None, '', '', 0 | |
vid_aud = vid_paths(args.import_a, 4) if len(mapping) > 4 and mapping[4] else None | |
mkv_pre = f'{output_path}{os.path.basename(mod_ext(vid_formats, 'mkv', vid_old))}' | |
mkv_out = f'{mkv_pre[:-4]}{add_tags} ({idx + 1}){mkv_pre[-4:]}' | |
sub_out = f'{mod_ext(vid_formats, 'en.ass', mkv_out)}' | |
# check if there is a new file or the old one is being formatted | |
if vid_new.lower() == 'format': | |
print(f'\n╭{os.path.basename(vid_old)}\n├─Marking the Old Video for Formatting...') | |
vid_new = vid_old | |
else: | |
print(f'\n╭{os.path.basename(vid_new)}\n├─Source: {os.path.basename(vid_old)}') | |
if not args.skip_sub or not args.skip_xml: | |
# convert wmv to mkv for mkvmerge compatibility | |
if (vid_new and vid_new.lower().endswith('.wmv')) or (vid_aud and vid_aud.lower().endswith('.wmv')): | |
start = time.time() | |
print('├┬Converting .wmv to .mkv...') | |
if vid_new and vid_new.lower().endswith('.wmv'): convert, vid_new = subprocess.run(f'ffmpeg -v quiet -y -i "{vid_new}" -c:v copy -c:a copy "{mod_ext('wmv', 'mkv', vid_new)}"'), mod_ext('wmv', 'mkv', vid_new) | |
if vid_aud and vid_aud.lower().endswith('.wmv'): convert, vid_aud = subprocess.run(f'ffmpeg -v quiet -y -i "{vid_aud}" -c:v copy -c:a copy "{mod_ext('wmv', 'mkv', vid_aud)}"'), mod_ext('wmv', 'mkv', vid_aud) | |
print(f'│╰─Completed in {elapsed(start)}') | |
if vid_new != vid_old: | |
# determine the audio offset in seconds by using the first 5 minutes of audio (increase trim for slightly higher accuracy and much slower parsing) | |
start = time.time() | |
print('├┬Determining Audio Offset...') | |
try: | |
offset = json.loads(subprocess.run(f'audio-offset-finder --find-offset-of "{vid_old}" --within "{vid_new}"{enable_trim} --json', capture_output=True).stdout) | |
offset = offset['time_offset'] if offset else 0 | |
except Exception: | |
print(f'│{err}─Failed! Unable to Parse Video') | |
print(f'│╰─Completed in {elapsed(start)} [{round(offset, 3)}s]') | |
# determine the extra audio offset in seconds by using the first 5 minutes of audio (increase trim for slightly higher accuracy and much slower parsing) | |
if vid_aud: | |
start = time.time() | |
print('├┬Determining Extra Audio Offset...') | |
try: | |
extra_offset = json.loads(subprocess.run(f'audio-offset-finder --find-offset-of "{vid_aud}" --within "{vid_new}"{enable_trim} --json', capture_output=True).stdout) | |
extra_offset = extra_offset['time_offset'] if extra_offset else 0 | |
except Exception: | |
print(f'│{err}─Failed! Unable to Parse Video') | |
print(f'│╰─Completed in {elapsed(start)} [{round(extra_offset, 3)}s]') | |
# grab the file information of the original file in json format and store it in a variable | |
print('├┬Parsing Track Info & MultiPlexing...') | |
start, ident_old, sub_track, attachment_pairs = time.time(), identify(vid_old), None, [] | |
# if available add the title from the mapping otherwise use the one from the original file to ensure titles from the new file aren't used | |
try: title = mapping[2] if len(mapping) > 2 and mapping[2] else ident_old['container']['properties'].get('title') | |
except Exception: print(f'│{err}─Failed! Unable to Parse Container') | |
mod_title = f' --edit info --set title="{undo_reserved(title)}"' if title else '' | |
# parse the identification variable to determine the track id of any .ass or .srt subtitles (if present) | |
try: | |
for track in ident_old['tracks']: | |
if track['codec'] == 'SubStationAlpha' : sub_ext, sub_track = 'en.ass', str(track['id']) | |
elif track['codec'] == 'SubRip/SRT' : sub_ext, sub_track = 'en.srt', str(track['id']) | |
continue | |
if args.external: attachment_pairs = ' '.join([f'{attachment['id']}:"{attachment['file_name']}"' for attachment in ident_old['attachments']]) # parse attachment ids and names if exporting them externally | |
except Exception: | |
print(f'│{err}─Failed! Unable to Parse Video') | |
# parse the mkv file identification to determine if there are multiple chapters present | |
xml_file, ident_new, sync_chapter, new_chapters = mod_ext(vid_formats, 'xml', vid_old), identify(vid_new), ' --no-chapters', ' --no-chapters' | |
if os.path.isfile(xml_file): sync_chapter = f' --chapters "{xml_file}" --chapter-sync {int(offset * 1000)}' | |
elif has_chapters(ident_old) and has_chapters(ident_old) > 1: sync_chapter = f' --chapter-sync {int(offset * 1000)}' # only add chapters from the video if there is more than one and an external chapter file isn't present | |
elif has_chapters(ident_new) and has_chapters(ident_new) > 1: new_chapters = '' # use chapters from the new file if there is more than one and the old file didn't add chapters already | |
if not args.external: | |
# if an extra audio track is being used determine the track id(s) | |
if vid_aud: | |
ident_aud, aud_opts = identify(vid_aud), mapping[4].split(';') | |
lang_aud = aud_opts[2].strip(" '\"") if len(aud_opts) > 2 and aud_opts[2] else args.lang_set.split(',')[0] | |
name_aud = f'"{aud_opts[3].strip(" '\"")}"' if len(aud_opts) > 3 and aud_opts[3] else '""' | |
for track in ident_aud['tracks']: | |
if track['type'] == 'audio': aud_track = str(track['id']) | |
continue | |
vid_aud = f' -D -S -B -T -M --no-chapters --default-track-flag {aud_track}:0 --language {aud_track}:{lang_aud} --track-name {aud_track}:{name_aud} --sync {aud_track}:{int(extra_offset * 1000)} "{vid_aud}"' | |
else: vid_aud = '' | |
# mux the attachments, chapters (with offset) and audio tracks from the original file(s) into the new file | |
start, merge = time.time(), subprocess.run(f'{mkvtoolnix}mkvmerge.exe{quiet} -o "{mkv_out}" -S{del_attach_n}{new_chapters} -a {args.lang_aud} "{vid_new}" -A -D -S{del_attach_o}{sync_chapter} "{vid_old}"{vid_aud}', capture_output=True) | |
parse_std(merge) | |
print(f'│╰─Completed in {elapsed(start)}') | |
print('├┬Editing Properties...') | |
# if there are chapters rename them following regex substitution pair values and don't allow empty chapter names | |
start, mod_chapters, mod_xml = time.time(), '', 'mod_chapters.xml' | |
if sync_chapter != ' --no-chapters' or new_chapters != ' --no-chapters': | |
try: | |
chapter_xml = subprocess.run(f'{mkvtoolnix}mkvextract chapters "{mkv_out}"', capture_output=True, text=True, encoding="utf-8", errors="replace", check=True).stdout.strip() # capture chapter XML directly from mkvextract stdout | |
if chapter_xml: # check if chapter_xml is non-empty | |
# parse the XML string and wrap in ElementTree | |
root, tree, modified = ElementTree.fromstring(chapter_xml), ElementTree.ElementTree(), False | |
tree._setroot(root) | |
# get all ChapterAtom elements and check for existing ChapterString elements | |
chapter_atoms, chapters = tree.findall('.//ChapterAtom'), tree.findall('.//ChapterDisplay/ChapterString') | |
# check if all chapters lack ChapterString or have empty ChapterString | |
all_empty = not chapters or all(chapter.text is None or chapter.text.strip() == '' for chapter in chapters) | |
# create or update ChapterString for each ChapterAtom | |
if all_empty and chapter_atoms: | |
for index, atom in enumerate(chapter_atoms, 1): | |
display = atom.find('ChapterDisplay') | |
if display is None: | |
display = ElementTree.SubElement(atom, 'ChapterDisplay') | |
chapter_string = ElementTree.SubElement(display, 'ChapterString') | |
chapter_string.text, modified = f'Chapter {index:02d}', True | |
else: | |
# check if ChapterString exists | |
chapter_string = display.find('ChapterString') | |
if chapter_string is None: | |
chapter_string = ElementTree.SubElement(display, 'ChapterString') | |
chapter_string.text, modified = f'Chapter {index:02d}', True | |
elif chapter_string.text is None or chapter_string.text.strip() == '': | |
chapter_string.text, modified = f'Chapter {index:02d}', True | |
elif chapters: | |
# apply regex substitutions to existing ChapterString elements | |
for key, value in chapter_formatting.items(): | |
for chapter in chapters: | |
if chapter.text is not None and re.match(key, chapter.text): chapter.text, modified = re.sub(key, value, chapter.text), True | |
if modified: | |
tree.write(mod_xml, encoding='utf-8', xml_declaration=True) | |
mod_chapters = f' --chapters {mod_xml}' | |
else: | |
mod_chapters = '' | |
except Exception as e: | |
mod_chapters = '' | |
print(f"│{err}─Failed! Unable to Rename Chapters: {e}") | |
# remove any tags, set the video+audio tracks to japanese (or the language from the mapping), set the title, and remove single entry chapters for the muxed file | |
propedit = subprocess.run(f'{mkvtoolnix}mkvpropedit.exe{quiet} "{mkv_out}" -t all:{mod_tracks}{del_fonts}{add_fonts}{mod_title}{del_images}{mod_chapters}', capture_output=True) | |
parse_std(propedit) | |
# clean up chapter xml files if they exist | |
if os.path.exists(mod_xml): os.remove(mod_xml) | |
print(f'│╰─Completed in {elapsed(start)}') | |
if args.external: | |
# extract the fonts for each video file into a folder with the same name as the old parent file when in external mode (appends _attach) | |
if not args.skip_ext and attachment_pairs: | |
print('├┬Extracting Attachments...') | |
start, cwd, vid_old_opts = time.time(), os.getcwd(), mapping[1].split(';') # save the current directory as you can't globaly specify the output directory for mkvmerge | |
vid_ext = f'{vid_old_opts[2]}_' if len(vid_old_opts) > 2 and vid_old_opts[2] else '' | |
attach_path, chapter_file = os.path.join(os.path.dirname(vid_new), os.path.basename(mod_ext(vid_formats, f'{vid_ext}attach', vid_old, '_'))), os.path.join(os.path.dirname(vid_new), os.path.basename(mod_ext(vid_formats, 'xml', vid_new))) | |
os.makedirs(attach_path, exist_ok=True) # create directory for the attachments and move to it (where mkvmerge will output) | |
os.chdir(attach_path) | |
extract = subprocess.run(f'{mkvtoolnix}mkvextract.exe{quiet} "{vid_old}" attachments {attachment_pairs}', capture_output=True) | |
parse_std(extract) | |
os.chdir(cwd) # move back to the cwd | |
# print(f'│├─{attachment_pairs}') # results in too much console spam but can be useful | |
print(f'│╰─Completed in {elapsed(start)}') | |
# apply the chapter sync and extract to the vid_new directory as external mode skips the chapter section | |
if not args.skip_xml: | |
print('├┬Extracting Offset Chapters...') | |
start, vid_tmp = time.time(), 'temp_chapter_sync.mkv' | |
if sync_chapter != ' --no-chapters' or new_chapters != ' --no-chapters': | |
sync = subprocess.run(f'{mkvtoolnix}mkvmerge.exe{quiet}{sync_chapter} -o "{vid_tmp}" "{vid_old}"', capture_output=True) # mkvmerge only outputs to mkv | |
parse_std(sync) | |
extract = subprocess.run(f'{mkvtoolnix}mkvextract.exe{quiet} "{vid_tmp}" chapters "{chapter_file}"', capture_output=True) | |
parse_std(extract) | |
# generate a chp chapter sidecar file (for use with mpv and chapter-make-read.lua) from the chapter xml for quick quality checking of the external subtitles (mpv doesn't support chapters as xml) | |
tree, output = ElementTree.parse(chapter_file), '' | |
for chapter in tree.getroot().findall('.//ChapterAtom'): | |
start_time = chapter.find('ChapterTimeStart').text | |
title = chapter.find('.//ChapterString').text or '' | |
output += f"{start_time} {title}\n" # format: HH:MM:SS.sss <title> | |
# write to final chp file | |
with open(f'{chapter_file.replace('.xml', '.chp')}', 'w', encoding='utf-8') as f: | |
f.write(output) | |
if os.path.isfile(vid_tmp): os.remove(vid_tmp) | |
print(f'│╰─Completed in {elapsed(start)}') | |
# extract the subtitle track from the original file and name it the same as the output file with ".en" appended | |
if not args.skip_sub: | |
if sub_track: | |
start, sub_file = time.time(), mod_ext(vid_formats, sub_ext, vid_old) | |
print('├┬Extracting Subtitles...') | |
extract = subprocess.run(f'{mkvtoolnix}mkvextract.exe{quiet} "{vid_old}" tracks "{sub_track}:{sub_file}', capture_output=True) | |
parse_std(extract) | |
print(f'│├─{os.path.basename(sub_file)}') | |
print(f'│╰─Completed in {elapsed(start)}') | |
else: # check for external subtitles if there are no sub tracks | |
for s in sub_formats: | |
if os.path.isfile(mod_ext(vid_formats, s, vid_old)): | |
print('├┬External Subtitles Found...') | |
sub_file = mod_ext(vid_formats, s, vid_old) | |
print(f'│├─{os.path.basename(sub_file)}') | |
print(f'│╰─Completed in {elapsed(start)}') | |
continue | |
# convert any .srt subtitle to .ass | |
if sub_file.endswith('en.srt'): | |
print('├┬Converting SubRip/SRT to SubStationAlpha...') | |
start, convert, sub_file = time.time(), subprocess.run(f'ffmpeg -v quiet -y -i {sub_file} {sub_file.replace('.en.srt', '.en.ass')}'), sub_file.replace('.en.srt', '.en.ass') | |
print(f'│╰─Completed in {elapsed(start)}') | |
# resample the subtitle file resolution to match the output file | |
if os.path.isfile(sub_file): | |
print('├┬Resampling & Offsetting Subtitles...') | |
if args.external: mkv_out, sub_out = vid_new, f'{mod_ext(vid_formats, 'en.ass', vid_new)}' # if skipping videos target the new video filename and create the mux folder | |
shutil.copy(sub_file, 'temp_sub_in.ass') # copy original subtitles to a temp file to avoid modifying them | |
os.rename(mkv_out, 'temp_mkv_out.mkv') # aegisub-cli will fail if the file paths contain any unicode so use temp filenames and rename the video temporarily | |
start, resample = time.time(), subprocess.run(f'{aegisub_cli} --loglevel 2 --video temp_mkv_out.mkv temp_sub_in.ass temp_sub_out.ass tool/resampleres') | |
os.rename('temp_mkv_out.mkv', mkv_out) # restore the video filename | |
# apply the audio offset to the resampled file and append the subgroup name if specified | |
subgroup = f'_{mapping[3]}' if len(mapping) > 3 and mapping[3] else '' | |
subprocess.run(f'ffmpeg -v quiet -y -itsoffset {offset} -i temp_sub_out.ass "{sub_out.replace('.en.ass', f'{subgroup}.en.ass')}"') | |
print(f'│╰─Completed in {elapsed(start)}') | |
print('╰─Muxing Complete!') | |
# clean up temp subtitle files | |
if os.path.isfile('temp_sub_in.ass'): os.remove('temp_sub_in.ass') | |
if os.path.isfile('temp_sub_out.ass'): os.remove('temp_sub_out.ass') | |
if not args.skip_cln: | |
start = time.time() | |
print('\n##### Subtitle Script Properties & Styles #####') | |
print('\n╭SubStation Alpha Operations') | |
if not args.original: | |
# insert subtitle properties required for the unified style to remain consistent | |
print('├┬Inserting Credits / ScaledBorderAndShadow / WrapStyle...') | |
for file in grep(updated_by, flags='L'): | |
if file: replace(file, res_x_map, updated_by + update_info) | |
for file in grep('ScaledBorderAndShadow:', flags='L'): | |
if file: replace(file, res_x_map, 'ScaledBorderAndShadow: yes\n') | |
for file in grep('ScaledBorderAndShadow: no'): | |
if file: replace(file, r'^ScaledBorderAndShadow: no', 'ScaledBorderAndShadow: yes') | |
for file in grep('WrapStyle:', flags='L'): | |
if file: replace(file, res_x_map, 'WrapStyle: 0\n') | |
for file in grep(r'WrapStyle: [1-9]'): | |
if file: replace(file, r'^WrapStyle: [1-9]', 'WrapStyle: 0') | |
print('│╰─Done') | |
# insert SakuraCircle styled fonts after checking for dupes | |
print('├┬Inserting Unified Font Styles...') | |
for file in grep(''): | |
replace(file, style_chk, '', False) | |
replace(file, fr'{line_chk}{line_def}', r'\1Default,', False) | |
replace(file, fr'{line_chk}{line_ita}', r'\1Alternate,', False) | |
replace(file, fr'{line_chk}{line_top}', r'\1Default,,0,0,0,,{\\an8}', False) | |
replace(file, r'(?<=^\[Aegisub Project Garbage\]\n)[\s\S]*(?=\n^\[V4\+ Styles\])', '', False) # clear aegisub project garbage | |
for file in grep('PlayResY: 404'): | |
replace(file, style_map, fr'\1{style_404p}') | |
for file in grep('PlayResY: 432'): | |
replace(file, style_map, fr'\1{style_432p}') | |
for file in grep('PlayResY: 480'): | |
if grep('PlayResX: 854', path=f'"{file}"'): replace(file, style_map, fr'\1{style_480p}') | |
elif grep('PlayResX: 720', path=f'"{file}"'): replace(file, style_map, fr'\1{style_480ps}') | |
for file in grep('PlayResY: 576'): | |
replace(file, style_map, fr'\1{style_576p}') | |
for file in grep('PlayResY: 720'): | |
replace(file, style_map, fr'\1{style_720p}') | |
for file in grep('PlayResY: 1080'): | |
replace(file, style_map, fr'\1{style_1080p}') | |
for file in grep('PlayResY: 1280'): | |
replace(file, style_map, fr'\1{style_1280u}') | |
print('│╰─Done') | |
# regex for fixing common subtitle script errors | |
print('├┬Cleaning the Script...') | |
for file in grep('[Script Info]'): | |
if file: | |
replace(file, r'--' , '—' , False) # Convert double hyphen to single long hyphen | |
replace(file, r'’' , "'" , False) # Convert curly to straight single quotes | |
replace(file, r'“|”', '"' , False) # Convert doubly curly quotes to single double quotes | |
replace(file, r"''" , '"' , False) # Convert double single quotes to single double quote | |
replace(file, r'…' , '...', False) # Convert ellipses to periods | |
replace(file, r' ' , ' ' , False) # Convert double spaces to single spaces | |
replace(file, r'(?!^Style: .+?,.*,)100\.039(?=,100,)', '100', False) # Fix Aegisub Font Size Conversions | |
replace(file, r"""(?<!\d )(?:(?<=[}\.," -])|(?<=\\[nN]))l(?=[\., ]|[fnst] |'[md]|'ll|'ve|t's|t'll|sn't|-l|nside|dea|ntro)""" , 'I') # Replace lowercase l when it should be an uppercase I (caused by old OCR techniques) | |
print('│╰─Done') | |
print(f'╰─Completed in {elapsed(start)}') | |
print('\n############# All Tasks Complete! #############') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment