Created
September 2, 2025 19:58
-
-
Save jazzsequence/1f820c5036ae7887b33d12b5c4591a21 to your computer and use it in GitHub Desktop.
offset vtt timestamps by 18 seconds
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Offset timestamps in a WebVTT (.vtt) file by a fixed number of seconds. | |
- Keeps the offset fixed in code (DEFAULT_OFFSET_SECONDS). | |
- Accepts input and output file paths from the command line to avoid overwriting. | |
Usage: | |
python offset_timestamps.py input.vtt output.vtt | |
""" | |
import sys | |
import re | |
from datetime import timedelta, datetime | |
# Fixed offset (seconds). Change here if you ever need a different default. | |
DEFAULT_OFFSET_SECONDS = 18 | |
# Regex to match a VTT timing line, preserving any cue settings after the end time. | |
# Example matched line: "00:00:05.000 --> 00:00:08.500 line:0 position:50%" | |
TIMING_RE = re.compile( | |
r'(?P<start>\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(?P<end>\d{2}:\d{2}:\d{2}\.\d{3})(?P<rest>[^\n\r]*)' | |
) | |
def _parse_ts(ts: str) -> timedelta: | |
"""Parse 'HH:MM:SS.mmm' into a timedelta.""" | |
h, m, s_ms = ts.split(':') | |
s, ms = s_ms.split('.') | |
return timedelta(hours=int(h), minutes=int(m), seconds=int(s), milliseconds=int(ms)) | |
def _format_ts(td: timedelta) -> str: | |
"""Format timedelta back to 'HH:MM:SS.mmm' clamped at 0.""" | |
if td.total_seconds() < 0: | |
td = timedelta(0) | |
# Convert to hours, minutes, seconds, milliseconds | |
total_ms = int(round(td.total_seconds() * 1000)) | |
hours, rem_ms = divmod(total_ms, 3600_000) | |
minutes, rem_ms = divmod(rem_ms, 60_000) | |
seconds, milliseconds = divmod(rem_ms, 1000) | |
return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}" | |
def offset_vtt_timestamps_text(vtt_text: str, offset_seconds: float = DEFAULT_OFFSET_SECONDS) -> str: | |
"""Offset all cue times in a VTT file's text by offset_seconds.""" | |
offset_td = timedelta(seconds=offset_seconds) | |
def _replace(match: re.Match) -> str: | |
start = _parse_ts(match.group('start')) + offset_td | |
end = _parse_ts(match.group('end')) + offset_td | |
rest = match.group('rest') or '' | |
return f"{_format_ts(start)} --> {_format_ts(end)}{rest}" | |
return TIMING_RE.sub(_replace, vtt_text) | |
def main(): | |
if len(sys.argv) < 3: | |
print("Usage: python offset_timestamps.py <input.vtt> <output.vtt>") | |
sys.exit(1) | |
input_path = sys.argv[1] | |
output_path = sys.argv[2] | |
try: | |
with open(input_path, 'r', encoding='utf-8') as f: | |
vtt_text = f.read() | |
except FileNotFoundError: | |
print(f"Error: input file not found: {input_path}") | |
sys.exit(2) | |
new_text = offset_vtt_timestamps_text(vtt_text, DEFAULT_OFFSET_SECONDS) | |
with open(output_path, 'w', encoding='utf-8') as f: | |
f.write(new_text) | |
print(f"Shifted timestamps by {DEFAULT_OFFSET_SECONDS} seconds.") | |
print(f"Input : {input_path}") | |
print(f"Output: {output_path}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment