| package main | |
| import ( | |
| "time" | |
| "regexp" | |
| "bufio" | |
| "strconv" | |
| "fmt" | |
| "os" | |
| "errors" | |
| "io" | |
| "strings" | |
| ) | |
| type Subtitle struct { | |
| idx int | |
| fromTime time.Duration | |
| toTime time.Duration | |
| text string | |
| } | |
| var timeFramePattern, _ = regexp.Compile(`(\d+):(\d+):(\d+),(\d+) --> (\d+):(\d+):(\d+),(\d+)`) | |
| func getDuration(parts []string) time.Duration { | |
| hour, _ := strconv.Atoi(parts[0]) | |
| minute, _ := strconv.Atoi(parts[1]) | |
| second, _ := strconv.Atoi(parts[2]) | |
| millisecond, _ := strconv.Atoi(parts[3]) | |
| return time.Millisecond * time.Duration(millisecond) + | |
| time.Second * time.Duration(second) + | |
| time.Minute * time.Duration(minute) + | |
| time.Hour * time.Duration(hour) | |
| } | |
| func printDuration(duration time.Duration) string { | |
| hour := duration / time.Hour | |
| duration -= hour * time.Hour | |
| minute := duration / time.Minute | |
| duration -= minute * time.Minute | |
| second := duration / time.Second | |
| duration -= second * time.Second | |
| millisecond := duration / time.Millisecond | |
| return fmt.Sprintf(`%02d:%02d:%02d,%03d`, hour, minute, second, millisecond) | |
| } | |
| func readOneSubtitle(scanner *bufio.Scanner) (*Subtitle, error) { | |
| // read idx | |
| if !scanner.Scan() { | |
| return nil, nil | |
| } | |
| idxRaw := scanner.Text() | |
| idx, err := strconv.Atoi(idxRaw) | |
| if err != nil { | |
| return nil, errors.New("invalid subtitle index") | |
| } | |
| // read timing | |
| if !scanner.Scan() { | |
| return nil, errors.New("could not find subtitle timing") | |
| } | |
| timing := timeFramePattern.FindStringSubmatch(scanner.Text()) | |
| if timing == nil { | |
| return nil, errors.New("invalid subtitle timing") | |
| } | |
| fromTime := getDuration(timing[1:5]) | |
| toTime := getDuration(timing[5:9]) | |
| // read content | |
| if !scanner.Scan() { | |
| return nil, errors.New("could not find subtitle text") | |
| } | |
| content := scanner.Text() | |
| for scanner.Scan() && scanner.Text() != "" { | |
| content += "\n" | |
| content += scanner.Text() | |
| } | |
| subtitle := &Subtitle{idx, fromTime, toTime, content} | |
| return subtitle, nil | |
| } | |
| func writeOneSubtitle(file io.Writer, subtitle *Subtitle, idx *int) error { | |
| _, err := fmt.Fprint(file, | |
| *idx, "\n", | |
| printDuration(subtitle.fromTime), " --> ", printDuration(subtitle.toTime), "\n", | |
| subtitle.text, "\n\n") | |
| *idx++ | |
| return err | |
| } | |
| func main() { | |
| if len(os.Args) < 2 { | |
| println("Provide a subtitle file to fix.\ne.g. subtitle-fixer mysubtitle.srt") | |
| return | |
| } | |
| filePath := os.Args[1] | |
| newFilePath := filePath + ".fixed" | |
| file, _ := os.Open(filePath) | |
| newFile, _ := os.Create(newFilePath) | |
| defer file.Close() | |
| defer newFile.Close() | |
| scanner := bufio.NewScanner(file) | |
| var newIdx = 1 | |
| var lastSubtitle *Subtitle = nil | |
| for { | |
| subtitle, err := readOneSubtitle(scanner) | |
| if lastSubtitle != nil { | |
| if subtitle != nil { | |
| subtitle.text = strings.Trim(subtitle.text, "\n ") | |
| if len(subtitle.text) == 0 { // skip over empty subtitles | |
| continue | |
| } | |
| // skip over super-short subtitles that basically contain what their previous subtitle contains, and just prolong previous subtitle | |
| if subtitle.toTime - subtitle.fromTime < time.Millisecond * 150 && | |
| strings.Contains(lastSubtitle.text, subtitle.text) { | |
| lastSubtitle.toTime = subtitle.toTime | |
| continue | |
| } | |
| // if first-line of current subtitle is repeating last-line of previous-subtitle remove it | |
| currentLines := strings.Split(subtitle.text, "\n") | |
| lastLines := strings.Split(lastSubtitle.text, "\n") | |
| if currentLines[0] == lastLines[len(lastLines)-1] { | |
| subtitle.text = strings.Join(currentLines[1:], "\n") | |
| } | |
| // if first-line of current subtitle is repeating last-line of previous-subtitle remove it | |
| if subtitle.fromTime < lastSubtitle.toTime { | |
| lastSubtitle.toTime = subtitle.fromTime - time.Millisecond | |
| } | |
| } | |
| writeOneSubtitle(newFile, lastSubtitle, &newIdx) | |
| } | |
| if subtitle == nil { | |
| break | |
| } | |
| if err != nil { | |
| panic(err) | |
| } | |
| lastSubtitle = subtitle | |
| } | |
| os.Rename(filePath, filePath + ".bak") | |
| os.Rename(newFilePath, filePath) | |
| } |
Actually I am having the same problem the output file ends up coming to be 0 kb. please help
I found the same problem, this happen when the file have BOM mark.
I removed the BOM mark, but now result only file with extension .fixed, no bak file is create, and the content is a only paragraph with time of start and end and all the text of file.
Greetings.
Thanks for sharing! Already had an .srt from youtube, so I just ran
sudo apt install golang-go
go build subtitle-overlap-fixer.go
./subtitle-overlap-fixer subtitles.srt
Output file worked great.
Hi Nima, this solved a problem for me so well. Thank you so much.
I use this tool as part of a little YouTube + Mac terminal routine to create and burn in captions for accessibility purposes, using youtube-dlc and ffmpeg as well. It's pretty neat.
I'm thinking about creating a bash script to do it all semi-automatically and maybe write a blog post about it so that others can use this routine to produce captioned videos quickly. If I do end up writing that script + blog post, may I include a link to your gist with credit (and praise!) in my blog post?
Thanks again!
@niceindividual That would be my pleasure. 🌹
For anyone else who would find this useful:
I had an issue with my SRT file being rejected by a picky program for having occasional blank entries like
42
00:03:14,000 --> 00:03:14,159
I moved the section at https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L111-L113 down to the end of the block to perform this check last. My thinking is that I was running into issues at line https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L124 that subverted the previous empty line check. In any case, pushing this section down seemed to resolve my issues!
Cheers to @nimatrueway for this awesome script. It saved me a ton of time!
Great job! It solved the problem I had with the overlapping. Thanks a lot.
AWESOME! Did exactly what I needed. There were some additional fixes I needed to do (sentence capitalization and changing lowercase "i" to uppercase "I" where needed.). I wrote a little OS X bash script for these issues if anyone want to try it. https://github.com/bruno-sardine/mac#Further-correct-YouTube-captions-captfixsh
For anyone else who would find this useful:
I had an issue with my SRT file being rejected by a picky program for having occasional blank entries like
42 00:03:14,000 --> 00:03:14,159I moved the section at https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L111-L113 down to the end of the block to perform this check last. My thinking is that I was running into issues at line https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L124 that subverted the previous empty line check. In any case, pushing this section down seemed to resolve my issues!
Cheers to @nimatrueway for this awesome script. It saved me a ton of time!
Thanks fork it
Thank you for sharing this, @nimatrueway.
Sir, I compiled your original code and tested it by fixing a subtitle file, say abc.srt. The output ends up being named abc.srt.fixed, which is a bit inconvenient since the operating system doesn't recognize it as a standard subtitle file due to the altered extension. So I modified the code to generate the output as abc.fixed.srt instead—this way, the .srt extension remains intact and the file is still recognized properly.
Hello,
When I use "ffmpeg -fix_sub_duration -i download.srt new.srt", fix the overlapping but remove the last line of subtitle, please how to evite this.
Thanks.