nvbn · August 29, 2018 22:33
diff --git a/generate.py b/generate.py
 from pathlib import Path
 from typing import NamedTuple
 from collections import defaultdict
 from datetime import timedelta
 from subprocess import call
 from pycaption.srt import SRTReader
 import lxml.html
 import tensorflow as tf
 import tensorflow_hub as hub
 import numpy as np

 lang = 'en-US'
 output_dir = ''
 root = Path('')


 class Caption(NamedTuple):
    path: str
    start: int
    length: int
    text: str


 def to_text(raw_text):
    if not raw_text:
        return ''

    raw_text = raw_text.replace('\n', ' ')
    return lxml.html.document_fromstring(raw_text).text_content()


 def _read_subtitles(path, offset=0):
    with open(path, 'rb') as f:
        data = f.read().decode()[offset:]
        raw_captions = SRTReader().read(data, lang=lang).get_captions(lang)
        for raw_caption, next_raw_caption in zip(raw_captions, raw_captions[1:] + [None]):
            if next_raw_caption:
                length = next_raw_caption.start - raw_caption.start
            else:
                length = raw_caption.end - raw_caption.start

            yield Caption(
                path=path,
                start=raw_caption.start,
                length=length,
                text=to_text(raw_caption.get_text()),
            )


 def read_subtitles(path):
    try:
        return _read_subtitles(path, 0)
    except:
        return _read_subtitles(path, 1)


 data_text2captions = defaultdict(lambda: [])
 for season in root.glob('*'):
    if season.is_dir():
        for subtitles in season.glob('*.srt'):
            print(subtitles)
            try:
                for caption in read_subtitles(subtitles.as_posix(), offset=1):
                    data_text2captions[caption.text].append(caption)
            except:
                print('pass', subtitles)

 data_texts = [*data_text2captions]
 print('got data texts')

 # Tina-rannosaurus Wrecks
 # https://www.opensubtitles.org/en/subtitles/5643476/bob-s-burgers-tina-rannosaurus-wrecks-en
 # https://www.youtube.com/watch?v=hZ_EKHGgWJQ
 play = [*read_subtitles('Bobs.Burgers.S03E07.HDTV.XviD-AFG.srt')][1:54]
 play_text2captions = defaultdict(lambda: [])
 for caption in play:
    play_text2captions[caption.text].append(caption)

 play_texts = [*play_text2captions]
 print('got play texts')

 module_url = "https://tfhub.dev/google/universal-sentence-encoder/2"
 embed = hub.Module(module_url)
 print('got module')

 vec_a = tf.placeholder(tf.float32, shape=None)
 vec_b = tf.placeholder(tf.float32, shape=None)

 # For evaluation we use exactly normalized rather than
 # approximately normalized.
 normalized_a = tf.nn.l2_normalize(vec_a, axis=1)
 normalized_b = tf.nn.l2_normalize(vec_b, axis=1)
 sim_scores = -tf.acos(tf.reduce_sum(tf.multiply(normalized_a, normalized_b), axis=1))


 def get_similarity_score(text_vec_a, text_vec_b):
    emba, embb, scores = session.run(
        [normalized_a, normalized_b, sim_scores],
        feed_dict={
            vec_a: text_vec_a,
            vec_b: text_vec_b
        })
    return scores


 def get_most_similar_text(vec_a, data_vectors):
    scores = get_similarity_score([vec_a] * len(data_texts), data_vectors)
    return data_texts[sorted(enumerate(scores), key=lambda score: -score[1])[3][0]]


 with tf.Session() as session:
    session.run([tf.global_variables_initializer(), tf.tables_initializer()])
    data_vecs, play_vecs = session.run([embed(data_texts), embed(play_texts)])
    data_vecs = np.array(data_vecs).tolist()
    play_vecs = np.array(play_vecs).tolist()
    print('got vecs')

    similar_texts = {play_text: get_most_similar_text(play_vecs[n], data_vecs)
                     for n, play_text in enumerate(play_texts)}
    print('got similarity')


 class Part(NamedTuple):
    video: str
    start: str
    end: str
    output: str


 def generate_parts():
    for n, caption in enumerate(play):
        similar = similar_texts[caption.text]
        similar_caption = sorted(
            data_text2captions[similar],
            key=lambda maybe_similar: abs(caption.length - maybe_similar.length),
            reverse=True)[0]

        yield Part(
            video=similar_caption.path.replace('.srt', '.mp4'),
            start=str(timedelta(microseconds=similar_caption.start))[:-3],
            end=str(timedelta(microseconds=similar_caption.length))[:-3],
            output=Path(output_dir).joinpath(f'part_{n}.mp4').as_posix())


 parts = [*generate_parts()]
 for part in parts:
    call(['ffmpeg', '-y', '-i', part.video,
          '-ss', part.start, '-t', part.end,
          '-c:v', 'libx264', '-c:a', 'aac', '-strict', 'experimental',
          '-vf', 'fps=30',
          '-b:a', '128k', part.output])


 concat = '\n'.join(f"file '{part.output}'" for part in parts) + '\n'
 with open('concat.txt', 'w') as f:
    f.write(concat)


 call(['ffmpeg', '-y', '-safe', '0', '-f', 'concat', '-i', 'concat.txt',
      '-c:v', 'libx264', '-c:a', 'aac', '-strict', 'experimental',
      '-vf', 'fps=30', 'output.mp4'])
	from pathlib import Path
	from typing import NamedTuple
	from collections import defaultdict
	from datetime import timedelta
	from subprocess import call
	from pycaption.srt import SRTReader
	import lxml.html
	import tensorflow as tf
	import tensorflow_hub as hub
	import numpy as np

	lang = 'en-US'
	output_dir = ''
	root = Path('')


	class Caption(NamedTuple):
	path: str
	start: int
	length: int
	text: str


	def to_text(raw_text):
	if not raw_text:
	return ''

	raw_text = raw_text.replace('\n', ' ')
	return lxml.html.document_fromstring(raw_text).text_content()


	def _read_subtitles(path, offset=0):
	with open(path, 'rb') as f:
	data = f.read().decode()[offset:]
	raw_captions = SRTReader().read(data, lang=lang).get_captions(lang)
	for raw_caption, next_raw_caption in zip(raw_captions, raw_captions[1:] + [None]):
	if next_raw_caption:
	length = next_raw_caption.start - raw_caption.start
	else:
	length = raw_caption.end - raw_caption.start

	yield Caption(
	path=path,
	start=raw_caption.start,
	length=length,
	text=to_text(raw_caption.get_text()),
	)


	def read_subtitles(path):
	try:
	return _read_subtitles(path, 0)
	except:
	return _read_subtitles(path, 1)


	data_text2captions = defaultdict(lambda: [])
	for season in root.glob('*'):
	if season.is_dir():
	for subtitles in season.glob('*.srt'):
	print(subtitles)
	try:
	for caption in read_subtitles(subtitles.as_posix(), offset=1):
	data_text2captions[caption.text].append(caption)
	except:
	print('pass', subtitles)

	data_texts = [*data_text2captions]
	print('got data texts')

	# Tina-rannosaurus Wrecks
	# https://www.opensubtitles.org/en/subtitles/5643476/bob-s-burgers-tina-rannosaurus-wrecks-en
	# https://www.youtube.com/watch?v=hZ_EKHGgWJQ
	play = [*read_subtitles('Bobs.Burgers.S03E07.HDTV.XviD-AFG.srt')][1:54]
	play_text2captions = defaultdict(lambda: [])
	for caption in play:
	play_text2captions[caption.text].append(caption)

	play_texts = [*play_text2captions]
	print('got play texts')

	module_url = "https://tfhub.dev/google/universal-sentence-encoder/2"
	embed = hub.Module(module_url)
	print('got module')

	vec_a = tf.placeholder(tf.float32, shape=None)
	vec_b = tf.placeholder(tf.float32, shape=None)

	# For evaluation we use exactly normalized rather than
	# approximately normalized.
	normalized_a = tf.nn.l2_normalize(vec_a, axis=1)
	normalized_b = tf.nn.l2_normalize(vec_b, axis=1)
	sim_scores = -tf.acos(tf.reduce_sum(tf.multiply(normalized_a, normalized_b), axis=1))


	def get_similarity_score(text_vec_a, text_vec_b):
	emba, embb, scores = session.run(
	[normalized_a, normalized_b, sim_scores],
	feed_dict={
	vec_a: text_vec_a,
	vec_b: text_vec_b
	})
	return scores


	def get_most_similar_text(vec_a, data_vectors):
	scores = get_similarity_score([vec_a] * len(data_texts), data_vectors)
	return data_texts[sorted(enumerate(scores), key=lambda score: -score[1])[3][0]]


	with tf.Session() as session:
	session.run([tf.global_variables_initializer(), tf.tables_initializer()])
	data_vecs, play_vecs = session.run([embed(data_texts), embed(play_texts)])
	data_vecs = np.array(data_vecs).tolist()
	play_vecs = np.array(play_vecs).tolist()
	print('got vecs')

	similar_texts = {play_text: get_most_similar_text(play_vecs[n], data_vecs)
	for n, play_text in enumerate(play_texts)}
	print('got similarity')


	class Part(NamedTuple):
	video: str
	start: str
	end: str
	output: str


	def generate_parts():
	for n, caption in enumerate(play):
	similar = similar_texts[caption.text]
	similar_caption = sorted(
	data_text2captions[similar],
	key=lambda maybe_similar: abs(caption.length - maybe_similar.length),
	reverse=True)[0]

	yield Part(
	video=similar_caption.path.replace('.srt', '.mp4'),
	start=str(timedelta(microseconds=similar_caption.start))[:-3],
	end=str(timedelta(microseconds=similar_caption.length))[:-3],
	output=Path(output_dir).joinpath(f'part_{n}.mp4').as_posix())


	parts = [*generate_parts()]
	for part in parts:
	call(['ffmpeg', '-y', '-i', part.video,
	'-ss', part.start, '-t', part.end,
	'-c:v', 'libx264', '-c:a', 'aac', '-strict', 'experimental',
	'-vf', 'fps=30',
	'-b:a', '128k', part.output])


	concat = '\n'.join(f"file '{part.output}'" for part in parts) + '\n'
	with open('concat.txt', 'w') as f:
	f.write(concat)


	call(['ffmpeg', '-y', '-safe', '0', '-f', 'concat', '-i', 'concat.txt',
	'-c:v', 'libx264', '-c:a', 'aac', '-strict', 'experimental',
	'-vf', 'fps=30', 'output.mp4'])