Skip to content

Instantly share code, notes, and snippets.

@rahimnathwani
Created March 10, 2025 20:28
Show Gist options
  • Save rahimnathwani/20deaa08a60a06664af3fb923fd95263 to your computer and use it in GitHub Desktop.
Save rahimnathwani/20deaa08a60a06664af3fb923fd95263 to your computer and use it in GitHub Desktop.
Extract slides (as images) from a video file of a live presentation
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.8"
# dependencies = [
# "opencv-python>=4.5.0",
# "numpy>=1.20.0",
# "pillow>=8.0.0",
# ]
# ///
"""
Extract frames from a video that remain static for at least a specified duration.
Useful for extracting slides from presentation videos.
"""
import argparse
import cv2
import numpy as np
import os
from datetime import timedelta
from PIL import Image
def extract_static_frames(video_path, output_dir, min_static_duration=3.0, similarity_threshold=0.98):
"""
Extract frames from video that remain static for at least min_static_duration seconds.
Parameters:
- video_path: Path to the input video file
- output_dir: Directory to save extracted frames
- min_static_duration: Minimum duration (in seconds) a frame should remain static
- similarity_threshold: Threshold for considering frames as similar (0.0-1.0)
"""
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Open the video file
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise ValueError(f"Could not open video file: {video_path}")
# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = frame_count / fps
print(f"Video: {video_path}")
print(f"FPS: {fps:.2f}")
print(f"Duration: {timedelta(seconds=duration)}")
print(f"Extracting frames that remain static for at least {min_static_duration} seconds...")
# Calculate minimum number of frames for static duration
min_static_frames = int(min_static_duration * fps)
# Initialize variables
prev_frame = None
static_frame_count = 0
static_start_time = 0
extracted_count = 0
frame_number = 0
while True:
ret, frame = cap.read()
if not ret:
break
# Convert frame to grayscale for comparison
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if prev_frame is not None:
# Compare current frame with previous frame
similarity = calculate_similarity(prev_frame, gray)
if similarity >= similarity_threshold:
# Frames are similar, increment static counter
static_frame_count += 1
else:
# Frames are different, check if previous static sequence was long enough
if static_frame_count >= min_static_frames:
# Calculate the middle frame of the static sequence
middle_frame_number = frame_number - static_frame_count // 2
timestamp = middle_frame_number / fps
# Save the static frame
save_frame(cap, middle_frame_number, output_dir, extracted_count, timestamp)
extracted_count += 1
# Reset counter and update start time
static_frame_count = 0
static_start_time = frame_number / fps
# Update previous frame
prev_frame = gray
frame_number += 1
# Check if the last sequence was static enough
if static_frame_count >= min_static_frames:
middle_frame_number = frame_number - static_frame_count // 2
timestamp = middle_frame_number / fps
save_frame(cap, middle_frame_number, output_dir, extracted_count, timestamp)
extracted_count += 1
# Release resources
cap.release()
print(f"Extraction complete. {extracted_count} static frames extracted to {output_dir}")
def calculate_similarity(img1, img2):
"""Calculate structural similarity between two grayscale images."""
# Using a simple histogram comparison for speed
hist1 = cv2.calcHist([img1], [0], None, [256], [0, 256])
hist2 = cv2.calcHist([img2], [0], None, [256], [0, 256])
# Normalize histograms
cv2.normalize(hist1, hist1, 0, 1, cv2.NORM_MINMAX)
cv2.normalize(hist2, hist2, 0, 1, cv2.NORM_MINMAX)
# Compare histograms
return cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)
def save_frame(cap, frame_number, output_dir, extracted_count, timestamp):
"""Save a specific frame from the video."""
# Position the video to the specific frame
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
ret, frame = cap.read()
if ret:
# Format timestamp as HH:MM:SS
timestamp_str = str(timedelta(seconds=int(timestamp)))
# Save the frame
filename = f"slide_{extracted_count:03d}_{timestamp_str.replace(':', '-')}.jpg"
output_path = os.path.join(output_dir, filename)
# Convert from BGR to RGB before saving
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img = Image.fromarray(rgb_frame)
img.save(output_path, quality=95)
print(f"Saved frame at {timestamp_str} to {filename}")
def main():
parser = argparse.ArgumentParser(description="Extract static frames from a video file.")
parser.add_argument("video_path", help="Path to the input video file")
parser.add_argument("--output-dir", "-o", default="extracted_slides",
help="Directory to save extracted frames (default: 'extracted_slides')")
parser.add_argument("--duration", "-d", type=float, default=3.0,
help="Minimum static duration in seconds (default: 3.0)")
parser.add_argument("--threshold", "-t", type=float, default=0.98,
help="Similarity threshold (0.0-1.0, default: 0.98)")
args = parser.parse_args()
try:
extract_static_frames(
args.video_path,
args.output_dir,
min_static_duration=args.duration,
similarity_threshold=args.threshold
)
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment