rahimnathwani · March 10, 2025 20:28
diff --git a/extract-slides.py b/extract-slides.py
 #!/usr/bin/env -S uv run --script
 # /// script
 # requires-python = ">=3.8"
 # dependencies = [
 #   "opencv-python>=4.5.0",
 #   "numpy>=1.20.0",
 #   "pillow>=8.0.0",
 # ]
 # ///

 """
 Extract frames from a video that remain static for at least a specified duration.
 Useful for extracting slides from presentation videos.
 """

 import argparse
 import cv2
 import numpy as np
 import os
 from datetime import timedelta
 from PIL import Image


 def extract_static_frames(video_path, output_dir, min_static_duration=3.0, similarity_threshold=0.98):
    """
    Extract frames from video that remain static for at least min_static_duration seconds.
    
    Parameters:
    - video_path: Path to the input video file
    - output_dir: Directory to save extracted frames
    - min_static_duration: Minimum duration (in seconds) a frame should remain static
    - similarity_threshold: Threshold for considering frames as similar (0.0-1.0)
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Could not open video file: {video_path}")
    
    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = frame_count / fps
    
    print(f"Video: {video_path}")
    print(f"FPS: {fps:.2f}")
    print(f"Duration: {timedelta(seconds=duration)}")
    print(f"Extracting frames that remain static for at least {min_static_duration} seconds...")
    
    # Calculate minimum number of frames for static duration
    min_static_frames = int(min_static_duration * fps)
    
    # Initialize variables
    prev_frame = None
    static_frame_count = 0
    static_start_time = 0
    extracted_count = 0
    frame_number = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Convert frame to grayscale for comparison
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        if prev_frame is not None:
            # Compare current frame with previous frame
            similarity = calculate_similarity(prev_frame, gray)
            
            if similarity >= similarity_threshold:
                # Frames are similar, increment static counter
                static_frame_count += 1
            else:
                # Frames are different, check if previous static sequence was long enough
                if static_frame_count >= min_static_frames:
                    # Calculate the middle frame of the static sequence
                    middle_frame_number = frame_number - static_frame_count // 2
                    timestamp = middle_frame_number / fps
                    
                    # Save the static frame
                    save_frame(cap, middle_frame_number, output_dir, extracted_count, timestamp)
                    extracted_count += 1
                
                # Reset counter and update start time
                static_frame_count = 0
                static_start_time = frame_number / fps
        
        # Update previous frame
        prev_frame = gray
        frame_number += 1
    
    # Check if the last sequence was static enough
    if static_frame_count >= min_static_frames:
        middle_frame_number = frame_number - static_frame_count // 2
        timestamp = middle_frame_number / fps
        save_frame(cap, middle_frame_number, output_dir, extracted_count, timestamp)
        extracted_count += 1
    
    # Release resources
    cap.release()
    
    print(f"Extraction complete. {extracted_count} static frames extracted to {output_dir}")


 def calculate_similarity(img1, img2):
    """Calculate structural similarity between two grayscale images."""
    # Using a simple histogram comparison for speed
    hist1 = cv2.calcHist([img1], [0], None, [256], [0, 256])
    hist2 = cv2.calcHist([img2], [0], None, [256], [0, 256])
    
    # Normalize histograms
    cv2.normalize(hist1, hist1, 0, 1, cv2.NORM_MINMAX)
    cv2.normalize(hist2, hist2, 0, 1, cv2.NORM_MINMAX)
    
    # Compare histograms
    return cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)


 def save_frame(cap, frame_number, output_dir, extracted_count, timestamp):
    """Save a specific frame from the video."""
    # Position the video to the specific frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
    ret, frame = cap.read()
    
    if ret:
        # Format timestamp as HH:MM:SS
        timestamp_str = str(timedelta(seconds=int(timestamp)))
        
        # Save the frame
        filename = f"slide_{extracted_count:03d}_{timestamp_str.replace(':', '-')}.jpg"
        output_path = os.path.join(output_dir, filename)
        
        # Convert from BGR to RGB before saving
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(rgb_frame)
        img.save(output_path, quality=95)
        
        print(f"Saved frame at {timestamp_str} to {filename}")


 def main():
    parser = argparse.ArgumentParser(description="Extract static frames from a video file.")
    parser.add_argument("video_path", help="Path to the input video file")
    parser.add_argument("--output-dir", "-o", default="extracted_slides", 
                        help="Directory to save extracted frames (default: 'extracted_slides')")
    parser.add_argument("--duration", "-d", type=float, default=3.0,
                        help="Minimum static duration in seconds (default: 3.0)")
    parser.add_argument("--threshold", "-t", type=float, default=0.98,
                        help="Similarity threshold (0.0-1.0, default: 0.98)")
    
    args = parser.parse_args()
    
    try:
        extract_static_frames(
            args.video_path, 
            args.output_dir, 
            min_static_duration=args.duration,
            similarity_threshold=args.threshold
        )
    except Exception as e:
        print(f"Error: {e}")


 if __name__ == "__main__":
    main()
	#!/usr/bin/env -S uv run --script
	# /// script
	# requires-python = ">=3.8"
	# dependencies = [
	# "opencv-python>=4.5.0",
	# "numpy>=1.20.0",
	# "pillow>=8.0.0",
	# ]
	# ///

	"""
	Extract frames from a video that remain static for at least a specified duration.
	Useful for extracting slides from presentation videos.
	"""

	import argparse
	import cv2
	import numpy as np
	import os
	from datetime import timedelta
	from PIL import Image


	def extract_static_frames(video_path, output_dir, min_static_duration=3.0, similarity_threshold=0.98):
	"""
	Extract frames from video that remain static for at least min_static_duration seconds.

	Parameters:
	- video_path: Path to the input video file
	- output_dir: Directory to save extracted frames
	- min_static_duration: Minimum duration (in seconds) a frame should remain static
	- similarity_threshold: Threshold for considering frames as similar (0.0-1.0)
	"""
	# Create output directory if it doesn't exist
	os.makedirs(output_dir, exist_ok=True)

	# Open the video file
	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	raise ValueError(f"Could not open video file: {video_path}")

	# Get video properties
	fps = cap.get(cv2.CAP_PROP_FPS)
	frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	duration = frame_count / fps

	print(f"Video: {video_path}")
	print(f"FPS: {fps:.2f}")
	print(f"Duration: {timedelta(seconds=duration)}")
	print(f"Extracting frames that remain static for at least {min_static_duration} seconds...")

	# Calculate minimum number of frames for static duration
	min_static_frames = int(min_static_duration * fps)

	# Initialize variables
	prev_frame = None
	static_frame_count = 0
	static_start_time = 0
	extracted_count = 0
	frame_number = 0

	while True:
	ret, frame = cap.read()
	if not ret:
	break

	# Convert frame to grayscale for comparison
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

	if prev_frame is not None:
	# Compare current frame with previous frame
	similarity = calculate_similarity(prev_frame, gray)

	if similarity >= similarity_threshold:
	# Frames are similar, increment static counter
	static_frame_count += 1
	else:
	# Frames are different, check if previous static sequence was long enough
	if static_frame_count >= min_static_frames:
	# Calculate the middle frame of the static sequence
	middle_frame_number = frame_number - static_frame_count // 2
	timestamp = middle_frame_number / fps

	# Save the static frame
	save_frame(cap, middle_frame_number, output_dir, extracted_count, timestamp)
	extracted_count += 1

	# Reset counter and update start time
	static_frame_count = 0
	static_start_time = frame_number / fps

	# Update previous frame
	prev_frame = gray
	frame_number += 1

	# Check if the last sequence was static enough
	if static_frame_count >= min_static_frames:
	middle_frame_number = frame_number - static_frame_count // 2
	timestamp = middle_frame_number / fps
	save_frame(cap, middle_frame_number, output_dir, extracted_count, timestamp)
	extracted_count += 1

	# Release resources
	cap.release()

	print(f"Extraction complete. {extracted_count} static frames extracted to {output_dir}")


	def calculate_similarity(img1, img2):
	"""Calculate structural similarity between two grayscale images."""
	# Using a simple histogram comparison for speed
	hist1 = cv2.calcHist([img1], [0], None, [256], [0, 256])
	hist2 = cv2.calcHist([img2], [0], None, [256], [0, 256])

	# Normalize histograms
	cv2.normalize(hist1, hist1, 0, 1, cv2.NORM_MINMAX)
	cv2.normalize(hist2, hist2, 0, 1, cv2.NORM_MINMAX)

	# Compare histograms
	return cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)


	def save_frame(cap, frame_number, output_dir, extracted_count, timestamp):
	"""Save a specific frame from the video."""
	# Position the video to the specific frame
	cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
	ret, frame = cap.read()

	if ret:
	# Format timestamp as HH:MM:SS
	timestamp_str = str(timedelta(seconds=int(timestamp)))

	# Save the frame
	filename = f"slide_{extracted_count:03d}_{timestamp_str.replace(':', '-')}.jpg"
	output_path = os.path.join(output_dir, filename)

	# Convert from BGR to RGB before saving
	rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	img = Image.fromarray(rgb_frame)
	img.save(output_path, quality=95)

	print(f"Saved frame at {timestamp_str} to {filename}")


	def main():
	parser = argparse.ArgumentParser(description="Extract static frames from a video file.")
	parser.add_argument("video_path", help="Path to the input video file")
	parser.add_argument("--output-dir", "-o", default="extracted_slides",
	help="Directory to save extracted frames (default: 'extracted_slides')")
	parser.add_argument("--duration", "-d", type=float, default=3.0,
	help="Minimum static duration in seconds (default: 3.0)")
	parser.add_argument("--threshold", "-t", type=float, default=0.98,
	help="Similarity threshold (0.0-1.0, default: 0.98)")

	args = parser.parse_args()

	try:
	extract_static_frames(
	args.video_path,
	args.output_dir,
	min_static_duration=args.duration,
	similarity_threshold=args.threshold
	)
	except Exception as e:
	print(f"Error: {e}")


	if __name__ == "__main__":
	main()