Skip to content

Instantly share code, notes, and snippets.

@solariz
Created April 21, 2025 14:30
Show Gist options
  • Save solariz/b756632a76310af8c20555764be89a3c to your computer and use it in GitHub Desktop.
Save solariz/b756632a76310af8c20555764be89a3c to your computer and use it in GitHub Desktop.
# see blog-post at: https://tcpip.wtf/en/audiobook-mp3-merge.htm
import os
import subprocess
import tempfile
from pathlib import Path
import shutil
import re
import json
import math
# Function to check if ffmpeg/ffprobe is installed
def check_command(command_name):
path = shutil.which(command_name)
if path is None:
print(f"Error: {command_name} is not installed or not found in PATH.")
print(f"Please install ffmpeg suite (which includes {command_name}) and ensure it's accessible.")
exit(1)
return path
# --- Check for ffmpeg and ffprobe ---
ffmpeg_path = check_command("ffmpeg")
ffprobe_path = check_command("ffprobe")
# Set the output file name
output_file = "merged_audio.mp3"
# Get a list of all MP3 files in the current directory
try:
mp3_files = [f for f in os.listdir() if f.lower().endswith(".mp3") and f != output_file]
except FileNotFoundError:
print(f"Error: Current directory not found.")
exit(1)
except Exception as e:
print(f"Error listing directory contents: {e}")
exit(1)
# Sort the files by their numeric prefix, handling non-numeric prefixes robustly
def sort_key(filename):
match = re.match(r"(\d+)", filename) # Match digits at the beginning
if match:
return int(match.group(1))
else:
# Files without a numeric prefix go first (can adjust if needed)
return 0 # Or float('-inf') to guarantee they are first
mp3_files.sort(key=sort_key)
# Check if there are at least 2 source files
if len(mp3_files) < 2:
print(f"Error: Found only {len(mp3_files)} MP3 source file(s) (excluding '{output_file}'). Need at least 2 to merge.")
exit(1)
# --- Prepare for Merge ---
first_mp3_file = mp3_files[0]
temp_file_path = None # Initialize to ensure cleanup block works
# --- Extract Specific Metadata from First File using ffprobe ---
def get_metadata(file_path):
print(f"\nAttempting to read metadata from: {file_path}")
command = [
ffprobe_path,
"-v", "quiet",
"-print_format", "json",
"-show_format",
file_path
]
result = None # Initialize result
try:
result = subprocess.run(command, check=True, capture_output=True, text=True, encoding='utf-8')
data = json.loads(result.stdout)
tags = data.get("format", {}).get("tags", {})
tags_lower = {k.lower(): v for k, v in tags.items()}
title = tags_lower.get('title')
year = tags_lower.get('date') or tags_lower.get('originaldate') or tags_lower.get('year')
artist = tags_lower.get('artist') or tags_lower.get('album_artist') # Added artist extraction
# Extract just the year if needed
if year and not re.fullmatch(r'\d{4}', year):
match = re.search(r'(\d{4})', year)
year = match.group(1) if match else None
print(f" Found Title: {'Yes' if title else 'No'}")
print(f" Found Artist: {'Yes' if artist else 'No'}") # Added artist status print
print(f" Found Year: {'Yes (' + year + ')' if year else 'No'}")
return title, artist, year # Return artist as well
except subprocess.CalledProcessError as e:
print(f" Error running ffprobe: {e}")
if e.stderr:
print(f" ffprobe stderr: {e.stderr.strip()}")
return None, None, None
except json.JSONDecodeError as e:
print(f" Error parsing ffprobe JSON output: {e}")
if result and result.stdout:
print(f" ffprobe stdout: {result.stdout.strip()}")
return None, None, None
except Exception as e:
print(f" An unexpected error occurred while reading metadata: {e}")
return None, None, None
# --- Get Statistics for Output File ---
def get_output_stats(file_path):
print(f"\n--- Output File Statistics ({file_path}) ---")
command = [
ffprobe_path,
"-v", "quiet",
"-print_format", "json",
"-show_format",
"-show_streams", # Needed for duration
file_path
]
result = None # Initialize result
try:
result = subprocess.run(command, check=True, capture_output=True, text=True, encoding='utf-8')
data = json.loads(result.stdout)
# --- Size ---
size_bytes = data.get("format", {}).get("size")
if size_bytes:
size_mb = int(size_bytes) / (1024 * 1024)
print(f"Size: {size_mb:.2f} MB")
else:
print("Size: Unknown")
# --- Duration ---
duration_str = None
# Prefer duration from format, fallback to stream[0] if needed
duration_sec = data.get("format", {}).get("duration")
if not duration_sec and data.get("streams"): # Fallback to first stream
duration_sec = data["streams"][0].get("duration")
if duration_sec:
try:
duration_total_seconds = float(duration_sec)
minutes = math.floor(duration_total_seconds / 60)
seconds = round(duration_total_seconds % 60)
duration_str = f"{minutes:02d}:{seconds:02d}"
print(f"Duration: {duration_str}")
except (ValueError, TypeError):
print(f"Duration: Invalid value ({duration_sec})")
else:
print("Duration: Unknown")
# --- Metadata Tags ---
print("Metadata:")
tags = data.get("format", {}).get("tags", {})
tags_lower = {k.lower(): v for k, v in tags.items()}
title = tags_lower.get('title')
artist = tags_lower.get('artist')
year = tags_lower.get('date') or tags_lower.get('year') # Check both date and year in output
print(f" Title: {title if title else '-'}")
print(f" Artist: {artist if artist else '-'}")
print(f" Year: {year if year else '-'}")
print("-" * 20)
except subprocess.CalledProcessError as e:
print(f"Error running ffprobe for stats: {e}")
if e.stderr:
print(f"ffprobe stderr: {e.stderr.strip()}")
except json.JSONDecodeError as e:
print(f"Error parsing ffprobe JSON output for stats: {e}")
if result and result.stdout:
print(f"ffprobe stdout: {result.stdout.strip()}")
except Exception as e:
print(f"An unexpected error occurred while getting stats: {e}")
# --- Main Execution --- #
extracted_title, extracted_artist, extracted_year = get_metadata(first_mp3_file)
try:
# Create a temporary file with the list of input files
with tempfile.NamedTemporaryFile(mode='w', delete=False, encoding='utf-8', suffix='.txt') as temp_file:
for mp3_file in mp3_files:
safe_mp3_file = mp3_file.replace("'", "'\\''") # Basic escaping for filenames
temp_file.write(f"file '{safe_mp3_file}'\n")
temp_file_path = temp_file.name
# Display the files to be merged
print("\nFiles to be merged (in order)>>>")
for i, f in enumerate(mp3_files):
print(f" {i+1}. {f}")
print("-" * 20)
# Check if the output file already exists
if Path(output_file).exists():
print(f"\nError: Output file '{output_file}' already exists.")
exit(1)
# Build the ffmpeg command
ffmpeg_command = [
ffmpeg_path,
"-f", "concat",
"-safe", "0",
"-i", temp_file_path,
"-i", first_mp3_file,
"-map", "0:a",
"-c:a", "copy",
"-map", "1:v?",
"-c:v", "copy",
"-disposition:v", "attached_pic",
"-map_metadata", "-1",
]
if extracted_title:
ffmpeg_command.extend(["-metadata", f"title={extracted_title}"])
if extracted_artist:
ffmpeg_command.extend(["-metadata", f"artist={extracted_artist}"]) # Added artist metadata
if extracted_year:
ffmpeg_command.extend(["-metadata", f"date={extracted_year}"])
ffmpeg_command.extend(["-metadata", f"year={extracted_year}"])
ffmpeg_command.append(output_file)
# Display the ffmpeg command
print("\nThe following ffmpeg command will be executed:")
print(" ".join(map(lambda x: f'"{x}"' if " " in x else x, ffmpeg_command)))
# Ask for confirmation before running the command
run_command = input("\nDo you want to run this command? (y/n) ").lower()
if run_command == "y":
print("\nRunning ffmpeg...")
result = subprocess.run(ffmpeg_command, check=False, capture_output=True, text=True, encoding='utf-8')
if result.returncode == 0:
print(f"\nSuccessfully merged audio files into '{output_file}'")
get_output_stats(output_file) # Get and display stats for the output file
else:
print("\n--- ffmpeg Error ---")
print(f"ffmpeg command failed with exit code {result.returncode}")
if result.stdout:
print("ffmpeg stdout:")
print(result.stdout.strip())
if result.stderr:
print("ffmpeg stderr:")
print(result.stderr.strip())
print("--------------------")
if Path(output_file).exists():
try:
os.remove(output_file)
print(f"Deleted potentially incomplete output file: '{output_file}'")
except OSError as e:
print(f"Warning: Could not delete incomplete output file '{output_file}': {e}")
else:
print("Command cancelled.")
finally:
# Ensure the temporary file is always removed
if temp_file_path and Path(temp_file_path).exists():
try:
os.remove(temp_file_path)
except OSError as e:
print(f"Warning: Could not remove temporary file '{temp_file_path}': {e}")
# Print a summary
print(f"\nProcessed {len(mp3_files)} input files.")
print("Done.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment