solariz · April 21, 2025 14:30
diff --git a/audiobook_mp3_merge.py b/audiobook_mp3_merge.py
 # see blog-post at: https://tcpip.wtf/en/audiobook-mp3-merge.htm
 import os
 import subprocess
 import tempfile
 from pathlib import Path
 import shutil
 import re
 import json
 import math 

 # Function to check if ffmpeg/ffprobe is installed
 def check_command(command_name):
    path = shutil.which(command_name)
    if path is None:
        print(f"Error: {command_name} is not installed or not found in PATH.")
        print(f"Please install ffmpeg suite (which includes {command_name}) and ensure it's accessible.")
        exit(1)
    return path

 # --- Check for ffmpeg and ffprobe ---
 ffmpeg_path = check_command("ffmpeg")
 ffprobe_path = check_command("ffprobe")

 # Set the output file name
 output_file = "merged_audio.mp3"

 # Get a list of all MP3 files in the current directory
 try:
    mp3_files = [f for f in os.listdir() if f.lower().endswith(".mp3") and f != output_file]
 except FileNotFoundError:
    print(f"Error: Current directory not found.")
    exit(1)
 except Exception as e:
    print(f"Error listing directory contents: {e}")
    exit(1)


 # Sort the files by their numeric prefix, handling non-numeric prefixes robustly
 def sort_key(filename):
    match = re.match(r"(\d+)", filename) # Match digits at the beginning
    if match:
        return int(match.group(1))
    else:
        # Files without a numeric prefix go first (can adjust if needed)
        return 0 # Or float('-inf') to guarantee they are first

 mp3_files.sort(key=sort_key)

 # Check if there are at least 2 source files
 if len(mp3_files) < 2:
    print(f"Error: Found only {len(mp3_files)} MP3 source file(s) (excluding '{output_file}'). Need at least 2 to merge.")
    exit(1)

 # --- Prepare for Merge ---
 first_mp3_file = mp3_files[0]
 temp_file_path = None # Initialize to ensure cleanup block works

 # --- Extract Specific Metadata from First File using ffprobe ---
 def get_metadata(file_path):
    print(f"\nAttempting to read metadata from: {file_path}")
    command = [
        ffprobe_path,
        "-v", "quiet",
        "-print_format", "json",
        "-show_format",
        file_path
    ]
    result = None # Initialize result
    try:
        result = subprocess.run(command, check=True, capture_output=True, text=True, encoding='utf-8')
        data = json.loads(result.stdout)
        tags = data.get("format", {}).get("tags", {})
        tags_lower = {k.lower(): v for k, v in tags.items()}

        title = tags_lower.get('title')
        year = tags_lower.get('date') or tags_lower.get('originaldate') or tags_lower.get('year')
        artist = tags_lower.get('artist') or tags_lower.get('album_artist') # Added artist extraction

        # Extract just the year if needed
        if year and not re.fullmatch(r'\d{4}', year):
             match = re.search(r'(\d{4})', year)
             year = match.group(1) if match else None

        print(f"  Found Title: {'Yes' if title else 'No'}")
        print(f"  Found Artist: {'Yes' if artist else 'No'}") # Added artist status print
        print(f"  Found Year: {'Yes (' + year + ')' if year else 'No'}")
        return title, artist, year # Return artist as well

    except subprocess.CalledProcessError as e:
        print(f"  Error running ffprobe: {e}")
        if e.stderr:
            print(f"  ffprobe stderr: {e.stderr.strip()}")
        return None, None, None
    except json.JSONDecodeError as e:
        print(f"  Error parsing ffprobe JSON output: {e}")
        if result and result.stdout:
            print(f"  ffprobe stdout: {result.stdout.strip()}")
        return None, None, None
    except Exception as e:
        print(f"  An unexpected error occurred while reading metadata: {e}")
        return None, None, None

 # --- Get Statistics for Output File ---
 def get_output_stats(file_path):
    print(f"\n--- Output File Statistics ({file_path}) ---")
    command = [
        ffprobe_path,
        "-v", "quiet",
        "-print_format", "json",
        "-show_format",
        "-show_streams", # Needed for duration
        file_path
    ]
    result = None # Initialize result
    try:
        result = subprocess.run(command, check=True, capture_output=True, text=True, encoding='utf-8')
        data = json.loads(result.stdout)

        # --- Size ---
        size_bytes = data.get("format", {}).get("size")
        if size_bytes:
            size_mb = int(size_bytes) / (1024 * 1024)
            print(f"Size: {size_mb:.2f} MB")
        else:
            print("Size: Unknown")

        # --- Duration ---
        duration_str = None
        # Prefer duration from format, fallback to stream[0] if needed
        duration_sec = data.get("format", {}).get("duration")
        if not duration_sec and data.get("streams"): # Fallback to first stream
             duration_sec = data["streams"][0].get("duration")

        if duration_sec:
            try:
                duration_total_seconds = float(duration_sec)
                minutes = math.floor(duration_total_seconds / 60)
                seconds = round(duration_total_seconds % 60)
                duration_str = f"{minutes:02d}:{seconds:02d}"
                print(f"Duration: {duration_str}")
            except (ValueError, TypeError):
                print(f"Duration: Invalid value ({duration_sec})")
        else:
            print("Duration: Unknown")

        # --- Metadata Tags ---
        print("Metadata:")
        tags = data.get("format", {}).get("tags", {})
        tags_lower = {k.lower(): v for k, v in tags.items()}

        title = tags_lower.get('title')
        artist = tags_lower.get('artist')
        year = tags_lower.get('date') or tags_lower.get('year') # Check both date and year in output

        print(f"  Title: {title if title else '-'}")
        print(f"  Artist: {artist if artist else '-'}")
        print(f"  Year: {year if year else '-'}")
        print("-" * 20)

    except subprocess.CalledProcessError as e:
        print(f"Error running ffprobe for stats: {e}")
        if e.stderr:
            print(f"ffprobe stderr: {e.stderr.strip()}")
    except json.JSONDecodeError as e:
        print(f"Error parsing ffprobe JSON output for stats: {e}")
        if result and result.stdout:
             print(f"ffprobe stdout: {result.stdout.strip()}")
    except Exception as e:
        print(f"An unexpected error occurred while getting stats: {e}")

 # --- Main Execution --- #

 extracted_title, extracted_artist, extracted_year = get_metadata(first_mp3_file)

 try:
    # Create a temporary file with the list of input files
    with tempfile.NamedTemporaryFile(mode='w', delete=False, encoding='utf-8', suffix='.txt') as temp_file:
        for mp3_file in mp3_files:
            safe_mp3_file = mp3_file.replace("'", "'\\''") # Basic escaping for filenames
            temp_file.write(f"file '{safe_mp3_file}'\n")
        temp_file_path = temp_file.name

    # Display the files to be merged
    print("\nFiles to be merged (in order)>>>")
    for i, f in enumerate(mp3_files):
        print(f"  {i+1}. {f}")
    print("-" * 20)

    # Check if the output file already exists
    if Path(output_file).exists():
        print(f"\nError: Output file '{output_file}' already exists.")
        exit(1)

    # Build the ffmpeg command
    ffmpeg_command = [
        ffmpeg_path,
        "-f", "concat",
        "-safe", "0",
        "-i", temp_file_path,
        "-i", first_mp3_file,
        "-map", "0:a",
        "-c:a", "copy",
        "-map", "1:v?",
        "-c:v", "copy",
        "-disposition:v", "attached_pic",
        "-map_metadata", "-1",
    ]
    if extracted_title:
        ffmpeg_command.extend(["-metadata", f"title={extracted_title}"])
    if extracted_artist:
        ffmpeg_command.extend(["-metadata", f"artist={extracted_artist}"]) # Added artist metadata
    if extracted_year:
        ffmpeg_command.extend(["-metadata", f"date={extracted_year}"])
        ffmpeg_command.extend(["-metadata", f"year={extracted_year}"])

    ffmpeg_command.append(output_file)

    # Display the ffmpeg command
    print("\nThe following ffmpeg command will be executed:")
    print(" ".join(map(lambda x: f'"{x}"' if " " in x else x, ffmpeg_command)))

    # Ask for confirmation before running the command
    run_command = input("\nDo you want to run this command? (y/n) ").lower()
    if run_command == "y":
        print("\nRunning ffmpeg...")
        result = subprocess.run(ffmpeg_command, check=False, capture_output=True, text=True, encoding='utf-8')

        if result.returncode == 0:
            print(f"\nSuccessfully merged audio files into '{output_file}'")
            get_output_stats(output_file) # Get and display stats for the output file
        else:
            print("\n--- ffmpeg Error ---")
            print(f"ffmpeg command failed with exit code {result.returncode}")
            if result.stdout:
                 print("ffmpeg stdout:")
                 print(result.stdout.strip())
            if result.stderr:
                 print("ffmpeg stderr:")
                 print(result.stderr.strip())
            print("--------------------")
            if Path(output_file).exists():
                try:
                    os.remove(output_file)
                    print(f"Deleted potentially incomplete output file: '{output_file}'")
                except OSError as e:
                    print(f"Warning: Could not delete incomplete output file '{output_file}': {e}")

    else:
        print("Command cancelled.")

 finally:
    # Ensure the temporary file is always removed
    if temp_file_path and Path(temp_file_path).exists():
        try:
            os.remove(temp_file_path)
        except OSError as e:
            print(f"Warning: Could not remove temporary file '{temp_file_path}': {e}")

 # Print a summary
 print(f"\nProcessed {len(mp3_files)} input files.")
 print("Done.")
	# see blog-post at: https://tcpip.wtf/en/audiobook-mp3-merge.htm
	import os
	import subprocess
	import tempfile
	from pathlib import Path
	import shutil
	import re
	import json
	import math

	# Function to check if ffmpeg/ffprobe is installed
	def check_command(command_name):
	path = shutil.which(command_name)
	if path is None:
	print(f"Error: {command_name} is not installed or not found in PATH.")
	print(f"Please install ffmpeg suite (which includes {command_name}) and ensure it's accessible.")
	exit(1)
	return path

	# --- Check for ffmpeg and ffprobe ---
	ffmpeg_path = check_command("ffmpeg")
	ffprobe_path = check_command("ffprobe")

	# Set the output file name
	output_file = "merged_audio.mp3"

	# Get a list of all MP3 files in the current directory
	try:
	mp3_files = [f for f in os.listdir() if f.lower().endswith(".mp3") and f != output_file]
	except FileNotFoundError:
	print(f"Error: Current directory not found.")
	exit(1)
	except Exception as e:
	print(f"Error listing directory contents: {e}")
	exit(1)


	# Sort the files by their numeric prefix, handling non-numeric prefixes robustly
	def sort_key(filename):
	match = re.match(r"(\d+)", filename) # Match digits at the beginning
	if match:
	return int(match.group(1))
	else:
	# Files without a numeric prefix go first (can adjust if needed)
	return 0 # Or float('-inf') to guarantee they are first

	mp3_files.sort(key=sort_key)

	# Check if there are at least 2 source files
	if len(mp3_files) < 2:
	print(f"Error: Found only {len(mp3_files)} MP3 source file(s) (excluding '{output_file}'). Need at least 2 to merge.")
	exit(1)

	# --- Prepare for Merge ---
	first_mp3_file = mp3_files[0]
	temp_file_path = None # Initialize to ensure cleanup block works

	# --- Extract Specific Metadata from First File using ffprobe ---
	def get_metadata(file_path):
	print(f"\nAttempting to read metadata from: {file_path}")
	command = [
	ffprobe_path,
	"-v", "quiet",
	"-print_format", "json",
	"-show_format",
	file_path
	]
	result = None # Initialize result
	try:
	result = subprocess.run(command, check=True, capture_output=True, text=True, encoding='utf-8')
	data = json.loads(result.stdout)
	tags = data.get("format", {}).get("tags", {})
	tags_lower = {k.lower(): v for k, v in tags.items()}

	title = tags_lower.get('title')
	year = tags_lower.get('date') or tags_lower.get('originaldate') or tags_lower.get('year')
	artist = tags_lower.get('artist') or tags_lower.get('album_artist') # Added artist extraction

	# Extract just the year if needed
	if year and not re.fullmatch(r'\d{4}', year):
	match = re.search(r'(\d{4})', year)
	year = match.group(1) if match else None

	print(f" Found Title: {'Yes' if title else 'No'}")
	print(f" Found Artist: {'Yes' if artist else 'No'}") # Added artist status print
	print(f" Found Year: {'Yes (' + year + ')' if year else 'No'}")
	return title, artist, year # Return artist as well

	except subprocess.CalledProcessError as e:
	print(f" Error running ffprobe: {e}")
	if e.stderr:
	print(f" ffprobe stderr: {e.stderr.strip()}")
	return None, None, None
	except json.JSONDecodeError as e:
	print(f" Error parsing ffprobe JSON output: {e}")
	if result and result.stdout:
	print(f" ffprobe stdout: {result.stdout.strip()}")
	return None, None, None
	except Exception as e:
	print(f" An unexpected error occurred while reading metadata: {e}")
	return None, None, None

	# --- Get Statistics for Output File ---
	def get_output_stats(file_path):
	print(f"\n--- Output File Statistics ({file_path}) ---")
	command = [
	ffprobe_path,
	"-v", "quiet",
	"-print_format", "json",
	"-show_format",
	"-show_streams", # Needed for duration
	file_path
	]
	result = None # Initialize result
	try:
	result = subprocess.run(command, check=True, capture_output=True, text=True, encoding='utf-8')
	data = json.loads(result.stdout)

	# --- Size ---
	size_bytes = data.get("format", {}).get("size")
	if size_bytes:
	size_mb = int(size_bytes) / (1024 * 1024)
	print(f"Size: {size_mb:.2f} MB")
	else:
	print("Size: Unknown")

	# --- Duration ---
	duration_str = None
	# Prefer duration from format, fallback to stream[0] if needed
	duration_sec = data.get("format", {}).get("duration")
	if not duration_sec and data.get("streams"): # Fallback to first stream
	duration_sec = data["streams"][0].get("duration")

	if duration_sec:
	try:
	duration_total_seconds = float(duration_sec)
	minutes = math.floor(duration_total_seconds / 60)
	seconds = round(duration_total_seconds % 60)
	duration_str = f"{minutes:02d}:{seconds:02d}"
	print(f"Duration: {duration_str}")
	except (ValueError, TypeError):
	print(f"Duration: Invalid value ({duration_sec})")
	else:
	print("Duration: Unknown")

	# --- Metadata Tags ---
	print("Metadata:")
	tags = data.get("format", {}).get("tags", {})
	tags_lower = {k.lower(): v for k, v in tags.items()}

	title = tags_lower.get('title')
	artist = tags_lower.get('artist')
	year = tags_lower.get('date') or tags_lower.get('year') # Check both date and year in output

	print(f" Title: {title if title else '-'}")
	print(f" Artist: {artist if artist else '-'}")
	print(f" Year: {year if year else '-'}")
	print("-" * 20)

	except subprocess.CalledProcessError as e:
	print(f"Error running ffprobe for stats: {e}")
	if e.stderr:
	print(f"ffprobe stderr: {e.stderr.strip()}")
	except json.JSONDecodeError as e:
	print(f"Error parsing ffprobe JSON output for stats: {e}")
	if result and result.stdout:
	print(f"ffprobe stdout: {result.stdout.strip()}")
	except Exception as e:
	print(f"An unexpected error occurred while getting stats: {e}")

	# --- Main Execution --- #

	extracted_title, extracted_artist, extracted_year = get_metadata(first_mp3_file)

	try:
	# Create a temporary file with the list of input files
	with tempfile.NamedTemporaryFile(mode='w', delete=False, encoding='utf-8', suffix='.txt') as temp_file:
	for mp3_file in mp3_files:
	safe_mp3_file = mp3_file.replace("'", "'\\''") # Basic escaping for filenames
	temp_file.write(f"file '{safe_mp3_file}'\n")
	temp_file_path = temp_file.name

	# Display the files to be merged
	print("\nFiles to be merged (in order)>>>")
	for i, f in enumerate(mp3_files):
	print(f" {i+1}. {f}")
	print("-" * 20)

	# Check if the output file already exists
	if Path(output_file).exists():
	print(f"\nError: Output file '{output_file}' already exists.")
	exit(1)

	# Build the ffmpeg command
	ffmpeg_command = [
	ffmpeg_path,
	"-f", "concat",
	"-safe", "0",
	"-i", temp_file_path,
	"-i", first_mp3_file,
	"-map", "0:a",
	"-c:a", "copy",
	"-map", "1:v?",
	"-c:v", "copy",
	"-disposition:v", "attached_pic",
	"-map_metadata", "-1",
	]
	if extracted_title:
	ffmpeg_command.extend(["-metadata", f"title={extracted_title}"])
	if extracted_artist:
	ffmpeg_command.extend(["-metadata", f"artist={extracted_artist}"]) # Added artist metadata
	if extracted_year:
	ffmpeg_command.extend(["-metadata", f"date={extracted_year}"])
	ffmpeg_command.extend(["-metadata", f"year={extracted_year}"])

	ffmpeg_command.append(output_file)

	# Display the ffmpeg command
	print("\nThe following ffmpeg command will be executed:")
	print(" ".join(map(lambda x: f'"{x}"' if " " in x else x, ffmpeg_command)))

	# Ask for confirmation before running the command
	run_command = input("\nDo you want to run this command? (y/n) ").lower()
	if run_command == "y":
	print("\nRunning ffmpeg...")
	result = subprocess.run(ffmpeg_command, check=False, capture_output=True, text=True, encoding='utf-8')

	if result.returncode == 0:
	print(f"\nSuccessfully merged audio files into '{output_file}'")
	get_output_stats(output_file) # Get and display stats for the output file
	else:
	print("\n--- ffmpeg Error ---")
	print(f"ffmpeg command failed with exit code {result.returncode}")
	if result.stdout:
	print("ffmpeg stdout:")
	print(result.stdout.strip())
	if result.stderr:
	print("ffmpeg stderr:")
	print(result.stderr.strip())
	print("--------------------")
	if Path(output_file).exists():
	try:
	os.remove(output_file)
	print(f"Deleted potentially incomplete output file: '{output_file}'")
	except OSError as e:
	print(f"Warning: Could not delete incomplete output file '{output_file}': {e}")

	else:
	print("Command cancelled.")

	finally:
	# Ensure the temporary file is always removed
	if temp_file_path and Path(temp_file_path).exists():
	try:
	os.remove(temp_file_path)
	except OSError as e:
	print(f"Warning: Could not remove temporary file '{temp_file_path}': {e}")

	# Print a summary
	print(f"\nProcessed {len(mp3_files)} input files.")
	print("Done.")