Created
January 8, 2025 06:46
-
-
Save slavanap/ebcef228cc2c8d4f5ff923e58f61bec4 to your computer and use it in GitHub Desktop.
Transcode audio files to MP3 with PSNR check
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from pydub import AudioSegment | |
# ffmpeg -fflags +genpts -i 008.wma -sample_fmt s16p -b:a 320k 008.mp3 | |
def load_audio(file_path): | |
""" | |
Load an audio file and convert it to a NumPy array. | |
""" | |
audio = AudioSegment.from_file(file_path) | |
samples = np.array(audio.get_array_of_samples(), dtype=np.float32) | |
return samples, audio.frame_rate | |
def calculate_psnr(original, compressed): | |
""" | |
Calculate the Peak Signal-to-Noise Ratio (PSNR) between two signals. | |
""" | |
mse = np.mean((original - compressed) ** 2) | |
if mse == 0: | |
return float('inf') # Perfect match | |
psnr=mse | |
max_pixel_value = np.max(original) | |
psnr = 20 * np.log10(max_pixel_value / np.sqrt(mse)) | |
return psnr | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument('src', type=str) | |
parser.add_argument('dst', type=str) | |
parser.add_argument('--threshold', default=None, type=float, help='threshold to return an error code for if unreached') | |
args = parser.parse_args() | |
sdata, srate = load_audio(args.src) | |
ddata, drate = load_audio(args.dst) | |
if len(sdata) != len(ddata): | |
print("length mismatch") | |
return 1 | |
if srate != drate: | |
print("Sampling rates do not match. Resample the files before comparison.") | |
return 1 | |
""" | |
pydub bug workaround | |
""" | |
m_sdata, m_ddata = max(sdata), max(ddata) | |
if m_sdata >= 65536 and m_ddata < 65536: | |
sdata = sdata / 65536 | |
elif m_ddata >= 65536 and m_sdata < 65536: | |
ddata = ddata / 65536 | |
# Calculate PSNR | |
psnr_value = calculate_psnr(sdata, ddata) | |
print(f"PSNR between MP3 and WMA: {psnr_value:.2f} dB") | |
if args.threshold and psnr_value < args.threshold: | |
print("PSNR is below requrested threshold") | |
return 1 | |
return 0 | |
if __name__ == "__main__": | |
import sys | |
sys.exit(main()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
bash_dir="$(cd -- "$(dirname "$0")"; echo "$PWD")" | |
ffmpeg -loglevel warning -stats -fflags +genpts -i "$1" -sample_fmt s16p -b:a 320k "${1%.*}.mp3" | |
touch -r "$1" "${1%.*}.mp3" | |
python "$bash_dir"/check_psnr.py --threshold 40 "$1" "${1%.*}.mp3" | |
rm "$1" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment