-
-
Save rishikksh20/d5491e2635d2ff079d61def802918162 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import time\n", | |
"from shutil import rmtree\n", | |
"import numpy as np\n", | |
"import matplotlib # Remove this line if you don't need them\n", | |
"matplotlib.use('Agg') # Remove this line if you don't need them\n", | |
"import matplotlib.pyplot as plt\n", | |
"import soundfile as sf\n", | |
"# import librosa\n", | |
"import pyworld as pw\n", | |
"\n", | |
"speed = 1.0\n", | |
"frame_period = 5.0\n", | |
"\n", | |
"EPSILON = 1e-8\n", | |
"\n", | |
"def savefig(filename, figlist, log=True):\n", | |
" #h = 10\n", | |
" n = len(figlist)\n", | |
" # peek into instances\n", | |
" f = figlist[0]\n", | |
" if len(f.shape) == 1:\n", | |
" plt.figure()\n", | |
" for i, f in enumerate(figlist):\n", | |
" plt.subplot(n, 1, i+1)\n", | |
" if len(f.shape) == 1:\n", | |
" plt.plot(f)\n", | |
" plt.xlim([0, len(f)])\n", | |
" elif len(f.shape) == 2:\n", | |
" Nsmp, dim = figlist[0].shape\n", | |
" #figsize=(h * float(Nsmp) / dim, len(figlist) * h)\n", | |
" #plt.figure(figsize=figsize)\n", | |
" plt.figure()\n", | |
" for i, f in enumerate(figlist):\n", | |
" plt.subplot(n, 1, i+1)\n", | |
" if log:\n", | |
" x = np.log(f + EPSILON)\n", | |
" else:\n", | |
" x = f + EPSILON\n", | |
" plt.imshow(x.T, origin='lower', interpolation='none', aspect='auto', extent=(0, x.shape[0], 0, x.shape[1]))\n", | |
" else:\n", | |
" raise ValueError('Input dimension must < 3.')\n", | |
" plt.savefig(filename)\n", | |
" # plt.close() " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"FILE_NAME = '/data/shared/KeithIto/LJSpeech-1.0/wavs/LJ007-0141.wav'\n", | |
"# FILE_NAME = '/data/shared/CommonVoice/cv_corpus/cv-other-test/sample-000645.wav'\n", | |
"# FILE_NAME = '/data/shared/OpenSLR/LibriSpeech/train-other-500/1006/135212/1006-135212-0040.flac'\n", | |
"x, fs = sf.read(FILE_NAME)\n", | |
"# x, fs = librosa.load('utterance/vaiueo2d.wav', dtype=np.float64)\n", | |
"\n", | |
"# 1. A convient way\n", | |
"f0, sp, ap = pw.wav2world(x, fs) # use default options\n", | |
"start = time.time()\n", | |
"y = pw.synthesize(f0, sp, ap, fs, pw.default_frame_period)\n", | |
"print(\" > Default - {}\".format(time.time() - start))\n", | |
"\n", | |
"# 2. Step by step\n", | |
"# 2-1 Without F0 refinement\n", | |
"start = time.time()\n", | |
"_f0, t = pw.dio(x, fs, f0_floor=50.0, f0_ceil=600.0,\n", | |
" channels_in_octave=2,\n", | |
" frame_period=frame_period,\n", | |
" speed=speed)\n", | |
"_sp = pw.cheaptrick(x, _f0, t, fs)\n", | |
"_ap = pw.d4c(x, _f0, t, fs)\n", | |
"start = time.time()\n", | |
"_y = pw.synthesize(_f0, _sp, _ap, fs, frame_period)\n", | |
"print(\" > No refinement - {}\".format(time.time() - start))\n", | |
"# librosa.output.write_wav('test/y_without_f0_refinement.wav', _y, fs)\n", | |
"# sf.write('y_without_f0_refinement.wav', _y, fs)\n", | |
"\n", | |
"# 2-2 DIO with F0 refinement (using Stonemask)\n", | |
"f0 = pw.stonemask(x, _f0, t, fs,)\n", | |
"sp = pw.cheaptrick(x, f0, t, fs,)\n", | |
"ap = pw.d4c(x, f0, t, fs)\n", | |
"start = time.time()\n", | |
"y_dio = pw.synthesize(f0, sp, ap, fs, frame_period)\n", | |
"print(\" > DIO - {}\".format(time.time() - start))\n", | |
"# librosa.output.write_wav('test/y_with_f0_refinement.wav', y, fs)\n", | |
"# sf.write('y_with_f0_refinement.wav', y_dio, fs)\n", | |
"\n", | |
"# 2-3 Harvest with F0 refinement (using Stonemask)\n", | |
"_f0_h, t_h = pw.harvest(x, fs)\n", | |
"f0_h = pw.stonemask(x, _f0_h, t_h, fs)\n", | |
"sp_h = pw.cheaptrick(x, f0_h, t_h, fs)\n", | |
"ap_h = pw.d4c(x, f0_h, t_h, fs)\n", | |
"start = time.time()\n", | |
"y_h_harvest = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)\n", | |
"print(\" > Harvest - {}\".format(time.time() - start))\n", | |
"# librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)\n", | |
"# sf.write('y_harvest_with_f0_refinement.wav', y_h_harvest, fs)\n", | |
"\n", | |
"# Reaper F0\n", | |
"import pyreaper\n", | |
"from scipy.io import wavfile\n", | |
"import numpy as np\n", | |
"# fs, x = wavfile.read(FILE_NAME, )\n", | |
"x, fs = sf.read(FILE_NAME, dtype=np.int16)\n", | |
"pm_times, pm, t_h, f0_h, corr = pyreaper.reaper(x, fs, do_high_pass=True, do_hilbert_transform=False, )\n", | |
"x = x.astype('double')\n", | |
"f0_h = f0_h.astype('double')\n", | |
"t_h = t_h.astype('double')\n", | |
"sp_h = pw.cheaptrick(x, f0_h, t_h, fs)\n", | |
"ap_h = pw.d4c(x, f0_h, t_h, fs)\n", | |
"start = time.time()\n", | |
"y_h_reaper = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)\n", | |
"print(\" > Reaper - {}\".format(time.time() - start))\n", | |
"# librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)\n", | |
"# sf.write('y_reaper_with_f0_refinement.wav', y_h_reaper, fs)\n", | |
"\n", | |
"# Comparison\n", | |
"# savefig('test/wavform.png', [x, _y, y])\n", | |
"# savefig('test/sp.png', [_sp, sp])\n", | |
"# savefig('test/ap.png', [_ap, ap], log=False)\n", | |
"# savefig('test/f0.png', [_f0, f0])\n", | |
"\n", | |
"import IPython\n", | |
"def play_audio(audio, sr):\n", | |
" IPython.display.display(IPython.display.Audio(audio, rate=sr))\n", | |
" \n", | |
"print(\"-- Default\")\n", | |
"play_audio(y, fs)\n", | |
" \n", | |
"print(\"-- No refinement\")\n", | |
"play_audio(_y, fs)\n", | |
"\n", | |
"print(\"-- DIO f0 refinement\")\n", | |
"play_audio(y_dio, fs)\n", | |
"\n", | |
"print(\"-- Harverst f0 refinement\")\n", | |
"play_audio(y_h_harvest, fs)\n", | |
"\n", | |
"print(\"-- Reaper f0 refinement\")\n", | |
"play_audio(y_h_reaper, fs)\n", | |
"\n", | |
"print(f0.shape)\n", | |
"print(_sp.shape)\n", | |
"print(_ap.shape)\n", | |
"print(fs)\n", | |
"\n", | |
"import IPython\n", | |
"IPython.display.Audio(FILE_NAME, rate=fs)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment