Created
February 1, 2025 12:19
-
-
Save whateverforever/f8ac3dd0552ce9bd7485f34e099b79a0 to your computer and use it in GitHub Desktop.
convert some text to audio using kokoro
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os.path as osp | |
import shutil | |
import tempfile | |
import subprocess | |
import shlex | |
import argparse | |
SUPPORTED_EXTS = [".wav", ".mp3", ".m4b"] | |
print("Checking for ffmpeg...") | |
try: | |
subprocess.check_call( | |
["ffmpeg", "--help"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL | |
) | |
except FileNotFoundError: | |
print("ffmpeg not available. please install. exiting...") | |
exit(1) | |
print(" ffmpeg available!") | |
print("Checking for kokoro and soundfile...") | |
try: | |
from kokoro import KPipeline | |
import soundfile as sf | |
except ImportError: | |
print("kokoro or soundfile missing. please install (python modules). exiting...") | |
exit(1) | |
print(" kokoro and soundfile available!") | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("input", help="Path to a .txt file containing the target text.") | |
parser.add_argument("output", help="Output file path (has to end in .wav)") | |
parser.add_argument( | |
"--voice", choices=["af_heart", "af_bella", "af_nicole"], default="af_heart" | |
) | |
args = parser.parse_args() | |
if not args.input.lower().endswith(".txt"): | |
parser.error("Input file has to end on .txt") | |
_, outext = osp.splitext(args.output) | |
if outext.lower() not in SUPPORTED_EXTS: | |
parser.error(f"Output file path has invalid suffix. Allowed: {SUPPORTED_EXTS}") | |
print("Reading input text...") | |
with open(args.input) as fh: | |
text = fh.read() | |
print("Constructing pipeline...") | |
pipeline = KPipeline(lang_code="a") | |
generator = pipeline(text, voice=args.voice, speed=1.25, split_pattern=r"\n+") | |
print("Generating individual sound files...") | |
with tempfile.TemporaryDirectory() as tmpdir: | |
paths = [] | |
for i, (_, _, audio) in enumerate(generator): | |
print(i, end=" ", flush=True) | |
path = osp.join(tmpdir, f"part_{i}.wav") | |
paths.append(path) | |
sf.write(path, audio, 24000) | |
print() | |
filelist = [f"file '{fn}'\n" for fn in paths] | |
filelist_path = osp.join(tmpdir, "filelist.txt") | |
with open(filelist_path, "w") as fh: | |
fh.writelines(filelist) | |
print("Merging soundfiles...") | |
merged_output = osp.join(tmpdir, "merged.wav") | |
subprocess.check_call( | |
shlex.split( | |
f"ffmpeg -f concat -safe 0 -i {filelist_path} -c copy {merged_output}" | |
) | |
) | |
if not outext.lower().endswith(".wav"): | |
shutil.move(merged_output, args.output) | |
else: | |
print(f"Converting to {outext}...") | |
subprocess.check_call( | |
shlex.split(f"ffmpeg -i {merged_output} -q:a 2 {args.output}") | |
) | |
print("Done. :)") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment