Skip to content

Instantly share code, notes, and snippets.

@whateverforever
Created February 1, 2025 12:19
Show Gist options
  • Save whateverforever/f8ac3dd0552ce9bd7485f34e099b79a0 to your computer and use it in GitHub Desktop.
Save whateverforever/f8ac3dd0552ce9bd7485f34e099b79a0 to your computer and use it in GitHub Desktop.
convert some text to audio using kokoro
import os.path as osp
import shutil
import tempfile
import subprocess
import shlex
import argparse
SUPPORTED_EXTS = [".wav", ".mp3", ".m4b"]
print("Checking for ffmpeg...")
try:
subprocess.check_call(
["ffmpeg", "--help"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
except FileNotFoundError:
print("ffmpeg not available. please install. exiting...")
exit(1)
print(" ffmpeg available!")
print("Checking for kokoro and soundfile...")
try:
from kokoro import KPipeline
import soundfile as sf
except ImportError:
print("kokoro or soundfile missing. please install (python modules). exiting...")
exit(1)
print(" kokoro and soundfile available!")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("input", help="Path to a .txt file containing the target text.")
parser.add_argument("output", help="Output file path (has to end in .wav)")
parser.add_argument(
"--voice", choices=["af_heart", "af_bella", "af_nicole"], default="af_heart"
)
args = parser.parse_args()
if not args.input.lower().endswith(".txt"):
parser.error("Input file has to end on .txt")
_, outext = osp.splitext(args.output)
if outext.lower() not in SUPPORTED_EXTS:
parser.error(f"Output file path has invalid suffix. Allowed: {SUPPORTED_EXTS}")
print("Reading input text...")
with open(args.input) as fh:
text = fh.read()
print("Constructing pipeline...")
pipeline = KPipeline(lang_code="a")
generator = pipeline(text, voice=args.voice, speed=1.25, split_pattern=r"\n+")
print("Generating individual sound files...")
with tempfile.TemporaryDirectory() as tmpdir:
paths = []
for i, (_, _, audio) in enumerate(generator):
print(i, end=" ", flush=True)
path = osp.join(tmpdir, f"part_{i}.wav")
paths.append(path)
sf.write(path, audio, 24000)
print()
filelist = [f"file '{fn}'\n" for fn in paths]
filelist_path = osp.join(tmpdir, "filelist.txt")
with open(filelist_path, "w") as fh:
fh.writelines(filelist)
print("Merging soundfiles...")
merged_output = osp.join(tmpdir, "merged.wav")
subprocess.check_call(
shlex.split(
f"ffmpeg -f concat -safe 0 -i {filelist_path} -c copy {merged_output}"
)
)
if not outext.lower().endswith(".wav"):
shutil.move(merged_output, args.output)
else:
print(f"Converting to {outext}...")
subprocess.check_call(
shlex.split(f"ffmpeg -i {merged_output} -q:a 2 {args.output}")
)
print("Done. :)")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment