Skip to content

Instantly share code, notes, and snippets.

@frgomes
Last active August 31, 2025 16:25
Show Gist options
  • Save frgomes/ead0ac678a1855adf5ff7c7a7a19f88e to your computer and use it in GitHub Desktop.
Save frgomes/ead0ac678a1855adf5ff7c7a7a19f88e to your computer and use it in GitHub Desktop.
AI :: aisuite_ollama_fastapi_moonshine_gemma_kokoto.sh
#!/bin/bash
##
## This script creates a FastRTC server which is well suited for audio conversations.
##
## Credits: https://www.youtube.com/watch?v=NthuElhWDk0
###
function aisuite_check_python3() {
python3 -V | grep 3.13 > /dev/null
}
function aisuite_install_ollama() {
which ollama > /dev/null || curl -fsSL https://ollama.ai/install.sh | sudo sh
}
function aisuite_install_ollama_gemma3() {
ollama list | grep gemma3:4b > /dev/null || ollama pull gemma3:4b
}
function aisuite_fastRTC_prepare() {
mkdir -p ~/workspace/fastRTC
cat << EOD > ~/workspace/fastRTC/pyproject.toml
[project]
name = "simple-fastrtc-voice-chat"
version = "0.1.0"
description = "A simple voice chat that can run on the cpu "
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"fastrtc[stt]>=0.0.19",
"kokoro-onnx>=0.4.7",
"loguru>=0.7.3",
"ollama>=0.4.7",
]
EOD
cat <<EOD > ~/workspace/fastRTC/code.py
from fastrtc import ReplyOnPause, Stream, get_stt_model, get_tts_model
from ollama import chat
stt_model = get_stt_model(model="moonshine/base")
tts_model = get_tts_model(model="kokoro")
SYSTEM_PROMPT = "You are a Voice AI Assistant. Engage in a conversation with the user and answer their questions. Do not use emojis or formatting, like asterisks or faces."
def echo(audio):
transcript = stt_model.stt(audio)
response = chat(
model="gemma3:4b",
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": transcript}
]
)
response_text = response['message']['content']
for audio_chunk in tts_model.stream_tts_sync(response_text):
yield audio_chunk
server_name = "0.0.0.0"
server_port = 7860
share_public_link = True
ssl_verify = False
opts = { 'server_name': server_name, 'server_port': server_port, 'share': share_public_link }
if not ssl_verify:
opts = opts | { 'ssl_verify': False, 'ssl_keyfile': "key.pem", 'ssl_certfile': "cert.pem" }
stream = Stream(ReplyOnPause(echo), modality="audio", mode="send-receive")
stream.ui.launch(**opts)
EOD
local HOSTNAME=$(hostname)
openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 365 -nodes -subj "/C=US/ST=New York/L=New York/O=My Organization/OU=My Department/CN=${HOSTNAME}"
}
function aisuite_fastRTC_build() {
source ~/.virtualenvs/fastRTC/bin/activate
pushd ~/workspace/fastRTC
pip install .
deactivate > /dev/null
popd
}
function aisuite_fastRTC_prepare_and_build() {
which ensurepip || sudo apt install -y python3-venv python3-pip
[[ -d ~/.virtualenvs ]] || mkdir -p ~/.virtualenvs
[[ -d ~/.virtualenvs/python313 ]] || python3 -m venv ~/.virtualenvs/fastRTC
[[ -f ~/workspace/fastRTC/code.py ]] || aisuite_fastRTC_prepare
[[ -f ~/workspace/fastRTC/simple_fastrtc_voice_chat.egg-info ]] || aisuite_fastRTC_build
}
function aisuite_fastRTC_run() {
if [[ $(cat /proc/meminfo | grep MemTotal | sed 's/kB//' | sed 's/ //g' | cut -d: -f2) -gt 16384000 ]] ;then
[[ -f ~/workspace/fastRTC/simple_fastrtc_voice_chat.egg-info ]] || aisuite_fastRTC_prepare_and_build
source ~/.virtualenvs/fastRTC/bin/activate
pushd ~/workspace/fastRTC
python3 code.py # should use an .egg image instead
deactivate > /dev/null
popd
else
echo "FATAL: At least 16Gb of memory is required"; return 1
fi
}
aisuite_check_python3 && aisuite_install_ollama && aisuite_install_ollama_gemma3 && aisuite_fastRTC_run
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment