Skip to content

Instantly share code, notes, and snippets.

@bee-san
Created October 9, 2025 12:57
Show Gist options
  • Save bee-san/0064fbfa575e03cc15c7fd1a006a362f to your computer and use it in GitHub Desktop.
Save bee-san/0064fbfa575e03cc15c7fd1a006a362f to your computer and use it in GitHub Desktop.
  • Run terraform to deploy azure TTS
  • Deploy the rest of the code as an azure function
  • Replace secrets with your own

"this is not secure, u hard code ur api secrets!?!?!?"

idc, this is all on a free azure trial for a non prod workload used only by me.

import azure.functions as func
import logging
import azure.cognitiveservices.speech as speechsdk
# Azure Speech API key and region
azure_region = "XXXX"
azure_key = "XXXX"
app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)
@app.route(route="tts")
def text_to_speech(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
key = req.params.get('key')
if not key:
raise ValueError("Please pass key in the query string")
if key != "XXXXX":
raise ValueError("Invalid key")
# Get text from request body or query parameters
text = req.params.get('text')
if not text:
try:
req_body = req.get_json()
except ValueError:
pass
else:
text = req_body.get('text')
if not text:
raise ValueError("No text provided")
try:
# Configure speech synthesis
speech_config = speechsdk.SpeechConfig(subscription=azure_key, region=azure_region)
speech_config.speech_synthesis_voice_name = 'ja-JP-NanamiNeural'
speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Ogg24Khz16BitMonoOpus)
# Use pull stream instead of file output for Azure Functions
pull_stream = speechsdk.audio.PullAudioOutputStream()
audio_config = speechsdk.audio.AudioOutputConfig(stream=pull_stream)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
# Synthesize speech
speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
# Check result
if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
logging.info(f"Speech synthesized for text: {text}")
# Read audio data from the stream
audio_data = bytes(speech_synthesis_result.audio_data)
return func.HttpResponse(
audio_data,
mimetype="audio/ogg",
status_code=200
)
elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_synthesis_result.cancellation_details
logging.error(f"Speech synthesis canceled: {cancellation_details.reason}")
if cancellation_details.reason == speechsdk.CancellationReason.Error:
logging.error(f"Error details: {cancellation_details.error_details}")
return func.HttpResponse(
f"Speech synthesis failed: {cancellation_details.reason}",
status_code=500
)
except Exception as e:
logging.error(f"Error during speech synthesis: {str(e)}")
return func.HttpResponse(
f"An error occurred: {str(e)}",
status_code=500
)
return func.HttpResponse(
"Please pass text in the query string or request body",
status_code=400
)
# httphttps://XXXX.azurewebsites.net/api/tts?key=XXXX&text=$s
{
"version": "2.0",
"logging": {
"applicationInsights": {
"samplingSettings": {
"isEnabled": true,
"excludedTypes": "Request"
}
}
},
"extensionBundle": {
"id": "Microsoft.Azure.Functions.ExtensionBundle",
"version": "[4.*, 5.0.0)"
}
}
provider "azurerm" {
features {}
subscription_id = "XXXX"
}
resource "azurerm_resource_group" "speech_service_rg" {
name = "speech-service-rg"
location = "East US"
}
resource "azurerm_cognitive_account" "speech_service" {
name = "speechservice"
location = azurerm_resource_group.speech_service_rg.location
resource_group_name = azurerm_resource_group.speech_service_rg.name
kind = "SpeechServices"
sku_name = "S0"
}
# Uncomment to enable Azure Monitor OpenTelemetry
# Ref: aka.ms/functions-azure-monitor-python
# azure-monitor-opentelemetry
azure-functions
azure-cognitiveservices-speech
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment