|
import azure.functions as func |
|
import logging |
|
import azure.cognitiveservices.speech as speechsdk |
|
|
|
# Azure Speech API key and region |
|
azure_region = "XXXX" |
|
azure_key = "XXXX" |
|
|
|
app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS) |
|
|
|
@app.route(route="tts") |
|
def text_to_speech(req: func.HttpRequest) -> func.HttpResponse: |
|
logging.info('Python HTTP trigger function processed a request.') |
|
|
|
key = req.params.get('key') |
|
if not key: |
|
raise ValueError("Please pass key in the query string") |
|
if key != "XXXXX": |
|
raise ValueError("Invalid key") |
|
|
|
# Get text from request body or query parameters |
|
text = req.params.get('text') |
|
if not text: |
|
try: |
|
req_body = req.get_json() |
|
except ValueError: |
|
pass |
|
else: |
|
text = req_body.get('text') |
|
|
|
if not text: |
|
raise ValueError("No text provided") |
|
|
|
try: |
|
# Configure speech synthesis |
|
speech_config = speechsdk.SpeechConfig(subscription=azure_key, region=azure_region) |
|
speech_config.speech_synthesis_voice_name = 'ja-JP-NanamiNeural' |
|
speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Ogg24Khz16BitMonoOpus) |
|
|
|
# Use pull stream instead of file output for Azure Functions |
|
pull_stream = speechsdk.audio.PullAudioOutputStream() |
|
audio_config = speechsdk.audio.AudioOutputConfig(stream=pull_stream) |
|
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) |
|
|
|
# Synthesize speech |
|
speech_synthesis_result = speech_synthesizer.speak_text_async(text).get() |
|
|
|
# Check result |
|
if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: |
|
logging.info(f"Speech synthesized for text: {text}") |
|
|
|
# Read audio data from the stream |
|
audio_data = bytes(speech_synthesis_result.audio_data) |
|
|
|
return func.HttpResponse( |
|
audio_data, |
|
mimetype="audio/ogg", |
|
status_code=200 |
|
) |
|
elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled: |
|
cancellation_details = speech_synthesis_result.cancellation_details |
|
logging.error(f"Speech synthesis canceled: {cancellation_details.reason}") |
|
if cancellation_details.reason == speechsdk.CancellationReason.Error: |
|
logging.error(f"Error details: {cancellation_details.error_details}") |
|
return func.HttpResponse( |
|
f"Speech synthesis failed: {cancellation_details.reason}", |
|
status_code=500 |
|
) |
|
except Exception as e: |
|
logging.error(f"Error during speech synthesis: {str(e)}") |
|
return func.HttpResponse( |
|
f"An error occurred: {str(e)}", |
|
status_code=500 |
|
) |
|
|
|
return func.HttpResponse( |
|
"Please pass text in the query string or request body", |
|
status_code=400 |
|
) |
|
|
|
# httphttps://XXXX.azurewebsites.net/api/tts?key=XXXX&text=$s |