Skip to content

Instantly share code, notes, and snippets.

@knbknb
Created December 15, 2024 20:29
Show Gist options
  • Save knbknb/3019d4b71e272612b47a46fe4672f075 to your computer and use it in GitHub Desktop.
Save knbknb/3019d4b71e272612b47a46fe4672f075 to your computer and use it in GitHub Desktop.
Whisper Speech to Text Shellscript
#!/bin/bash
# Speech to text with Whisper model using OpenAI API
# Assumes that the OpenAI API key is set as an environment variable
# Usage: ./speech2text-whisper.sh input_audio_file
# Check if an input argument is provided and validate the file extension
if [ -z "$1" ]; then
echo "Error: No input file provided. Please specify an audio file (MP3, WAV, etc.) as the first argument."
exit 1
fi
if [[ ! "$1" =~ \.(mp3|wav|ogg|flac)$ ]]; then
echo "Error: The input file must be an audio file (MP3, WAV, OGG, FLAC)."
exit 1
fi
infile_path=$1
# Check if the file size exceeds 25MB
max_size_bytes=$((25 * 1024 * 1024))
file_size_bytes=$(du -b "$infile_path" | cut -f1)
if [ $file_size_bytes -gt $max_size_bytes ]; then
echo "Error: The input file size exceeds 25MB. Please use a file that is 25MB or smaller."
exit 1
fi
# Set your OpenAI API key
#$OPENAI_API_KEY="sk-..."
# Set the Whisper model to use
WHISPER_MODEL="whisper-1"
# Set the output file name
outfile_path="${infile_path%.*}_transcription.txt"
# Create a temporary response file
now=$(date +"%Y%m%d_%H%M%S")
temp_response_file=response_$now.txt
RESPONSE_FORMAT="text"
# Submit the request to the OpenAI API and store the response in a temporary file
curl -s https://api.openai.com/v1/audio/transcriptions \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Content-Type: multipart/form-data" \
-F "model=$WHISPER_MODEL" \
-F "response_format=$RESPONSE_FORMAT" \
-F "temperature=0" \
-F "language=en" \
-F "file=@$infile_path" > $outfile_path
if [ -n "$outfile_path" ]; then
echo "Transcription successful. Saved to $outfile_path..."
else
echo "Error: Could not extract transcription text from API response"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment