Created
December 15, 2024 20:29
-
-
Save knbknb/3019d4b71e272612b47a46fe4672f075 to your computer and use it in GitHub Desktop.
Whisper Speech to Text Shellscript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Speech to text with Whisper model using OpenAI API | |
# Assumes that the OpenAI API key is set as an environment variable | |
# Usage: ./speech2text-whisper.sh input_audio_file | |
# Check if an input argument is provided and validate the file extension | |
if [ -z "$1" ]; then | |
echo "Error: No input file provided. Please specify an audio file (MP3, WAV, etc.) as the first argument." | |
exit 1 | |
fi | |
if [[ ! "$1" =~ \.(mp3|wav|ogg|flac)$ ]]; then | |
echo "Error: The input file must be an audio file (MP3, WAV, OGG, FLAC)." | |
exit 1 | |
fi | |
infile_path=$1 | |
# Check if the file size exceeds 25MB | |
max_size_bytes=$((25 * 1024 * 1024)) | |
file_size_bytes=$(du -b "$infile_path" | cut -f1) | |
if [ $file_size_bytes -gt $max_size_bytes ]; then | |
echo "Error: The input file size exceeds 25MB. Please use a file that is 25MB or smaller." | |
exit 1 | |
fi | |
# Set your OpenAI API key | |
#$OPENAI_API_KEY="sk-..." | |
# Set the Whisper model to use | |
WHISPER_MODEL="whisper-1" | |
# Set the output file name | |
outfile_path="${infile_path%.*}_transcription.txt" | |
# Create a temporary response file | |
now=$(date +"%Y%m%d_%H%M%S") | |
temp_response_file=response_$now.txt | |
RESPONSE_FORMAT="text" | |
# Submit the request to the OpenAI API and store the response in a temporary file | |
curl -s https://api.openai.com/v1/audio/transcriptions \ | |
-H "Authorization: Bearer $OPENAI_API_KEY" \ | |
-H "Content-Type: multipart/form-data" \ | |
-F "model=$WHISPER_MODEL" \ | |
-F "response_format=$RESPONSE_FORMAT" \ | |
-F "temperature=0" \ | |
-F "language=en" \ | |
-F "file=@$infile_path" > $outfile_path | |
if [ -n "$outfile_path" ]; then | |
echo "Transcription successful. Saved to $outfile_path..." | |
else | |
echo "Error: Could not extract transcription text from API response" | |
fi | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment