knbknb · December 15, 2024 20:29
diff --git a/speech2text-whisper.sh b/speech2text-whisper.sh
 #!/bin/bash
 # Speech to text with Whisper model using OpenAI API
 # Assumes that the OpenAI API key is set as an environment variable
 # Usage: ./speech2text-whisper.sh input_audio_file

 # Check if an input argument is provided and validate the file extension
 if [ -z "$1" ]; then
  echo "Error: No input file provided. Please specify an audio file (MP3, WAV, etc.) as the first argument."
  exit 1
 fi

 if [[ ! "$1" =~ \.(mp3|wav|ogg|flac)$ ]]; then
  echo "Error: The input file must be an audio file (MP3, WAV, OGG, FLAC)."
  exit 1
 fi

 infile_path=$1


 # Check if the file size exceeds 25MB
 max_size_bytes=$((25 * 1024 * 1024))
 file_size_bytes=$(du -b "$infile_path" | cut -f1)

 if [ $file_size_bytes -gt $max_size_bytes ]; then
  echo "Error: The input file size exceeds 25MB. Please use a file that is 25MB or smaller."
  exit 1
 fi


 # Set your OpenAI API key
 #$OPENAI_API_KEY="sk-..."

 # Set the Whisper model to use
 WHISPER_MODEL="whisper-1"

 # Set the output file name
 outfile_path="${infile_path%.*}_transcription.txt"

 # Create a temporary response file
 now=$(date +"%Y%m%d_%H%M%S")
 temp_response_file=response_$now.txt
 RESPONSE_FORMAT="text"

 # Submit the request to the OpenAI API and store the response in a temporary file
 curl -s https://api.openai.com/v1/audio/transcriptions \
  -H "Authorization: Bearer $OPENAI_API_KEY" \
  -H "Content-Type: multipart/form-data" \
  -F "model=$WHISPER_MODEL" \
  -F "response_format=$RESPONSE_FORMAT" \
  -F "temperature=0" \
  -F "language=en" \
  -F "file=@$infile_path" > $outfile_path

 if [ -n "$outfile_path" ]; then
  echo "Transcription successful. Saved to $outfile_path..."
  
 else
  echo "Error: Could not extract transcription text from API response"
  
 fi
	#!/bin/bash
	# Speech to text with Whisper model using OpenAI API
	# Assumes that the OpenAI API key is set as an environment variable
	# Usage: ./speech2text-whisper.sh input_audio_file

	# Check if an input argument is provided and validate the file extension
	if [ -z "$1" ]; then
	echo "Error: No input file provided. Please specify an audio file (MP3, WAV, etc.) as the first argument."
	exit 1
	fi

	if [[ ! "$1" =~ \.(mp3\|wav\|ogg\|flac)$ ]]; then
	echo "Error: The input file must be an audio file (MP3, WAV, OGG, FLAC)."
	exit 1
	fi

	infile_path=$1


	# Check if the file size exceeds 25MB
	max_size_bytes=$((25 * 1024 * 1024))
	file_size_bytes=$(du -b "$infile_path" \| cut -f1)

	if [ $file_size_bytes -gt $max_size_bytes ]; then
	echo "Error: The input file size exceeds 25MB. Please use a file that is 25MB or smaller."
	exit 1
	fi


	# Set your OpenAI API key
	#$OPENAI_API_KEY="sk-..."

	# Set the Whisper model to use
	WHISPER_MODEL="whisper-1"

	# Set the output file name
	outfile_path="${infile_path%.*}_transcription.txt"

	# Create a temporary response file
	now=$(date +"%Y%m%d_%H%M%S")
	temp_response_file=response_$now.txt
	RESPONSE_FORMAT="text"

	# Submit the request to the OpenAI API and store the response in a temporary file
	curl -s https://api.openai.com/v1/audio/transcriptions \
	-H "Authorization: Bearer $OPENAI_API_KEY" \
	-H "Content-Type: multipart/form-data" \
	-F "model=$WHISPER_MODEL" \
	-F "response_format=$RESPONSE_FORMAT" \
	-F "temperature=0" \
	-F "language=en" \
	-F "file=@$infile_path" > $outfile_path

	if [ -n "$outfile_path" ]; then
	echo "Transcription successful. Saved to $outfile_path..."

	else
	echo "Error: Could not extract transcription text from API response"

	fi