Skip to content

Instantly share code, notes, and snippets.

@sebington
Created December 26, 2024 22:28
Show Gist options
  • Save sebington/c2e6c6ef7bb32fb8bcb1f2cd062b4bdc to your computer and use it in GitHub Desktop.
Save sebington/c2e6c6ef7bb32fb8bcb1f2cd062b4bdc to your computer and use it in GitHub Desktop.
Batch transcribe audio/video files using Groq Whisper
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "8c39022e-4551-4167-ad56-9a39da484e38",
"metadata": {},
"outputs": [],
"source": [
"from groq import Groq\n",
"client = Groq()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3d548229-26b0-464f-98da-77ddc3b8d5f8",
"metadata": {},
"outputs": [],
"source": [
"filename = \"ElevenLabs_Eastend_Steve_2.mp3\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "35d8827f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" When suddenly, there's this strange sucking noise. It's Armstrong! He's only brought his own food along in a liquid form in a belt pouch and started sucking it noisily from a straw. I should say, Oi Armstrong, no! You're not on your lunar module now. Pick up your knife and fork and eat your finders chicken Italians in a terrestrial fashion.\n"
]
}
],
"source": [
"with open(filename, \"rb\") as file:\n",
" transcription = client.audio.transcriptions.create(\n",
" file=(filename, file.read()),\n",
" model=\"whisper-large-v3-turbo\", # distil-whisper-large-v3-en or whisper-large-v3\n",
" response_format=\"verbose_json\",\n",
" )\n",
" print(transcription.text)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c8b407d7-35f8-42b3-bc2d-96c5b0e8400f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Transcription(text=\" When suddenly, there's this strange sucking noise. It's Armstrong! He's only brought his own food along in a liquid form in a belt pouch and started sucking it noisily from a straw. I should say, Oi Armstrong, no! You're not on your lunar module now. Pick up your knife and fork and eat your finders chicken Italians in a terrestrial fashion.\", task='transcribe', language='English', duration=23.3, segments=[{'id': 0, 'seek': 0, 'start': 0, 'end': 4.38, 'text': \" When suddenly, there's this strange sucking noise. It's Armstrong!\", 'tokens': [50365, 1133, 5800, 11, 456, 311, 341, 5861, 38669, 5658, 13, 467, 311, 36100, 0, 50584], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}, {'id': 1, 'seek': 0, 'start': 4.38, 'end': 9.08, 'text': \" He's only brought his own food along in a liquid form in a belt pouch\", 'tokens': [50584, 634, 311, 787, 3038, 702, 1065, 1755, 2051, 294, 257, 6553, 1254, 294, 257, 10750, 27781, 50819], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}, {'id': 2, 'seek': 0, 'start': 9.08, 'end': 11.96, 'text': ' and started sucking it noisily from a straw.', 'tokens': [50819, 293, 1409, 38669, 309, 572, 271, 953, 490, 257, 10099, 13, 50963], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}, {'id': 3, 'seek': 0, 'start': 11.96, 'end': 14.38, 'text': ' I should say, Oi Armstrong, no!', 'tokens': [50963, 286, 820, 584, 11, 31610, 36100, 11, 572, 0, 51084], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}, {'id': 4, 'seek': 0, 'start': 14.38, 'end': 16.7, 'text': \" You're not on your lunar module now.\", 'tokens': [51084, 509, 434, 406, 322, 428, 32581, 10088, 586, 13, 51200], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}, {'id': 5, 'seek': 0, 'start': 16.7, 'end': 23.12, 'text': ' Pick up your knife and fork and eat your finders chicken Italians in a terrestrial fashion.', 'tokens': [51200, 14129, 493, 428, 7976, 293, 17716, 293, 1862, 428, 915, 433, 4662, 43620, 294, 257, 1796, 34539, 6700, 13, 51521], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}], x_groq={'id': 'req_01jg2f5xf6e8esvszdn6qrxkm3'})\n"
]
}
],
"source": [
"with open(filename, \"rb\") as file:\n",
" transcription = client.audio.transcriptions.create(\n",
" file=(filename, file.read()),\n",
" model=\"whisper-large-v3-turbo\", # distil-whisper-large-v3-en or whisper-large-v3\n",
" response_format=\"verbose_json\",\n",
" #prompt=\"Specify context or spelling\", # Optional\n",
" #language=\"es\", # Optional\n",
" #temperature=0.0 # Optional\n",
" )\n",
" #print(transcription.text)\n",
" print(transcription)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "d5f07a6a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'id': 0,\n",
" 'seek': 0,\n",
" 'start': 0,\n",
" 'end': 4.38,\n",
" 'text': \" When suddenly, there's this strange sucking noise. It's Armstrong!\",\n",
" 'tokens': [50365,\n",
" 1133,\n",
" 5800,\n",
" 11,\n",
" 456,\n",
" 311,\n",
" 341,\n",
" 5861,\n",
" 38669,\n",
" 5658,\n",
" 13,\n",
" 467,\n",
" 311,\n",
" 36100,\n",
" 0,\n",
" 50584],\n",
" 'temperature': 0,\n",
" 'avg_logprob': -0.2622168,\n",
" 'compression_ratio': 1.5336323,\n",
" 'no_speech_prob': 1.9729613e-12},\n",
" {'id': 1,\n",
" 'seek': 0,\n",
" 'start': 4.38,\n",
" 'end': 9.08,\n",
" 'text': \" He's only brought his own food along in a liquid form in a belt pouch\",\n",
" 'tokens': [50584,\n",
" 634,\n",
" 311,\n",
" 787,\n",
" 3038,\n",
" 702,\n",
" 1065,\n",
" 1755,\n",
" 2051,\n",
" 294,\n",
" 257,\n",
" 6553,\n",
" 1254,\n",
" 294,\n",
" 257,\n",
" 10750,\n",
" 27781,\n",
" 50819],\n",
" 'temperature': 0,\n",
" 'avg_logprob': -0.2622168,\n",
" 'compression_ratio': 1.5336323,\n",
" 'no_speech_prob': 1.9729613e-12},\n",
" {'id': 2,\n",
" 'seek': 0,\n",
" 'start': 9.08,\n",
" 'end': 11.96,\n",
" 'text': ' and started sucking it noisily from a straw.',\n",
" 'tokens': [50819,\n",
" 293,\n",
" 1409,\n",
" 38669,\n",
" 309,\n",
" 572,\n",
" 271,\n",
" 953,\n",
" 490,\n",
" 257,\n",
" 10099,\n",
" 13,\n",
" 50963],\n",
" 'temperature': 0,\n",
" 'avg_logprob': -0.2622168,\n",
" 'compression_ratio': 1.5336323,\n",
" 'no_speech_prob': 1.9729613e-12},\n",
" {'id': 3,\n",
" 'seek': 0,\n",
" 'start': 11.96,\n",
" 'end': 14.38,\n",
" 'text': ' I should say, Oi Armstrong, no!',\n",
" 'tokens': [50963, 286, 820, 584, 11, 31610, 36100, 11, 572, 0, 51084],\n",
" 'temperature': 0,\n",
" 'avg_logprob': -0.2622168,\n",
" 'compression_ratio': 1.5336323,\n",
" 'no_speech_prob': 1.9729613e-12},\n",
" {'id': 4,\n",
" 'seek': 0,\n",
" 'start': 14.38,\n",
" 'end': 16.7,\n",
" 'text': \" You're not on your lunar module now.\",\n",
" 'tokens': [51084, 509, 434, 406, 322, 428, 32581, 10088, 586, 13, 51200],\n",
" 'temperature': 0,\n",
" 'avg_logprob': -0.2622168,\n",
" 'compression_ratio': 1.5336323,\n",
" 'no_speech_prob': 1.9729613e-12},\n",
" {'id': 5,\n",
" 'seek': 0,\n",
" 'start': 16.7,\n",
" 'end': 23.12,\n",
" 'text': ' Pick up your knife and fork and eat your finders chicken Italians in a terrestrial fashion.',\n",
" 'tokens': [51200,\n",
" 14129,\n",
" 493,\n",
" 428,\n",
" 7976,\n",
" 293,\n",
" 17716,\n",
" 293,\n",
" 1862,\n",
" 428,\n",
" 915,\n",
" 433,\n",
" 4662,\n",
" 43620,\n",
" 294,\n",
" 257,\n",
" 1796,\n",
" 34539,\n",
" 6700,\n",
" 13,\n",
" 51521],\n",
" 'temperature': 0,\n",
" 'avg_logprob': -0.2622168,\n",
" 'compression_ratio': 1.5336323,\n",
" 'no_speech_prob': 1.9729613e-12}]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"segments = (transcription.segments)\n",
"segments"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b381c88c",
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"\n",
"def convert_seconds_to_hms(seconds):\n",
" hours, remainder = divmod(seconds, 3600)\n",
" minutes, seconds = divmod(remainder, 60)\n",
" milliseconds = math.floor((seconds % 1) * 1000)\n",
" output = f\"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}\"\n",
" return output"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c8ff6a57",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[00:00:00,000 -> 00:00:04,379] When suddenly, there's this strange sucking noise. It's Armstrong!\n",
"[00:00:04,379 -> 00:00:09,080] He's only brought his own food along in a liquid form in a belt pouch\n",
"[00:00:09,080 -> 00:00:11,960] and started sucking it noisily from a straw.\n",
"[00:00:11,960 -> 00:00:14,380] I should say, Oi Armstrong, no!\n",
"[00:00:14,380 -> 00:00:16,699] You're not on your lunar module now.\n",
"[00:00:16,699 -> 00:00:23,120] Pick up your knife and fork and eat your finders chicken Italians in a terrestrial fashion.\n"
]
}
],
"source": [
"def format_segments_with_timecode(segments):\n",
" formatted_segments = []\n",
" for segment in segments:\n",
" start_tc = convert_seconds_to_hms(segment['start'])\n",
" end_tc = convert_seconds_to_hms(segment['end'])\n",
" formatted_line = f\"[{start_tc} -> {end_tc}] {segment['text']}\"\n",
" formatted_segments.append(formatted_line)\n",
" return formatted_segments\n",
"\n",
"# Print all segments\n",
"for formatted_line in format_segments_with_timecode(segments):\n",
" print(formatted_line)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "77a20d69",
"metadata": {},
"outputs": [],
"source": [
"# save transcription as .srt file\n",
"count = 0\n",
"with open(\"subs.srt\", 'w') as f: # define subtitle file name here\n",
" for segment in segments:\n",
" count +=1\n",
" duration = f\"{convert_seconds_to_hms(segment['start'])} --> {convert_seconds_to_hms(segment['end'])}\\n\"\n",
" text = f\"{segment['text'].lstrip()}\\n\\n\"\n",
" f.write(f\"{count}\\n{duration}{text}\") # Write formatted string to the file"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment