Created
December 26, 2024 22:28
-
-
Save sebington/c2e6c6ef7bb32fb8bcb1f2cd062b4bdc to your computer and use it in GitHub Desktop.
Batch transcribe audio/video files using Groq Whisper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "8c39022e-4551-4167-ad56-9a39da484e38", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from groq import Groq\n", | |
"client = Groq()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "3d548229-26b0-464f-98da-77ddc3b8d5f8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"filename = \"ElevenLabs_Eastend_Steve_2.mp3\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "35d8827f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" When suddenly, there's this strange sucking noise. It's Armstrong! He's only brought his own food along in a liquid form in a belt pouch and started sucking it noisily from a straw. I should say, Oi Armstrong, no! You're not on your lunar module now. Pick up your knife and fork and eat your finders chicken Italians in a terrestrial fashion.\n" | |
] | |
} | |
], | |
"source": [ | |
"with open(filename, \"rb\") as file:\n", | |
" transcription = client.audio.transcriptions.create(\n", | |
" file=(filename, file.read()),\n", | |
" model=\"whisper-large-v3-turbo\", # distil-whisper-large-v3-en or whisper-large-v3\n", | |
" response_format=\"verbose_json\",\n", | |
" )\n", | |
" print(transcription.text)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "c8b407d7-35f8-42b3-bc2d-96c5b0e8400f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Transcription(text=\" When suddenly, there's this strange sucking noise. It's Armstrong! He's only brought his own food along in a liquid form in a belt pouch and started sucking it noisily from a straw. I should say, Oi Armstrong, no! You're not on your lunar module now. Pick up your knife and fork and eat your finders chicken Italians in a terrestrial fashion.\", task='transcribe', language='English', duration=23.3, segments=[{'id': 0, 'seek': 0, 'start': 0, 'end': 4.38, 'text': \" When suddenly, there's this strange sucking noise. It's Armstrong!\", 'tokens': [50365, 1133, 5800, 11, 456, 311, 341, 5861, 38669, 5658, 13, 467, 311, 36100, 0, 50584], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}, {'id': 1, 'seek': 0, 'start': 4.38, 'end': 9.08, 'text': \" He's only brought his own food along in a liquid form in a belt pouch\", 'tokens': [50584, 634, 311, 787, 3038, 702, 1065, 1755, 2051, 294, 257, 6553, 1254, 294, 257, 10750, 27781, 50819], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}, {'id': 2, 'seek': 0, 'start': 9.08, 'end': 11.96, 'text': ' and started sucking it noisily from a straw.', 'tokens': [50819, 293, 1409, 38669, 309, 572, 271, 953, 490, 257, 10099, 13, 50963], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}, {'id': 3, 'seek': 0, 'start': 11.96, 'end': 14.38, 'text': ' I should say, Oi Armstrong, no!', 'tokens': [50963, 286, 820, 584, 11, 31610, 36100, 11, 572, 0, 51084], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}, {'id': 4, 'seek': 0, 'start': 14.38, 'end': 16.7, 'text': \" You're not on your lunar module now.\", 'tokens': [51084, 509, 434, 406, 322, 428, 32581, 10088, 586, 13, 51200], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}, {'id': 5, 'seek': 0, 'start': 16.7, 'end': 23.12, 'text': ' Pick up your knife and fork and eat your finders chicken Italians in a terrestrial fashion.', 'tokens': [51200, 14129, 493, 428, 7976, 293, 17716, 293, 1862, 428, 915, 433, 4662, 43620, 294, 257, 1796, 34539, 6700, 13, 51521], 'temperature': 0, 'avg_logprob': -0.2622168, 'compression_ratio': 1.5336323, 'no_speech_prob': 1.9729613e-12}], x_groq={'id': 'req_01jg2f5xf6e8esvszdn6qrxkm3'})\n" | |
] | |
} | |
], | |
"source": [ | |
"with open(filename, \"rb\") as file:\n", | |
" transcription = client.audio.transcriptions.create(\n", | |
" file=(filename, file.read()),\n", | |
" model=\"whisper-large-v3-turbo\", # distil-whisper-large-v3-en or whisper-large-v3\n", | |
" response_format=\"verbose_json\",\n", | |
" #prompt=\"Specify context or spelling\", # Optional\n", | |
" #language=\"es\", # Optional\n", | |
" #temperature=0.0 # Optional\n", | |
" )\n", | |
" #print(transcription.text)\n", | |
" print(transcription)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "d5f07a6a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'id': 0,\n", | |
" 'seek': 0,\n", | |
" 'start': 0,\n", | |
" 'end': 4.38,\n", | |
" 'text': \" When suddenly, there's this strange sucking noise. It's Armstrong!\",\n", | |
" 'tokens': [50365,\n", | |
" 1133,\n", | |
" 5800,\n", | |
" 11,\n", | |
" 456,\n", | |
" 311,\n", | |
" 341,\n", | |
" 5861,\n", | |
" 38669,\n", | |
" 5658,\n", | |
" 13,\n", | |
" 467,\n", | |
" 311,\n", | |
" 36100,\n", | |
" 0,\n", | |
" 50584],\n", | |
" 'temperature': 0,\n", | |
" 'avg_logprob': -0.2622168,\n", | |
" 'compression_ratio': 1.5336323,\n", | |
" 'no_speech_prob': 1.9729613e-12},\n", | |
" {'id': 1,\n", | |
" 'seek': 0,\n", | |
" 'start': 4.38,\n", | |
" 'end': 9.08,\n", | |
" 'text': \" He's only brought his own food along in a liquid form in a belt pouch\",\n", | |
" 'tokens': [50584,\n", | |
" 634,\n", | |
" 311,\n", | |
" 787,\n", | |
" 3038,\n", | |
" 702,\n", | |
" 1065,\n", | |
" 1755,\n", | |
" 2051,\n", | |
" 294,\n", | |
" 257,\n", | |
" 6553,\n", | |
" 1254,\n", | |
" 294,\n", | |
" 257,\n", | |
" 10750,\n", | |
" 27781,\n", | |
" 50819],\n", | |
" 'temperature': 0,\n", | |
" 'avg_logprob': -0.2622168,\n", | |
" 'compression_ratio': 1.5336323,\n", | |
" 'no_speech_prob': 1.9729613e-12},\n", | |
" {'id': 2,\n", | |
" 'seek': 0,\n", | |
" 'start': 9.08,\n", | |
" 'end': 11.96,\n", | |
" 'text': ' and started sucking it noisily from a straw.',\n", | |
" 'tokens': [50819,\n", | |
" 293,\n", | |
" 1409,\n", | |
" 38669,\n", | |
" 309,\n", | |
" 572,\n", | |
" 271,\n", | |
" 953,\n", | |
" 490,\n", | |
" 257,\n", | |
" 10099,\n", | |
" 13,\n", | |
" 50963],\n", | |
" 'temperature': 0,\n", | |
" 'avg_logprob': -0.2622168,\n", | |
" 'compression_ratio': 1.5336323,\n", | |
" 'no_speech_prob': 1.9729613e-12},\n", | |
" {'id': 3,\n", | |
" 'seek': 0,\n", | |
" 'start': 11.96,\n", | |
" 'end': 14.38,\n", | |
" 'text': ' I should say, Oi Armstrong, no!',\n", | |
" 'tokens': [50963, 286, 820, 584, 11, 31610, 36100, 11, 572, 0, 51084],\n", | |
" 'temperature': 0,\n", | |
" 'avg_logprob': -0.2622168,\n", | |
" 'compression_ratio': 1.5336323,\n", | |
" 'no_speech_prob': 1.9729613e-12},\n", | |
" {'id': 4,\n", | |
" 'seek': 0,\n", | |
" 'start': 14.38,\n", | |
" 'end': 16.7,\n", | |
" 'text': \" You're not on your lunar module now.\",\n", | |
" 'tokens': [51084, 509, 434, 406, 322, 428, 32581, 10088, 586, 13, 51200],\n", | |
" 'temperature': 0,\n", | |
" 'avg_logprob': -0.2622168,\n", | |
" 'compression_ratio': 1.5336323,\n", | |
" 'no_speech_prob': 1.9729613e-12},\n", | |
" {'id': 5,\n", | |
" 'seek': 0,\n", | |
" 'start': 16.7,\n", | |
" 'end': 23.12,\n", | |
" 'text': ' Pick up your knife and fork and eat your finders chicken Italians in a terrestrial fashion.',\n", | |
" 'tokens': [51200,\n", | |
" 14129,\n", | |
" 493,\n", | |
" 428,\n", | |
" 7976,\n", | |
" 293,\n", | |
" 17716,\n", | |
" 293,\n", | |
" 1862,\n", | |
" 428,\n", | |
" 915,\n", | |
" 433,\n", | |
" 4662,\n", | |
" 43620,\n", | |
" 294,\n", | |
" 257,\n", | |
" 1796,\n", | |
" 34539,\n", | |
" 6700,\n", | |
" 13,\n", | |
" 51521],\n", | |
" 'temperature': 0,\n", | |
" 'avg_logprob': -0.2622168,\n", | |
" 'compression_ratio': 1.5336323,\n", | |
" 'no_speech_prob': 1.9729613e-12}]" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"segments = (transcription.segments)\n", | |
"segments" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "b381c88c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import math\n", | |
"\n", | |
"def convert_seconds_to_hms(seconds):\n", | |
" hours, remainder = divmod(seconds, 3600)\n", | |
" minutes, seconds = divmod(remainder, 60)\n", | |
" milliseconds = math.floor((seconds % 1) * 1000)\n", | |
" output = f\"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}\"\n", | |
" return output" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "c8ff6a57", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[00:00:00,000 -> 00:00:04,379] When suddenly, there's this strange sucking noise. It's Armstrong!\n", | |
"[00:00:04,379 -> 00:00:09,080] He's only brought his own food along in a liquid form in a belt pouch\n", | |
"[00:00:09,080 -> 00:00:11,960] and started sucking it noisily from a straw.\n", | |
"[00:00:11,960 -> 00:00:14,380] I should say, Oi Armstrong, no!\n", | |
"[00:00:14,380 -> 00:00:16,699] You're not on your lunar module now.\n", | |
"[00:00:16,699 -> 00:00:23,120] Pick up your knife and fork and eat your finders chicken Italians in a terrestrial fashion.\n" | |
] | |
} | |
], | |
"source": [ | |
"def format_segments_with_timecode(segments):\n", | |
" formatted_segments = []\n", | |
" for segment in segments:\n", | |
" start_tc = convert_seconds_to_hms(segment['start'])\n", | |
" end_tc = convert_seconds_to_hms(segment['end'])\n", | |
" formatted_line = f\"[{start_tc} -> {end_tc}] {segment['text']}\"\n", | |
" formatted_segments.append(formatted_line)\n", | |
" return formatted_segments\n", | |
"\n", | |
"# Print all segments\n", | |
"for formatted_line in format_segments_with_timecode(segments):\n", | |
" print(formatted_line)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "77a20d69", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# save transcription as .srt file\n", | |
"count = 0\n", | |
"with open(\"subs.srt\", 'w') as f: # define subtitle file name here\n", | |
" for segment in segments:\n", | |
" count +=1\n", | |
" duration = f\"{convert_seconds_to_hms(segment['start'])} --> {convert_seconds_to_hms(segment['end'])}\\n\"\n", | |
" text = f\"{segment['text'].lstrip()}\\n\\n\"\n", | |
" f.write(f\"{count}\\n{duration}{text}\") # Write formatted string to the file" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.13.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment