Created
December 26, 2024 22:20
-
-
Save sebington/7cda2fec1302aab6f30048f207a5efcb to your computer and use it in GitHub Desktop.
Batch transcribe audio/video files using Faster-Whisper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Batch transcriptions with Faster-Whisper" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"id": "xRe-wO2gWNN4" | |
}, | |
"outputs": [], | |
"source": [ | |
"# pip install faster-whisper -q\n", | |
"from faster_whisper import WhisperModel" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"id": "i8yDvreNvETR" | |
}, | |
"outputs": [], | |
"source": [ | |
"# model initialization (run on GPU with FP16 or on CPU with int8)\n", | |
"model = WhisperModel(\"small.en\", device=\"cpu\", compute_type=\"int8\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"id": "UCZ_UwAiH5sG" | |
}, | |
"outputs": [], | |
"source": [ | |
"# load an audio file\n", | |
"audio = \"en_bbc_eggs.mp4\"" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "7ZiMZZ1oRRq8" | |
}, | |
"source": [ | |
"### Transcribe a single file at segment level" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "GjFICWaGu94E" | |
}, | |
"outputs": [], | |
"source": [ | |
"# transcribe the file (segment level)\n", | |
"segments, _ = model.transcribe(audio, language=\"en\")\n", | |
"segments = list(segments) # This is where the transcription takes place\n", | |
"\n", | |
"# display transcription results\n", | |
"for segment in segments:\n", | |
" print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# create SRT subs and write to file (segment level)\n", | |
"import math\n", | |
"\n", | |
"def convert_seconds_to_hms(seconds):\n", | |
" hours, remainder = divmod(seconds, 3600)\n", | |
" minutes, seconds = divmod(remainder, 60)\n", | |
" milliseconds = math.floor((seconds % 1) * 1000)\n", | |
" output = f\"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}\"\n", | |
" return output\n", | |
"\n", | |
"count = 0\n", | |
"with open(\"subs.srt\", 'w') as f: # enter subtitle file name\n", | |
" for segment in segments:\n", | |
" count +=1\n", | |
" duration = f\"{convert_seconds_to_hms(segment.start)} --> {convert_seconds_to_hms(segment.end)}\\n\"\n", | |
" text = f\"{segment.text.lstrip()}\\n\\n\"\n", | |
" f.write(f\"{count}\\n{duration}{text}\") # Write formatted string to the file" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "h_DC0hO7_uFO" | |
}, | |
"source": [ | |
"### Transcribe several files with different languages" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 407 | |
}, | |
"id": "oKTsHgCewWJ6", | |
"outputId": "05383ee7-14fb-4c19-db22-520ea7feed4d" | |
}, | |
"outputs": [], | |
"source": [ | |
"# cells 1-3 MUST be run first\n", | |
"# files MUST be named en_*.wav or fr_*.mp3 etc.\n", | |
"# date: 13-12-2023 (22:30)\n", | |
"\n", | |
"import math\n", | |
"import os\n", | |
"\n", | |
"def convert_seconds_to_hms(seconds):\n", | |
" hours, remainder = divmod(seconds, 3600)\n", | |
" minutes, seconds = divmod(remainder, 60)\n", | |
" milliseconds = math.floor((seconds % 1) * 1000)\n", | |
" output = f\"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}\"\n", | |
" return output\n", | |
"\n", | |
"# Get a list of all files in the current directory\n", | |
"files_in_directory = os.listdir()\n", | |
"\n", | |
"# Filter the list to include only files with a specific extension (e.g., mp3)\n", | |
"audio_files = [file for file in files_in_directory if file.endswith(\".wav\")]\n", | |
"\n", | |
"# Iterate through each audio file and transcribe\n", | |
"for audio_file in audio_files:\n", | |
" print(f'Processing {audio_file} ...')\n", | |
" language = audio_file[:2] # retrieves language code from filename\n", | |
" segments, _ = model.transcribe(audio_file, language=language, beam_size=5)\n", | |
" segments = list(segments)\n", | |
" count = 0\n", | |
" output_file = f\"{os.path.splitext(audio_file)[0]}.srt\" # creates output SRT file based on audio file name\n", | |
" with open(output_file, 'w') as f: # Open file for writing\n", | |
" for segment in segments:\n", | |
" count +=1\n", | |
" duration = f\"{convert_seconds_to_hms(segment.start)} --> {convert_seconds_to_hms(segment.end)}\\n\"\n", | |
" text = f\"{segment.text.lstrip()}\\n\\n\"\n", | |
" f.write(f\"{count}\\n{duration}{text}\") # Write formatted string to the file\n", | |
" #print(f\"{duration}{text}\",end='')\n", | |
"\n", | |
"# Indicates end of process\n", | |
"print(\"Transcription process completed.\")" | |
] | |
} | |
], | |
"metadata": { | |
"accelerator": "GPU", | |
"colab": { | |
"provenance": [] | |
}, | |
"kernelspec": { | |
"display_name": "base", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.12.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment