Skip to content

Instantly share code, notes, and snippets.

@sebington
Created December 26, 2024 22:20
Show Gist options
  • Save sebington/7cda2fec1302aab6f30048f207a5efcb to your computer and use it in GitHub Desktop.
Save sebington/7cda2fec1302aab6f30048f207a5efcb to your computer and use it in GitHub Desktop.
Batch transcribe audio/video files using Faster-Whisper
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Batch transcriptions with Faster-Whisper"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "xRe-wO2gWNN4"
},
"outputs": [],
"source": [
"# pip install faster-whisper -q\n",
"from faster_whisper import WhisperModel"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "i8yDvreNvETR"
},
"outputs": [],
"source": [
"# model initialization (run on GPU with FP16 or on CPU with int8)\n",
"model = WhisperModel(\"small.en\", device=\"cpu\", compute_type=\"int8\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "UCZ_UwAiH5sG"
},
"outputs": [],
"source": [
"# load an audio file\n",
"audio = \"en_bbc_eggs.mp4\""
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7ZiMZZ1oRRq8"
},
"source": [
"### Transcribe a single file at segment level"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "GjFICWaGu94E"
},
"outputs": [],
"source": [
"# transcribe the file (segment level)\n",
"segments, _ = model.transcribe(audio, language=\"en\")\n",
"segments = list(segments) # This is where the transcription takes place\n",
"\n",
"# display transcription results\n",
"for segment in segments:\n",
" print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# create SRT subs and write to file (segment level)\n",
"import math\n",
"\n",
"def convert_seconds_to_hms(seconds):\n",
" hours, remainder = divmod(seconds, 3600)\n",
" minutes, seconds = divmod(remainder, 60)\n",
" milliseconds = math.floor((seconds % 1) * 1000)\n",
" output = f\"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}\"\n",
" return output\n",
"\n",
"count = 0\n",
"with open(\"subs.srt\", 'w') as f: # enter subtitle file name\n",
" for segment in segments:\n",
" count +=1\n",
" duration = f\"{convert_seconds_to_hms(segment.start)} --> {convert_seconds_to_hms(segment.end)}\\n\"\n",
" text = f\"{segment.text.lstrip()}\\n\\n\"\n",
" f.write(f\"{count}\\n{duration}{text}\") # Write formatted string to the file"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "h_DC0hO7_uFO"
},
"source": [
"### Transcribe several files with different languages"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 407
},
"id": "oKTsHgCewWJ6",
"outputId": "05383ee7-14fb-4c19-db22-520ea7feed4d"
},
"outputs": [],
"source": [
"# cells 1-3 MUST be run first\n",
"# files MUST be named en_*.wav or fr_*.mp3 etc.\n",
"# date: 13-12-2023 (22:30)\n",
"\n",
"import math\n",
"import os\n",
"\n",
"def convert_seconds_to_hms(seconds):\n",
" hours, remainder = divmod(seconds, 3600)\n",
" minutes, seconds = divmod(remainder, 60)\n",
" milliseconds = math.floor((seconds % 1) * 1000)\n",
" output = f\"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}\"\n",
" return output\n",
"\n",
"# Get a list of all files in the current directory\n",
"files_in_directory = os.listdir()\n",
"\n",
"# Filter the list to include only files with a specific extension (e.g., mp3)\n",
"audio_files = [file for file in files_in_directory if file.endswith(\".wav\")]\n",
"\n",
"# Iterate through each audio file and transcribe\n",
"for audio_file in audio_files:\n",
" print(f'Processing {audio_file} ...')\n",
" language = audio_file[:2] # retrieves language code from filename\n",
" segments, _ = model.transcribe(audio_file, language=language, beam_size=5)\n",
" segments = list(segments)\n",
" count = 0\n",
" output_file = f\"{os.path.splitext(audio_file)[0]}.srt\" # creates output SRT file based on audio file name\n",
" with open(output_file, 'w') as f: # Open file for writing\n",
" for segment in segments:\n",
" count +=1\n",
" duration = f\"{convert_seconds_to_hms(segment.start)} --> {convert_seconds_to_hms(segment.end)}\\n\"\n",
" text = f\"{segment.text.lstrip()}\\n\\n\"\n",
" f.write(f\"{count}\\n{duration}{text}\") # Write formatted string to the file\n",
" #print(f\"{duration}{text}\",end='')\n",
"\n",
"# Indicates end of process\n",
"print(\"Transcription process completed.\")"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment