sebington · December 26, 2024 22:20
diff --git a/batch_faster_whisper.ipynb b/batch_faster_whisper.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Batch transcriptions with Faster-Whisper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "xRe-wO2gWNN4"
   },
   "outputs": [],
   "source": [
    "# pip install faster-whisper -q\n",
    "from faster_whisper import WhisperModel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "i8yDvreNvETR"
   },
   "outputs": [],
   "source": [
    "# model initialization (run on GPU with FP16 or on CPU with int8)\n",
    "model = WhisperModel(\"small.en\", device=\"cpu\", compute_type=\"int8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "id": "UCZ_UwAiH5sG"
   },
   "outputs": [],
   "source": [
    "# load an audio file\n",
    "audio = \"en_bbc_eggs.mp4\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "7ZiMZZ1oRRq8"
   },
   "source": [
    "### Transcribe a single file at segment level"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "GjFICWaGu94E"
   },
   "outputs": [],
   "source": [
    "# transcribe the file (segment level)\n",
    "segments, _ = model.transcribe(audio, language=\"en\")\n",
    "segments = list(segments)  # This is where the transcription takes place\n",
    "\n",
    "# display transcription results\n",
    "for segment in segments:\n",
    "    print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create SRT subs and write to file (segment level)\n",
    "import math\n",
    "\n",
    "def convert_seconds_to_hms(seconds):\n",
    "    hours, remainder = divmod(seconds, 3600)\n",
    "    minutes, seconds = divmod(remainder, 60)\n",
    "    milliseconds = math.floor((seconds % 1) * 1000)\n",
    "    output = f\"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}\"\n",
    "    return output\n",
    "\n",
    "count = 0\n",
    "with open(\"subs.srt\", 'w') as f: # enter subtitle file name\n",
    "      for segment in segments:\n",
    "        count +=1\n",
    "        duration = f\"{convert_seconds_to_hms(segment.start)} --> {convert_seconds_to_hms(segment.end)}\\n\"\n",
    "        text = f\"{segment.text.lstrip()}\\n\\n\"\n",
    "        f.write(f\"{count}\\n{duration}{text}\")  # Write formatted string to the file"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "h_DC0hO7_uFO"
   },
   "source": [
    "### Transcribe several files with different languages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 407
    },
    "id": "oKTsHgCewWJ6",
    "outputId": "05383ee7-14fb-4c19-db22-520ea7feed4d"
   },
   "outputs": [],
   "source": [
    "# cells 1-3 MUST be run first\n",
    "# files MUST be named en_*.wav or fr_*.mp3 etc.\n",
    "# date: 13-12-2023 (22:30)\n",
    "\n",
    "import math\n",
    "import os\n",
    "\n",
    "def convert_seconds_to_hms(seconds):\n",
    "    hours, remainder = divmod(seconds, 3600)\n",
    "    minutes, seconds = divmod(remainder, 60)\n",
    "    milliseconds = math.floor((seconds % 1) * 1000)\n",
    "    output = f\"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}\"\n",
    "    return output\n",
    "\n",
    "# Get a list of all files in the current directory\n",
    "files_in_directory = os.listdir()\n",
    "\n",
    "# Filter the list to include only files with a specific extension (e.g., mp3)\n",
    "audio_files = [file for file in files_in_directory if file.endswith(\".wav\")]\n",
    "\n",
    "# Iterate through each audio file and transcribe\n",
    "for audio_file in audio_files:\n",
    "    print(f'Processing {audio_file} ...')\n",
    "    language = audio_file[:2] # retrieves language code from filename\n",
    "    segments, _ = model.transcribe(audio_file, language=language, beam_size=5)\n",
    "    segments = list(segments)\n",
    "    count = 0\n",
    "    output_file = f\"{os.path.splitext(audio_file)[0]}.srt\"  # creates output SRT file based on audio file name\n",
    "    with open(output_file, 'w') as f:  # Open file for writing\n",
    "        for segment in segments:\n",
    "            count +=1\n",
    "            duration = f\"{convert_seconds_to_hms(segment.start)} --> {convert_seconds_to_hms(segment.end)}\\n\"\n",
    "            text = f\"{segment.text.lstrip()}\\n\\n\"\n",
    "            f.write(f\"{count}\\n{duration}{text}\")  # Write formatted string to the file\n",
    "            #print(f\"{duration}{text}\",end='')\n",
    "\n",
    "# Indicates end of process\n",
    "print(\"Transcription process completed.\")"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Batch transcriptions with Faster-Whisper"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"id": "xRe-wO2gWNN4"
	},
	"outputs": [],
	"source": [
	"# pip install faster-whisper -q\n",
	"from faster_whisper import WhisperModel"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"id": "i8yDvreNvETR"
	},
	"outputs": [],
	"source": [
	"# model initialization (run on GPU with FP16 or on CPU with int8)\n",
	"model = WhisperModel(\"small.en\", device=\"cpu\", compute_type=\"int8\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"id": "UCZ_UwAiH5sG"
	},
	"outputs": [],
	"source": [
	"# load an audio file\n",
	"audio = \"en_bbc_eggs.mp4\""
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "7ZiMZZ1oRRq8"
	},
	"source": [
	"### Transcribe a single file at segment level"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "GjFICWaGu94E"
	},
	"outputs": [],
	"source": [
	"# transcribe the file (segment level)\n",
	"segments, _ = model.transcribe(audio, language=\"en\")\n",
	"segments = list(segments) # This is where the transcription takes place\n",
	"\n",
	"# display transcription results\n",
	"for segment in segments:\n",
	" print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# create SRT subs and write to file (segment level)\n",
	"import math\n",
	"\n",
	"def convert_seconds_to_hms(seconds):\n",
	" hours, remainder = divmod(seconds, 3600)\n",
	" minutes, seconds = divmod(remainder, 60)\n",
	" milliseconds = math.floor((seconds % 1) * 1000)\n",
	" output = f\"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}\"\n",
	" return output\n",
	"\n",
	"count = 0\n",
	"with open(\"subs.srt\", 'w') as f: # enter subtitle file name\n",
	" for segment in segments:\n",
	" count +=1\n",
	" duration = f\"{convert_seconds_to_hms(segment.start)} --> {convert_seconds_to_hms(segment.end)}\\n\"\n",
	" text = f\"{segment.text.lstrip()}\\n\\n\"\n",
	" f.write(f\"{count}\\n{duration}{text}\") # Write formatted string to the file"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "h_DC0hO7_uFO"
	},
	"source": [
	"### Transcribe several files with different languages"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 407
	},
	"id": "oKTsHgCewWJ6",
	"outputId": "05383ee7-14fb-4c19-db22-520ea7feed4d"
	},
	"outputs": [],
	"source": [
	"# cells 1-3 MUST be run first\n",
	"# files MUST be named en_.wav or fr_.mp3 etc.\n",
	"# date: 13-12-2023 (22:30)\n",
	"\n",
	"import math\n",
	"import os\n",
	"\n",
	"def convert_seconds_to_hms(seconds):\n",
	" hours, remainder = divmod(seconds, 3600)\n",
	" minutes, seconds = divmod(remainder, 60)\n",
	" milliseconds = math.floor((seconds % 1) * 1000)\n",
	" output = f\"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}\"\n",
	" return output\n",
	"\n",
	"# Get a list of all files in the current directory\n",
	"files_in_directory = os.listdir()\n",
	"\n",
	"# Filter the list to include only files with a specific extension (e.g., mp3)\n",
	"audio_files = [file for file in files_in_directory if file.endswith(\".wav\")]\n",
	"\n",
	"# Iterate through each audio file and transcribe\n",
	"for audio_file in audio_files:\n",
	" print(f'Processing {audio_file} ...')\n",
	" language = audio_file[:2] # retrieves language code from filename\n",
	" segments, _ = model.transcribe(audio_file, language=language, beam_size=5)\n",
	" segments = list(segments)\n",
	" count = 0\n",
	" output_file = f\"{os.path.splitext(audio_file)[0]}.srt\" # creates output SRT file based on audio file name\n",
	" with open(output_file, 'w') as f: # Open file for writing\n",
	" for segment in segments:\n",
	" count +=1\n",
	" duration = f\"{convert_seconds_to_hms(segment.start)} --> {convert_seconds_to_hms(segment.end)}\\n\"\n",
	" text = f\"{segment.text.lstrip()}\\n\\n\"\n",
	" f.write(f\"{count}\\n{duration}{text}\") # Write formatted string to the file\n",
	" #print(f\"{duration}{text}\",end='')\n",
	"\n",
	"# Indicates end of process\n",
	"print(\"Transcription process completed.\")"
	]
	}
	],
	"metadata": {
	"accelerator": "GPU",
	"colab": {
	"provenance": []
	},
	"kernelspec": {
	"display_name": "base",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.12.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}