Rishit-dagli · December 20, 2020 03:53
diff --git a/prefetch.ipynb b/prefetch.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Prefetch.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyPQa1ZxdbinJT/QujZkexhz",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/Rishit-dagli/27aa9fe80d467920d2d0faaabb8bbdc3/prefetch.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "YbnFcovifzTU"
      },
      "source": [
        "💡#TensorFlowTip\r\n",
        "Use .prefetch to reduce your step time of training and extracting data\r\n",
        "\r\n",
        "- overlap preprocessing and model execution\r\n",
        "- while the model executes training step n input pipeline is reading the data for n+1 step\r\n",
        "- reduce idle time for the GPU and CPU"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "SqTOouW-Re91"
      },
      "source": [
        "import tensorflow as tf\r\n",
        "import time"
      ],
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Yvb3YpvkSRk4"
      },
      "source": [
        "class ArtificialDataset(tf.data.Dataset):\r\n",
        "    def _generator(num_samples):\r\n",
        "        # Opening the file\r\n",
        "        time.sleep(0.03)\r\n",
        "\r\n",
        "        for sample_idx in range(num_samples):\r\n",
        "            # Reading data (line, record) from the file\r\n",
        "            time.sleep(0.015)\r\n",
        "\r\n",
        "            yield (sample_idx,)\r\n",
        "\r\n",
        "    def __new__(cls, num_samples=3):\r\n",
        "        return tf.data.Dataset.from_generator(\r\n",
        "            cls._generator,\r\n",
        "            output_types=tf.dtypes.int64,\r\n",
        "            output_shapes=(1,),\r\n",
        "            args=(num_samples,)\r\n",
        "        )"
      ],
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "owgpUHXNS2_V"
      },
      "source": [
        "def benchmark(dataset, num_epochs=2):\r\n",
        "    start_time = time.perf_counter()\r\n",
        "    for epoch_num in range(num_epochs):\r\n",
        "        for sample in dataset:\r\n",
        "            # Performing a training step\r\n",
        "            time.sleep(0.01)\r\n",
        "    tf.print(\"Execution time:\", time.perf_counter() - start_time)"
      ],
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9VrYgd1vTN-P",
        "outputId": "24c220b0-cb47-411a-e500-24a927922318"
      },
      "source": [
        "benchmark(ArtificialDataset())"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Execution time: 0.38392184499997484\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Am87LIjEZ2Bp",
        "outputId": "89563cc2-26aa-408c-ac43-7fc84f45b6eb"
      },
      "source": [
        "benchmark(\r\n",
        "    ArtificialDataset()\r\n",
        "    .prefetch(tf.data.experimental.AUTOTUNE)\r\n",
        ")"
      ],
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Execution time: 0.19688551699999834\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "Prefetch.ipynb",
	"provenance": [],
	"authorship_tag": "ABX9TyPQa1ZxdbinJT/QujZkexhz",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"accelerator": "GPU"
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/Rishit-dagli/27aa9fe80d467920d2d0faaabb8bbdc3/prefetch.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "YbnFcovifzTU"
	},
	"source": [
	"💡#TensorFlowTip\r\n",
	"Use .prefetch to reduce your step time of training and extracting data\r\n",
	"\r\n",
	"- overlap preprocessing and model execution\r\n",
	"- while the model executes training step n input pipeline is reading the data for n+1 step\r\n",
	"- reduce idle time for the GPU and CPU"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "SqTOouW-Re91"
	},
	"source": [
	"import tensorflow as tf\r\n",
	"import time"
	],
	"execution_count": 1,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Yvb3YpvkSRk4"
	},
	"source": [
	"class ArtificialDataset(tf.data.Dataset):\r\n",
	" def _generator(num_samples):\r\n",
	" # Opening the file\r\n",
	" time.sleep(0.03)\r\n",
	"\r\n",
	" for sample_idx in range(num_samples):\r\n",
	" # Reading data (line, record) from the file\r\n",
	" time.sleep(0.015)\r\n",
	"\r\n",
	" yield (sample_idx,)\r\n",
	"\r\n",
	" def __new__(cls, num_samples=3):\r\n",
	" return tf.data.Dataset.from_generator(\r\n",
	" cls._generator,\r\n",
	" output_types=tf.dtypes.int64,\r\n",
	" output_shapes=(1,),\r\n",
	" args=(num_samples,)\r\n",
	" )"
	],
	"execution_count": 2,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "owgpUHXNS2_V"
	},
	"source": [
	"def benchmark(dataset, num_epochs=2):\r\n",
	" start_time = time.perf_counter()\r\n",
	" for epoch_num in range(num_epochs):\r\n",
	" for sample in dataset:\r\n",
	" # Performing a training step\r\n",
	" time.sleep(0.01)\r\n",
	" tf.print(\"Execution time:\", time.perf_counter() - start_time)"
	],
	"execution_count": 3,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "9VrYgd1vTN-P",
	"outputId": "24c220b0-cb47-411a-e500-24a927922318"
	},
	"source": [
	"benchmark(ArtificialDataset())"
	],
	"execution_count": 4,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Execution time: 0.38392184499997484\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "Am87LIjEZ2Bp",
	"outputId": "89563cc2-26aa-408c-ac43-7fc84f45b6eb"
	},
	"source": [
	"benchmark(\r\n",
	" ArtificialDataset()\r\n",
	" .prefetch(tf.data.experimental.AUTOTUNE)\r\n",
	")"
	],
	"execution_count": 5,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Execution time: 0.19688551699999834\n"
	],
	"name": "stdout"
	}
	]
	}
	]
	}