Created
July 16, 2019 05:37
-
-
Save kungfoomanchu/4e952268569e7780314e9e9a414061af to your computer and use it in GitHub Desktop.
Cusine For Google Colab
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Cusine For Google Colab", | |
"version": "0.3.2", | |
"provenance": [], | |
"include_colab_link": true | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.1" | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/kungfoomanchu/4e952268569e7780314e9e9a414061af/cusine-for-google-colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "yDrQBMtlPqSF", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "74ba2214-bc24-48b5-d55d-b2b933ae07f6" | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"from sklearn.preprocessing import LabelEncoder\n", | |
"from keras.utils import to_categorical\n", | |
"import numpy as np\n", | |
"import tensorflow as tf\n", | |
"from sklearn.model_selection import train_test_split\n", | |
"from sklearn.datasets import make_classification\n", | |
"from tensorflow.keras.models import Sequential\n", | |
"from tensorflow.keras.layers import Dense\n", | |
"from keras.models import model_from_json\n", | |
"from tensorflow.keras.models import load_model\n", | |
"\n", | |
"from google.colab import files" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Using TensorFlow backend.\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "SyaoVEjpRM5w", | |
"colab_type": "code", | |
"colab": { | |
"resources": { | |
"http://localhost:8080/nbextensions/google.colab/files.js": { | |
"data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7Ci8vIE1heCBhbW91bnQgb2YgdGltZSB0byBibG9jayB3YWl0aW5nIGZvciB0aGUgdXNlci4KY29uc3QgRklMRV9DSEFOR0VfVElNRU9VVF9NUyA9IDMwICogMTAwMDsKCmZ1bmN0aW9uIF91cGxvYWRGaWxlcyhpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IHN0ZXBzID0gdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKTsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIC8vIENhY2hlIHN0ZXBzIG9uIHRoZSBvdXRwdXRFbGVtZW50IHRvIG1ha2UgaXQgYXZhaWxhYmxlIGZvciB0aGUgbmV4dCBjYWxsCiAgLy8gdG8gdXBsb2FkRmlsZXNDb250aW51ZSBmcm9tIFB5dGhvbi4KICBvdXRwdXRFbGVtZW50LnN0ZXBzID0gc3RlcHM7CgogIHJldHVybiBfdXBsb2FkRmlsZXNDb250aW51ZShvdXRwdXRJZCk7Cn0KCi8vIFRoaXMgaXMgcm91Z2hseSBhbiBhc3luYyBnZW5lcmF0b3IgKG5vdCBzdXBwb3J0ZWQgaW4gdGhlIGJyb3dzZXIgeWV0KSwKLy8gd2hlcmUgdGhlcmUgYXJlIG11bHRpcGxlIGFzeW5jaHJvbm91cyBzdGVwcyBhbmQgdGhlIFB5dGhvbiBzaWRlIGlzIGdvaW5nCi8vIHRvIHBvbGwgZm9yIGNvbXBsZXRpb24gb2YgZWFjaCBzdGVwLgovLyBUaGlzIHVzZXMgYSBQcm9taXNlIHRvIGJsb2NrIHRoZSBweXRob24gc2lkZSBvbiBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcCwKLy8gdGhlbiBwYXNzZXMgdGhlIHJlc3VsdCBvZiB0aGUgcHJldmlvdXMgc3RlcCBhcyB0aGUgaW5wdXQgdG8gdGhlIG5leHQgc3RlcC4KZnVuY3Rpb24gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpIHsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIGNvbnN0IHN0ZXBzID0gb3V0cHV0RWxlbWVudC5zdGVwczsKCiAgY29uc3QgbmV4dCA9IHN0ZXBzLm5leHQob3V0cHV0RWxlbWVudC5sYXN0UHJvbWlzZVZhbHVlKTsKICByZXR1cm4gUHJvbWlzZS5yZXNvbHZlKG5leHQudmFsdWUucHJvbWlzZSkudGhlbigodmFsdWUpID0+IHsKICAgIC8vIENhY2hlIHRoZSBsYXN0IHByb21pc2UgdmFsdWUgdG8gbWFrZSBpdCBhdmFpbGFibGUgdG8gdGhlIG5leHQKICAgIC8vIHN0ZXAgb2YgdGhlIGdlbmVyYXRvci4KICAgIG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSA9IHZhbHVlOwogICAgcmV0dXJuIG5leHQudmFsdWUucmVzcG9uc2U7CiAgfSk7Cn0KCi8qKgogKiBHZW5lcmF0b3IgZnVuY3Rpb24gd2hpY2ggaXMgY2FsbGVkIGJldHdlZW4gZWFjaCBhc3luYyBzdGVwIG9mIHRoZSB1cGxvYWQKICogcHJvY2Vzcy4KICogQHBhcmFtIHtzdHJpbmd9IGlucHV0SWQgRWxlbWVudCBJRCBvZiB0aGUgaW5wdXQgZmlsZSBwaWNrZXIgZWxlbWVudC4KICogQHBhcmFtIHtzdHJpbmd9IG91dHB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIG91dHB1dCBkaXNwbGF5LgogKiBAcmV0dXJuIHshSXRlcmFibGU8IU9iamVjdD59IEl0ZXJhYmxlIG9mIG5leHQgc3RlcHMuCiAqLwpmdW5jdGlvbiogdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKSB7CiAgY29uc3QgaW5wdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoaW5wdXRJZCk7CiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gZmFsc2U7CgogIGNvbnN0IG91dHB1dEVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50QnlJZChvdXRwdXRJZCk7CiAgb3V0cHV0RWxlbWVudC5pbm5lckhUTUwgPSAnJzsKCiAgY29uc3QgcGlja2VkUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBpbnB1dEVsZW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignY2hhbmdlJywgKGUpID0+IHsKICAgICAgcmVzb2x2ZShlLnRhcmdldC5maWxlcyk7CiAgICB9KTsKICB9KTsKCiAgY29uc3QgY2FuY2VsID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnYnV0dG9uJyk7CiAgaW5wdXRFbGVtZW50LnBhcmVudEVsZW1lbnQuYXBwZW5kQ2hpbGQoY2FuY2VsKTsKICBjYW5jZWwudGV4dENvbnRlbnQgPSAnQ2FuY2VsIHVwbG9hZCc7CiAgY29uc3QgY2FuY2VsUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBjYW5jZWwub25jbGljayA9ICgpID0+IHsKICAgICAgcmVzb2x2ZShudWxsKTsKICAgIH07CiAgfSk7CgogIC8vIENhbmNlbCB1cGxvYWQgaWYgdXNlciBoYXNuJ3QgcGlja2VkIGFueXRoaW5nIGluIHRpbWVvdXQuCiAgY29uc3QgdGltZW91dFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgc2V0VGltZW91dCgoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9LCBGSUxFX0NIQU5HRV9USU1FT1VUX01TKTsKICB9KTsKCiAgLy8gV2FpdCBmb3IgdGhlIHVzZXIgdG8gcGljayB0aGUgZmlsZXMuCiAgY29uc3QgZmlsZXMgPSB5aWVsZCB7CiAgICBwcm9taXNlOiBQcm9taXNlLnJhY2UoW3BpY2tlZFByb21pc2UsIHRpbWVvdXRQcm9taXNlLCBjYW5jZWxQcm9taXNlXSksCiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdzdGFydGluZycsCiAgICB9CiAgfTsKCiAgaWYgKCFmaWxlcykgewogICAgcmV0dXJuIHsKICAgICAgcmVzcG9uc2U6IHsKICAgICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICAgIH0KICAgIH07CiAgfQoKICBjYW5jZWwucmVtb3ZlKCk7CgogIC8vIERpc2FibGUgdGhlIGlucHV0IGVsZW1lbnQgc2luY2UgZnVydGhlciBwaWNrcyBhcmUgbm90IGFsbG93ZWQuCiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gdHJ1ZTsKCiAgZm9yIChjb25zdCBmaWxlIG9mIGZpbGVzKSB7CiAgICBjb25zdCBsaSA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2xpJyk7CiAgICBsaS5hcHBlbmQoc3BhbihmaWxlLm5hbWUsIHtmb250V2VpZ2h0OiAnYm9sZCd9KSk7CiAgICBsaS5hcHBlbmQoc3BhbigKICAgICAgICBgKCR7ZmlsZS50eXBlIHx8ICduL2EnfSkgLSAke2ZpbGUuc2l6ZX0gYnl0ZXMsIGAgKwogICAgICAgIGBsYXN0IG1vZGlmaWVkOiAkewogICAgICAgICAgICBmaWxlLmxhc3RNb2RpZmllZERhdGUgPyBmaWxlLmxhc3RNb2RpZmllZERhdGUudG9Mb2NhbGVEYXRlU3RyaW5nKCkgOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnbi9hJ30gLSBgKSk7CiAgICBjb25zdCBwZXJjZW50ID0gc3BhbignMCUgZG9uZScpOwogICAgbGkuYXBwZW5kQ2hpbGQocGVyY2VudCk7CgogICAgb3V0cHV0RWxlbWVudC5hcHBlbmRDaGlsZChsaSk7CgogICAgY29uc3QgZmlsZURhdGFQcm9taXNlID0gbmV3IFByb21pc2UoKHJlc29sdmUpID0+IHsKICAgICAgY29uc3QgcmVhZGVyID0gbmV3IEZpbGVSZWFkZXIoKTsKICAgICAgcmVhZGVyLm9ubG9hZCA9IChlKSA9PiB7CiAgICAgICAgcmVzb2x2ZShlLnRhcmdldC5yZXN1bHQpOwogICAgICB9OwogICAgICByZWFkZXIucmVhZEFzQXJyYXlCdWZmZXIoZmlsZSk7CiAgICB9KTsKICAgIC8vIFdhaXQgZm9yIHRoZSBkYXRhIHRvIGJlIHJlYWR5LgogICAgbGV0IGZpbGVEYXRhID0geWllbGQgewogICAgICBwcm9taXNlOiBmaWxlRGF0YVByb21pc2UsCiAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgYWN0aW9uOiAnY29udGludWUnLAogICAgICB9CiAgICB9OwoKICAgIC8vIFVzZSBhIGNodW5rZWQgc2VuZGluZyB0byBhdm9pZCBtZXNzYWdlIHNpemUgbGltaXRzLiBTZWUgYi82MjExNTY2MC4KICAgIGxldCBwb3NpdGlvbiA9IDA7CiAgICB3aGlsZSAocG9zaXRpb24gPCBmaWxlRGF0YS5ieXRlTGVuZ3RoKSB7CiAgICAgIGNvbnN0IGxlbmd0aCA9IE1hdGgubWluKGZpbGVEYXRhLmJ5dGVMZW5ndGggLSBwb3NpdGlvbiwgTUFYX1BBWUxPQURfU0laRSk7CiAgICAgIGNvbnN0IGNodW5rID0gbmV3IFVpbnQ4QXJyYXkoZmlsZURhdGEsIHBvc2l0aW9uLCBsZW5ndGgpOwogICAgICBwb3NpdGlvbiArPSBsZW5ndGg7CgogICAgICBjb25zdCBiYXNlNjQgPSBidG9hKFN0cmluZy5mcm9tQ2hhckNvZGUuYXBwbHkobnVsbCwgY2h1bmspKTsKICAgICAgeWllbGQgewogICAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgICBhY3Rpb246ICdhcHBlbmQnLAogICAgICAgICAgZmlsZTogZmlsZS5uYW1lLAogICAgICAgICAgZGF0YTogYmFzZTY0LAogICAgICAgIH0sCiAgICAgIH07CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPQogICAgICAgICAgYCR7TWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCl9JSBkb25lYDsKICAgIH0KICB9CgogIC8vIEFsbCBkb25lLgogIHlpZWxkIHsKICAgIHJlc3BvbnNlOiB7CiAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgIH0KICB9Owp9CgpzY29wZS5nb29nbGUgPSBzY29wZS5nb29nbGUgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYiA9IHNjb3BlLmdvb2dsZS5jb2xhYiB8fCB7fTsKc2NvcGUuZ29vZ2xlLmNvbGFiLl9maWxlcyA9IHsKICBfdXBsb2FkRmlsZXMsCiAgX3VwbG9hZEZpbGVzQ29udGludWUsCn07Cn0pKHNlbGYpOwo=", | |
"ok": true, | |
"headers": [ | |
[ | |
"content-type", | |
"application/javascript" | |
] | |
], | |
"status": 200, | |
"status_text": "" | |
} | |
}, | |
"base_uri": "https://localhost:8080/", | |
"height": 74 | |
}, | |
"outputId": "12e88b74-5d56-494e-bae3-642110acbd52" | |
}, | |
"source": [ | |
"uploaded = files.upload()" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <input type=\"file\" id=\"files-5781108a-e573-46d1-a25c-87d581b622d1\" name=\"files[]\" multiple disabled />\n", | |
" <output id=\"result-5781108a-e573-46d1-a25c-87d581b622d1\">\n", | |
" Upload widget is only available when the cell has been executed in the\n", | |
" current browser session. Please rerun this cell to enable.\n", | |
" </output>\n", | |
" <script src=\"/nbextensions/google.colab/files.js\"></script> " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Saving train.json to train.json\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "lWIOsFvhRNCQ", | |
"colab_type": "code", | |
"colab": { | |
"resources": { | |
"http://localhost:8080/nbextensions/google.colab/files.js": { | |
"data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7Ci8vIE1heCBhbW91bnQgb2YgdGltZSB0byBibG9jayB3YWl0aW5nIGZvciB0aGUgdXNlci4KY29uc3QgRklMRV9DSEFOR0VfVElNRU9VVF9NUyA9IDMwICogMTAwMDsKCmZ1bmN0aW9uIF91cGxvYWRGaWxlcyhpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IHN0ZXBzID0gdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKTsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIC8vIENhY2hlIHN0ZXBzIG9uIHRoZSBvdXRwdXRFbGVtZW50IHRvIG1ha2UgaXQgYXZhaWxhYmxlIGZvciB0aGUgbmV4dCBjYWxsCiAgLy8gdG8gdXBsb2FkRmlsZXNDb250aW51ZSBmcm9tIFB5dGhvbi4KICBvdXRwdXRFbGVtZW50LnN0ZXBzID0gc3RlcHM7CgogIHJldHVybiBfdXBsb2FkRmlsZXNDb250aW51ZShvdXRwdXRJZCk7Cn0KCi8vIFRoaXMgaXMgcm91Z2hseSBhbiBhc3luYyBnZW5lcmF0b3IgKG5vdCBzdXBwb3J0ZWQgaW4gdGhlIGJyb3dzZXIgeWV0KSwKLy8gd2hlcmUgdGhlcmUgYXJlIG11bHRpcGxlIGFzeW5jaHJvbm91cyBzdGVwcyBhbmQgdGhlIFB5dGhvbiBzaWRlIGlzIGdvaW5nCi8vIHRvIHBvbGwgZm9yIGNvbXBsZXRpb24gb2YgZWFjaCBzdGVwLgovLyBUaGlzIHVzZXMgYSBQcm9taXNlIHRvIGJsb2NrIHRoZSBweXRob24gc2lkZSBvbiBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcCwKLy8gdGhlbiBwYXNzZXMgdGhlIHJlc3VsdCBvZiB0aGUgcHJldmlvdXMgc3RlcCBhcyB0aGUgaW5wdXQgdG8gdGhlIG5leHQgc3RlcC4KZnVuY3Rpb24gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpIHsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIGNvbnN0IHN0ZXBzID0gb3V0cHV0RWxlbWVudC5zdGVwczsKCiAgY29uc3QgbmV4dCA9IHN0ZXBzLm5leHQob3V0cHV0RWxlbWVudC5sYXN0UHJvbWlzZVZhbHVlKTsKICByZXR1cm4gUHJvbWlzZS5yZXNvbHZlKG5leHQudmFsdWUucHJvbWlzZSkudGhlbigodmFsdWUpID0+IHsKICAgIC8vIENhY2hlIHRoZSBsYXN0IHByb21pc2UgdmFsdWUgdG8gbWFrZSBpdCBhdmFpbGFibGUgdG8gdGhlIG5leHQKICAgIC8vIHN0ZXAgb2YgdGhlIGdlbmVyYXRvci4KICAgIG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSA9IHZhbHVlOwogICAgcmV0dXJuIG5leHQudmFsdWUucmVzcG9uc2U7CiAgfSk7Cn0KCi8qKgogKiBHZW5lcmF0b3IgZnVuY3Rpb24gd2hpY2ggaXMgY2FsbGVkIGJldHdlZW4gZWFjaCBhc3luYyBzdGVwIG9mIHRoZSB1cGxvYWQKICogcHJvY2Vzcy4KICogQHBhcmFtIHtzdHJpbmd9IGlucHV0SWQgRWxlbWVudCBJRCBvZiB0aGUgaW5wdXQgZmlsZSBwaWNrZXIgZWxlbWVudC4KICogQHBhcmFtIHtzdHJpbmd9IG91dHB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIG91dHB1dCBkaXNwbGF5LgogKiBAcmV0dXJuIHshSXRlcmFibGU8IU9iamVjdD59IEl0ZXJhYmxlIG9mIG5leHQgc3RlcHMuCiAqLwpmdW5jdGlvbiogdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKSB7CiAgY29uc3QgaW5wdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoaW5wdXRJZCk7CiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gZmFsc2U7CgogIGNvbnN0IG91dHB1dEVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50QnlJZChvdXRwdXRJZCk7CiAgb3V0cHV0RWxlbWVudC5pbm5lckhUTUwgPSAnJzsKCiAgY29uc3QgcGlja2VkUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBpbnB1dEVsZW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignY2hhbmdlJywgKGUpID0+IHsKICAgICAgcmVzb2x2ZShlLnRhcmdldC5maWxlcyk7CiAgICB9KTsKICB9KTsKCiAgY29uc3QgY2FuY2VsID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnYnV0dG9uJyk7CiAgaW5wdXRFbGVtZW50LnBhcmVudEVsZW1lbnQuYXBwZW5kQ2hpbGQoY2FuY2VsKTsKICBjYW5jZWwudGV4dENvbnRlbnQgPSAnQ2FuY2VsIHVwbG9hZCc7CiAgY29uc3QgY2FuY2VsUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBjYW5jZWwub25jbGljayA9ICgpID0+IHsKICAgICAgcmVzb2x2ZShudWxsKTsKICAgIH07CiAgfSk7CgogIC8vIENhbmNlbCB1cGxvYWQgaWYgdXNlciBoYXNuJ3QgcGlja2VkIGFueXRoaW5nIGluIHRpbWVvdXQuCiAgY29uc3QgdGltZW91dFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgc2V0VGltZW91dCgoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9LCBGSUxFX0NIQU5HRV9USU1FT1VUX01TKTsKICB9KTsKCiAgLy8gV2FpdCBmb3IgdGhlIHVzZXIgdG8gcGljayB0aGUgZmlsZXMuCiAgY29uc3QgZmlsZXMgPSB5aWVsZCB7CiAgICBwcm9taXNlOiBQcm9taXNlLnJhY2UoW3BpY2tlZFByb21pc2UsIHRpbWVvdXRQcm9taXNlLCBjYW5jZWxQcm9taXNlXSksCiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdzdGFydGluZycsCiAgICB9CiAgfTsKCiAgaWYgKCFmaWxlcykgewogICAgcmV0dXJuIHsKICAgICAgcmVzcG9uc2U6IHsKICAgICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICAgIH0KICAgIH07CiAgfQoKICBjYW5jZWwucmVtb3ZlKCk7CgogIC8vIERpc2FibGUgdGhlIGlucHV0IGVsZW1lbnQgc2luY2UgZnVydGhlciBwaWNrcyBhcmUgbm90IGFsbG93ZWQuCiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gdHJ1ZTsKCiAgZm9yIChjb25zdCBmaWxlIG9mIGZpbGVzKSB7CiAgICBjb25zdCBsaSA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2xpJyk7CiAgICBsaS5hcHBlbmQoc3BhbihmaWxlLm5hbWUsIHtmb250V2VpZ2h0OiAnYm9sZCd9KSk7CiAgICBsaS5hcHBlbmQoc3BhbigKICAgICAgICBgKCR7ZmlsZS50eXBlIHx8ICduL2EnfSkgLSAke2ZpbGUuc2l6ZX0gYnl0ZXMsIGAgKwogICAgICAgIGBsYXN0IG1vZGlmaWVkOiAkewogICAgICAgICAgICBmaWxlLmxhc3RNb2RpZmllZERhdGUgPyBmaWxlLmxhc3RNb2RpZmllZERhdGUudG9Mb2NhbGVEYXRlU3RyaW5nKCkgOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnbi9hJ30gLSBgKSk7CiAgICBjb25zdCBwZXJjZW50ID0gc3BhbignMCUgZG9uZScpOwogICAgbGkuYXBwZW5kQ2hpbGQocGVyY2VudCk7CgogICAgb3V0cHV0RWxlbWVudC5hcHBlbmRDaGlsZChsaSk7CgogICAgY29uc3QgZmlsZURhdGFQcm9taXNlID0gbmV3IFByb21pc2UoKHJlc29sdmUpID0+IHsKICAgICAgY29uc3QgcmVhZGVyID0gbmV3IEZpbGVSZWFkZXIoKTsKICAgICAgcmVhZGVyLm9ubG9hZCA9IChlKSA9PiB7CiAgICAgICAgcmVzb2x2ZShlLnRhcmdldC5yZXN1bHQpOwogICAgICB9OwogICAgICByZWFkZXIucmVhZEFzQXJyYXlCdWZmZXIoZmlsZSk7CiAgICB9KTsKICAgIC8vIFdhaXQgZm9yIHRoZSBkYXRhIHRvIGJlIHJlYWR5LgogICAgbGV0IGZpbGVEYXRhID0geWllbGQgewogICAgICBwcm9taXNlOiBmaWxlRGF0YVByb21pc2UsCiAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgYWN0aW9uOiAnY29udGludWUnLAogICAgICB9CiAgICB9OwoKICAgIC8vIFVzZSBhIGNodW5rZWQgc2VuZGluZyB0byBhdm9pZCBtZXNzYWdlIHNpemUgbGltaXRzLiBTZWUgYi82MjExNTY2MC4KICAgIGxldCBwb3NpdGlvbiA9IDA7CiAgICB3aGlsZSAocG9zaXRpb24gPCBmaWxlRGF0YS5ieXRlTGVuZ3RoKSB7CiAgICAgIGNvbnN0IGxlbmd0aCA9IE1hdGgubWluKGZpbGVEYXRhLmJ5dGVMZW5ndGggLSBwb3NpdGlvbiwgTUFYX1BBWUxPQURfU0laRSk7CiAgICAgIGNvbnN0IGNodW5rID0gbmV3IFVpbnQ4QXJyYXkoZmlsZURhdGEsIHBvc2l0aW9uLCBsZW5ndGgpOwogICAgICBwb3NpdGlvbiArPSBsZW5ndGg7CgogICAgICBjb25zdCBiYXNlNjQgPSBidG9hKFN0cmluZy5mcm9tQ2hhckNvZGUuYXBwbHkobnVsbCwgY2h1bmspKTsKICAgICAgeWllbGQgewogICAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgICBhY3Rpb246ICdhcHBlbmQnLAogICAgICAgICAgZmlsZTogZmlsZS5uYW1lLAogICAgICAgICAgZGF0YTogYmFzZTY0LAogICAgICAgIH0sCiAgICAgIH07CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPQogICAgICAgICAgYCR7TWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCl9JSBkb25lYDsKICAgIH0KICB9CgogIC8vIEFsbCBkb25lLgogIHlpZWxkIHsKICAgIHJlc3BvbnNlOiB7CiAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgIH0KICB9Owp9CgpzY29wZS5nb29nbGUgPSBzY29wZS5nb29nbGUgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYiA9IHNjb3BlLmdvb2dsZS5jb2xhYiB8fCB7fTsKc2NvcGUuZ29vZ2xlLmNvbGFiLl9maWxlcyA9IHsKICBfdXBsb2FkRmlsZXMsCiAgX3VwbG9hZEZpbGVzQ29udGludWUsCn07Cn0pKHNlbGYpOwo=", | |
"ok": true, | |
"headers": [ | |
[ | |
"content-type", | |
"application/javascript" | |
] | |
], | |
"status": 200, | |
"status_text": "" | |
} | |
}, | |
"base_uri": "https://localhost:8080/", | |
"height": 74 | |
}, | |
"outputId": "cd4cd35b-98f4-47b5-dffb-4cbf34e2a214" | |
}, | |
"source": [ | |
"uploaded = files.upload()" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <input type=\"file\" id=\"files-2369b3eb-eabf-40bd-b9c5-c2ddb6dccee7\" name=\"files[]\" multiple disabled />\n", | |
" <output id=\"result-2369b3eb-eabf-40bd-b9c5-c2ddb6dccee7\">\n", | |
" Upload widget is only available when the cell has been executed in the\n", | |
" current browser session. Please rerun this cell to enable.\n", | |
" </output>\n", | |
" <script src=\"/nbextensions/google.colab/files.js\"></script> " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Saving test.json to test.json\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "WRmVLaIWPqSL", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"outputId": "ab904300-556c-46fc-c7b8-c650dd27b71d" | |
}, | |
"source": [ | |
"data = pd.read_json(\"train.json\")\n", | |
"data.head()" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>cuisine</th>\n", | |
" <th>id</th>\n", | |
" <th>ingredients</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>greek</td>\n", | |
" <td>10259</td>\n", | |
" <td>[romaine lettuce, black olives, grape tomatoes...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>southern_us</td>\n", | |
" <td>25693</td>\n", | |
" <td>[plain flour, ground pepper, salt, tomatoes, g...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>filipino</td>\n", | |
" <td>20130</td>\n", | |
" <td>[eggs, pepper, salt, mayonaise, cooking oil, g...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>indian</td>\n", | |
" <td>22213</td>\n", | |
" <td>[water, vegetable oil, wheat, salt]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>indian</td>\n", | |
" <td>13162</td>\n", | |
" <td>[black pepper, shallots, cornflour, cayenne pe...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" cuisine id ingredients\n", | |
"0 greek 10259 [romaine lettuce, black olives, grape tomatoes...\n", | |
"1 southern_us 25693 [plain flour, ground pepper, salt, tomatoes, g...\n", | |
"2 filipino 20130 [eggs, pepper, salt, mayonaise, cooking oil, g...\n", | |
"3 indian 22213 [water, vegetable oil, wheat, salt]\n", | |
"4 indian 13162 [black pepper, shallots, cornflour, cayenne pe..." | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "-O_AYsCyPqSO", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 357 | |
}, | |
"outputId": "8674b73c-f942-4e75-becf-a3d95f73522d" | |
}, | |
"source": [ | |
"# Create list of unique cuisine types\n", | |
"cuisine_list = data['cuisine']\n", | |
"cuisine_compilation = []\n", | |
"for cuisine in cuisine_list:\n", | |
" cuisine_compilation.append(cuisine)\n", | |
" \n", | |
"cuis_unique = list(set(cuisine_compilation))\n", | |
"cuis_unique" | |
], | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['italian',\n", | |
" 'russian',\n", | |
" 'thai',\n", | |
" 'filipino',\n", | |
" 'mexican',\n", | |
" 'greek',\n", | |
" 'spanish',\n", | |
" 'british',\n", | |
" 'cajun_creole',\n", | |
" 'french',\n", | |
" 'irish',\n", | |
" 'jamaican',\n", | |
" 'vietnamese',\n", | |
" 'korean',\n", | |
" 'chinese',\n", | |
" 'indian',\n", | |
" 'moroccan',\n", | |
" 'southern_us',\n", | |
" 'japanese',\n", | |
" 'brazilian']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 5 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "C26VncKVPqSQ", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Onehotencoding for cuisine types\n", | |
"label_encoder_cuis = LabelEncoder()\n", | |
"label_encoder_cuis.fit(cuis_unique)\n", | |
"encoded_cuis = label_encoder_cuis.transform(data['cuisine'])\n", | |
"one_hot_cuis = to_categorical(encoded_cuis)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "F0nrC-egPqSS", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 51 | |
}, | |
"outputId": "1a05e367-b61f-4045-c8af-41651db20ae0" | |
}, | |
"source": [ | |
"# Check first encoded item\n", | |
"one_hot_cuis[0]" | |
], | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0.], dtype=float32)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 7 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "huUtx052PqSV", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Add one hot cuisine to dataframe\n", | |
"data[\"one_hot_cuisine\"] = list(one_hot_cuis)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "bFpBKxoyPqSX", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"outputId": "d4b22761-a910-4758-886d-d17cc1ea3307" | |
}, | |
"source": [ | |
"data.head()" | |
], | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>cuisine</th>\n", | |
" <th>id</th>\n", | |
" <th>ingredients</th>\n", | |
" <th>one_hot_cuisine</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>greek</td>\n", | |
" <td>10259</td>\n", | |
" <td>[romaine lettuce, black olives, grape tomatoes...</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>southern_us</td>\n", | |
" <td>25693</td>\n", | |
" <td>[plain flour, ground pepper, salt, tomatoes, g...</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>filipino</td>\n", | |
" <td>20130</td>\n", | |
" <td>[eggs, pepper, salt, mayonaise, cooking oil, g...</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>indian</td>\n", | |
" <td>22213</td>\n", | |
" <td>[water, vegetable oil, wheat, salt]</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>indian</td>\n", | |
" <td>13162</td>\n", | |
" <td>[black pepper, shallots, cornflour, cayenne pe...</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" cuisine ... one_hot_cuisine\n", | |
"0 greek ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...\n", | |
"1 southern_us ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...\n", | |
"2 filipino ... [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n", | |
"3 indian ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n", | |
"4 indian ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n", | |
"\n", | |
"[5 rows x 4 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 9 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "xgqQCZ7FPqSZ", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"outputId": "5377d002-1fcb-447b-cafe-977966c8fa45" | |
}, | |
"source": [ | |
"# Organize data frame\n", | |
"data = data[[\"cuisine\", \"one_hot_cuisine\", \"ingredients\"]]\n", | |
"data.head()" | |
], | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>cuisine</th>\n", | |
" <th>one_hot_cuisine</th>\n", | |
" <th>ingredients</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>greek</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...</td>\n", | |
" <td>[romaine lettuce, black olives, grape tomatoes...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>southern_us</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n", | |
" <td>[plain flour, ground pepper, salt, tomatoes, g...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>filipino</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n", | |
" <td>[eggs, pepper, salt, mayonaise, cooking oil, g...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>indian</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n", | |
" <td>[water, vegetable oil, wheat, salt]</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>indian</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n", | |
" <td>[black pepper, shallots, cornflour, cayenne pe...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" cuisine ... ingredients\n", | |
"0 greek ... [romaine lettuce, black olives, grape tomatoes...\n", | |
"1 southern_us ... [plain flour, ground pepper, salt, tomatoes, g...\n", | |
"2 filipino ... [eggs, pepper, salt, mayonaise, cooking oil, g...\n", | |
"3 indian ... [water, vegetable oil, wheat, salt]\n", | |
"4 indian ... [black pepper, shallots, cornflour, cayenne pe...\n", | |
"\n", | |
"[5 rows x 3 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 10 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "n61Xvfu6PqSc", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Create one hot encoding for ingredients that are in lists!\n", | |
"# -Create dictionary\n", | |
"ingredients = data.loc[:,'ingredients']\n", | |
"\n", | |
"i_map = {}\n", | |
"i_list = []\n", | |
"counter = 0\n", | |
"for lists in ingredients:\n", | |
" for items in lists:\n", | |
" if items not in i_map:\n", | |
" i_list.append(items)\n", | |
" i_map[items] = counter\n", | |
" counter = counter + 1" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "2tYwfaBpPqSf", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 102 | |
}, | |
"outputId": "7f2984d8-1454-41c0-85ef-182d3ea08197" | |
}, | |
"source": [ | |
"dict(list(i_map.items())[0:5])" | |
], | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'black olives': 1,\n", | |
" 'garlic': 3,\n", | |
" 'grape tomatoes': 2,\n", | |
" 'pepper': 4,\n", | |
" 'romaine lettuce': 0}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 12 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "aSlEak3BPqSi", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Create one hot encoding for ingredients that are in lists!\n", | |
"# -Create encoded data\n", | |
"ingredients_encodings = []\n", | |
"for lists in ingredients:\n", | |
" encoding = [0]*len(i_map)\n", | |
" for items in lists:\n", | |
" encoding[i_map[items]] = 1\n", | |
" ingredients_encodings.append(encoding)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "s0kuDddyPqSl", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 1000 | |
}, | |
"outputId": "5e99ff7b-2808-4487-f44e-8437e1d3cc4b" | |
}, | |
"source": [ | |
"ingredients_encodings[0]" | |
], | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[1,\n", | |
" 1,\n", | |
" 1,\n", | |
" 1,\n", | |
" 1,\n", | |
" 1,\n", | |
" 1,\n", | |
" 1,\n", | |
" 1,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" 0,\n", | |
" ...]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 14 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "jIKfh8BbPqSo", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Add encoded ingredients to data frame\n", | |
"data[\"one_hot_ingredients\"] = ingredients_encodings" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "WI5Xty1-PqSr", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"outputId": "c759c5ef-6eda-4585-a13c-2c858a64b5e7" | |
}, | |
"source": [ | |
"data.head()" | |
], | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>cuisine</th>\n", | |
" <th>one_hot_cuisine</th>\n", | |
" <th>ingredients</th>\n", | |
" <th>one_hot_ingredients</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>greek</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...</td>\n", | |
" <td>[romaine lettuce, black olives, grape tomatoes...</td>\n", | |
" <td>[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>southern_us</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n", | |
" <td>[plain flour, ground pepper, salt, tomatoes, g...</td>\n", | |
" <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>filipino</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n", | |
" <td>[eggs, pepper, salt, mayonaise, cooking oil, g...</td>\n", | |
" <td>[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>indian</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n", | |
" <td>[water, vegetable oil, wheat, salt]</td>\n", | |
" <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>indian</td>\n", | |
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n", | |
" <td>[black pepper, shallots, cornflour, cayenne pe...</td>\n", | |
" <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" cuisine ... one_hot_ingredients\n", | |
"0 greek ... [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ...\n", | |
"1 southern_us ... [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, ...\n", | |
"2 filipino ... [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...\n", | |
"3 indian ... [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...\n", | |
"4 indian ... [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...\n", | |
"\n", | |
"[5 rows x 4 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 16 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "uR1Q_O8APqSu", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Create variables for train test split\n", | |
"one_hot_cuisine = data['one_hot_cuisine']\n", | |
"one_hot_ingredients = data['one_hot_ingredients']" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "_MLWC7B1PqSw", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Split data\n", | |
"ing_train, ing_test,cuis_train, cuis_test = train_test_split(one_hot_ingredients, one_hot_cuisine, random_state=1)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "4NzPDfSUPqSx", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 119 | |
}, | |
"outputId": "f1c503ea-c43e-4067-870f-1521e8c821c5" | |
}, | |
"source": [ | |
"ing_train.head()" | |
], | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"15470 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n", | |
"24599 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...\n", | |
"4712 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...\n", | |
"8761 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n", | |
"22503 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, ...\n", | |
"Name: one_hot_ingredients, dtype: object" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 19 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "m4L9LTZUPqSz", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Convert object into numpy array \n", | |
"cuis_train = np.array(cuis_train.tolist())\n", | |
"cuis_test = np.array(cuis_test.tolist())\n", | |
"ing_train = np.array(ing_train.tolist())\n", | |
"ing_test = np.array(ing_test.tolist())" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "3YzeFYakPqS0", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 136 | |
}, | |
"outputId": "7c49e5ec-d98b-46c6-d77b-57983e05bcde" | |
}, | |
"source": [ | |
"ing_train" | |
], | |
"execution_count": 21, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array([[0, 0, 0, ..., 0, 0, 0],\n", | |
" [0, 0, 0, ..., 0, 0, 0],\n", | |
" [0, 0, 0, ..., 0, 0, 0],\n", | |
" ...,\n", | |
" [0, 0, 0, ..., 0, 0, 0],\n", | |
" [0, 0, 0, ..., 0, 0, 0],\n", | |
" [0, 0, 0, ..., 0, 0, 0]])" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 21 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "COq5jmxWPqS2", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 85 | |
}, | |
"outputId": "80ad3ec2-4f82-4cc8-eb8d-9066a454f5be" | |
}, | |
"source": [ | |
"# Create model structure\n", | |
"deep_model = Sequential()\n", | |
"deep_model.add(Dense(units=20, activation='relu', input_dim=6714))\n", | |
"deep_model.add(Dense(units=15, activation='relu'))\n", | |
"deep_model.add(Dense(units=10, activation='relu'))\n", | |
"deep_model.add(Dense(units=20, activation='softmax'))" | |
], | |
"execution_count": 22, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"WARNING: Logging before flag parsing goes to stderr.\n", | |
"W0716 05:36:04.674324 140608603793280 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n", | |
"Instructions for updating:\n", | |
"Call initializer instance with the dtype argument instead of passing it to the constructor\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "KNc1lbefPqS5", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 289 | |
}, | |
"outputId": "4eb1ca14-2ff4-43c3-b896-6538c5246fa4" | |
}, | |
"source": [ | |
"deep_model.summary()" | |
], | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Model: \"sequential\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"dense (Dense) (None, 20) 134300 \n", | |
"_________________________________________________________________\n", | |
"dense_1 (Dense) (None, 15) 315 \n", | |
"_________________________________________________________________\n", | |
"dense_2 (Dense) (None, 10) 160 \n", | |
"_________________________________________________________________\n", | |
"dense_3 (Dense) (None, 20) 220 \n", | |
"=================================================================\n", | |
"Total params: 134,995\n", | |
"Trainable params: 134,995\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5P5SVUIAPqS9", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 374 | |
}, | |
"outputId": "c4e0db74-3c7f-4d01-f936-da1729088610" | |
}, | |
"source": [ | |
"# Compile and fit model\n", | |
"deep_model.compile(optimizer='adam',\n", | |
" loss='categorical_crossentropy',\n", | |
" metrics=['accuracy'])\n", | |
"\n", | |
"deep_model.fit(\n", | |
" ing_train,\n", | |
" cuis_train,\n", | |
" epochs=10,\n", | |
" shuffle=True,\n", | |
" verbose=2\n", | |
")" | |
], | |
"execution_count": 24, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/10\n", | |
"29830/29830 - 4s - loss: 1.6802 - acc: 0.5114\n", | |
"Epoch 2/10\n", | |
"29830/29830 - 3s - loss: 0.9572 - acc: 0.7250\n", | |
"Epoch 3/10\n", | |
"29830/29830 - 3s - loss: 0.7762 - acc: 0.7728\n", | |
"Epoch 4/10\n", | |
"29830/29830 - 3s - loss: 0.6661 - acc: 0.8040\n", | |
"Epoch 5/10\n", | |
"29830/29830 - 3s - loss: 0.5822 - acc: 0.8275\n", | |
"Epoch 6/10\n", | |
"29830/29830 - 3s - loss: 0.5137 - acc: 0.8491\n", | |
"Epoch 7/10\n", | |
"29830/29830 - 3s - loss: 0.4582 - acc: 0.8665\n", | |
"Epoch 8/10\n", | |
"29830/29830 - 3s - loss: 0.4123 - acc: 0.8784\n", | |
"Epoch 9/10\n", | |
"29830/29830 - 3s - loss: 0.3715 - acc: 0.8912\n", | |
"Epoch 10/10\n", | |
"29830/29830 - 3s - loss: 0.3372 - acc: 0.9018\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<tensorflow.python.keras.callbacks.History at 0x7fe1dabc6cf8>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 24 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "3QBZAwZAPqTA", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 51 | |
}, | |
"outputId": "59302994-1c54-4aab-999d-32aad3df7a0e" | |
}, | |
"source": [ | |
"# Test the model. Get loss and accuracy.\n", | |
"deep_model_loss, deep_model_accuracy = deep_model.evaluate(\n", | |
" ing_test, cuis_test, verbose=2)\n", | |
"print(\n", | |
" f\"Deep Neural Network - Loss: {deep_model_loss}, Accuracy: {deep_model_accuracy}\")" | |
], | |
"execution_count": 25, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"9944/9944 - 1s - loss: 1.2339 - acc: 0.7273\n", | |
"Deep Neural Network - Loss: 1.2339258739186791, Accuracy: 0.7272727489471436\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "e151jDlVPqTD", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Save the model weights\n", | |
"deep_model.save(\"cuisine_deep_model_trained.h5\")" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "KAeVInQkPqTG", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Save the model structure\n", | |
"with open('deep_model_architecture.json', 'w') as f:\n", | |
" f.write(deep_model.to_json())" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "GUo8U3BtPqTI", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "dyyRbG82PqTN", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"" | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment