Created
July 30, 2017 17:58
-
-
Save vaibhav-jain/ed0d10082a6ab3be136958be94104d7e to your computer and use it in GitHub Desktop.
Using TFLearn and TensorFlow to estimate the surviving chance of Titanic passengers.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import numpy as np | |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' | |
import tflearn | |
from tflearn.data_utils import load_csv | |
from tflearn.datasets import titanic | |
""" | |
VARIABLE DESCRIPTIONS: | |
survived Survived | |
(0 = No; 1 = Yes) | |
pclass Passenger Class | |
(1 = st; 2 = nd; 3 = rd) | |
name Name | |
sex Sex | |
age Age | |
sibsp Number of Siblings/Spouses Aboard | |
parch Number of Parents/Children Aboard | |
ticket Ticket Number | |
fare Passenger Fare | |
""" | |
# Download the Titanic dataset | |
titanic.download_dataset('titanic_dataset.csv') | |
# Load CSV file, indicate that the | |
# first column represents labels | |
data, labels = load_csv( | |
'titanic_dataset.csv', | |
target_column=0, | |
categorical_labels=True, | |
n_classes=2 | |
) | |
# Preprocessing function | |
def preprocess(data, columns_to_ignore): | |
# Sort by descending id and delete columns | |
for id in sorted(columns_to_ignore, reverse=True): | |
[r.pop(id) for r in data] | |
for i in range(len(data)): | |
# Converting 'sex' field to float (id is 1 after removing labels column) | |
data[i][1] = 1. if data[i][1] == 'female' else 0. | |
return np.array(data, dtype=np.float32) | |
# Ignore 'name' and 'ticket' columns (id 1 & 6 of data array) | |
to_ignore = [1, 6] | |
# Preprocess data | |
data = preprocess(data, to_ignore) | |
# Build neural network | |
net = tflearn.input_data(shape=[None, 6]) | |
net = tflearn.fully_connected(net, 32) | |
net = tflearn.fully_connected(net, 32) | |
net = tflearn.fully_connected(net, 2, activation='softmax') | |
net = tflearn.regression(net) | |
# Define model | |
model = tflearn.DNN(net) | |
# Start training (apply gradient descent algorithm) | |
model.fit(data, labels, n_epoch=10, batch_size=16, show_metric=True) | |
# Let's create some data for DiCaprio and Winslet | |
dicaprio = [3, 'Jack Dawson', 'male', 19, 0, 0, 'N/A', 5.0000] | |
winslet = [1, 'Rose DeWitt Bukater', 'female', 17, 1, 2, 'N/A', 100.0000] | |
# Preprocess data | |
dicaprio, winslet = preprocess([dicaprio, winslet], to_ignore) | |
# Predict surviving chances (class 1 results) | |
pred = model.predict([dicaprio, winslet]) | |
print("DiCaprio Surviving Rate:", pred[0][1]) | |
print("Winslet Surviving Rate:", pred[1][1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment