Last active
December 10, 2021 00:40
-
-
Save addy1997/325aa1dc560ae3e4217ccbfbb6d7d3fb to your computer and use it in GitHub Desktop.
CSI assignment 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Sat Dec 5 00:24:39 2021 | |
@author: adwaitnaik | |
""" | |
import pandas as pd | |
import numpy as np | |
pd.options.mode.chained_assignment = None # default='warn' | |
def train_test(X1_train, X2_train, X_test): | |
#Compute mean and standard deviation for Class == 'smile' | |
smile_mean = X1_train.mean(axis=0) | |
smile_std = X1_train.std(axis=0) | |
#Compute mean and standard deviation for Class == 'frown' | |
frown_mean = X2_train.mean(axis=0) | |
frown_std = X2_train.std(axis=0) | |
log_P_Xi_C1 = np.zeros(X_test.shape) | |
log_P_Xi_C2 = np.zeros(X_test.shape) | |
log_P_X_C1 = np.zeros(X_test.shape[0],) | |
log_P_X_C2 = np.zeros(X_test.shape[0],) | |
#calculating the likelihood P(Xi|Ck=1) | |
for i in range(len(X_test.columns)): | |
logA = (np.sqrt(2*np.pi)*smile_std.iloc[i]) | |
logB = np.square(X_test.iloc[:,i] - smile_mean.iloc[i])/(2*np.square(smile_std.iloc[i])) | |
logP = np.log(logA)+logB | |
#Log likelihood of observing the 17 features when class is smile | |
log_P_Xi_C1[:,i] = logP | |
log_P_X_C1 = -np.sum(log_P_Xi_C1,axis=1) | |
# print(log_P_X_C1) | |
#calculating the likelihood P(Xi|Ck=0) | |
for i in range(len(X_test.columns)): | |
logA = (np.sqrt(2*np.pi)*frown_std.iloc[i]) | |
logB = np.square(X_test.iloc[:,i] - frown_mean.iloc[i])/(2*np.square(frown_std.iloc[i])) | |
logP = np.log(logA)+logB | |
#Log likelihood of observing the 17 features when class is frown | |
log_P_Xi_C2[:,i] = logP | |
log_P_X_C2 = -np.sum(log_P_Xi_C2,axis=1) | |
#Gaussian Discriminant Function Rule | |
#discriminant function implementation | |
gamma_prime_1 = log_P_X_C1 + np.log(Ck) | |
gamma_prime_0 = log_P_X_C2 + np.log(Ck) | |
G = np.column_stack((gamma_prime_0, gamma_prime_1)) | |
k = np.argmax(G, axis=1) | |
return k | |
if __name__ == "__main__": | |
#Load data | |
test_path = r'/Users/adwaitnaik/test-part-2.csv' #insert path to the test file | |
training_path = r'/Users/adwaitnaik/training-part-2.csv' #insert path to the train file' | |
test_data = pd.read_csv(test_path) | |
train_data = pd.read_csv(training_path) | |
#Separate data according to class | |
smile_train = train_data.loc[train_data["Class"] == 'smile'] | |
frown_train = train_data.loc[train_data["Class"] == 'frown'] | |
#Separate features from class of test data | |
X_test = test_data.iloc[:,0:17] | |
y_test = test_data.iloc[:,17] | |
y_test[y_test == 'smile'] = 1 | |
y_test[y_test == 'frown'] = 0 | |
Ck = 0.5 | |
#Training and testing | |
result = train_test(smile_train, frown_train, X_test) | |
classified_total = len(y_test) | |
classified_correct = 0 | |
classified_incorrect = 0 | |
for i in range(classified_total): | |
if y_test[i]==result[i]: | |
classified_correct = classified_correct + 1 #smile | |
else: | |
classified_incorrect = classified_incorrect + 1 #frown | |
accuracy = classified_correct/classified_total | |
error_rate = 1 - accuracy | |
print("The accuracy is:",accuracy) | |
print("The error rate is:",error_rate) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment