Skip to content

Instantly share code, notes, and snippets.

@sidd607
Last active March 30, 2016 15:45
Show Gist options
  • Save sidd607/f7c23ef26ec86dc692420ce7dffe2721 to your computer and use it in GitHub Desktop.
Save sidd607/f7c23ef26ec86dc692420ce7dffe2721 to your computer and use it in GitHub Desktop.
#-------------------Retrieving and Cleaning Data--------------
data = read.csv("filter.csv",header=T, na.strings=c(""))
sapply(data, function(x) sum(is.na(x)))
data = subset(data,select=c(2,3,5,6,7,8,10,12))
data$Age[is.na(data$Age)] = mean(data$Age, na.rm = T)
#is.factor(data$Sex)
#is.factor(data$Embarked)
data = data[!is.na(data$Embarked),]
rownames(data) = NULL
trainData = data[1:800,]
testData = data[801:889,]
#-----------------It starts----------------------------------
#simod
g = function(z){
return (1/(1+exp(-z)))
}
#Hypothesis
h = function(x, th){
return (g(x%*%th))
}
#cost
J = function(x, y, th, m){
return( 1/m * sum(-y * log(h(x,th)) - (1 - y) * log(1 - h(x,th))) )
}
# derivative of J (gradient)
grad = function (x,y,th,m) {
return( 1/m * t(x) %*% (h(x,th) - y))
} # grad(x,y,th,m)
# Hessian
H = function (x,y,th,m) {
return (1/m * t(x) %*% x * diag(h(x,th)) * diag(1 - h(x,th)))
} # H(x,y,th,m)
#setup
j = array(0, c(10, 1))
m = length (trainData$Sex)
x = matrix(c(rep(1, m), trainData$Pclass, trainData$Sex, trainData$Age, trainData$SibSp, trainData$Parch, trainData$Fare, trainData$Embarked),
ncol =8)
y = matrix(trainData$Survived, ncol = 1)
th = matrix(0,8)
for( i in 1:10){
j[i] = J(x,y,th,m)
th = th - solve(H(x,y,th,m)) %*% grad(x,y,th,m)
}
print (th)
plot(j, xlab="iterations", ylab="cost J")
test = testData[3,]
test3 = rapply(test, c)
test2 = c(1,2,3,4,5,6,7,8)
ans = g(test3%*%th)[1,1]
ans2 = g(c(1,1,1,11,1,2,120,0)%*%th)
print(ans)
print (ans2)
correct = 0
trueTrue = 0
trueFalse = 0
falseTrue = 0
falseFalse = 0
for (i in 1:80){
test = testData[i,]
test3 = rapply(test, c)
test3[1] = 1
ans = g(test3%*%th)[1,1]
if(ans < 0.4)
ans = 0
if (ans >= 0.4)
ans = 1
final = 0
if (ans == testData[i,]$Survived){
final = 1
if(ans == 0)
falseFalse = falseFalse+1
if (ans == 1)
trueTrue = trueTrue + 1
}
if(ans != testData[i,]$Survived){
if(ans == 1)
trueFalse = trueFalse + 1
if(ans == 0)
falseTrue = falseTrue + 1
}
#tmp = c(ans, testData[i,]$Survived)
print (final)
correct = correct + final
}
#---------------------------Summary--------------------------------
print (correct)
print ("True True")
print (trueTrue)
print ("True False")
print (trueFalse)
print ("False True")
print (falseTrue)
print ("False False")
print (falseFalse)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment