Last active
May 28, 2016 19:27
Revisions
-
psamim revised this gist
May 28, 2016 . 1 changed file with 2 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,7 @@ library("openxlsx") library("rpart") library("rpart.plot") ## setwd("/home/samim/workspace/danial") -
psamim revised this gist
May 28, 2016 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -22,7 +22,7 @@ formula <- Influence.in.Buying.Behaviour.Newsletter ~ Age + Monthly.Income model <- rpart(formula, method = "anova", data = wb, cp = 10^(-6)) # Prune the tree cp9 = which(model$cptable[, 2] == 9) tree9 = prune(model, model$cptable[cp9, 1]) # create attractive pdf plot of tree -
psamim created this gist
May 28, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,31 @@ library("openxlsx") library("rpart") ## setwd("/home/samim/workspace/danial") # Load the data wb <- read.xlsx("data.xlsx") # get average over range Monthly.Income ## temp <- wb$Monthly.Income ## temp[temp == "1200-180000-800"] <- NA ## temp[temp == "9"] <- 15000 ## wb$Monthly.Income <- sapply(strsplit(temp , "-") , function(i) mean(as.numeric(i))) # get average over range Age ## temp <- wb$Age ## temp <- gsub( "over" , "" , temp) ## wb$Age <- sapply(strsplit(temp , "to") , function(i) mean(as.numeric(i))) # Model it using rpart formula <- Influence.in.Buying.Behaviour.Newsletter ~ Age + Monthly.Income model <- rpart(formula, method = "anova", data = wb, cp = 10^(-6)) # Prune the tree cp9 = which(model$cptable[, 2] == 4) tree9 = prune(model, model$cptable[cp9, 1]) # create attractive pdf plot of tree pdf("spactree9.pdf") prp(tree9, extra = 100) dev.off()