Explain with a case study how to implement logistic regression using R.
We will read the dataset which is a bank data
bank <- read.csv(file.choose(),sep=";") # Choose the bank-full Data set
sum(is.na(bank))
bank <- na.omit(bank)
colnames(bank)
Now we will create dummy variables for those column having categories
#created dummy var
install.packages("dummies")
library("dummies")
bank.new <- dummy.data.frame(bank, sep = ".")
dummy(bank$job, sep = ".")
Now we will perform splitting for training and testing
#splitted train and test
library(caTools)
set.seed(123)
split = sample.split(bank$y, SplitRatio = 0.75)
training_set = subset(bank, split == TRUE)
test_set = subset(bank, split == FALSE)
Now we will prepare the model
# Preparing a model
classifier = glm(formula = y ~ .,
family = binomial,
data = training_set)
Classifier
Now we will predict the test data
#predict y of test set
pred1 <- predict(classifier,test_set)
pred1
y_pred = ifelse(pred1 > 0.5, 1, 0)
summary(classifier)
Now we will evaluate the model
#conf matrix
cm = table(test_set[, 17], y_pred > 0.5)
cm
#accuracy
Accuracy<-sum(diag(cm)/sum(cm))
Accuracy
Now we can plot the ROC curve to see the rate of misclassification
#ROC
library(ROCR)
data(ROCR.simple)
pred <- prediction( ROCR.simple$predictions, ROCR.simple$labels)
perf <- performance(pred,"tpr","fpr")
plot(perf)
#AUC
auc.tmp <- performance(pred,"auc"); auc <- as.numeric(auc.tmp@y.values)