Explain Naïve Bayes along with a case study in python
First we import the libraries and the datasets
import pandas as pd
import bumpy as np
from sklearn.naive_bayes import Gaussian NB
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
salary_train = pd.read_csv("SalaryData_Train.csv")
salary_test = pd.read_csv("SalaryData_Test.csv")
string_columns=["workclass","education","maritalstatus","occupation","relationship","race","sex","native"]
Now we perform some preprocessing of the data and split into feature and target variable
from sklearn import preprocessing
number = preprocessing.LabelEncoder()
for i in string_columns:
salary_train[i] = number.fit_transform(salary_train[i])
salary_test[i] = number.fit_transform(salary_test[i])
colnames = salary_train.columns
len(colnames[0:13])
trainX = salary_train[colnames[0:13]]
trainY = salary_train[colnames[13]]
testX = salary_test[colnames[0:13]]
testY = salary_test[colnames[13]]
Now we will fit and predict the model for Gaussian Naïve Bayes
sgnb = GaussianNB()
smnb = MultinomialNB()
spred_gnb = sgnb.fit(trainX,trainY).predict(testX)
confusion_matrix(testY,spred_gnb)
print ("Accuracy",(10759+1209)/(10759+601+2491+1209))
Again we will fit and predict the model for Multinomial Naïve Bayes
spred_mnb = smnb.fit(trainX,trainY).predict(testX)
confusion_matrix(testY,spred_mnb)
print("Accuracy",(10891+780)/(10891+780+2920+780))