# you must install packages klaR, openxlsx, caret, ggplot2 and tcltk
library(klaR)
library(openxlsx)
library(tcltk)

#read data (add more na strings like "N/N" if you use them)
training <- read.xlsx(tk_choose.files(caption='Choose training set file'), sheet=1,na.strings=c("NN", ""))
test <- read.xlsx(tk_choose.files(caption='Choose test set file'), sheet=1,na.strings=c("NN", ""))
#if you prefer to include your own files, do not include tcltk and rewrite the last two lines as
#training <- read.xlsx("training_set.xlsx", sheet=1,na.strings=c("NN", ""))
#test <- read.xlsx("test_set.xlsx", sheet=1,na.strings=c("NN", ""))

training[] <- lapply( training, factor) #convert training into factor
test[] <- lapply( test, factor)  #convert test into factor
nb_mod <- NaiveBayes(Population ~., data=training) 
pred <- suppressWarnings(predict(nb_mod, test))
nb_mod
pred
tab <- table(pred$class, test$Population)
caret::confusionMatrix(tab)
library(ggplot2)
test$pred <- pred$class
ggplot(test, aes(Population, pred, color = Population)) +geom_jitter(width = 0.2, height = 0.1, size=2)+
labs(title="Confusion Matrix", subtitle="Predicted vs. Observed from test set", y="Predicted", x="Truth")
