#Author: Dr.R.Maheswari,Professor, School of Computing Science & Engineering, VIT Chennai
#Ex4- Traffic pattern recognition using Decision tree, correlation study and Naïve Bayes classification and forecasting
#R version 3.6.1
#RStudio version 1.2.1335

#Import required packages after installing
library("e1071")
library("caTools")
library("caret")
library("party")
library("dplyr")
library("magrittr")
library("TTR")
library("data.table") 

#Load the data set
data <- read.csv("traffic.csv")
data

#Pre-process the data appropriately
data$DateTime = strtrim(data$DateTime,15)
data

#Use summary method to see the characteristics of the data set
print("Summary of Dataset")
summary(data)

# correlation study
print("Correlation between Traffic and Junction")
cor(data$Vehicles,data$Junction,method = "pearson")
cor.test(data$Vehicles,data$Junction,method = "pearson")

#Use the Simple Moving Average forecasting model and visualize the output
t_col1 <- fread("traffic.csv",select = c("Vehicles"))
t_col1series <- ts(t_col1,frequency=12, start=c(2015,1))
t_col1series[is.na(t_col1series)]<-mean(t_col1series,na.rm=TRUE) #Replace NA with mean
t_col1seriesSMA3 <- SMA(t_col1series,n=12)
plot.ts(t_col1seriesSMA3)

#Use the Exponential smoothing forecasting model and visualize the output
t_col1 <- fread("traffic.csv",select = c("Junction"))
t_col1series <- ts(t_col1,frequency=12, start=c(2015,1))
t_col1series[is.na(t_col1series)]<-mean(t_col1series,na.rm=TRUE) #Replace NA with mean

t_col1seriesforecasts <- HoltWinters(t_col1series, beta=FALSE, gamma=FALSE)
t_col1seriesforecasts
t_col1seriesforecasts$SSE
HoltWinters(t_col1series, beta=FALSE, gamma=FALSE, l.start=23.56)

#Use the Arima forecasting model and view the output
library("TTR")
v1 <- data[[4]]
datats <- ts(v1)
## partition into train and test
train_series=datats[1:40]
test_series=datats[41:50]
## make arima models
arimaModel_1=arima(train_series, order=c(0,1,2))
arimaModel_2=arima(train_series, order=c(1,1,0))
arimaModel_3=arima(train_series, order=c(1,1,2))
## look at the parameters
print(arimaModel_1);print(arimaModel_2);print(arimaModel_3)

#Split the data set into training and testing in the ratio of 70:30
split <- sample.split(data, SplitRatio = 0.7)
train_cl <- subset(data, split == "TRUE")
test_cl <- subset(data, split == "FALSE")

#Perform Decision tree classification and view the results in tree format
model<- ctree(Vehicles ~ Junction, train_cl)
plot(model)

#Perform Naïve Bayes and view the results in confusion matrix
set.seed(120) # Setting Seed
classifier_cl <- naiveBayes(Junction ~ ., data = train_cl)
# Predicting on test data
y_pred <- predict(classifier_cl, newdata = test_cl)
# Confusion Matrix
cm <- table(test_cl$Junction, y_pred)
cm
confusionMatrix(cm)
plot(cm)

#Conclusion
# Traffic pattern recognition with Decision trees, correlation study, Naïve Bayes classification and Time series forecasting was successfully implemented and visualised using R
