The term “traffic patterns recognition” refers to the process of recognising a user’s current traffic pattern, which can be applicable to transportation planning, location-based services, social networks, and a range of other applications. This work will be performing the correlation study, Naïve Bayes, Decision tree and forecasting on the data using various libraries and methods.
Procedure
Import required packages after installing
Load and read the data set
Pre-process the data appropriately
Use summary method to see the characteristics of the data set
Use the Simple Moving Average forecasting model and visualize the output
Use the Exponential smoothing forecasting model and visualize the output
Use the Arima forecasting model and view the output
Get the correlation between the columns
Split the data set into training and testing in the ratio of 70:30
Perform Decision tree classification and view the results in tree format
Perform Naïve Bayes and view the results in confusion matrix
Import required packages after installing
library("e1071")
library("caTools")
library("caret")
## Loading required package: ggplot2
## Loading required package: lattice
library("party")
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("magrittr")
library("TTR")
library("data.table")
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
data <- read.csv("traffic.csv")
data
data$DateTime = strtrim(data$DateTime,15)
data
print("Summary of Dataset")
## [1] "Summary of Dataset"
summary(data)
## DateTime Junction Vehicles ID
## Length:48120 Min. :1.000 Min. : 1.00 Min. :2.015e+10
## Class :character 1st Qu.:1.000 1st Qu.: 9.00 1st Qu.:2.016e+10
## Mode :character Median :2.000 Median : 15.00 Median :2.016e+10
## Mean :2.181 Mean : 22.79 Mean :2.016e+10
## 3rd Qu.:3.000 3rd Qu.: 29.00 3rd Qu.:2.017e+10
## Max. :4.000 Max. :180.00 Max. :2.017e+10
t_col1 <- fread("traffic.csv",select = c("Vehicles"))
t_col1series <- ts(t_col1,frequency=12, start=c(2015,1))
t_col1series[is.na(t_col1series)]<-mean(t_col1series,na.rm=TRUE) #Replace NA with mean
t_col1seriesSMA3 <- SMA(t_col1series,n=12)
plot.ts(t_col1seriesSMA3)
#Forecast using Exponential smoothing
t_col1 <- fread("traffic.csv",select = c("Junction"))
t_col1series <- ts(t_col1,frequency=12, start=c(2015,1))
t_col1series[is.na(t_col1series)]<-mean(t_col1series,na.rm=TRUE) #Replace NA with mean
t_col1seriesforecasts <- HoltWinters(t_col1series, beta=FALSE, gamma=FALSE)
t_col1seriesforecasts
## Holt-Winters exponential smoothing without trend and without seasonal component.
##
## Call:
## HoltWinters(x = t_col1series, beta = FALSE, gamma = FALSE)
##
## Smoothing parameters:
## alpha: 0.9999188
## beta : FALSE
## gamma: FALSE
##
## Coefficients:
## [,1]
## a 4
t_col1seriesforecasts$SSE
## [1] 3
HoltWinters(t_col1series, beta=FALSE, gamma=FALSE, l.start=23.56)
## Holt-Winters exponential smoothing without trend and without seasonal component.
##
## Call:
## HoltWinters(x = t_col1series, beta = FALSE, gamma = FALSE, l.start = 23.56)
##
## Smoothing parameters:
## alpha: 0.9999188
## beta : FALSE
## gamma: FALSE
##
## Coefficients:
## [,1]
## a 4
7.Use the Arima forecasting model and view the output
library("TTR")
v1 <- data[[4]]
datats <- ts(v1)
## partition into train and test
train_series=datats[1:40]
test_series=datats[41:50]
## make arima models
arimaModel_1=arima(train_series, order=c(0,1,2))
arimaModel_2=arima(train_series, order=c(1,1,0))
arimaModel_3=arima(train_series, order=c(1,1,2))
## look at the parameters
print(arimaModel_1);print(arimaModel_2);print(arimaModel_3)
##
## Call:
## arima(x = train_series, order = c(0, 1, 2))
##
## Coefficients:
## ma1 ma2
## 0.0005 0.0005
## s.e. 0.0061 0.0061
##
## sigma^2 estimated as 15294: log likelihood = -243.23, aic = 492.45
##
## Call:
## arima(x = train_series, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## 0.0005
## s.e. 0.0061
##
## sigma^2 estimated as 15297: log likelihood = -243.23, aic = 490.46
##
## Call:
## arima(x = train_series, order = c(1, 1, 2))
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ma1 ma2
## 4e-04 2e-04 0.0005
## s.e. NaN NaN 0.0061
##
## sigma^2 estimated as 15294: log likelihood = -243.23, aic = 494.45
forecast1=predict(arimaModel_1, 10)
forecast2=predict(arimaModel_2, 10)
forecast3=predict(arimaModel_3, 10)
forecast1
## $pred
## Time Series:
## Start = 41
## End = 50
## Frequency = 1
## [1] 20151102151 20151102151 20151102151 20151102151 20151102151 20151102151
## [7] 20151102151 20151102151 20151102151 20151102151
##
## $se
## Time Series:
## Start = 41
## End = 50
## Frequency = 1
## [1] 123.6684 174.9410 214.3161 247.5044 276.7408 303.1707 327.4745 350.0951
## [9] 371.3403 391.4341
forecast2
## $pred
## Time Series:
## Start = 41
## End = 50
## Frequency = 1
## [1] 20151102151 20151102151 20151102151 20151102151 20151102151 20151102151
## [7] 20151102151 20151102151 20151102151 20151102151
##
## $se
## Time Series:
## Start = 41
## End = 50
## Frequency = 1
## [1] 123.6808 174.9585 214.2989 247.4622 276.6787 303.0918 327.3808 349.9882
## [9] 371.2213 391.3040
forecast3
## $pred
## Time Series:
## Start = 41
## End = 50
## Frequency = 1
## [1] 20151102151 20151102151 20151102151 20151102151 20151102151 20151102151
## [7] 20151102151 20151102151 20151102151 20151102151
##
## $se
## Time Series:
## Start = 41
## End = 50
## Frequency = 1
## [1] 123.6684 174.9409 214.3159 247.5042 276.7405 303.1705 327.4742 350.0948
## [9] 371.3399 391.4337
print("Correlation between Traffic and Junction")
## [1] "Correlation between Traffic and Junction"
cor(data$Vehicles,data$Junction,method = "pearson")
## [1] -0.6137872
cor.test(data$Vehicles,data$Junction,method = "pearson")
##
## Pearson's product-moment correlation
##
## data: data$Vehicles and data$Junction
## t = -170.54, df = 48118, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.6193256 -0.6081877
## sample estimates:
## cor
## -0.6137872
split <- sample.split(data, SplitRatio = 0.7)
train_cl <- subset(data, split == "TRUE")
test_cl <- subset(data, split == "FALSE")
10.Perform Decision tree classification and view the results in tree format
model<- ctree(Vehicles ~ Junction, train_cl)
plot(model)
11.Perform Naïve Bayes and view the results in confusion matrix
set.seed(120) # Setting Seed
classifier_cl <- naiveBayes(Junction ~ ., data = train_cl)
# Predicting on test data
y_pred <- predict(classifier_cl, newdata = test_cl)
# Confusion Matrix
cm <- table(test_cl$Junction, y_pred)
cm
## y_pred
## 1 2 3 4
## 1 5146 9 821 1320
## 2 408 686 361 5841
## 3 397 510 466 5923
## 4 1 36 2 2133
confusionMatrix(cm)
## Confusion Matrix and Statistics
##
## y_pred
## 1 2 3 4
## 1 5146 9 821 1320
## 2 408 686 361 5841
## 3 397 510 466 5923
## 4 1 36 2 2133
##
## Overall Statistics
##
## Accuracy : 0.3504
## 95% CI : (0.3444, 0.3565)
## No Information Rate : 0.6325
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.2187
##
## Mcnemar's Test P-Value : <2e-16
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 0.8646 0.55278 0.28242 0.14017
## Specificity 0.8813 0.71033 0.69523 0.99559
## Pos Pred Value 0.7053 0.09402 0.06387 0.98204
## Neg Pred Value 0.9519 0.96689 0.92937 0.40223
## Prevalence 0.2474 0.05158 0.06858 0.63246
## Detection Rate 0.2139 0.02851 0.01937 0.08865
## Detection Prevalence 0.3032 0.30324 0.30324 0.09027
## Balanced Accuracy 0.8729 0.63155 0.48882 0.56788
plot(cm)