#Author: Dr.R.Maheswari,Professor, School of Computing Science & Engineering, VIT Chennai
#Ex5- Power data analysis and visualisation for project in home power in raspberry pi using R
#R version 3.6.1
#RStudio version 1.2.1335

# Import Packages
library(data.table)
library(dplyr)
library(lubridate)
library(plotly)
library(forecast)

# Import Data
data <- fread("household_power_consumption.txt")
head(data)
glimpse(data)
summary(data)

# Data Preprocessing
data <- data[complete.cases(data)]
sum(is.na(data))
data$datetime <- paste(data$Date,data$Time)
data$datetime <- as.POSIXct(data$datetime, format="%d/%m/%Y %H:%M:%S")
attr(data$datetime, "tzone") <- "Europe/Paris"
str(data)

data$year <- year(data$datetime)
data$week <- week(data$datetime)
data$day <- day(data$datetime)
data$month <- month(data$datetime)
data$minute <- minute(data$datetime)

# Data Visualization
plot(data$Sub_metering_1)
ann <- filter(data, year == 2006)
plot(ann$Sub_metering_1)
plot(ann$Sub_metering_2)
plot(ann$Sub_metering_3)
houseDay <- filter(data, year == 2008 & day == 10 & month==1)
plot_ly(houseDay, x = ~houseDay$datetime, y = ~houseDay$Sub_metering_1, type = 'scatter', mode = 'lines')
dtDay <- filter(data, year == 2009 & day == 2 & month==2)
plot_ly(dtDay, x = ~dtDay$datetime, y = ~dtDay$Sub_metering_1, name = 'Kitchen', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~dtDay$Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
  add_trace(y = ~dtDay$Sub_metering_3, name = 'Water Heater & AC', mode = 'lines') %>%
  layout(title = "Power Consumption Feb 2th, 2009",
         xaxis = list(title = "Time"),
         yaxis = list (title = "Power (watt-hours)"))
houseDay10 <- filter(data, year == 2009 & month == 2 & day == 2 & (minute == 0 | minute == 10 | minute == 20 | minute == 30 | minute == 40 | minute == 50))
plot_ly(houseDay10, x = ~houseDay10$datetime, y = ~houseDay10$Sub_metering_1, name = 'Kitchen', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~houseDay10$Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
  add_trace(y = ~houseDay10$Sub_metering_3, name = 'Water Heater & AC', mode = 'lines') %>%
  layout(title = "Power Consumption Feb 2th, 2009",
         xaxis = list(title = "Time"),
         yaxis = list (title = "Power (watt-hours)"))
data$minute <- minute(data$datetime)
houseDay10 <- filter(data, year == 2008 & month == 5 & day == 10 & (minute == 0 | minute == 10 | minute == 20 | minute == 30 | minute == 40 | minute == 50))
plot_ly(houseDay10, x = ~houseDay10$datetime, y = ~houseDay10$Sub_metering_1, name = 'Kitchen', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~houseDay10$Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
  add_trace(y = ~houseDay10$Sub_metering_3, name = 'Water Heater & AC', mode = 'lines') %>%
  layout(title = "Power Consumption May 10th, 2008",
         xaxis = list(title = "Time"),
         yaxis = list (title = "Power (watt-hours)"))

# Time Series Analysis
data$hour <- hour(data$datetime)
houseweekly <- filter(data, week == 2 & hour == 20 & minute == 1)
tsSM3_weekly <- ts(houseweekly$Sub_metering_3, frequency=52, start=c(2007,1))
plot(tsSM3_weekly, xlab = "Time", ylab = "Watt Hours", main = "Sub-meter 3")
tsSM3_weekly <- ts(houseweekly$Sub_metering_1, frequency=52, start=c(2007,1))
plot(tsSM3_weekly, xlab = "Time", ylab = "Watt Hours", main = "Sub-meter 1")
tsSM3_weekly <- ts(houseweekly$Sub_metering_2, frequency=52, start=c(2007,1))
plot(tsSM3_weekly, xlab = "Time", ylab = "Watt Hours", main = "Sub-meter 2")
house070809weekly <- filter(data, year==2008, hour == 20 & minute == 1)
tsSM3_070809weekly <- ts(house070809weekly$Sub_metering_3, frequency=52, start=c(2008,3))
tsSM2_070809weekly <- ts(house070809weekly$Sub_metering_2, frequency=52, start=c(2008,3))
tsSM1_070809weekly <- ts(house070809weekly$Sub_metering_1, frequency=52, start=c(2008,3))

fit3 <- tslm(tsSM3_070809weekly ~ trend + season)
fit2 <- tslm(tsSM2_070809weekly ~ trend + season)
fit1 <- tslm(tsSM1_070809weekly ~ trend + season)
summary(fit3)

forecastfitSM3c <- forecast(fit3, h=20, level=c(80,90))
forecastfitSM2c <- forecast(fit2, h=20, level=c(80,90))
forecastfitSM1c <- forecast(fit1, h=20, level=c(80,90))
plot(forecastfitSM3c, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time")
plot(forecastfitSM2c, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time")
plot(forecastfitSM1c, ylim = c(0, 20), ylab= "Watt-Hours", xlab=“Time")

#Conclusion
# Time series forecasting, exploratory data analytics and data visualization using the Household Electric Power Consumption dataset was successfully implemented and visualised using R