Drinking water analysis, monitoring and forecasting
Using the dataset https://raw.githubusercontent.com/jbrownlee/Datasets/master/yearly-water-usage.csv , which consists of annual water consumption in Baltimore from 1885 to 1963( unit used is litres per capita per day), we perform SMA, Holt- Winter filtering, MannKendall and ts2df.
Procedure:
Import Dataset
Plot data as timeseries
Plot logarithmic timeseries
Plot SMA(Simple Moving Average)
Use Holt - Winters filtering
Forecast based on Holt - Winters
Perform decomposition of Additive timeseries
Convert time series to dataframe using ts2df.
#1. Importing dataset and plotting values as a timeseries
df1 <- read.csv("C:\\Users\\Lenovo\\waterdata.csv")
time_series <- ts(df1$Water,frequency=1, start=c(1885))
time_series
## Time Series:
## Start = 1885
## End = 1963
## Frequency = 1
## [1] 356 386 397 397 413 458 485 344 390 360 420 435 439 454 462 454 469 500 492
## [20] 473 458 469 481 488 466 462 473 530 662 651 587 515 526 503 503 503 515 522
## [39] 492 503 503 450 432 432 458 462 503 488 466 492 503 515 500 522 575 583 587
## [58] 628 640 609 606 632 617 613 598 575 564 549 538 568 575 579 587 602 594 587
## [77] 587 625 613
plot.ts(time_series)
#2 Plotting Logarithmic timeseries
log_series <- log(time_series)
log_series
## Time Series:
## Start = 1885
## End = 1963
## Frequency = 1
## [1] 5.874931 5.955837 5.983936 5.983936 6.023448 6.126869 6.184149 5.840642
## [9] 5.966147 5.886104 6.040255 6.075346 6.084499 6.118097 6.135565 6.118097
## [17] 6.150603 6.214608 6.198479 6.159095 6.126869 6.150603 6.175867 6.190315
## [25] 6.144186 6.135565 6.159095 6.272877 6.495266 6.478510 6.375025 6.244167
## [33] 6.265301 6.220590 6.220590 6.220590 6.244167 6.257668 6.198479 6.220590
## [41] 6.220590 6.109248 6.068426 6.068426 6.126869 6.135565 6.220590 6.190315
## [49] 6.144186 6.198479 6.220590 6.244167 6.214608 6.257668 6.354370 6.368187
## [57] 6.375025 6.442540 6.461468 6.411818 6.406880 6.448889 6.424869 6.418365
## [65] 6.393591 6.354370 6.335054 6.308098 6.287859 6.342121 6.354370 6.361302
## [73] 6.375025 6.400257 6.386879 6.375025 6.375025 6.437752 6.418365
plot.ts(log_series)
#3 Simple Moving Average(SMA)
library("TTR")
SMA_series <- SMA(time_series,n=3)
plot.ts(SMA_series)
#4 Holt-Winters filtering
time_series_forecasts <- HoltWinters(time_series, beta=FALSE, gamma=FALSE)
time_series_forecasts
## Holt-Winters exponential smoothing without trend and without seasonal component.
##
## Call:
## HoltWinters(x = time_series, beta = FALSE, gamma = FALSE)
##
## Smoothing parameters:
## alpha: 0.9999294
## beta : FALSE
## gamma: FALSE
##
## Coefficients:
## [,1]
## a 613.0008
time_series_forecasts$fitted
## Time Series:
## Start = 1886
## End = 1963
## Frequency = 1
## xhat level
## 1886 356.0000 356.0000
## 1887 385.9979 385.9979
## 1888 396.9992 396.9992
## 1889 397.0000 397.0000
## 1890 412.9989 412.9989
## 1891 457.9968 457.9968
## 1892 484.9981 484.9981
## 1893 344.0099 344.0099
## 1894 389.9968 389.9968
## 1895 360.0021 360.0021
## 1896 419.9958 419.9958
## 1897 434.9989 434.9989
## 1898 438.9997 438.9997
## 1899 453.9989 453.9989
## 1900 461.9994 461.9994
## 1901 454.0006 454.0006
## 1902 468.9989 468.9989
## 1903 499.9978 499.9978
## 1904 492.0006 492.0006
## 1905 473.0013 473.0013
## 1906 458.0011 458.0011
## 1907 468.9992 468.9992
## 1908 480.9992 480.9992
## 1909 487.9995 487.9995
## 1910 466.0016 466.0016
## 1911 462.0003 462.0003
## 1912 472.9992 472.9992
## 1913 529.9960 529.9960
## 1914 661.9907 661.9907
## 1915 651.0008 651.0008
## 1916 587.0045 587.0045
## 1917 515.0051 515.0051
## 1918 525.9992 525.9992
## 1919 503.0016 503.0016
## 1920 503.0000 503.0000
## 1921 503.0000 503.0000
## 1922 514.9992 514.9992
## 1923 521.9995 521.9995
## 1924 492.0021 492.0021
## 1925 502.9992 502.9992
## 1926 503.0000 503.0000
## 1927 450.0037 450.0037
## 1928 432.0013 432.0013
## 1929 432.0000 432.0000
## 1930 457.9982 457.9982
## 1931 461.9997 461.9997
## 1932 502.9971 502.9971
## 1933 488.0011 488.0011
## 1934 466.0016 466.0016
## 1935 491.9982 491.9982
## 1936 502.9992 502.9992
## 1937 514.9992 514.9992
## 1938 500.0011 500.0011
## 1939 521.9984 521.9984
## 1940 574.9963 574.9963
## 1941 582.9994 582.9994
## 1942 586.9997 586.9997
## 1943 627.9971 627.9971
## 1944 639.9992 639.9992
## 1945 609.0022 609.0022
## 1946 606.0002 606.0002
## 1947 631.9982 631.9982
## 1948 617.0011 617.0011
## 1949 613.0003 613.0003
## 1950 598.0011 598.0011
## 1951 575.0016 575.0016
## 1952 564.0008 564.0008
## 1953 549.0011 549.0011
## 1954 538.0008 538.0008
## 1955 567.9979 567.9979
## 1956 574.9995 574.9995
## 1957 578.9997 578.9997
## 1958 586.9994 586.9994
## 1959 601.9989 601.9989
## 1960 594.0006 594.0006
## 1961 587.0005 587.0005
## 1962 587.0000 587.0000
## 1963 624.9973 624.9973
plot(time_series_forecasts)
#5 Forecasting
time_series_forecasts$SSE
## [1] 84473.46
HoltWinters(time_series, beta=FALSE, gamma=FALSE, l.start=23.56)
## Holt-Winters exponential smoothing without trend and without seasonal component.
##
## Call:
## HoltWinters(x = time_series, beta = FALSE, gamma = FALSE, l.start = 23.56)
##
## Smoothing parameters:
## alpha: 0.999926
## beta : FALSE
## gamma: FALSE
##
## Coefficients:
## [,1]
## a 613.0009
#6 MannKendall
library(Kendall)
## Warning: package 'Kendall' was built under R version 4.1.3
MannKendall(time_series)
## tau = 0.599, 2-sided pvalue =< 2.22e-16
plot(time_series)
lines(lowess(time(time_series),time_series), col='blue')
#7 Decomposition of Additive timeseries
time_series <- ts(df1$Water,frequency=12, start=c(1885))
time_series_components <- decompose(time_series)
time_series_components$seasonal
## Jan Feb Mar Apr May Jun
## 1885 -7.551042 1.490625 -4.451042 6.182292 26.557292 17.265625
## 1886 -7.551042 1.490625 -4.451042 6.182292 26.557292 17.265625
## 1887 -7.551042 1.490625 -4.451042 6.182292 26.557292 17.265625
## 1888 -7.551042 1.490625 -4.451042 6.182292 26.557292 17.265625
## 1889 -7.551042 1.490625 -4.451042 6.182292 26.557292 17.265625
## 1890 -7.551042 1.490625 -4.451042 6.182292 26.557292 17.265625
## 1891 -7.551042 1.490625 -4.451042 6.182292 26.557292 17.265625
## Jul Aug Sep Oct Nov Dec
## 1885 17.386458 -25.551042 -18.252431 -15.439931 4.032292 -1.669097
## 1886 17.386458 -25.551042 -18.252431 -15.439931 4.032292 -1.669097
## 1887 17.386458 -25.551042 -18.252431 -15.439931 4.032292 -1.669097
## 1888 17.386458 -25.551042 -18.252431 -15.439931 4.032292 -1.669097
## 1889 17.386458 -25.551042 -18.252431 -15.439931 4.032292 -1.669097
## 1890 17.386458 -25.551042 -18.252431 -15.439931 4.032292 -1.669097
## 1891 17.386458
plot(time_series_components)
#8 ts2df
library(wql)
## Warning: package 'wql' was built under R version 4.1.3
ts2df(time_series, mon1 = 1, addYr = FALSE, omit = FALSE)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1885 356 386 397 397 413 458 485 344 390 360 420 435
## 1886 439 454 462 454 469 500 492 473 458 469 481 488
## 1887 466 462 473 530 662 651 587 515 526 503 503 503
## 1888 515 522 492 503 503 450 432 432 458 462 503 488
## 1889 466 492 503 515 500 522 575 583 587 628 640 609
## 1890 606 632 617 613 598 575 564 549 538 568 575 579
## 1891 587 602 594 587 587 625 613 NA NA NA NA NA
```
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.