#Author: Dr.R.Maheswari, Professor, School of Computing Science & Engineering, VIT Chennai
#Ex2- IOT based healthcare application using Generalized Linear model, Random forest and Decision trees in R
#R version 3.3.2 (2016-10-31)
#RStudio version 1.2.1335

# Loading all the necessary Libraries
library(rpart) #used for building classification and regression trees.
library(rpart.plot)
library(RColorBrewer) # help you choose sensible colour schemes for figures library(rattle) 
# provides a collection of utilities functions for a data scientist.
library(randomForest) #Used to create and analyse random forests.

# Loading the dataset 
data = read.csv("heart_health.csv")

# Analyzing the data in the dataset
print("Minimum resting blood pressure")
min(data$trestbps)
print("Maximum resting blood pressure")
max(data$trestbps)
print("Summary of Dataset")
summary(data)
print("Range of resting blood pressure")
max(data$trestbps) - min(data$trestbps)
quantile(data$trestbps, c(0.25, 0.5, 0.75))
print("Column names of the Data")
names(data)
print("Attributes of the Data")
str(data)
print("Number of Rows and Columns:")
dim(data)

# Analyze the Correlation between resting BP and age
print("Correlation between the resting blood pressure and the age")
cor(data$trestbps, data$age, method = "pearson")
cor.test(data$trestbps, data$age, method = "pearson")

# Constructing the GLM 
print("Constructing the Logistic regression Model")
glm(target~ trestbps+ restecg + fbs, data = data, family=binomial())
model <- glm(target~trestbps+ chol + thalach, data = data, family=binomial())
plot(model)

# Make dependent variable as a factor (categorical)
data$target = as.factor(data$target)

# Splitting the dataset into test and train
print("Train Test Split") # 70/30 Split
dt = sort(sample(nrow(data), nrow(data)*.7))
train<-data[dt,]
val<-data[-dt,]

# No. of rows in Train and Val Dataset
nrow(train)
nrow(val)

# Constructing the Decision Tree Model 
print("Construction of the Decision Tree Model")
mtree <- rpart(
  target ~ trestbps + chol + thalach, 
  data = train, 
  method="class",
  control = rpart.control(
    minsplit = 20, 
    minbucket = 7, 
    maxdepth = 10, 
    usesurrogate = 2, 
    xval =10 
  )
)
mtree

# Plotting the Decision Tree for the dataset
print("Plotting the Decision Tree")
plot(mtree)
text(mtree)
par(xpd = NA, mar = rep(0.7, 4))
plot(mtree, compress = TRUE)
text(mtree, cex = 0.7, use.n = TRUE, fancy = FALSE, all = TRUE)
prp(mtree, faclen = 0,box.palette = "Reds", cex = 0.8, extra = 1)

# Constructing the Random Forest model
rf <- randomForest(target ~ trestbps + oldpeak + cp, data = data)
# View the forest results
print("Random Forest Results:")
print(rf) 
# Importance of each predictor
print("Importance of each predictor:")
print(importance(rf,type = 2)) 

# Plot the Random Forest
plot(rf)

#Conclusion
#Models like Decision trees, Random forest and GLM were trained on the given dataset and the predictions were visualised successfully
