#Title of Experiment: Statistical Analysis Using R. 
#OS :Windows 10 Pro
#R Version 3.6.3
#R Studio Version 1.3.1093
install.packages("reader")
library(readr) #to read csv file

install.packages("modeest")
library(modeest) #to calculate mode of the  data

install.packages("ggpubr")
library (ggpubr) #to create ggplot2 based ready graphs

install.packages("ggplot2")
library(ggplot2) #to represent both univariate and multivariate numerical and categorical data 
rm(list=ls()) #to clear workspace


# Q1)Calculate mean and mode of StudyHours from given table.
studmark <- read.csv("studmark.csv")#to load the data
cal1.mean <- mean(studmark$StudyHours)#mean function for Studyhours 
print(cal1.mean)
cal2.mode = mfv(studmark$StudyHours) #to calculate mode
print(cal2.mode)  


#Q2)Calculate covariance between StudyHours and Marks.
StudyHours = studmark$StudyHours  
Marks = studmark$Marks     
cov(StudyHours, Marks) #find covariance between StudyHours and Marks
plot(cov(StudyHours,Marks),
     type='h',
     col="red",
     main="Testing Covariance ")#to plot the testing of covariance 


#Q3)Calculate Standard Deviation of marks obtained by students.
s_marks<-studmark$Marks #store the marks from studmark dataframe
sd(s_marks) #standard deviation of marks of student


#Q4)Calculate variance of StudyHours  from above data.
StudyHours = studmark$StudyHours #store StudyHours from studmark dataframe     
var(StudyHours)  #function to find variance of StudyHours


#Q5)Determine the relation between StudyHours and marks obtained by students using linear regression.
ggplot(studmark,aes(x=StudyHours, y =  Marks))+
  geom_point() #plot graph of StudyHours Vs Marks
abline(lm(studmark$Marks~ studmark$StudyHours)) #to add regression line
lm(studmark$Marks~studmark$StudyHours) #to create a regression model
plot(StudyHours,Marks)
cal3.correlation <-cor.test(studmark$StudyHours, studmark$Marks,  method = "spearman")
#calculating the correlation between StudyHours and Marks using spearman method 
print(cal3.correlation)
a<- cor(studmark$StudyHours,studmark$Marks)
print(a) #to print correlation value



#Q6)Predict the marks of students who study for 14 hours in a week.
model <- lm(Marks~ StudyHours, data = studmark) #summary of dataset
model
new.StudyHours <- data.frame(
  StudyHours = c(14)
)#new data for prediction
predict(model, newdata = new.StudyHours)
predict(model, newdata = new.StudyHours, interval = "prediction") 
#gives uncertainty around a single value

