# Statistics for Management and Economics by Gerald Keller
# Chapter 14: ANALYSIS OF VARIANCE
# Example 14.4 on Pg 564
# Comparing the Lifetime Number of Jobs by Educational Level 


data1 <- read.csv(file.choose()) #choose Xm14-04.csv
names(data1)
head(data1)


#One-way ANOVA based on Factor 'grp'
anova(lm(noofjobs ~ grp, data1))
#Answer: One-way ANOVA: Male E1, Male E2, Male E3, Male E4, Female E1, Female E2, ...

#Analysis of Variance Table

#Response: noofjobs
#           Df Sum Sq Mean Sq  F value Pr(>F)  
#grp        7 153.35  21.907   2.172 0.04674 *
#Residuals 72 726.20  10.086

cat("The value of the test statistic is F = 2.17 with a p-value = 0.0467. 
    We conclude that there are differences in the number of jobs between the eight treatments.")




#Two-way ANOVA with Interaction based on Factors (Education, Gender)
anova(lm(noofjobs ~ gender * education, data1))

#Answer: Two-way ANOVA: Jobs versus Gender, Education

#Analysis of Variance Table

#Response: noofjobs
#                  Df Sum Sq Mean Sq F value   Pr(>F)   
#gender            1  11.25  11.250  1.1154 0.294443   
#education         3 135.85  45.283  4.4897 0.006043 **
#gender:education  3   6.25   2.083  0.2066 0.891546   
#Residuals        72 726.20  10.086                

cat("There is not evidence at the 5% significance level to infer that differences in 
the number of jobs exist between men and women; as can be seen with F = 1.12 and p-value = 0.2944.")

cat("There is sufficient evidence at the 5% significance level to infer that differences in 
the number of jobs exist between educational levels; as can be seen with F = 4.49 and p-value = 0.006.")

cat("There is not enough evidence to conclude that there is an interaction between gender and education;
as can be seen with F = 0.21 and p-value = 0.8915.")


#Lineplot
install.packages("ggplot2")
library(ggplot2)
#the ggplot() function used below gives the Figure 14.5 on Pg 572
ggplot(data=data1, aes(x=education, y=noofjobs, group=gender, color=gender))+ 
  geom_line(stat='summary', fun.y='mean') + 
  geom_point(stat='summary', fun.y='mean')

#End
