#########################################Mixed Models################################################

###################################one way random effects model################################################
##install package lme4
library(nlme)
#onewayre <- read.table(file="W:/win1/luyan/teaching/stat579/web sampling/mixedmodel1/sas for mixed model/influent.txt",
header=T)
onewayre<-read.table(file="E:/influent.txt",header=T)
#onewayre <- read.table(file="C:/jenn/teachingrelated/stat579/data/influent.txt",header=T)
summary(onewayre)
#declare grouping variable as a factor
onewayre$influent.f<-as.factor(onewayre$influent)
boxplot(nitrogen~influent.f,onewayre,main="Boxplot",xlab = "Influent",
        ylab = "Nitrogen")

# fit a linear model, assuming one random error
nitrogen<-onewayre$nitrogen
influent.f<-onewayre$influent.f
inf.aov <- aov(nitrogen~influent.f)
summary(inf.aov)
inf.aov$fitted
inf.aov$resid
plot(inf.aov$resid~inf.aov$fitted)


# fit a linear mixed effects model, assuming two nested errors, defaut is REML
inf.lme <- lme(nitrogen~1,random=~1|influent.f,data=onewayre)
summary(inf.lme)
intervals(inf.lme)
fixef(inf.lme)
ranef(inf.lme)
fit.lme<-predict(inf.lme)
resid.lme<-resid(inf.lme)
plot(resid.lme~fit.lme)


#refitting model using ML
inf.lme2 <- lme(nitrogen~1,random=~1|influent.f,data=onewayre,method="ML")


###################################Hierarchical Data################################################
#Data MathAchieve and MathAchSchool ware from 1982 "High school and Beyond" survey containing 7185 high-school
#students from 160 schools. MathAchieve contains information about students and MathAchSchool contains information
#about schools. Data is available in nlme library. Code is based on John Fox's edition
library(nlme)
library(lattice) # for Trellis graphics
data(MathAchieve)
MathAchieve[1:10,]
nrow(MathAchieve)
data(MathAchSchool)
MathAchSchool[1:10,]
nrow(MathAchSchool)

##the following is to generate a new data "Bryk" containing the variables 
##"school, SES, MathAch, Sector, MEANSES, CSES (Bryk$cses <- Bryk$ses - Bryk$meanses)"
attach(MathAchieve)
mses <- tapply(SES, School, mean) # school means
mses[as.character(MathAchSchool$School[1:10])]
detach(MathAchieve)
Bryk <- as.data.frame(MathAchieve[, c("School", "SES", "MathAch")])
names(Bryk) <- c("school", "ses", "mathach")
sample20 <- sort(sample(7185, 20)) # 20 randomly sampled students
Bryk[sample20, ]
Bryk$meanses <- mses[as.character(Bryk$school)]
Bryk$cses <- Bryk$ses - Bryk$meanses
sector <- MathAchSchool$Sector
names(sector) <- row.names(MathAchSchool)
Bryk$sector <- sector[as.character(Bryk$school)]
Bryk[sample20,]

#Goal of the study: 
#(1) whether math achievement is related to socioeconomic status; 
#(2) whether this relationship varies systematically by sector; 
#(3) whether the relationship varies randomly across schools within the same sector


##Examine Data, randomly select 20 schools from Catholic and 20 from public, see the relationship between 
##mathachievements and the socioeconomic status of the student's family
attach(Bryk)
set.seed(100)
newdatacat<-Bryk[which(Bryk$sector=='Catholic'),]
nrow(newdatacat)
newdatapub<-Bryk[which(Bryk$sector=='Public'),]
nrow(newdatapub)
nrow(newdatacat)+nrow(newdatapub)
set.seed(100)
cat <- sample(unique(newdatacat$school[sector=='Catholic']), 20)
#cat <- sample(unique(school[sector=='Catholic']), 20)
Cat.20 <- groupedData(mathach ~ ses | school,data=Bryk[is.element(school, cat),])
set.seed(100)
pub <- sample(unique(newdatapub$school[sector=='Public']), 20)
Pub.20 <- groupedData(mathach ~ ses | school,data=Bryk[is.element(school, pub),])

trellis.device(color=F)
xyplot(mathach ~ ses | school, data=Cat.20, main="Catholic",
panel=function(x, y){
 panel.xyplot(x, y)
panel.loess(x, y, span=1)
panel.lmline(x, y, lty=2)
}
 )
xyplot(mathach ~ ses | school, data=Pub.20, main="Public",
panel=function(x, y){
 panel.xyplot(x, y)
 panel.loess(x, y, span=1)
panel.lmline(x, y, lty=2)
}
 )

#observe a weak positive relationship between math achivement and SES in most Catholic schools
#in some schools the slope of the regression line is near zero or even negative
#positive relationship between the two variables for most of the public schools, and 
#the average slope is larger


##Fit the regression of math-achievement scores on socioeconomic status for each school
##creating separate lmList objects for Catholic and public schools
cat.list <- lmList(mathach ~ ses | school, subset = sector=='Catholic',data=Bryk)#lmList to fit a linear 
#model to the observations in each group
pub.list <- lmList(mathach ~ ses | school, subset = sector=='Public',data=Bryk)
plot(intervals(cat.list), main='Catholic')
plot(intervals(pub.list), main='Public')

cat.fitted<-fitted(cat.list)
cat.fitted[1:10]
pub.fitted<-fitted(pub.list)
pub.fitted[1:10]
boxplot(cat.fitted, pub.fitted, main='fitted value',names=c('Catholic', 'Public'))


cat.coef<-coef(cat.list)
cat.coef[1:10,]
pub.coef<-coef(pub.list)
pub.coef[1:10,]
par(mfrow=c(1,2))
boxplot(cat.coef[,1], pub.coef[,1], main='Intercepts',names=c('Catholic', 'Public'))
boxplot(cat.coef[,2], pub.coef[,2], main='Slopes',names=c('Catholic', 'Public'))
#observe school to school variation


##Following Bryk and Raudenbush (1992) and Singer (1998), fit a hierarchical linear model to the data. 
##This model consists of two equations: First, within schools, we have the regression of
##math achievement on the individual-level covariate SES; 
##center SES at the school average, then the intercept for each school estimates the average level of math
##achievement in the school.

Bryk$sector <- factor(Bryk$sector, levels=c('Public', 'Catholic'))
contrasts(Bryk$sector)
bryk.lme.1 <- lme(mathach ~ meanses*cses + sector*cses,random = ~ cses | school,data=Bryk)
summary(bryk.lme.1)
bryk.lme.2 <- update(bryk.lme.1, random = ~ 1 | school) # omitting random effect of cses
summary(bryk.lme.2)
bryk.lme.3 <- update(bryk.lme.1,random = ~ cses - 1 | school) # omitting random intercept
summary(bryk.lme.3)
anova(bryk.lme.1, bryk.lme.2)
anova(bryk.lme.1, bryk.lme.3)
anova(bryk.lme.1, bryk.lme.2, bryk.lme.3)

##################################Repeated Measure################################################
library(lme4)
data(sleepstudy)
nrow(sleepstudy)
sleepstudy[1:20,]
#fit regression line
myfit1<-lm(Reaction~Days,data=sleepstudy)
myfit1
#fit regression line for each driver
myfit2<-lmList(Reaction ~ Days | Subject, data=sleepstudy)
myfit2
trellis.device(color=F)
xyplot(Reaction ~ Days | Subject, data=sleepstudy, main="xyplot",
panel=function(x, y){
 panel.xyplot(x, y)
panel.loess(x, y, span=1)
panel.lmline(x, y, lty=2)
}
 )


#fit mixed model
fm1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
fm1
##or use fm1 <- lme(Reaction ~ Days, random = ~ Days | Subject, data=sleepstudy)
fm2<-lmer(Reaction ~ Days + (1 | Subject) + (0 + Days | Subject), sleepstudy)
fm2
anova(fm2,fm1)
fm3 <- lmer(Reaction ~ Days + (1 | Subject), sleepstudy)
fm3
anova(fm3, fm2)
#choose model 2
ranef(fm2)