#########################################Mixed Models################################################ ###################################one way random effects model################################################ ##install package lme4 library(nlme) #onewayre <- read.table(file="W:/win1/luyan/teaching/stat579/web sampling/mixedmodel1/sas for mixed model/influent.txt", header=T) onewayre<-read.table(file="E:/influent.txt",header=T) #onewayre <- read.table(file="C:/jenn/teachingrelated/stat579/data/influent.txt",header=T) summary(onewayre) #declare grouping variable as a factor onewayre$influent.f<-as.factor(onewayre$influent) boxplot(nitrogen~influent.f,onewayre,main="Boxplot",xlab = "Influent", ylab = "Nitrogen") # fit a linear model, assuming one random error nitrogen<-onewayre$nitrogen influent.f<-onewayre$influent.f inf.aov <- aov(nitrogen~influent.f) summary(inf.aov) inf.aov$fitted inf.aov$resid plot(inf.aov$resid~inf.aov$fitted) # fit a linear mixed effects model, assuming two nested errors, defaut is REML inf.lme <- lme(nitrogen~1,random=~1|influent.f,data=onewayre) summary(inf.lme) intervals(inf.lme) fixef(inf.lme) ranef(inf.lme) fit.lme<-predict(inf.lme) resid.lme<-resid(inf.lme) plot(resid.lme~fit.lme) #refitting model using ML inf.lme2 <- lme(nitrogen~1,random=~1|influent.f,data=onewayre,method="ML") ###################################Hierarchical Data################################################ #Data MathAchieve and MathAchSchool ware from 1982 "High school and Beyond" survey containing 7185 high-school #students from 160 schools. MathAchieve contains information about students and MathAchSchool contains information #about schools. Data is available in nlme library. Code is based on John Fox's edition library(nlme) library(lattice) # for Trellis graphics data(MathAchieve) MathAchieve[1:10,] nrow(MathAchieve) data(MathAchSchool) MathAchSchool[1:10,] nrow(MathAchSchool) ##the following is to generate a new data "Bryk" containing the variables ##"school, SES, MathAch, Sector, MEANSES, CSES (Bryk$cses <- Bryk$ses - Bryk$meanses)" attach(MathAchieve) mses <- tapply(SES, School, mean) # school means mses[as.character(MathAchSchool$School[1:10])] detach(MathAchieve) Bryk <- as.data.frame(MathAchieve[, c("School", "SES", "MathAch")]) names(Bryk) <- c("school", "ses", "mathach") sample20 <- sort(sample(7185, 20)) # 20 randomly sampled students Bryk[sample20, ] Bryk$meanses <- mses[as.character(Bryk$school)] Bryk$cses <- Bryk$ses - Bryk$meanses sector <- MathAchSchool$Sector names(sector) <- row.names(MathAchSchool) Bryk$sector <- sector[as.character(Bryk$school)] Bryk[sample20,] #Goal of the study: #(1) whether math achievement is related to socioeconomic status; #(2) whether this relationship varies systematically by sector; #(3) whether the relationship varies randomly across schools within the same sector ##Examine Data, randomly select 20 schools from Catholic and 20 from public, see the relationship between ##mathachievements and the socioeconomic status of the student's family attach(Bryk) set.seed(100) newdatacat<-Bryk[which(Bryk$sector=='Catholic'),] nrow(newdatacat) newdatapub<-Bryk[which(Bryk$sector=='Public'),] nrow(newdatapub) nrow(newdatacat)+nrow(newdatapub) set.seed(100) cat <- sample(unique(newdatacat$school[sector=='Catholic']), 20) #cat <- sample(unique(school[sector=='Catholic']), 20) Cat.20 <- groupedData(mathach ~ ses | school,data=Bryk[is.element(school, cat),]) set.seed(100) pub <- sample(unique(newdatapub$school[sector=='Public']), 20) Pub.20 <- groupedData(mathach ~ ses | school,data=Bryk[is.element(school, pub),]) trellis.device(color=F) xyplot(mathach ~ ses | school, data=Cat.20, main="Catholic", panel=function(x, y){ panel.xyplot(x, y) panel.loess(x, y, span=1) panel.lmline(x, y, lty=2) } ) xyplot(mathach ~ ses | school, data=Pub.20, main="Public", panel=function(x, y){ panel.xyplot(x, y) panel.loess(x, y, span=1) panel.lmline(x, y, lty=2) } ) #observe a weak positive relationship between math achivement and SES in most Catholic schools #in some schools the slope of the regression line is near zero or even negative #positive relationship between the two variables for most of the public schools, and #the average slope is larger ##Fit the regression of math-achievement scores on socioeconomic status for each school ##creating separate lmList objects for Catholic and public schools cat.list <- lmList(mathach ~ ses | school, subset = sector=='Catholic',data=Bryk)#lmList to fit a linear #model to the observations in each group pub.list <- lmList(mathach ~ ses | school, subset = sector=='Public',data=Bryk) plot(intervals(cat.list), main='Catholic') plot(intervals(pub.list), main='Public') cat.fitted<-fitted(cat.list) cat.fitted[1:10] pub.fitted<-fitted(pub.list) pub.fitted[1:10] boxplot(cat.fitted, pub.fitted, main='fitted value',names=c('Catholic', 'Public')) cat.coef<-coef(cat.list) cat.coef[1:10,] pub.coef<-coef(pub.list) pub.coef[1:10,] par(mfrow=c(1,2)) boxplot(cat.coef[,1], pub.coef[,1], main='Intercepts',names=c('Catholic', 'Public')) boxplot(cat.coef[,2], pub.coef[,2], main='Slopes',names=c('Catholic', 'Public')) #observe school to school variation ##Following Bryk and Raudenbush (1992) and Singer (1998), fit a hierarchical linear model to the data. ##This model consists of two equations: First, within schools, we have the regression of ##math achievement on the individual-level covariate SES; ##center SES at the school average, then the intercept for each school estimates the average level of math ##achievement in the school. Bryk$sector <- factor(Bryk$sector, levels=c('Public', 'Catholic')) contrasts(Bryk$sector) bryk.lme.1 <- lme(mathach ~ meanses*cses + sector*cses,random = ~ cses | school,data=Bryk) summary(bryk.lme.1) bryk.lme.2 <- update(bryk.lme.1, random = ~ 1 | school) # omitting random effect of cses summary(bryk.lme.2) bryk.lme.3 <- update(bryk.lme.1,random = ~ cses - 1 | school) # omitting random intercept summary(bryk.lme.3) anova(bryk.lme.1, bryk.lme.2) anova(bryk.lme.1, bryk.lme.3) anova(bryk.lme.1, bryk.lme.2, bryk.lme.3) ##################################Repeated Measure################################################ library(lme4) data(sleepstudy) nrow(sleepstudy) sleepstudy[1:20,] #fit regression line myfit1<-lm(Reaction~Days,data=sleepstudy) myfit1 #fit regression line for each driver myfit2<-lmList(Reaction ~ Days | Subject, data=sleepstudy) myfit2 trellis.device(color=F) xyplot(Reaction ~ Days | Subject, data=sleepstudy, main="xyplot", panel=function(x, y){ panel.xyplot(x, y) panel.loess(x, y, span=1) panel.lmline(x, y, lty=2) } ) #fit mixed model fm1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) fm1 ##or use fm1 <- lme(Reaction ~ Days, random = ~ Days | Subject, data=sleepstudy) fm2<-lmer(Reaction ~ Days + (1 | Subject) + (0 + Days | Subject), sleepstudy) fm2 anova(fm2,fm1) fm3 <- lmer(Reaction ~ Days + (1 | Subject), sleepstudy) fm3 anova(fm3, fm2) #choose model 2 ranef(fm2)