######################Germination of Orobanche######################
######################Modeling Dispersion######################
#Orobanche is a genus of parasitic plants without chlorophyll that grows on the
#roots of flowering plants. An experiment was made where a bach of seeds of the
#species Orobanche aegyptiaca (Variety) was brushed onto a plate containing an extract
#prepared from the roots (roots) of either a bean or a cucumber plant. The number of
#seeds that germinated (y) was then recorded. Two varieties of Orobanche aegyptiaca
#namely O.a. 75 and O.a. 73 were used in the experiment.
#y: number of seeds germinated
#n: total number of seeds
#variety: 1, O.a. 75; 2: O.a. 73
#root: 1, bean; 2, cubumber

dat<-read.table(file="C:/jenn/teaching/stat579/data/seeds.txt",header=TRUE)
nrow(dat)  #21 obsns
head(dat)
dat$variety<-as.factor(dat$variety)
dat$root<-as.factor(dat$root)
dat$resp<-cbind(dat$y,(dat$n-dat$y)) #dat$y #of success, dat$n-dat$y #of failure
head(dat)

#Descriptive Statistics
ftable(xtabs(~variety + root, data = dat))
tapply(dat$y,dat$variety,mean)
tapply(dat$y,dat$root,mean)

##checking interactions
interaction.plot(dat$variety,dat$root,dat$y)

#fit logit model
#y_i ~Bin(n_i ; p_i ) 
#logit(pi) ~ variety+root+variety*root
fit1<-glm(resp~variety*root,family=binomial(link=logit),data=dat)
fit1

pval<-1-pchisq(33.28,17)
pval #0.01 reject sufficiency of the model

#residual plots
rp<- residuals(fit1,type="pearson") #pearson resids
resDev<-residuals(fit1,type='deviance') # Deviance residuals
plot(resDev, ylab="Deviance residuals")
plot(predict(fit1),resDev,xlab=(expression(hat(eta))),ylab="Deviance residuals")

par(mfrow=c(1,2))
plot(jitter(as.numeric(dat$variety),amount=0.1), resDev, xlab='Variety',
ylab="Deviance residuals", cex=0.6, axes=FALSE)
box()
axis(1,label=c('O.a. 75','O.a. 73'),at=c(1,2))
axis(2)
plot(jitter(as.numeric(dat$root),amount=0.1), resDev, xlab='Root',
ylab="Deviance residuals", cex=0.6, axes=FALSE)
box()
axis(1,label=c('Bean','Cucumber'),at=c(1,2))
axis(2)

#Nothing in the plots is shows an indication that the model is not
#reasonable. We doubt that the big residual deviance is because of
#overdispersion.
#Fit of model with overdispersion, quasi likelihood
fit2<-glm(resp~variety*root,family=quasibinomial,data=dat)
summary(fit2)
#compare to fit without taking care of dispersion
summary(fit1)
#Note that the standard errors shown in the summary output are bigger
#than without the overdispersion - multiplied with the overdispersion sqrt(1.8618)

#calculate dispersion parameter phi
phihat<-sum(rp^2)/fit1$df.res  
phihat


#Model reduction
fit2<-glm(resp~variety*root,family=quasibinomial,data=dat)
drop1(fit2, test="F")

fit3<-glm(resp~variety+root,family=quasibinomial,data=dat)
drop1(fit3, test="F")

fit4<-glm(resp~root,family=quasibinomial,data=dat)
drop1(fit4, test="F")

#confidence interval
par<-coef(fit4)
par
std<-sqrt(diag(vcov(fit4)))
std
par+std%o%c(lower=-1,upper=1)*qt(0.975,19)
confint.default(fit4) # same as above but with quantile qnorm(0.975)