######################Germination of Orobanche######################
######################Modeling Dispersion######################
#Orobanche is a genus of parasitic plants without chlorophyll that grows on the
#roots of flowering plants. An experiment was made where a bach of seeds of the
#species Orobanche aegyptiaca (Variety) was brushed onto a plate containing an extract
#prepared from the roots (roots) of either a bean or a cucumber plant. The number of
#seeds that germinated (y) was then recorded. Two varieties of Orobanche aegyptiaca
#namely O.a. 75 and O.a. 73 were used in the experiment.
#y: number of seeds germinated
#n: total number of seeds
#variety: 1, O.a. 75; 2: O.a. 73
#root: 1, bean; 2, cubumber

dat<-read.table(file="C:/jenn/teaching/stat579/data/seeds.txt",header=TRUE)
nrow(dat)  #21 obsns
## [1] 21
head(dat)
##   variety root  y  n
## 1       1    1 10 39
## 2       1    1 23 62
## 3       1    1 23 81
## 4       1    1 26 51
## 5       1    1 17 39
## 6       1    2  5  6
dat$variety<-as.factor(dat$variety)
dat$root<-as.factor(dat$root)
dat$resp<-cbind(dat$y,(dat$n-dat$y)) #dat$y #of success, dat$n-dat$y #of failure
head(dat)
##   variety root  y  n resp.1 resp.2
## 1       1    1 10 39     10     29
## 2       1    1 23 62     23     39
## 3       1    1 23 81     23     58
## 4       1    1 26 51     26     25
## 5       1    1 17 39     17     22
## 6       1    2  5  6      5      1
#Descriptive Statistics
ftable(xtabs(~variety + root, data = dat))
##         root 1 2
## variety         
## 1            5 6
## 2            5 5
tapply(dat$y,dat$variety,mean)
##        1        2 
## 27.27273 12.40000
tapply(dat$y,dat$root,mean)
##        1        2 
## 14.80000 25.09091
##checking interactions
interaction.plot(dat$variety,dat$root,dat$y)

#fit logit model
#y_i ~Bin(n_i ; p_i ) 
#logit(pi) ~ variety+root+variety*root
fit1<-glm(resp~variety*root,family=binomial(link=logit),data=dat)
fit1
## 
## Call:  glm(formula = resp ~ variety * root, family = binomial(link = logit), 
##     data = dat)
## 
## Coefficients:
##    (Intercept)        variety2           root2  variety2:root2  
##        -0.5582          0.1459          1.3182         -0.7781  
## 
## Degrees of Freedom: 20 Total (i.e. Null);  17 Residual
## Null Deviance:       98.72 
## Residual Deviance: 33.28     AIC: 117.9
pval<-1-pchisq(33.28,17)
pval #0.01 reject sufficiency of the model
## [1] 0.01038509
#residual plots
rp<- residuals(fit1,type="pearson") #pearson resids
resDev<-residuals(fit1,type='deviance') # Deviance residuals
plot(resDev, ylab="Deviance residuals")

plot(predict(fit1),resDev,xlab=(expression(hat(eta))),ylab="Deviance residuals")

par(mfrow=c(1,2))
plot(jitter(as.numeric(dat$variety),amount=0.1), resDev, xlab='Variety',
ylab="Deviance residuals", cex=0.6, axes=FALSE)
box()
axis(1,label=c('O.a. 75','O.a. 73'),at=c(1,2))
axis(2)
plot(jitter(as.numeric(dat$root),amount=0.1), resDev, xlab='Root',
ylab="Deviance residuals", cex=0.6, axes=FALSE)
box()
axis(1,label=c('Bean','Cucumber'),at=c(1,2))
axis(2)

#Nothing in the plots is shows an indication that the model is not
#reasonable. We doubt that the big residual deviance is because of
#overdispersion.
#Fit of model with overdispersion, quasi likelihood
fit2<-glm(resp~variety*root,family=quasibinomial,data=dat)
summary(fit2)
## 
## Call:
## glm(formula = resp ~ variety * root, family = quasibinomial, 
##     data = dat)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.01617  -1.24398   0.05995   0.84695   2.12123  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.5582     0.1720  -3.246  0.00475 ** 
## variety2         0.1459     0.3045   0.479  0.63789    
## root2            1.3182     0.2422   5.444 4.38e-05 ***
## variety2:root2  -0.7781     0.4181  -1.861  0.08014 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 1.861832)
## 
##     Null deviance: 98.719  on 20  degrees of freedom
## Residual deviance: 33.278  on 17  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
#compare to fit without taking care of dispersion
summary(fit1)
## 
## Call:
## glm(formula = resp ~ variety * root, family = binomial(link = logit), 
##     data = dat)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.01617  -1.24398   0.05995   0.84695   2.12123  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.5582     0.1260  -4.429 9.46e-06 ***
## variety2         0.1459     0.2232   0.654   0.5132    
## root2            1.3182     0.1775   7.428 1.10e-13 ***
## variety2:root2  -0.7781     0.3064  -2.539   0.0111 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 98.719  on 20  degrees of freedom
## Residual deviance: 33.278  on 17  degrees of freedom
## AIC: 117.87
## 
## Number of Fisher Scoring iterations: 4
#Note that the standard errors shown in the summary output are bigger
#than without the overdispersion - multiplied with the overdispersion sqrt(1.8618)

#calculate dispersion parameter phi
phihat<-sum(rp^2)/fit1$df.res  
phihat
## [1] 1.861832
#Model reduction
fit2<-glm(resp~variety*root,family=quasibinomial,data=dat)
drop1(fit2, test="F")
## Single term deletions
## 
## Model:
## resp ~ variety * root
##              Df Deviance F value  Pr(>F)  
## <none>            33.278                  
## variety:root  1   39.686  3.2736 0.08812 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit3<-glm(resp~variety+root,family=quasibinomial,data=dat)
drop1(fit3, test="F")
## Single term deletions
## 
## Model:
## resp ~ variety + root
##         Df Deviance F value    Pr(>F)    
## <none>       39.686                      
## variety  1   42.751  1.3902    0.2537    
## root     1   96.175 25.6214 8.124e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit4<-glm(resp~root,family=quasibinomial,data=dat)
drop1(fit4, test="F")
## Single term deletions
## 
## Model:
## resp ~ root
##        Df Deviance F value    Pr(>F)    
## <none>      42.751                      
## root    1   98.719  24.874 8.176e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#confidence interval
par<-coef(fit4)
par
## (Intercept)       root2 
##  -0.5121761   1.0574031
std<-sqrt(diag(vcov(fit4)))
std
## (Intercept)       root2 
##   0.1531186   0.2118211
par+std%o%c(lower=-1,upper=1)*qt(0.975,19)
##                  lower      upper
## (Intercept) -0.8326570 -0.1916952
## root2        0.6140564  1.5007498
confint.default(fit4) # same as above but with quantile qnorm(0.975)
##                  2.5 %     97.5 %
## (Intercept) -0.8122830 -0.2120691
## root2        0.6422414  1.4725649