#################################################################################### ##############Ratio and Regression Estimation####################################### #################################################################################### #####################Example 4.2#################################################################################### library(survey) library(SDaA) data(agsrs) agsrs[1:10,] agsrs10<-agsrs[1:10,] fix(agsrs10) n<-nrow(agsrs) n agsrsdesign <- svydesign(ids = ~1, fpc=rep(3078,300),data = agsrs) agsrsdesign ##Ratio estimation using acres87 to be auxiliary variable sratio<-svyratio(numerator = ~acres92, denominator = ~acres87,design = agsrsdesign) sratio confint(sratio, df=n-1) cor(agsrs$acres87,agsrs$acres92) #correlation between acres 87 and acres 92 is as high as 0.995806 ##calculate se of \hat B by direct formula e<-agsrs$acres92-.986565*agsrs$acres87 #residuals sesquare<-sum(e^2)/(n-1) sesquare xbar<-mean(agsrs$acres87) xbar vhatb<-(1-300/3078)*sesquare/(300*xbar^2) sevhatb<-sqrt(vhatb) sevhatb ##estimate ratio population total data(agpop) tx<-sum(agpop$acres87) tx #extract ratio estimator and se of the ratio estimator bhat<-as.data.frame(sratio[1])[1] bhat varbhat<-as.data.frame(sratio[2])[1] varbhat sebhat<-sqrt(varbhat) sebhat hatty<-tx*bhat hatty sehatty<-tx*sebhat sehatty hatty_CI <- c(hatty - qt(.975, n-1)*sehatty, hatty + qt(.975, n-1)*sehatty) names(hatty_CI) <- c("lower", "upper") hatty_CI ##compared to SRS estimator stotal<-svytotal(~acres92,agsrsdesign) stotal confint(stotal, level=.95,df=n-1) #####################Regression example of trees###################################################################### pf <- data.frame(photo = c(10, 12, 7, 13, 13, 6, 17, 16, 15, 10, 14, 12, 10, 5, 12, 10, 10, 9, 6, 11, 7, 9, 11, 10, 10), field = c(15, 14, 9, 14, 8, 5, 18, 15, 13, 15, 11, 15, 12, 8, 13, 9, 11, 12, 9, 12, 13, 11, 10, 9, 8)) pf photo<-pf$photo field<-pf$field plot(pf, main="Scatter Plot") ##Fit ordinary regression myfit<-lm(field~photo) ## Fit the model 'field' = a + b*'photo' ## Fit with survey regression dtree<- svydesign(id = ~1, fpc=rep(100,25), data = pf) myfit1 <- svyglm(field~photo, design=dtree) #compare different fits summary(myfit) summary(myfit1) confint(myfit, df=23) confint(myfit1,df=23) ## Assign the estimates to b_0 and b_1 b_0 <- myfit1$coef[1] b_1 <- myfit1$coef[2] ## Plot the data with the fitted LS line plot(pf, main="Fitted Line Plot") abline(b_0, b_1) points(x=11.3,y=11.99,col="purple",cex=2.0) ##estimate population mean newdata <- data.frame(photo=11.3) pmean<-predict(myfit1, newdata) pmean CI<-confint(pmean,df=23) CI #calculate hatybar using direct formula hatybar<-b_0+11.3*b_1 hatybar #calculate se(hatybar) using direct formula resid<-myfit1$residuals sesquare<-sum(resid^2)/24 se<-sqrt((1-25/100)*sesquare/25) se # Now make 95% CI for ybar EY_CI <- c(hatybar - qt(.975, 25-2)*se, hatybar + qt(.975, 25-2)*se) names(EY_CI) <- c("lower", "upper") EY_CI ##estimate population total hatty<-100*(b_0+11.3*b_1) hatty se1<-100*se se1 total_CI <- c(hatty - qt(.975, 25-2)*se1, hatty + qt(.975, 25-2)*se1) names(total_CI) <- c("lower", "upper") total_CI #####################Domain estimation Example 4.7###################################################################### data(agsrs) n<-nrow(agsrs) agsrsnew<-as.matrix(agsrs) #as.matrix attempts to turn its argument into a matrix. agsrsnew[1:10,] ##define the western states for(i in 1:n){if (agsrsnew[i,2]=="AK"|agsrsnew[i,2]=="AZ"|agsrsnew[i,2]=="CA"| agsrsnew[i,2]=="CO"|agsrsnew[i,2]=="HI"|agsrsnew[i,2]=="ID"|agsrsnew[i,2]=="MT"| agsrsnew[i,2]=="NV"|agsrsnew[i,2]=="NM"|agsrsnew[i,2]=="OR"|agsrsnew[i,2]=="UT"| agsrsnew[i,2]=="WA"|agsrsnew[i,2]=="WY") {agsrsnew[i,2]<-"w"}} agsrsnew[1:30,] agsrsd<-as.data.frame(agsrsnew) #check if an object is a data frame, or coerce it if possible. agsrsd[1:30,] agsrsd$acres92d<-agsrs$acres92 ##create a numerical column of acres92 for data agsrsd agsrsd[1:30,] dsrs <- svydesign(id = ~1, fpc=rep(3078,300), data = agsrsd) ##recall SRS design dsub<-subset(dsrs,state=='w') ##define domain, western states smean<-svymean(~acres92d,design=dsub) ##domain estimate for acres92 from western states smean confint(smean, level=.95,df=299) stotal<-svytotal(~acres92d,design=dsub) stotal confint(stotal, level=.95,df=299) ###direct calculation agsrsw<-agsrsd[which(agsrsd[,2]=='w'),] #define a data set agsrsw with only western states info nrow(agsrsw) mean(agsrsw$acres92d) sqrt(var(agsrsw$acres92d)) ##follow steps in the textbook, you will get the same results by using subset, svymean etc.