library(survey) library(SDaA) ################################################################################ #####two stage cluster sampling, API data####################################### ################################################################################ # The Academic Performance Index (API) is computed for all California # schools based on standardized testing of students. # The data sets contain information for all schools with at least 100 students # and for various probability samples of the data. # apiclus2 contains a two-stage cluster sample of schools within districts. #apiclus2 is a sample obtained using a two-stage cluster sampling design, # 1st stage, n = 40 SRS districts were selected from N=757 districts, # 1st stage, fpc1 is a vector of N = 757 # 2nd stage, within selected district i with Mi schools, one or more schools were selected using SRS # 2nd stage, fpc2 is a vector of total number of schools in each sampled district (i.e., fpc2 = Mi). # snum: School number, dnum: District number # "api00: 2000 API, predicted by the proportions of students learning English (ell), receiving subsidized meals (meals) # and having moved to the school within the past year (mobility). data(api) #loads several data frames that are samples from the data frame apipop. #help(api) apiclus2[1:5,] dimnames(apiclus2)[2] dclus2<-svydesign(id=~dnum+snum,fpc=~fpc1+fpc2,data=apiclus2) summary(dclus2) #use ell, meals and mobility to predict api00 summary(svyglm(api00 ~ ell + meals + mobility, design = dclus2)) #A useful property of regression models is that they provide another way #to get domain estimates. Suppose we want the mean of api00 for each school type: summary(svyglm(api00~stype-1, dclus2)) summary(svyglm(api00~stype, dclus2)) #compared to using svyby svyby(~api00,~stype,dclus2,svymean, keep.var=TRUE)