library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(SDaA)
################################################################################
#####two stage cluster sampling, API data#######################################
################################################################################
# The Academic Performance Index (API) is computed for all California
# schools based on standardized testing of students.
# The data sets contain information for all schools with at least 100 students
# and for various probability samples of the data.
# apiclus2 contains a two-stage cluster sample of schools within districts.
#apiclus2 is a sample obtained using a two-stage cluster sampling design,
# 1st stage, n = 40 SRS districts were selected from N=757 districts,
# 1st stage, fpc1 is a vector of N = 757
# 2nd stage, within selected district i with Mi schools, one or more schools were selected using SRS
# 2nd stage, fpc2 is a vector of total number of schools in each sampled district (i.e., fpc2 = Mi).
# snum: School number, dnum: District number
# "api00: 2000 API, predicted by the proportions of students learning English (ell), receiving subsidized meals (meals)
# and having moved to the school within the past year (mobility).
data(api) #loads several data frames that are samples from the data frame apipop.
#help(api)
apiclus2[1:5,]
## cds stype name sname snum
## 1 31667796031017 E Alta-Dutch Flat Alta-Dutch Flat Elementary 3269
## 2 55751846054837 E Tenaya Elementa Tenaya Elementary 5979
## 3 41688746043517 E Panorama Elemen Panorama Elementary 4958
## 4 41688746043509 M Lipman Middle Lipman Middle 4957
## 5 41688746043491 E Brisbane Elemen Brisbane Elementary 4956
## dname dnum cname cnum flag pcttest api00 api99
## 1 Alta-Dutch Flat Elem 15 Placer 30 NA 100 821 785
## 2 Big Oak Flat-Grvlnd Unif 63 Tuolumne 54 NA 100 773 718
## 3 Brisbane Elementary 83 San Mateo 40 NA 98 600 632
## 4 Brisbane Elementary 83 San Mateo 40 NA 100 740 740
## 5 Brisbane Elementary 83 San Mateo 40 NA 98 716 711
## target growth sch.wide comp.imp both awards meals ell yr.rnd mobility
## 1 1 36 Yes Yes Yes Yes 27 0 No 14
## 2 4 55 Yes Yes Yes Yes 43 0 No 12
## 3 8 -32 No No No No 33 5 No 9
## 4 3 0 No No No No 11 4 No 8
## 5 4 5 Yes Yes Yes Yes 5 2 No 6
## acs.k3 acs.46 acs.core pct.resp not.hsg hsg some.col col.grad grad.sch
## 1 17 20 NA 89 4 16 53 21 6
## 2 18 34 NA 98 8 33 37 15 7
## 3 19 29 NA 79 8 28 30 32 1
## 4 NA 30 24 96 5 27 35 27 6
## 5 18 28 NA 98 3 14 22 58 3
## avg.ed full emer enroll api.stu pw fpc1 fpc2
## 1 3.07 100 0 152 120 18.925 757 1
## 2 2.79 100 0 312 270 18.925 757 1
## 3 2.90 100 0 173 151 18.925 757 3
## 4 3.03 82 18 201 179 18.925 757 3
## 5 3.44 100 8 147 136 18.925 757 3
dimnames(apiclus2)[2]
## [[1]]
## [1] "cds" "stype" "name" "sname" "snum" "dname"
## [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
## [13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
## [19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
## [25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
## [31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
## [37] "api.stu" "pw" "fpc1" "fpc2"
dclus2<-svydesign(id=~dnum+snum,fpc=~fpc1+fpc2,data=apiclus2)
summary(dclus2)
## 2 - level Cluster Sampling design
## With (40, 126) clusters.
## svydesign(id = ~dnum + snum, fpc = ~fpc1 + fpc2, data = apiclus2)
## Probabilities:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.003669 0.037740 0.052840 0.042390 0.052840 0.052840
## Population size (PSUs): 757
## Data variables:
## [1] "cds" "stype" "name" "sname" "snum" "dname"
## [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
## [13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
## [19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
## [25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
## [31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
## [37] "api.stu" "pw" "fpc1" "fpc2"
#use ell, meals and mobility to predict api00
summary(svyglm(api00 ~ ell + meals + mobility, design = dclus2))
##
## Call:
## svyglm(formula = api00 ~ ell + meals + mobility, design = dclus2)
##
## Survey design:
## svydesign(id = ~dnum + snum, fpc = ~fpc1 + fpc2, data = apiclus2)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 811.4907 30.2338 26.840 <2e-16 ***
## ell -2.0592 1.3798 -1.492 0.144
## meals -1.7772 1.0830 -1.641 0.110
## mobility 0.3253 0.6103 0.533 0.597
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 8363.101)
##
## Number of Fisher Scoring iterations: 2
#A useful property of regression models is that they provide another way
#to get domain estimates. Suppose we want the mean of api00 for each school type:
summary(svyglm(api00~stype-1, dclus2))
##
## Call:
## svyglm(formula = api00 ~ stype - 1, dclus2)
##
## Survey design:
## svydesign(id = ~dnum + snum, fpc = ~fpc1 + fpc2, data = apiclus2)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## stypeE 692.81 29.93 23.15 <2e-16 ***
## stypeH 598.34 17.69 33.82 <2e-16 ***
## stypeM 642.35 45.09 14.25 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 17528.44)
##
## Number of Fisher Scoring iterations: 2
summary(svyglm(api00~stype, dclus2))
##
## Call:
## svyglm(formula = api00 ~ stype, dclus2)
##
## Survey design:
## svydesign(id = ~dnum + snum, fpc = ~fpc1 + fpc2, data = apiclus2)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 692.81 29.93 23.150 < 2e-16 ***
## stypeH -94.47 28.21 -3.348 0.00188 **
## stypeM -50.46 25.30 -1.994 0.05354 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 17528.44)
##
## Number of Fisher Scoring iterations: 2
#compared to using svyby
svyby(~api00,~stype,dclus2,svymean, keep.var=TRUE)
## stype api00 se
## E E 692.8104 29.92660
## H H 598.3407 17.69417
## M M 642.3520 45.09132