################################################################################
####################Chapter 5 Cluster Sampling with Equal Probability###########
################################################################################
################################################################################
#####one stage cluster sampling, GPA data#######################################
################################################################################
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(SDaA)
ex.data <- data.frame(suite = c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5), gpa = c(3.08,
2.60,3.44,3.04,2.36,3.04,3.28,2.68,2.00,2.56,2.52,1.88,3.00,2.88,3.44,3.64,2.68,1.92,3.28,3.20),
wt=c(20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20))
ex.data
## suite gpa wt
## 1 1 3.08 20
## 2 1 2.60 20
## 3 1 3.44 20
## 4 1 3.04 20
## 5 2 2.36 20
## 6 2 3.04 20
## 7 2 3.28 20
## 8 2 2.68 20
## 9 3 2.00 20
## 10 3 2.56 20
## 11 3 2.52 20
## 12 3 1.88 20
## 13 4 3.00 20
## 14 4 2.88 20
## 15 4 3.44 20
## 16 4 3.64 20
## 17 5 2.68 20
## 18 5 1.92 20
## 19 5 3.28 20
## 20 5 3.20 20
#descriptive statistics
aa<-tapply(ex.data$gpa,ex.data$suite,sum) #sum gpa for each suite
aa
## 1 2 3 4 5
## 12.16 11.36 8.96 12.96 11.08
mean(aa)
## [1] 11.304
var(aa) #gives s_t^2
## [1] 2.25568
#define one-stage cluster design
design1<-svydesign(id=~suite,fpc=~rep(100,20),nest=TRUE,data=ex.data)
design1
## 1 - level Cluster Sampling design
## With (5) clusters.
## svydesign(id = ~suite, fpc = ~rep(100, 20), nest = TRUE, data = ex.data)
##recall srs: dsrs <- svydesign(id = ~1, fpc=rep(3078,300), data = agsrs)
summary(design1)
## 1 - level Cluster Sampling design
## With (5) clusters.
## svydesign(id = ~suite, fpc = ~rep(100, 20), nest = TRUE, data = ex.data)
## Probabilities:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.05 0.05 0.05 0.05 0.05 0.05
## Population size (PSUs): 100
## Data variables:
## [1] "suite" "gpa" "wt"
total1<-svytotal(~gpa,design1)
total1
## total SE
## gpa 1130.4 65.466
mean1<-svymean(~gpa,design1)
mean1
## mean SE
## gpa 2.826 0.1637
ci1<-confint(mean1,level=.95)
ci1
## 2.5 % 97.5 %
## gpa 2.505223 3.146777
ci2<-confint(mean1,level=.95,df=4) ##use t-approximation
ci2
## 2.5 % 97.5 %
## gpa 2.371593 3.280407
#anova table, consider suites as factor with 5 levels, one-way ANOVA can be used to derive the anova talbe
myfit<-lm(gpa~factor(suite),ex.data)
anova(myfit)
## Analysis of Variance Table
##
## Response: gpa
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(suite) 4 2.2557 0.56392 3.0476 0.05039 .
## Residuals 15 2.7756 0.18504
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
################################################################################
#####two stage cluster sampling, Coots data#####################################
################################################################################
data(coots)
attach(coots)
#coots.dat come from Arnold???s (1991) work on egg size
#and volume of American coot eggs in Minnedosa, Manitoba
#In this data set, we look at volumes of a subsample of eggs
#in clutches (nests of eggs) with at least two eggs available for
#measurement
#Randomly select 2 eggs in each clutch and measure their volume
#Want to estimate the mean egg volume
nrow(coots)
## [1] 368
coots[1:10,]
## clutch csize length breadth volume tmt
## 1 1 13 44.30 31.10 3.7957569 yes
## 2 1 13 45.90 32.70 3.9328497 yes
## 3 2 13 49.20 34.40 4.2156036 yes
## 4 2 13 48.70 32.70 4.1727621 yes
## 5 3 6 51.05 34.25 0.9317646 no
## 6 3 6 49.35 34.40 0.9007362 no
## 7 4 11 49.20 31.55 3.0182724 yes
## 8 4 11 48.55 33.10 2.9783969 yes
## 9 5 10 49.40 34.55 2.5045800 yes
## 10 5 10 49.05 34.95 2.4868350 yes
#clutch is psu
#csize: number of eggs in the clutch, M_i
#m_i=2
# n= 184 clutches selected
##plot data
plot(volume ~ clutch, xlim = c(0, 200), pch = 19,
data = coots, xlab = "Clutch Number", ylab = "Egg Volume")
#Note the wide variation in the means from clutch to clutch.
#eggs within the same clutch tend to be more similar
#than two randomly selected eggs from different clutches, and that clustering does
#not provide as much information per egg as would an SRS of eggs.
##calculate using direct formulas
#split data using clutch, response interested is volume
coots2<-split(volume,clutch)
coots2
## $`1`
## [1] 3.795757 3.932850
##
## $`2`
## [1] 4.215604 4.172762
##
## $`3`
## [1] 0.9317646 0.9007362
##
## $`4`
## [1] 3.018272 2.978397
##
## $`5`
## [1] 2.504580 2.486835
##
## $`6`
## [1] 3.971407 3.997112
##
## $`7`
## [1] 1.977376 1.876762
##
## $`8`
## [1] 2.910915 3.012138
##
## $`9`
## [1] 3.467880 3.453278
##
## $`10`
## [1] 3.067350 2.855703
##
## $`11`
## [1] 3.540888 3.456929
##
## $`12`
## [1] 2.990666 3.009070
##
## $`13`
## [1] 3.646750 3.486132
##
## $`14`
## [1] 3.024407 2.947723
##
## $`15`
## [1] 3.006003 2.959993
##
## $`16`
## [1] 2.438670 2.375295
##
## $`17`
## [1] 1.993803 2.026657
##
## $`18`
## [1] 2.441205 2.433600
##
## $`19`
## [1] 2.932387 2.920117
##
## $`20`
## [1] 3.091889 2.803558
##
## $`21`
## [1] 2.57049 2.50458
##
## $`22`
## [1] 4.361265 4.177046
##
## $`23`
## [1] 3.807367 3.723408
##
## $`24`
## [1] 2.535000 2.593305
##
## $`25`
## [1] 1.948629 1.975323
##
## $`26`
## [1] 3.599294 3.365669
##
## $`27`
## [1] 2.562885 2.585700
##
## $`28`
## [1] 1.911669 1.999963
##
## $`29`
## [1] 3.471530 3.343766
##
## $`30`
## [1] 2.966127 2.944656
##
## $`31`
## [1] 2.502045 2.507115
##
## $`32`
## [1] 1.546147 1.573728
##
## $`33`
## [1] 2.52486 2.45895
##
## $`34`
## [1] 2.436135 2.537535
##
## $`35`
## [1] 1.971216 2.002016
##
## $`36`
## [1] 1.967109 1.882922
##
## $`37`
## [1] 1.463405 1.531546
##
## $`38`
## [1] 1.932202 1.936309
##
## $`39`
## [1] 1.544525 1.585085
##
## $`40`
## [1] 2.39304 2.43360
##
## $`41`
## [1] 2.920117 2.717672
##
## $`42`
## [1] 1.244634 1.223518
##
## $`43`
## [1] 1.276930 1.244634
##
## $`44`
## [1] 3.018272 3.042811
##
## $`45`
## [1] 2.496975 2.446275
##
## $`46`
## [1] 2.098524 2.053350
##
## $`47`
## [1] 3.159371 3.159371
##
## $`48`
## [1] 1.969163 1.899349
##
## $`49`
## [1] 1.911669 1.917829
##
## $`50`
## [1] 1.601309 1.583462
##
## $`51`
## [1] 2.481765 2.458950
##
## $`52`
## [1] 3.113360 2.972262
##
## $`53`
## [1] 2.059510 2.063617
##
## $`54`
## [1] 2.425995 2.425995
##
## $`55`
## [1] 1.585085 1.568861
##
## $`56`
## [1] 1.913722 1.889082
##
## $`57`
## [1] 2.64147 2.63133
##
## $`58`
## [1] 0.8861346 0.8697078
##
## $`59`
## [1] 0.885222 0.871533
##
## $`60`
## [1] 0.9144252 0.8906976
##
## $`61`
## [1] 1.521811 1.466650
##
## $`62`
## [1] 1.596442 1.598064
##
## $`63`
## [1] 1.515322 1.479629
##
## $`64`
## [1] 2.018443 2.065670
##
## $`65`
## [1] 0.8094762 0.8176896
##
## $`66`
## [1] 1.183769 1.224760
##
## $`67`
## [1] 0.6565650 0.6635363
##
## $`68`
## [1] 2.096470 2.032817
##
## $`69`
## [1] 1.521811 1.573728
##
## $`70`
## [1] 3.500734 3.526286
##
## $`71`
## [1] 2.474160 2.573025
##
## $`72`
## [1] 2.030763 1.926042
##
## $`73`
## [1] 2.073883 2.121111
##
## $`74`
## [1] 1.528301 1.490986
##
## $`75`
## [1] 3.919997 3.913999
##
## $`76`
## [1] 2.110844 2.067724
##
## $`77`
## [1] 2.49951 2.45895
##
## $`78`
## [1] 1.588330 1.583462
##
## $`79`
## [1] 3.033609 3.037903
##
## $`80`
## [1] 1.62240 1.53479
##
## $`81`
## [1] 1.189980 1.244634
##
## $`82`
## [1] 1.186253 1.191222
##
## $`83`
## [1] 1.151473 1.156442
##
## $`84`
## [1] 0.5819093 0.5730368
##
## $`85`
## [1] 0.6413550 0.6571987
##
## $`86`
## [1] 2.137127 2.096470
##
## $`87`
## [1] 2.519790 2.436135
##
## $`88`
## [1] 1.854996 2.841593
##
## $`89`
## [1] 3.027474 3.002936
##
## $`90`
## [1] 1.918650 2.050065
##
## $`91`
## [1] 2.086614 1.981483
##
## $`92`
## [1] 2.385435 2.442726
##
## $`93`
## [1] 1.191222 1.175819
##
## $`94`
## [1] 2.572011 2.519283
##
## $`95`
## [1] 1.223021 1.230225
##
## $`96`
## [1] 0.8983634 0.8624070
##
## $`97`
## [1] 0.5925563 0.5957250
##
## $`98`
## [1] 1.974912 1.982304
##
## $`99`
## [1] 2.987599 2.988212
##
## $`100`
## [1] 2.418390 2.315469
##
## $`101`
## [1] 3.415314 3.445247
##
## $`102`
## [1] 0.6295672 0.6039638
##
## $`103`
## [1] 1.193209 1.076447
##
## $`104`
## [1] 1.148243 1.091850
##
## $`105`
## [1] 1.976144 1.998731
##
## $`106`
## [1] 2.966741 3.056921
##
## $`107`
## [1] 2.133841 2.051707
##
## $`108`
## [1] 1.935488 1.983947
##
## $`109`
## [1] 2.365155 2.456922
##
## $`110`
## [1] 1.889903 1.974091
##
## $`111`
## [1] 2.606994 2.593812
##
## $`112`
## [1] 3.600755 3.656241
##
## $`113`
## [1] 1.652252 1.645114
##
## $`114`
## [1] 1.626943 1.668476
##
## $`115`
## [1] 2.872266 2.867972
##
## $`116`
## [1] 2.130556 2.105916
##
## $`117`
## [1] 2.603952 2.559843
##
## $`118`
## [1] 1.600335 1.611043
##
## $`119`
## [1] 3.985973 3.934563
##
## $`120`
## [1] 3.164278 3.145874
##
## $`121`
## [1] 2.081686 2.046779
##
## $`122`
## [1] 3.616086 3.689094
##
## $`123`
## [1] 2.572518 2.627274
##
## $`124`
## [1] 2.893124 2.966741
##
## $`125`
## [1] 3.045879 3.068577
##
## $`126`
## [1] 2.496468 2.546154
##
## $`127`
## [1] 3.502924 3.491243
##
## $`128`
## [1] 3.056307 2.913982
##
## $`129`
## [1] 1.909205 1.955610
##
## $`130`
## [1] 2.167516 2.139591
##
## $`131`
## [1] 3.529207 3.554029
##
## $`132`
## [1] 2.673411 2.632344
##
## $`133`
## [1] 2.966127 2.855089
##
## $`134`
## [1] 1.595144 1.577946
##
## $`135`
## [1] 2.864291 2.886376
##
## $`136`
## [1] 2.098113 2.097292
##
## $`137`
## [1] 2.359578 2.363634
##
## $`138`
## [1] 2.662764 2.584686
##
## $`139`
## [1] 1.587356 1.554908
##
## $`140`
## [1] 2.069777 2.023782
##
## $`141`
## [1] 2.836685 2.908461
##
## $`142`
## [1] 2.866132 2.902940
##
## $`143`
## [1] 2.921958 2.955085
##
## $`144`
## [1] 3.710267 3.753341
##
## $`145`
## [1] 2.577081 2.581644
##
## $`146`
## [1] 2.529930 2.690649
##
## $`147`
## [1] 2.029531 2.046779
##
## $`148`
## [1] 2.592291 2.623725
##
## $`149`
## [1] 3.137286 3.072258
##
## $`150`
## [1] 3.002322 2.849568
##
## $`151`
## [1] 1.169857 1.177807
##
## $`152`
## [1] 1.556206 1.587356
##
## $`153`
## [1] 1.963003 1.979840
##
## $`154`
## [1] 2.368704 2.483793
##
## $`155`
## [1] 1.683747 1.965877
##
## $`156`
## [1] 2.484807 2.641977
##
## $`157`
## [1] 2.231814 2.448303
##
## $`158`
## [1] 2.062795 1.935488
##
## $`159`
## [1] 1.701817 1.803663
##
## $`160`
## [1] 2.893738 3.086981
##
## $`161`
## [1] 1.917008 1.967931
##
## $`162`
## [1] 1.504289 1.611368
##
## $`163`
## [1] 1.637975 1.556206
##
## $`164`
## [1] 1.180043 1.182775
##
## $`165`
## [1] 2.282514 2.590770
##
## $`166`
## [1] 1.642518 1.661338
##
## $`167`
## [1] 1.608123 1.601958
##
## $`168`
## [1] 0.8848570 0.8801114
##
## $`169`
## [1] 2.395575 2.375295
##
## $`170`
## [1] 2.425995 2.540070
##
## $`171`
## [1] 2.58063 2.44881
##
## $`172`
## [1] 2.990666 2.972262
##
## $`173`
## [1] 2.344875 2.347410
##
## $`174`
## [1] 3.559140 3.679603
##
## $`175`
## [1] 2.45388 2.41332
##
## $`176`
## [1] 3.789115 3.716107
##
## $`177`
## [1] 2.51472 2.54007
##
## $`178`
## [1] 3.180842 3.021340
##
## $`179`
## [1] 1.967109 1.913722
##
## $`180`
## [1] 1.940416 1.952736
##
## $`181`
## [1] 3.482482 3.424075
##
## $`182`
## [1] 4.224172 4.215604
##
## $`183`
## [1] 4.481221 4.348412
##
## $`184`
## [1] 3.486132 3.482482
coots2[[1]]
## [1] 3.795757 3.932850
nclus<-length(coots2) # number of clusters selected
nclus #184
## [1] 184
clusmean<-sapply(coots2,mean) #this is yibar
clusmean
## 1 2 3 4 5 6 7
## 3.8643033 4.1941828 0.9162504 2.9983346 2.4957075 3.9842595 1.9270690
## 8 9 10 11 12 13 14
## 2.9615265 3.4605792 2.9615265 3.4989084 2.9998683 3.5664408 2.9860653
## 15 16 17 18 19 20 21
## 2.9829979 2.4069825 2.0102297 2.4374025 2.9262519 2.9477233 2.5375350
## 22 23 24 25 26 27 28
## 4.2691555 3.7653876 2.5641525 1.9619759 3.4824816 2.5742925 1.9558159
## 29 30 31 32 33 34 35
## 3.4076484 2.9553917 2.5045800 1.5599376 2.4919050 2.4868350 1.9866162
## 36 37 38 39 40 41 42
## 1.9250156 1.4974752 1.9342556 1.5648048 2.4133200 2.8188946 1.2340760
## 43 44 45 46 47 48 49
## 1.2607823 3.0305418 2.4716250 2.0759368 3.1593705 1.9342556 1.9147489
## 50 51 52 53 54 55 56
## 1.5923856 2.4703575 3.0428112 2.0615634 2.4259950 1.5769728 1.9014021
## 57 58 59 60 61 62 63
## 2.6364000 0.8779212 0.8783775 0.9025614 1.4942304 1.5972528 1.4974752
## 64 65 66 67 68 69 70
## 2.0420566 0.8135829 1.2042644 0.6600506 2.0646435 1.5477696 3.5135100
## 71 72 73 74 75 76 77
## 2.5235925 1.9784027 2.0974971 1.5096432 3.9169984 2.0892837 2.4792300
## 78 79 80 81 82 83 84
## 1.5858960 3.0357563 1.5785952 1.2173070 1.1887375 1.1539574 0.5774731
## 85 86 87 88 89 90 91
## 0.6492769 2.1167985 2.4779625 2.3482947 3.0152051 1.9843574 2.0340485
## 92 93 94 95 96 97 98
## 2.4140805 1.1835205 2.5456470 1.2266231 0.8803852 0.5941406 1.9786081
## 99 100 101 102 103 104 105
## 2.9879057 2.3669295 3.4302808 0.6167655 1.1348282 1.1200466 1.9874374
## 106 107 108 109 110 111 112
## 3.0118310 2.0927743 1.9597172 2.4110385 1.9319970 2.6004030 3.6284976
## 113 114 115 116 117 118 119
## 1.6486829 1.6477095 2.8701193 2.1182359 2.5818975 1.6056893 3.9602683
## 120 121 122 123 124 125 126
## 3.1550763 2.0642327 3.6525902 2.5998960 2.9299327 3.0572277 2.5213110
## 127 128 129 130 131 132 133
## 3.4970832 2.9851450 1.9324077 2.1535535 3.5416181 2.6528775 2.9106084
## 134 135 136 137 138 139 140
## 1.5865449 2.8753339 2.0977024 2.3616060 2.6237250 1.5711322 2.0467793
## 141 142 143 144 145 146 147
## 2.8725733 2.8845359 2.9385213 3.7318040 2.5793625 2.6102895 2.0381552
## 148 149 150 151 152 153 154
## 2.6080080 3.1047717 2.9259452 1.1738318 1.5717812 1.9714213 2.4262485
## 155 156 157 158 159 160 161
## 1.8248122 2.5633920 2.3400585 1.9991416 1.7527396 2.9903595 1.9424691
## 162 163 164 165 166 167 168
## 1.5578285 1.5970905 1.1814088 2.4366420 1.6519277 1.6050403 0.8824842
## 169 170 171 172 173 174 175
## 2.3854350 2.4830325 2.5147200 2.9814642 2.3461425 3.6193716 2.4336000
## 176 177 178 179 180 181 182
## 3.7526112 2.5273950 3.1010909 1.9404158 1.9465758 3.4532784 4.2198877
## 183 184
## 4.4148166 3.4843068
clusvar<-sapply(coots2,var) #this is si
clusvar
## 1 2 3 4 5
## 9.397218e-03 9.176971e-04 4.813808e-04 7.950278e-04 1.574425e-04
## 6 7 8 9 10
## 3.303709e-04 5.061599e-03 5.122997e-03 1.066034e-04 2.239725e-02
## 11 12 13 14 15
## 3.524574e-03 1.693573e-04 1.289901e-02 2.940195e-03 1.058469e-03
## 16 17 18 19 20
## 2.008195e-03 5.396828e-04 2.891801e-05 7.526909e-05 4.156735e-02
## 21 22 23 24 25
## 2.172064e-03 1.696823e-02 3.524574e-03 1.699737e-03 3.562741e-04
## 26 27 28 29 30
## 2.729046e-02 2.602621e-04 3.897915e-03 8.161820e-03 2.305127e-04
## 31 32 33 34 35
## 1.285245e-05 3.803503e-04 2.172064e-03 5.140980e-03 4.743292e-04
## 36 37 38 39 40
## 3.543759e-03 2.321584e-03 8.432492e-06 8.225568e-04 8.225568e-04
## 41 42 43 44 45
## 2.049201e-02 2.229554e-04 5.215126e-04 3.010764e-04 1.285245e-03
## 46 47 48 49 50
## 1.020332e-03 0.000000e+00 2.436990e-03 1.897280e-05 1.592470e-04
## 51 52 53 54 55
## 2.602621e-04 9.954337e-03 8.432492e-06 0.000000e+00 1.316091e-04
## 56 57 58 59 60
## 3.035697e-04 5.140980e-05 1.349199e-04 9.369436e-05 2.814995e-04
## 61 62 63 64 65
## 1.521401e-03 1.316091e-06 6.369880e-04 1.115195e-03 3.372997e-05
## 66 67 68 69 70
## 8.401310e-04 2.429951e-05 2.025909e-03 1.347677e-03 3.264728e-04
## 71 72 73 74 75
## 4.887144e-03 5.483223e-03 1.115199e-03 6.962121e-04 1.798740e-05
## 76 77 78 79 80
## 9.296801e-04 8.225568e-04 1.184482e-05 9.220077e-06 3.837721e-03
## 81 82 83 84 85
## 1.493563e-03 1.234349e-05 1.234349e-05 3.936063e-05 1.255114e-04
## 86 87 88 89 90
## 8.264674e-04 3.499080e-03 4.866864e-01 3.010764e-04 8.634872e-03
## 91 92 93 94 95
## 5.526327e-03 1.641129e-03 1.186200e-04 1.390121e-03 2.595241e-05
## 96 97 98 99 100
## 6.464314e-04 5.020330e-06 2.732157e-05 1.881911e-07 5.296366e-03
## 101 102 103 104 105
## 4.480012e-04 3.277670e-04 6.816694e-03 1.590125e-03 2.550840e-04
## 106 107 108 109 110
## 4.066225e-03 3.372997e-03 1.174142e-03 4.210591e-03 3.543759e-03
## 111 112 113 114 115
## 8.688256e-05 1.539348e-03 2.547980e-05 8.625158e-04 9.220506e-06
## 116 117 118 119 120
## 3.035697e-04 9.728019e-04 5.732849e-05 1.321484e-03 1.693554e-04
## 121 122 123 124 125
## 6.092458e-04 2.665084e-03 1.499110e-03 2.709687e-03 2.576087e-04
## 126 127 128 129 130
## 1.234349e-03 6.822522e-05 1.012820e-02 1.076744e-03 3.899196e-04
## 131 132 133 134 135
## 3.080832e-04 8.432492e-04 6.164730e-03 1.478770e-04 2.438736e-04
## 136 137 138 139 140
## 3.372668e-07 8.225568e-06 3.048087e-03 5.264364e-04 1.057770e-03
## 141 142 143 144 145
## 2.575897e-03 6.774218e-04 5.487123e-04 9.277149e-04 1.041048e-05
## 146 147 148 149 150
## 1.291530e-02 1.487502e-04 4.940482e-04 2.114307e-03 1.166689e-02
## 151 152 153 154 155
## 3.159887e-05 4.851644e-04 1.417507e-04 6.622739e-03 3.979875e-02
## 156 157 158 159 160
## 1.235120e-02 2.343374e-02 8.103625e-03 5.186314e-03 1.867143e-02
## 161 162 163 164 165
## 1.296576e-03 5.732892e-03 3.343077e-03 3.733825e-06 4.751088e-02
## 166 167 168 169 170
## 1.770924e-04 1.900423e-05 1.126036e-05 2.056392e-04 6.506553e-03
## 171 172 173 174 175
## 8.688256e-03 1.693536e-04 3.213112e-06 7.255691e-03 8.225568e-04
## 176 177 178 179 180
## 2.665084e-03 3.213112e-04 1.272048e-02 1.425091e-03 7.589120e-05
## 181 182 183 184
## 1.705654e-03 3.670788e-05 8.819075e-03 6.662710e-06
#split data using clutch, response interested is csize
coots3<-split(csize,clutch)
#coots3
mi<-sapply(coots3,length)
mi
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 181 182 183 184
## 2 2 2 2
Mi<-sapply(coots3,mean)
Mi
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 13 13 6 11 10 13 9 11 12 11 12 11 12 11 11 10 9 10
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## 11 11 10 13 12 10 9 12 10 9 12 11 10 8 10 10 9 9
## 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## 8 9 8 10 11 7 7 11 10 9 11 9 9 8 10 11 9 10
## 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 8 9 10 6 6 6 8 8 8 9 6 7 5 9 8 12 10 9
## 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 9 8 13 9 10 8 11 8 7 7 7 5 5 9 10 10 11 9
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## 9 10 7 10 7 6 5 9 11 10 12 5 7 7 9 11 9 9
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## 10 9 10 12 8 8 11 9 10 8 13 11 9 12 10 11 11 10
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## 12 11 9 9 12 10 11 8 11 9 10 10 8 9 11 11 11 12
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## 10 10 9 10 11 11 7 8 9 10 9 10 10 9 9 11 9 8
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 8 7 10 8 8 6 10 10 10 11 10 12 10 12 10 11 9 9
## 181 182 183 184
## 12 13 13 12
##unbiased estimator, suppose N is known as 10000, 1-n/N goes to 1
##suppose M0=95000
N<-10000
tunb<-N*sum(Mi*clusmean)/nclus
tunb
## [1] 237950.8
yunb<-tunb/95000
yunb
## [1] 2.504745
#variances
ssufpc<-1-mi/Mi
st2<-var(Mi*clusmean)
varterm2<-sum(ssufpc*(Mi^2)*clusvar/mi)
vartunb<-N^2*(1-nclus/N)*st2/nclus + (N/nclus)*varterm2
vartunb
## [1] 79771832
varyunb<-vartunb/(95000)^2
varyunb
## [1] 0.008838984
seyunb<-sqrt(varyunb)
seyunb
## [1] 0.09401587
##ratio estimation
ybarratio<-sum(Mi*clusmean)/sum(Mi)
ybarratio
## [1] 2.490498
sr2<-var(Mi*(clusmean - ybarratio))
varybarr<-( (1 - nclus/N)*sr2/nclus + varterm2/(nclus*N) )/(mean(Mi))^2
varybarr
## [1] 0.003653726
seybarr<-sqrt(varybarr)
seybarr
## [1] 0.06044606
#compare first term and second term in variance formula
v1<-((1 - nclus/N)*sr2/nclus)/(mean(Mi))^2
v2<- (varterm2/(nclus*N))/(mean(Mi))^2
v1
## [1] 0.00365345
v2
## [1] 2.757191e-07
v1+v2
## [1] 0.003653726
#textbook use ssufpc<-1, and ignore second term
varybarrtext<-(sr2/nclus)/(mean(Mi))^2
varybarrtext
## [1] 0.003721934
seybarrtext<-sqrt(varybarrtext)
seybarrtext
## [1] 0.06100765
list(yunb=yunb,seyunb<-seyunb, yratio=ybarratio,seybarr<-seybarr,seybarrtext=seybarrtext)
## $yunb
## [1] 2.504745
##
## [[2]]
## [1] 0.09401587
##
## $yratio
## [1] 2.490498
##
## [[4]]
## [1] 0.06044606
##
## $seybarrtext
## [1] 0.06100765
##use svymean and svytotal to calculate the estimates
cootsfpc1<-rep(10000,368) #Note: 10000 is an arbitrary number, we don't have information of N
cootsfpc2<-coots$csize #number of ssus in each psu
ssu<-rep(1:2,184) #index of ssu
design3<-svydesign(id=~clutch+ssu,fpc=~cootsfpc1+cootsfpc2,data=coots)
## Warning in as.fpc(fpc, strata, ids, pps = pps): `fpc' varies within strata:
## stratum 1.88 at stage 2
summary(design3)
## 2 - level Cluster Sampling design
## With (184, 368) clusters.
## svydesign(id = ~clutch + ssu, fpc = ~cootsfpc1 + cootsfpc2, data = coots)
## Probabilities:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.002831 0.003345 0.003680 0.004021 0.004217 0.007360
## Population size (PSUs): 10000
## Data variables:
## [1] "clutch" "csize" "length" "breadth" "volume" "tmt"
svymean(~volume,design3) #ratio estimator
## mean SE
## volume 2.4908 0.0604
svytotal(~volume,design3) #unbiased estimator of the total
## total SE
## volume 237978 8931.6
################################################################################
#####two stage cluster sampling, API data#######################################
################################################################################
# The Academic Performance Index (API) is computed for all California
# schools based on standardized testing of students.
# The data sets contain information for all schools with at least 100 students
# and for various probability samples of the data.
# apipop contains the entire population.
# apistrat contains a sample stratified by stype (Elementary/Middle/High School).
# apiclus1 contains a cluster sample of school districts.
# apiclus2 contains a two-stage cluster sample of schools within districts. (this is the data we used here)
#apiclus2 is a sample obtained using a two-stage cluster sampling design,
# 1st stage, n = 40 SRS districts were selected from N=757 districts,
# 1st stage, fpc1 is a vector of N = 757
# 2nd stage, within selected district i with Mi schools, one or more schools were selected using SRS
# 2nd stage, fpc2 is a vector of total number of schools in each sampled district (i.e., fpc2 = Mi).
# snum: School number, dnum: District number
# enroll: number of students enrolled (response variable)
# there are missing data of enroll, use na.rm=TRUE, consider that these are
# missing at random so as not to bias inferences.
data(api) #loads several data frames that are samples from the data frame apipop.
#help(api)
apiclus2[1:5,]
## cds stype name sname snum
## 1 31667796031017 E Alta-Dutch Flat Alta-Dutch Flat Elementary 3269
## 2 55751846054837 E Tenaya Elementa Tenaya Elementary 5979
## 3 41688746043517 E Panorama Elemen Panorama Elementary 4958
## 4 41688746043509 M Lipman Middle Lipman Middle 4957
## 5 41688746043491 E Brisbane Elemen Brisbane Elementary 4956
## dname dnum cname cnum flag pcttest api00 api99
## 1 Alta-Dutch Flat Elem 15 Placer 30 NA 100 821 785
## 2 Big Oak Flat-Grvlnd Unif 63 Tuolumne 54 NA 100 773 718
## 3 Brisbane Elementary 83 San Mateo 40 NA 98 600 632
## 4 Brisbane Elementary 83 San Mateo 40 NA 100 740 740
## 5 Brisbane Elementary 83 San Mateo 40 NA 98 716 711
## target growth sch.wide comp.imp both awards meals ell yr.rnd mobility
## 1 1 36 Yes Yes Yes Yes 27 0 No 14
## 2 4 55 Yes Yes Yes Yes 43 0 No 12
## 3 8 -32 No No No No 33 5 No 9
## 4 3 0 No No No No 11 4 No 8
## 5 4 5 Yes Yes Yes Yes 5 2 No 6
## acs.k3 acs.46 acs.core pct.resp not.hsg hsg some.col col.grad grad.sch
## 1 17 20 NA 89 4 16 53 21 6
## 2 18 34 NA 98 8 33 37 15 7
## 3 19 29 NA 79 8 28 30 32 1
## 4 NA 30 24 96 5 27 35 27 6
## 5 18 28 NA 98 3 14 22 58 3
## avg.ed full emer enroll api.stu pw fpc1 fpc2
## 1 3.07 100 0 152 120 18.925 757 1
## 2 2.79 100 0 312 270 18.925 757 1
## 3 2.90 100 0 173 151 18.925 757 3
## 4 3.03 82 18 201 179 18.925 757 3
## 5 3.44 100 8 147 136 18.925 757 3
dimnames(apiclus2)[2]
## [[1]]
## [1] "cds" "stype" "name" "sname" "snum" "dname"
## [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
## [13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
## [19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
## [25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
## [31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
## [37] "api.stu" "pw" "fpc1" "fpc2"
clu2design<-svydesign(id=~dnum+snum,fpc=~fpc1+fpc2,data=apiclus2)
summary(clu2design)
## 2 - level Cluster Sampling design
## With (40, 126) clusters.
## svydesign(id = ~dnum + snum, fpc = ~fpc1 + fpc2, data = apiclus2)
## Probabilities:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.003669 0.037740 0.052840 0.042390 0.052840 0.052840
## Population size (PSUs): 757
## Data variables:
## [1] "cds" "stype" "name" "sname" "snum" "dname"
## [7] "dnum" "cname" "cnum" "flag" "pcttest" "api00"
## [13] "api99" "target" "growth" "sch.wide" "comp.imp" "both"
## [19] "awards" "meals" "ell" "yr.rnd" "mobility" "acs.k3"
## [25] "acs.46" "acs.core" "pct.resp" "not.hsg" "hsg" "some.col"
## [31] "col.grad" "grad.sch" "avg.ed" "full" "emer" "enroll"
## [37] "api.stu" "pw" "fpc1" "fpc2"
smean<-svymean(~enroll,clu2design,na.rm=TRUE) #ratio estimation
smean
## mean SE
## enroll 526.26 80.341
#identify the schools with missing values
with(apiclus2, sname[is.na(enroll)])
## [1] "Virginia Avenue Elementary" "Fairfax Elementary"
## [3] "California City Middle" "Mojave Elementary"
## [5] "Joshua Middle" "Ulrich (Robert P.) Elementary"
svytotal(~enroll, clu2design, na.rm = TRUE) #unbiased estimator
## total SE
## enroll 2639273 799638
#you may also use ratio estimator when M0 is known