################################################################################
####################Chapter 5 Cluster Sampling with Equal Probability###########
################################################################################

################################################################################
#####one stage cluster sampling, GPA data#######################################
################################################################################
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(SDaA)
ex.data <- data.frame(suite = c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5), gpa = c(3.08,
2.60,3.44,3.04,2.36,3.04,3.28,2.68,2.00,2.56,2.52,1.88,3.00,2.88,3.44,3.64,2.68,1.92,3.28,3.20),
wt=c(20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20))
ex.data
##    suite  gpa wt
## 1      1 3.08 20
## 2      1 2.60 20
## 3      1 3.44 20
## 4      1 3.04 20
## 5      2 2.36 20
## 6      2 3.04 20
## 7      2 3.28 20
## 8      2 2.68 20
## 9      3 2.00 20
## 10     3 2.56 20
## 11     3 2.52 20
## 12     3 1.88 20
## 13     4 3.00 20
## 14     4 2.88 20
## 15     4 3.44 20
## 16     4 3.64 20
## 17     5 2.68 20
## 18     5 1.92 20
## 19     5 3.28 20
## 20     5 3.20 20
#descriptive statistics
aa<-tapply(ex.data$gpa,ex.data$suite,sum)  #sum gpa for each suite
aa
##     1     2     3     4     5 
## 12.16 11.36  8.96 12.96 11.08
mean(aa)
## [1] 11.304
var(aa) #gives s_t^2
## [1] 2.25568
#define one-stage cluster design
design1<-svydesign(id=~suite,fpc=~rep(100,20),nest=TRUE,data=ex.data) 
design1
## 1 - level Cluster Sampling design
## With (5) clusters.
## svydesign(id = ~suite, fpc = ~rep(100, 20), nest = TRUE, data = ex.data)
##recall srs: dsrs <- svydesign(id = ~1, fpc=rep(3078,300), data = agsrs)
summary(design1)
## 1 - level Cluster Sampling design
## With (5) clusters.
## svydesign(id = ~suite, fpc = ~rep(100, 20), nest = TRUE, data = ex.data)
## Probabilities:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.05    0.05    0.05    0.05    0.05    0.05 
## Population size (PSUs): 100 
## Data variables:
## [1] "suite" "gpa"   "wt"
total1<-svytotal(~gpa,design1)
total1
##      total     SE
## gpa 1130.4 65.466
mean1<-svymean(~gpa,design1)
mean1
##      mean     SE
## gpa 2.826 0.1637
ci1<-confint(mean1,level=.95)
ci1
##        2.5 %   97.5 %
## gpa 2.505223 3.146777
ci2<-confint(mean1,level=.95,df=4) ##use t-approximation
ci2
##        2.5 %   97.5 %
## gpa 2.371593 3.280407
#anova table, consider suites as factor with 5 levels, one-way ANOVA can be used to derive the anova talbe
myfit<-lm(gpa~factor(suite),ex.data)
anova(myfit)
## Analysis of Variance Table
## 
## Response: gpa
##               Df Sum Sq Mean Sq F value  Pr(>F)  
## factor(suite)  4 2.2557 0.56392  3.0476 0.05039 .
## Residuals     15 2.7756 0.18504                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
################################################################################
#####two stage cluster sampling, Coots data#####################################
################################################################################
data(coots)
attach(coots)

#coots.dat come from Arnold???s (1991) work on egg size
#and volume of American coot eggs in Minnedosa, Manitoba
#In this data set, we look at volumes of a subsample of eggs
#in clutches (nests of eggs) with at least two eggs available for
#measurement
#Randomly select 2 eggs in each clutch and measure their volume
#Want to estimate the mean egg volume

nrow(coots)
## [1] 368
coots[1:10,]
##    clutch csize length breadth    volume tmt
## 1       1    13  44.30   31.10 3.7957569 yes
## 2       1    13  45.90   32.70 3.9328497 yes
## 3       2    13  49.20   34.40 4.2156036 yes
## 4       2    13  48.70   32.70 4.1727621 yes
## 5       3     6  51.05   34.25 0.9317646  no
## 6       3     6  49.35   34.40 0.9007362  no
## 7       4    11  49.20   31.55 3.0182724 yes
## 8       4    11  48.55   33.10 2.9783969 yes
## 9       5    10  49.40   34.55 2.5045800 yes
## 10      5    10  49.05   34.95 2.4868350 yes
#clutch is psu
#csize: number of eggs in the clutch, M_i
#m_i=2
# n= 184 clutches selected
##plot data
plot(volume ~ clutch, xlim = c(0, 200), pch = 19,
data = coots, xlab = "Clutch Number", ylab = "Egg Volume")

#Note the wide variation in the means from clutch to clutch. 
#eggs within the same clutch tend to be more similar 
#than two randomly selected eggs from different clutches, and that clustering does 
#not provide as much information per egg as would an SRS of eggs.

##calculate using direct formulas

#split data using clutch, response interested is volume
coots2<-split(volume,clutch)
coots2
## $`1`
## [1] 3.795757 3.932850
## 
## $`2`
## [1] 4.215604 4.172762
## 
## $`3`
## [1] 0.9317646 0.9007362
## 
## $`4`
## [1] 3.018272 2.978397
## 
## $`5`
## [1] 2.504580 2.486835
## 
## $`6`
## [1] 3.971407 3.997112
## 
## $`7`
## [1] 1.977376 1.876762
## 
## $`8`
## [1] 2.910915 3.012138
## 
## $`9`
## [1] 3.467880 3.453278
## 
## $`10`
## [1] 3.067350 2.855703
## 
## $`11`
## [1] 3.540888 3.456929
## 
## $`12`
## [1] 2.990666 3.009070
## 
## $`13`
## [1] 3.646750 3.486132
## 
## $`14`
## [1] 3.024407 2.947723
## 
## $`15`
## [1] 3.006003 2.959993
## 
## $`16`
## [1] 2.438670 2.375295
## 
## $`17`
## [1] 1.993803 2.026657
## 
## $`18`
## [1] 2.441205 2.433600
## 
## $`19`
## [1] 2.932387 2.920117
## 
## $`20`
## [1] 3.091889 2.803558
## 
## $`21`
## [1] 2.57049 2.50458
## 
## $`22`
## [1] 4.361265 4.177046
## 
## $`23`
## [1] 3.807367 3.723408
## 
## $`24`
## [1] 2.535000 2.593305
## 
## $`25`
## [1] 1.948629 1.975323
## 
## $`26`
## [1] 3.599294 3.365669
## 
## $`27`
## [1] 2.562885 2.585700
## 
## $`28`
## [1] 1.911669 1.999963
## 
## $`29`
## [1] 3.471530 3.343766
## 
## $`30`
## [1] 2.966127 2.944656
## 
## $`31`
## [1] 2.502045 2.507115
## 
## $`32`
## [1] 1.546147 1.573728
## 
## $`33`
## [1] 2.52486 2.45895
## 
## $`34`
## [1] 2.436135 2.537535
## 
## $`35`
## [1] 1.971216 2.002016
## 
## $`36`
## [1] 1.967109 1.882922
## 
## $`37`
## [1] 1.463405 1.531546
## 
## $`38`
## [1] 1.932202 1.936309
## 
## $`39`
## [1] 1.544525 1.585085
## 
## $`40`
## [1] 2.39304 2.43360
## 
## $`41`
## [1] 2.920117 2.717672
## 
## $`42`
## [1] 1.244634 1.223518
## 
## $`43`
## [1] 1.276930 1.244634
## 
## $`44`
## [1] 3.018272 3.042811
## 
## $`45`
## [1] 2.496975 2.446275
## 
## $`46`
## [1] 2.098524 2.053350
## 
## $`47`
## [1] 3.159371 3.159371
## 
## $`48`
## [1] 1.969163 1.899349
## 
## $`49`
## [1] 1.911669 1.917829
## 
## $`50`
## [1] 1.601309 1.583462
## 
## $`51`
## [1] 2.481765 2.458950
## 
## $`52`
## [1] 3.113360 2.972262
## 
## $`53`
## [1] 2.059510 2.063617
## 
## $`54`
## [1] 2.425995 2.425995
## 
## $`55`
## [1] 1.585085 1.568861
## 
## $`56`
## [1] 1.913722 1.889082
## 
## $`57`
## [1] 2.64147 2.63133
## 
## $`58`
## [1] 0.8861346 0.8697078
## 
## $`59`
## [1] 0.885222 0.871533
## 
## $`60`
## [1] 0.9144252 0.8906976
## 
## $`61`
## [1] 1.521811 1.466650
## 
## $`62`
## [1] 1.596442 1.598064
## 
## $`63`
## [1] 1.515322 1.479629
## 
## $`64`
## [1] 2.018443 2.065670
## 
## $`65`
## [1] 0.8094762 0.8176896
## 
## $`66`
## [1] 1.183769 1.224760
## 
## $`67`
## [1] 0.6565650 0.6635363
## 
## $`68`
## [1] 2.096470 2.032817
## 
## $`69`
## [1] 1.521811 1.573728
## 
## $`70`
## [1] 3.500734 3.526286
## 
## $`71`
## [1] 2.474160 2.573025
## 
## $`72`
## [1] 2.030763 1.926042
## 
## $`73`
## [1] 2.073883 2.121111
## 
## $`74`
## [1] 1.528301 1.490986
## 
## $`75`
## [1] 3.919997 3.913999
## 
## $`76`
## [1] 2.110844 2.067724
## 
## $`77`
## [1] 2.49951 2.45895
## 
## $`78`
## [1] 1.588330 1.583462
## 
## $`79`
## [1] 3.033609 3.037903
## 
## $`80`
## [1] 1.62240 1.53479
## 
## $`81`
## [1] 1.189980 1.244634
## 
## $`82`
## [1] 1.186253 1.191222
## 
## $`83`
## [1] 1.151473 1.156442
## 
## $`84`
## [1] 0.5819093 0.5730368
## 
## $`85`
## [1] 0.6413550 0.6571987
## 
## $`86`
## [1] 2.137127 2.096470
## 
## $`87`
## [1] 2.519790 2.436135
## 
## $`88`
## [1] 1.854996 2.841593
## 
## $`89`
## [1] 3.027474 3.002936
## 
## $`90`
## [1] 1.918650 2.050065
## 
## $`91`
## [1] 2.086614 1.981483
## 
## $`92`
## [1] 2.385435 2.442726
## 
## $`93`
## [1] 1.191222 1.175819
## 
## $`94`
## [1] 2.572011 2.519283
## 
## $`95`
## [1] 1.223021 1.230225
## 
## $`96`
## [1] 0.8983634 0.8624070
## 
## $`97`
## [1] 0.5925563 0.5957250
## 
## $`98`
## [1] 1.974912 1.982304
## 
## $`99`
## [1] 2.987599 2.988212
## 
## $`100`
## [1] 2.418390 2.315469
## 
## $`101`
## [1] 3.415314 3.445247
## 
## $`102`
## [1] 0.6295672 0.6039638
## 
## $`103`
## [1] 1.193209 1.076447
## 
## $`104`
## [1] 1.148243 1.091850
## 
## $`105`
## [1] 1.976144 1.998731
## 
## $`106`
## [1] 2.966741 3.056921
## 
## $`107`
## [1] 2.133841 2.051707
## 
## $`108`
## [1] 1.935488 1.983947
## 
## $`109`
## [1] 2.365155 2.456922
## 
## $`110`
## [1] 1.889903 1.974091
## 
## $`111`
## [1] 2.606994 2.593812
## 
## $`112`
## [1] 3.600755 3.656241
## 
## $`113`
## [1] 1.652252 1.645114
## 
## $`114`
## [1] 1.626943 1.668476
## 
## $`115`
## [1] 2.872266 2.867972
## 
## $`116`
## [1] 2.130556 2.105916
## 
## $`117`
## [1] 2.603952 2.559843
## 
## $`118`
## [1] 1.600335 1.611043
## 
## $`119`
## [1] 3.985973 3.934563
## 
## $`120`
## [1] 3.164278 3.145874
## 
## $`121`
## [1] 2.081686 2.046779
## 
## $`122`
## [1] 3.616086 3.689094
## 
## $`123`
## [1] 2.572518 2.627274
## 
## $`124`
## [1] 2.893124 2.966741
## 
## $`125`
## [1] 3.045879 3.068577
## 
## $`126`
## [1] 2.496468 2.546154
## 
## $`127`
## [1] 3.502924 3.491243
## 
## $`128`
## [1] 3.056307 2.913982
## 
## $`129`
## [1] 1.909205 1.955610
## 
## $`130`
## [1] 2.167516 2.139591
## 
## $`131`
## [1] 3.529207 3.554029
## 
## $`132`
## [1] 2.673411 2.632344
## 
## $`133`
## [1] 2.966127 2.855089
## 
## $`134`
## [1] 1.595144 1.577946
## 
## $`135`
## [1] 2.864291 2.886376
## 
## $`136`
## [1] 2.098113 2.097292
## 
## $`137`
## [1] 2.359578 2.363634
## 
## $`138`
## [1] 2.662764 2.584686
## 
## $`139`
## [1] 1.587356 1.554908
## 
## $`140`
## [1] 2.069777 2.023782
## 
## $`141`
## [1] 2.836685 2.908461
## 
## $`142`
## [1] 2.866132 2.902940
## 
## $`143`
## [1] 2.921958 2.955085
## 
## $`144`
## [1] 3.710267 3.753341
## 
## $`145`
## [1] 2.577081 2.581644
## 
## $`146`
## [1] 2.529930 2.690649
## 
## $`147`
## [1] 2.029531 2.046779
## 
## $`148`
## [1] 2.592291 2.623725
## 
## $`149`
## [1] 3.137286 3.072258
## 
## $`150`
## [1] 3.002322 2.849568
## 
## $`151`
## [1] 1.169857 1.177807
## 
## $`152`
## [1] 1.556206 1.587356
## 
## $`153`
## [1] 1.963003 1.979840
## 
## $`154`
## [1] 2.368704 2.483793
## 
## $`155`
## [1] 1.683747 1.965877
## 
## $`156`
## [1] 2.484807 2.641977
## 
## $`157`
## [1] 2.231814 2.448303
## 
## $`158`
## [1] 2.062795 1.935488
## 
## $`159`
## [1] 1.701817 1.803663
## 
## $`160`
## [1] 2.893738 3.086981
## 
## $`161`
## [1] 1.917008 1.967931
## 
## $`162`
## [1] 1.504289 1.611368
## 
## $`163`
## [1] 1.637975 1.556206
## 
## $`164`
## [1] 1.180043 1.182775
## 
## $`165`
## [1] 2.282514 2.590770
## 
## $`166`
## [1] 1.642518 1.661338
## 
## $`167`
## [1] 1.608123 1.601958
## 
## $`168`
## [1] 0.8848570 0.8801114
## 
## $`169`
## [1] 2.395575 2.375295
## 
## $`170`
## [1] 2.425995 2.540070
## 
## $`171`
## [1] 2.58063 2.44881
## 
## $`172`
## [1] 2.990666 2.972262
## 
## $`173`
## [1] 2.344875 2.347410
## 
## $`174`
## [1] 3.559140 3.679603
## 
## $`175`
## [1] 2.45388 2.41332
## 
## $`176`
## [1] 3.789115 3.716107
## 
## $`177`
## [1] 2.51472 2.54007
## 
## $`178`
## [1] 3.180842 3.021340
## 
## $`179`
## [1] 1.967109 1.913722
## 
## $`180`
## [1] 1.940416 1.952736
## 
## $`181`
## [1] 3.482482 3.424075
## 
## $`182`
## [1] 4.224172 4.215604
## 
## $`183`
## [1] 4.481221 4.348412
## 
## $`184`
## [1] 3.486132 3.482482
coots2[[1]]
## [1] 3.795757 3.932850
nclus<-length(coots2) # number of clusters selected
nclus  #184
## [1] 184
clusmean<-sapply(coots2,mean) #this is yibar
clusmean
##         1         2         3         4         5         6         7 
## 3.8643033 4.1941828 0.9162504 2.9983346 2.4957075 3.9842595 1.9270690 
##         8         9        10        11        12        13        14 
## 2.9615265 3.4605792 2.9615265 3.4989084 2.9998683 3.5664408 2.9860653 
##        15        16        17        18        19        20        21 
## 2.9829979 2.4069825 2.0102297 2.4374025 2.9262519 2.9477233 2.5375350 
##        22        23        24        25        26        27        28 
## 4.2691555 3.7653876 2.5641525 1.9619759 3.4824816 2.5742925 1.9558159 
##        29        30        31        32        33        34        35 
## 3.4076484 2.9553917 2.5045800 1.5599376 2.4919050 2.4868350 1.9866162 
##        36        37        38        39        40        41        42 
## 1.9250156 1.4974752 1.9342556 1.5648048 2.4133200 2.8188946 1.2340760 
##        43        44        45        46        47        48        49 
## 1.2607823 3.0305418 2.4716250 2.0759368 3.1593705 1.9342556 1.9147489 
##        50        51        52        53        54        55        56 
## 1.5923856 2.4703575 3.0428112 2.0615634 2.4259950 1.5769728 1.9014021 
##        57        58        59        60        61        62        63 
## 2.6364000 0.8779212 0.8783775 0.9025614 1.4942304 1.5972528 1.4974752 
##        64        65        66        67        68        69        70 
## 2.0420566 0.8135829 1.2042644 0.6600506 2.0646435 1.5477696 3.5135100 
##        71        72        73        74        75        76        77 
## 2.5235925 1.9784027 2.0974971 1.5096432 3.9169984 2.0892837 2.4792300 
##        78        79        80        81        82        83        84 
## 1.5858960 3.0357563 1.5785952 1.2173070 1.1887375 1.1539574 0.5774731 
##        85        86        87        88        89        90        91 
## 0.6492769 2.1167985 2.4779625 2.3482947 3.0152051 1.9843574 2.0340485 
##        92        93        94        95        96        97        98 
## 2.4140805 1.1835205 2.5456470 1.2266231 0.8803852 0.5941406 1.9786081 
##        99       100       101       102       103       104       105 
## 2.9879057 2.3669295 3.4302808 0.6167655 1.1348282 1.1200466 1.9874374 
##       106       107       108       109       110       111       112 
## 3.0118310 2.0927743 1.9597172 2.4110385 1.9319970 2.6004030 3.6284976 
##       113       114       115       116       117       118       119 
## 1.6486829 1.6477095 2.8701193 2.1182359 2.5818975 1.6056893 3.9602683 
##       120       121       122       123       124       125       126 
## 3.1550763 2.0642327 3.6525902 2.5998960 2.9299327 3.0572277 2.5213110 
##       127       128       129       130       131       132       133 
## 3.4970832 2.9851450 1.9324077 2.1535535 3.5416181 2.6528775 2.9106084 
##       134       135       136       137       138       139       140 
## 1.5865449 2.8753339 2.0977024 2.3616060 2.6237250 1.5711322 2.0467793 
##       141       142       143       144       145       146       147 
## 2.8725733 2.8845359 2.9385213 3.7318040 2.5793625 2.6102895 2.0381552 
##       148       149       150       151       152       153       154 
## 2.6080080 3.1047717 2.9259452 1.1738318 1.5717812 1.9714213 2.4262485 
##       155       156       157       158       159       160       161 
## 1.8248122 2.5633920 2.3400585 1.9991416 1.7527396 2.9903595 1.9424691 
##       162       163       164       165       166       167       168 
## 1.5578285 1.5970905 1.1814088 2.4366420 1.6519277 1.6050403 0.8824842 
##       169       170       171       172       173       174       175 
## 2.3854350 2.4830325 2.5147200 2.9814642 2.3461425 3.6193716 2.4336000 
##       176       177       178       179       180       181       182 
## 3.7526112 2.5273950 3.1010909 1.9404158 1.9465758 3.4532784 4.2198877 
##       183       184 
## 4.4148166 3.4843068
clusvar<-sapply(coots2,var) #this is si
clusvar
##            1            2            3            4            5 
## 9.397218e-03 9.176971e-04 4.813808e-04 7.950278e-04 1.574425e-04 
##            6            7            8            9           10 
## 3.303709e-04 5.061599e-03 5.122997e-03 1.066034e-04 2.239725e-02 
##           11           12           13           14           15 
## 3.524574e-03 1.693573e-04 1.289901e-02 2.940195e-03 1.058469e-03 
##           16           17           18           19           20 
## 2.008195e-03 5.396828e-04 2.891801e-05 7.526909e-05 4.156735e-02 
##           21           22           23           24           25 
## 2.172064e-03 1.696823e-02 3.524574e-03 1.699737e-03 3.562741e-04 
##           26           27           28           29           30 
## 2.729046e-02 2.602621e-04 3.897915e-03 8.161820e-03 2.305127e-04 
##           31           32           33           34           35 
## 1.285245e-05 3.803503e-04 2.172064e-03 5.140980e-03 4.743292e-04 
##           36           37           38           39           40 
## 3.543759e-03 2.321584e-03 8.432492e-06 8.225568e-04 8.225568e-04 
##           41           42           43           44           45 
## 2.049201e-02 2.229554e-04 5.215126e-04 3.010764e-04 1.285245e-03 
##           46           47           48           49           50 
## 1.020332e-03 0.000000e+00 2.436990e-03 1.897280e-05 1.592470e-04 
##           51           52           53           54           55 
## 2.602621e-04 9.954337e-03 8.432492e-06 0.000000e+00 1.316091e-04 
##           56           57           58           59           60 
## 3.035697e-04 5.140980e-05 1.349199e-04 9.369436e-05 2.814995e-04 
##           61           62           63           64           65 
## 1.521401e-03 1.316091e-06 6.369880e-04 1.115195e-03 3.372997e-05 
##           66           67           68           69           70 
## 8.401310e-04 2.429951e-05 2.025909e-03 1.347677e-03 3.264728e-04 
##           71           72           73           74           75 
## 4.887144e-03 5.483223e-03 1.115199e-03 6.962121e-04 1.798740e-05 
##           76           77           78           79           80 
## 9.296801e-04 8.225568e-04 1.184482e-05 9.220077e-06 3.837721e-03 
##           81           82           83           84           85 
## 1.493563e-03 1.234349e-05 1.234349e-05 3.936063e-05 1.255114e-04 
##           86           87           88           89           90 
## 8.264674e-04 3.499080e-03 4.866864e-01 3.010764e-04 8.634872e-03 
##           91           92           93           94           95 
## 5.526327e-03 1.641129e-03 1.186200e-04 1.390121e-03 2.595241e-05 
##           96           97           98           99          100 
## 6.464314e-04 5.020330e-06 2.732157e-05 1.881911e-07 5.296366e-03 
##          101          102          103          104          105 
## 4.480012e-04 3.277670e-04 6.816694e-03 1.590125e-03 2.550840e-04 
##          106          107          108          109          110 
## 4.066225e-03 3.372997e-03 1.174142e-03 4.210591e-03 3.543759e-03 
##          111          112          113          114          115 
## 8.688256e-05 1.539348e-03 2.547980e-05 8.625158e-04 9.220506e-06 
##          116          117          118          119          120 
## 3.035697e-04 9.728019e-04 5.732849e-05 1.321484e-03 1.693554e-04 
##          121          122          123          124          125 
## 6.092458e-04 2.665084e-03 1.499110e-03 2.709687e-03 2.576087e-04 
##          126          127          128          129          130 
## 1.234349e-03 6.822522e-05 1.012820e-02 1.076744e-03 3.899196e-04 
##          131          132          133          134          135 
## 3.080832e-04 8.432492e-04 6.164730e-03 1.478770e-04 2.438736e-04 
##          136          137          138          139          140 
## 3.372668e-07 8.225568e-06 3.048087e-03 5.264364e-04 1.057770e-03 
##          141          142          143          144          145 
## 2.575897e-03 6.774218e-04 5.487123e-04 9.277149e-04 1.041048e-05 
##          146          147          148          149          150 
## 1.291530e-02 1.487502e-04 4.940482e-04 2.114307e-03 1.166689e-02 
##          151          152          153          154          155 
## 3.159887e-05 4.851644e-04 1.417507e-04 6.622739e-03 3.979875e-02 
##          156          157          158          159          160 
## 1.235120e-02 2.343374e-02 8.103625e-03 5.186314e-03 1.867143e-02 
##          161          162          163          164          165 
## 1.296576e-03 5.732892e-03 3.343077e-03 3.733825e-06 4.751088e-02 
##          166          167          168          169          170 
## 1.770924e-04 1.900423e-05 1.126036e-05 2.056392e-04 6.506553e-03 
##          171          172          173          174          175 
## 8.688256e-03 1.693536e-04 3.213112e-06 7.255691e-03 8.225568e-04 
##          176          177          178          179          180 
## 2.665084e-03 3.213112e-04 1.272048e-02 1.425091e-03 7.589120e-05 
##          181          182          183          184 
## 1.705654e-03 3.670788e-05 8.819075e-03 6.662710e-06
#split data using clutch, response interested is csize
coots3<-split(csize,clutch)
#coots3
mi<-sapply(coots3,length)
mi
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
##  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
##  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
##  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 181 182 183 184 
##   2   2   2   2
Mi<-sapply(coots3,mean)
Mi
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
##  13  13   6  11  10  13   9  11  12  11  12  11  12  11  11  10   9  10 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##  11  11  10  13  12  10   9  12  10   9  12  11  10   8  10  10   9   9 
##  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##   8   9   8  10  11   7   7  11  10   9  11   9   9   8  10  11   9  10 
##  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72 
##   8   9  10   6   6   6   8   8   8   9   6   7   5   9   8  12  10   9 
##  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
##   9   8  13   9  10   8  11   8   7   7   7   5   5   9  10  10  11   9 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 
##   9  10   7  10   7   6   5   9  11  10  12   5   7   7   9  11   9   9 
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 
##  10   9  10  12   8   8  11   9  10   8  13  11   9  12  10  11  11  10 
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 
##  12  11   9   9  12  10  11   8  11   9  10  10   8   9  11  11  11  12 
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 
##  10  10   9  10  11  11   7   8   9  10   9  10  10   9   9  11   9   8 
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   8   7  10   8   8   6  10  10  10  11  10  12  10  12  10  11   9   9 
## 181 182 183 184 
##  12  13  13  12
##unbiased estimator, suppose N is known as 10000, 1-n/N goes to 1
##suppose M0=95000
N<-10000
tunb<-N*sum(Mi*clusmean)/nclus
tunb
## [1] 237950.8
yunb<-tunb/95000
yunb
## [1] 2.504745
#variances
ssufpc<-1-mi/Mi
st2<-var(Mi*clusmean)
varterm2<-sum(ssufpc*(Mi^2)*clusvar/mi)
vartunb<-N^2*(1-nclus/N)*st2/nclus + (N/nclus)*varterm2
vartunb
## [1] 79771832
varyunb<-vartunb/(95000)^2
varyunb
## [1] 0.008838984
seyunb<-sqrt(varyunb)
seyunb
## [1] 0.09401587
##ratio estimation
ybarratio<-sum(Mi*clusmean)/sum(Mi)
ybarratio
## [1] 2.490498
sr2<-var(Mi*(clusmean - ybarratio))
varybarr<-( (1 - nclus/N)*sr2/nclus + varterm2/(nclus*N) )/(mean(Mi))^2
varybarr
## [1] 0.003653726
seybarr<-sqrt(varybarr)
seybarr
## [1] 0.06044606
#compare first term and second term in variance formula
v1<-((1 - nclus/N)*sr2/nclus)/(mean(Mi))^2
v2<- (varterm2/(nclus*N))/(mean(Mi))^2
v1
## [1] 0.00365345
v2
## [1] 2.757191e-07
v1+v2
## [1] 0.003653726
#textbook use ssufpc<-1, and ignore second term
varybarrtext<-(sr2/nclus)/(mean(Mi))^2
varybarrtext
## [1] 0.003721934
seybarrtext<-sqrt(varybarrtext)
seybarrtext
## [1] 0.06100765
list(yunb=yunb,seyunb<-seyunb, yratio=ybarratio,seybarr<-seybarr,seybarrtext=seybarrtext)
## $yunb
## [1] 2.504745
## 
## [[2]]
## [1] 0.09401587
## 
## $yratio
## [1] 2.490498
## 
## [[4]]
## [1] 0.06044606
## 
## $seybarrtext
## [1] 0.06100765
##use svymean and svytotal to calculate the estimates 
cootsfpc1<-rep(10000,368) #Note: 10000 is an arbitrary number, we don't have information of N
cootsfpc2<-coots$csize #number of ssus in each psu
ssu<-rep(1:2,184) #index of ssu
design3<-svydesign(id=~clutch+ssu,fpc=~cootsfpc1+cootsfpc2,data=coots)
## Warning in as.fpc(fpc, strata, ids, pps = pps): `fpc' varies within strata:
## stratum 1.88 at stage 2
summary(design3)
## 2 - level Cluster Sampling design
## With (184, 368) clusters.
## svydesign(id = ~clutch + ssu, fpc = ~cootsfpc1 + cootsfpc2, data = coots)
## Probabilities:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.002831 0.003345 0.003680 0.004021 0.004217 0.007360 
## Population size (PSUs): 10000 
## Data variables:
## [1] "clutch"  "csize"   "length"  "breadth" "volume"  "tmt"
svymean(~volume,design3)  #ratio estimator
##          mean     SE
## volume 2.4908 0.0604
svytotal(~volume,design3)  #unbiased estimator of the total
##         total     SE
## volume 237978 8931.6
################################################################################
#####two stage cluster sampling, API data#######################################
################################################################################

# The Academic Performance Index (API) is computed for all California
# schools based on standardized testing of students.
# The data sets contain information for all schools with at least 100 students
# and for various probability samples of the data.
# apipop contains the entire population.
# apistrat contains a sample stratified by stype (Elementary/Middle/High School).
# apiclus1 contains a cluster sample of school districts.
# apiclus2 contains a two-stage cluster sample of schools within districts. (this is the data we used here)

#apiclus2 is a sample obtained using a two-stage cluster sampling design, 
# 1st stage,  n = 40 SRS districts were selected from N=757 districts,
# 1st stage, fpc1 is a vector of N = 757
# 2nd stage, within selected district i with Mi schools, one or more schools were selected using SRS
# 2nd stage, fpc2 is a vector of total number of schools in each sampled district (i.e., fpc2 = Mi).
# snum: School number, dnum: District number 
# enroll: number of students enrolled (response variable)
# there are missing data of enroll, use na.rm=TRUE, consider that these are 
# missing at random so as not to bias inferences.

data(api) #loads several data frames that are samples from the data frame apipop.
#help(api)
apiclus2[1:5,]
##              cds stype            name                      sname snum
## 1 31667796031017     E Alta-Dutch Flat Alta-Dutch Flat Elementary 3269
## 2 55751846054837     E Tenaya Elementa          Tenaya Elementary 5979
## 3 41688746043517     E Panorama Elemen        Panorama Elementary 4958
## 4 41688746043509     M   Lipman Middle              Lipman Middle 4957
## 5 41688746043491     E Brisbane Elemen        Brisbane Elementary 4956
##                      dname dnum     cname cnum flag pcttest api00 api99
## 1     Alta-Dutch Flat Elem   15    Placer   30   NA     100   821   785
## 2 Big Oak Flat-Grvlnd Unif   63  Tuolumne   54   NA     100   773   718
## 3      Brisbane Elementary   83 San Mateo   40   NA      98   600   632
## 4      Brisbane Elementary   83 San Mateo   40   NA     100   740   740
## 5      Brisbane Elementary   83 San Mateo   40   NA      98   716   711
##   target growth sch.wide comp.imp both awards meals ell yr.rnd mobility
## 1      1     36      Yes      Yes  Yes    Yes    27   0     No       14
## 2      4     55      Yes      Yes  Yes    Yes    43   0     No       12
## 3      8    -32       No       No   No     No    33   5     No        9
## 4      3      0       No       No   No     No    11   4     No        8
## 5      4      5      Yes      Yes  Yes    Yes     5   2     No        6
##   acs.k3 acs.46 acs.core pct.resp not.hsg hsg some.col col.grad grad.sch
## 1     17     20       NA       89       4  16       53       21        6
## 2     18     34       NA       98       8  33       37       15        7
## 3     19     29       NA       79       8  28       30       32        1
## 4     NA     30       24       96       5  27       35       27        6
## 5     18     28       NA       98       3  14       22       58        3
##   avg.ed full emer enroll api.stu     pw fpc1 fpc2
## 1   3.07  100    0    152     120 18.925  757    1
## 2   2.79  100    0    312     270 18.925  757    1
## 3   2.90  100    0    173     151 18.925  757    3
## 4   3.03   82   18    201     179 18.925  757    3
## 5   3.44  100    8    147     136 18.925  757    3
dimnames(apiclus2)[2]
## [[1]]
##  [1] "cds"      "stype"    "name"     "sname"    "snum"     "dname"   
##  [7] "dnum"     "cname"    "cnum"     "flag"     "pcttest"  "api00"   
## [13] "api99"    "target"   "growth"   "sch.wide" "comp.imp" "both"    
## [19] "awards"   "meals"    "ell"      "yr.rnd"   "mobility" "acs.k3"  
## [25] "acs.46"   "acs.core" "pct.resp" "not.hsg"  "hsg"      "some.col"
## [31] "col.grad" "grad.sch" "avg.ed"   "full"     "emer"     "enroll"  
## [37] "api.stu"  "pw"       "fpc1"     "fpc2"
clu2design<-svydesign(id=~dnum+snum,fpc=~fpc1+fpc2,data=apiclus2)
summary(clu2design)
## 2 - level Cluster Sampling design
## With (40, 126) clusters.
## svydesign(id = ~dnum + snum, fpc = ~fpc1 + fpc2, data = apiclus2)
## Probabilities:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.003669 0.037740 0.052840 0.042390 0.052840 0.052840 
## Population size (PSUs): 757 
## Data variables:
##  [1] "cds"      "stype"    "name"     "sname"    "snum"     "dname"   
##  [7] "dnum"     "cname"    "cnum"     "flag"     "pcttest"  "api00"   
## [13] "api99"    "target"   "growth"   "sch.wide" "comp.imp" "both"    
## [19] "awards"   "meals"    "ell"      "yr.rnd"   "mobility" "acs.k3"  
## [25] "acs.46"   "acs.core" "pct.resp" "not.hsg"  "hsg"      "some.col"
## [31] "col.grad" "grad.sch" "avg.ed"   "full"     "emer"     "enroll"  
## [37] "api.stu"  "pw"       "fpc1"     "fpc2"
smean<-svymean(~enroll,clu2design,na.rm=TRUE) #ratio estimation
smean
##          mean     SE
## enroll 526.26 80.341
#identify the schools with missing values 
with(apiclus2, sname[is.na(enroll)])
## [1] "Virginia Avenue Elementary"    "Fairfax Elementary"           
## [3] "California City Middle"        "Mojave Elementary"            
## [5] "Joshua Middle"                 "Ulrich (Robert P.) Elementary"
svytotal(~enroll, clu2design, na.rm = TRUE)  #unbiased estimator
##          total     SE
## enroll 2639273 799638
#you may also use ratio estimator when M0 is known