################################################################################ ####################Chapter 6 Unequal Probability Sampling###################### ################################################################################ ##########################example of physicians in United States################# library(SDaA) library(survey) data(statepop) nrow(statepop) statepop[1:10,] statepop$psi <- statepop$popn/255077536 #M_0 = 255077536 #define psi_i plot(phys ~ psi, data = statepop, xlab = expression(paste(Psi[i]," for County")), ylab = "Physicians in County (in thousands)") ###one stage sampling with replacement dppswr<- svydesign(id=~1, probs=~psi, data=statepop) est1<-svytotal(~phys,dppswr) #total of the n estimates est2<-as.data.frame(est1) tpsihat<-est2[1]/100 #avergae the n estimates to get the \hat t_phi setpsihat<-est2[2]/100 tpsihat setpsihat #direct formula tpsihat2<-sum(statepop$phys/statepop$psi)/100 tpsihat2 vtpsihat2<-sum((statepop$phys/statepop$psi-tpsihat2)^2)/(100*99) setpsihat2<-sqrt(vtpsihat2) setpsihat2 list(tpsihat=tpsihat,setpsihat=setpsihat,tpsihat2=tpsihat2,setpsihat2= setpsihat2) ###Variances: without replacement ## Horvitz-Thompson type #use brewer methods dppsbr<-svydesign(id=~1, fpc=~psi, data=statepop, pps="brewer",variance="HT") #default is "HT" dppsbr svytotal(~phys,dppsbr) #use Hartley-Rao approximation dppsht<- svydesign(id=~1, fpc=~psi, data=statepop, pps=HR(sum(statepop$psi^2)/100)) ##use Hartley-Rao approximation dppsht svytotal(~phys,dppsht) ## Yates-Grundy type dppsyg<- svydesign(id=~1, fpc=~psi, data=statepop, pps=HR(sum(statepop$psi^2)/100),variance="YG") #default is "HT" dppsyg svytotal(~phys,dppsyg) ##########################PPS package################# install.packages("pps") library(pps) data(calif) data(califcty) #The data set calif is based #on data from the 2000 U.S. census, available at the U.S. Census Bureau¡¯s web #site, http://www.census.gov. The file includes one record for each place in #California. A place can be anything from a tiny town to Los Angeles. The file #includes a county code, and a stratum code has been added. The four strata #were derived according to county size: counties with fewer than 50,000 people #are in stratum 1 (as are the places in those counties), with 50,000 to 300,000 #people in stratum 2, with 300,000 to 1,000,000 people in stratum 3, and the #remaining ones are in stratum 4. The other variables in the file are total #population (variable name population), and the number of whites (white), #American Indians (amerind) and Hispanics (hispanic). A second data set, #califcty, is derived from calif. It is the county-level summary of the bigger #file, i.e., each record in califcty corresponds to a county. ####stratified sampling table(calif$stratum) # take a look at the size of each stratum nh <- c(40,50,40,60) # these are the sample sizes we want calif<-calif[order(calif$stratum),] # sort by stratum califsample <- calif[stratsrs(calif$stratum,nh),] califsample[1:10,] by(califsample[,2:5],califsample$stratum,summary) by(calif[,2:5],calif$stratum,summary) ####ppswr, pps with replacement #The function call #is ppswr(sizes,n), where sizes is a vector of the sizes of the population #units and n is the sample size. county1 <- calif[calif$county==1,] # get the units in county 1, there are 20 #places in county 1 with different population size county1 county1sample<-county1[ppswr(county1$population,5),] # select 5 units county1sample ####ppswor, pps without replacement, sampford and sampfordpi #The function sampford select a PPS sample without replacement using Sampford¡¯s #method. The function sampfordpi computes the corresponding inclusion #pi_i and joint inclusion probabilities pi_{ij}. #the Sampford ones incorporate a check for units that are #too big and aborts if such units are found sampford(county1$popul,5) # what if n is too big? worsample<-county1[sampford(county1$popul,3),] worsample piij<-sampfordpi(county1$popul,3) diag(piij) # these are the inclusion probabilities wt<-1/diag(piij) # these are the weights to use for estimation