#simulaiton practice #m is the sample size, want to generate m random numbers with N(0.5, 1) ##for each sample, perform the z test, #repeat the procedure for tt times, find percentage of times that z < 1.96, expect to be close to 0.975 #find percentage that |z|<1.96, expect to be close to 0.95 simulation<-function(m,tt){ #set.seed(1234123) times1<-0 times2<-0 for(i in 1:tt){ mu<-rep(.5,m) #generate epsilon from normal distribution epsilon<-rnorm(m,0,1) y<-mu+epsilon #print (y) #estimators ybar<-mean(y) sigmasquare<-sum((y-ybar)^2)/(m-1) seybar<-sqrt(sigmasquare/m) #print (ybar) # print (sigmasquare) #print(seybar) # teststatistics z<-(ybar-.5)/seybar #qz(.95)=1.96 if(z<1.96){times1<-times1+1} if(abs(z)<1.96){times2<-times2+1} #print (z) } result1<-times1/tt result2<-times2/tt list(result1=result1, result2=result2) } simulation(100,1000) ###stratified cluster sample##### library(survey) data(election) ##creat data sorted, with decreasing votes in each county### sorted<-election[order(election$votes, decreasing = TRUE),] ##samples the largest five counties (based in or around ##Los Angeles, Chicago, Houston, Phoenix, and Deroit) for cetainty, then takes ##5 of the next 95, 15 of the next 900, and 15 of the remaining 3600. ##funciton one.sim() does one simulation one.sim<-function(){ insample<-c(1:5, sample(6:100,5), sample(101:1000,15), sample(1001:nrow(sorted),15)) strsample<-sorted[insample,] strsample$strat<-rep(1:4,c(5,5,15,15)) strsample$fpc<-rep(c(5,95,900,nrow(sorted)-1000),c(5,5,15,15)) strdesign<-svydesign(id=~1,strat=~strat,fpc=~fpc,data=strsample) totals<-svytotal(~Bush+Kerry+Nader, strdesign) c(total=coef(totals),se=SE(totals))} one.sim() many.sims<-replicate(100,one.sim()) ##repeats one.sim() R times apply(many.sims[1:3,],1,mean) ##computes the mean of each of the first three rows: the estimated total apply(many.sims[1:3,],1,sd) ## computes the standard deviation of the estimated totals apply(many.sims[4:6,],1,mean) ##computes the mean of the estimated standard error