#simulaiton practice
#m is the sample size, want to generate m random numbers with N(0.5, 1)
##for each sample, perform the z test, 
#repeat the procedure for tt times, find percentage of times that z < 1.96, expect to be close to 0.975
#find percentage that |z|<1.96, expect to be close to 0.95

  simulation<-function(m,tt){
  #set.seed(1234123)
  times1<-0
  times2<-0
    for(i in 1:tt){
    mu<-rep(.5,m)
    
#generate epsilon from normal distribution

    epsilon<-rnorm(m,0,1)      
    y<-mu+epsilon
    
    #print (y)
#estimators
    
    ybar<-mean(y)
    sigmasquare<-sum((y-ybar)^2)/(m-1)
    seybar<-sqrt(sigmasquare/m)
    #print (ybar)
   #    print (sigmasquare)
#print(seybar)
   
 
# teststatistics
    z<-(ybar-.5)/seybar
    #qz(.95)=1.96
    if(z<1.96){times1<-times1+1}
    if(abs(z)<1.96){times2<-times2+1} 
    
  #print (z)
    }
    
   result1<-times1/tt
result2<-times2/tt
list(result1=result1, result2=result2)
}

simulation(100,1000)


###stratified cluster sample#####
library(survey)
data(election) 
##creat data sorted, with decreasing votes in each county###
sorted<-election[order(election$votes, decreasing = TRUE),]
##samples the largest five counties (based in or around 
##Los Angeles, Chicago, Houston, Phoenix, and Deroit) for cetainty, then takes
##5 of the next 95, 15 of the next 900, and 15 of the remaining 3600.
##funciton one.sim() does one simulation  
one.sim<-function(){
insample<-c(1:5,
sample(6:100,5),
sample(101:1000,15),
sample(1001:nrow(sorted),15))
strsample<-sorted[insample,]
strsample$strat<-rep(1:4,c(5,5,15,15))
strsample$fpc<-rep(c(5,95,900,nrow(sorted)-1000),c(5,5,15,15))
strdesign<-svydesign(id=~1,strat=~strat,fpc=~fpc,data=strsample)
totals<-svytotal(~Bush+Kerry+Nader, strdesign)
c(total=coef(totals),se=SE(totals))}
one.sim()


many.sims<-replicate(100,one.sim()) ##repeats one.sim() R times
apply(many.sims[1:3,],1,mean) ##computes the mean of each of the first three rows: the estimated total
apply(many.sims[1:3,],1,sd) ## computes the standard deviation of the estimated totals
apply(many.sims[4:6,],1,mean) ##computes the mean of the estimated standard error