#########################################Resampling################################################
###################################jackknife################################################

ex.data<-read.table(file="C:/jenn/teaching/stat472572/data/tuition.txt",header=T)

ex.data
##    college enroll restuition nonrestuition
## 1        1   3482       1365          3747
## 2        2   5354       1677          4983
## 3        3   4500       1500          1500
## 4        4   1392       1080          2160
## 5        5   9195       1875          2475
## 6        6   3308       3071          5135
## 7        7  10802       1542          3950
## 8        8  18684        930          4050
## 9        9   7841       1340          4140
## 10      10   4784       1210          4166
x<-ex.data$restuition
y<-ex.data$nonrestuition
bhat<-mean(y)/mean(x)
bhat
## [1] 2.328801
ll<-nrow(ex.data)
xbarj<-rep(0,ll)
ybarj<-rep(0,ll)
bhatj<-rep(0,ll)
for(i in 1:ll){ 
xbarj[i]<-mean(x[-i])
ybarj[i]<-mean(y[-i])
bhatj[i]<-ybarj[i]/xbarj[i]
vhat<-(ll-1)/ll*sum((bhatj-bhat)^2)
}
list(xbarj=xbarj,ybarj=ybarj,bhatj=bhatj,vhat=vhat)
## $xbarj
##  [1] 1580.556 1545.889 1565.556 1612.222 1523.889 1391.000 1560.889
##  [8] 1628.889 1583.333 1597.778
## 
## $ybarj
##  [1] 3617.667 3480.333 3867.333 3794.000 3759.000 3463.444 3595.111
##  [8] 3584.000 3574.000 3571.111
## 
## $bhatj
##  [1] 2.288858 2.251348 2.470263 2.353274 2.466715 2.489895 2.303246
##  [8] 2.200273 2.257263 2.235049
## 
## $vhat
## [1] 0.09383063
################easier way################################
###install package bootstrap##########################
#install.packages("bootstrap")
library(bootstrap)
theta1<-function(x){mean(x)}
result1<-jackknife(x,theta1)  ##compare to xbarj##
result1
## $jack.se
## [1] 189.6246
## 
## $jack.bias
## [1] 0
## 
## $jack.values
##  [1] 1580.556 1545.889 1565.556 1612.222 1523.889 1391.000 1560.889
##  [8] 1628.889 1583.333 1597.778
## 
## $call
## jackknife(x = x, theta = theta1)
result1$jack.values-xbarj #should be all 0s
##  [1] 0 0 0 0 0 0 0 0 0 0
result2<-jackknife(y,theta1)  ##compare to ybarj##
result2$jack.values-ybarj 
##  [1] 0 0 0 0 0 0 0 0 0 0
result3<-result2$jack.value/result1$jack.value ##compare to bhatj##
##calculate jackknife variance
varjk<-sum((result3-bhat)^2)*9/10
varjk
## [1] 0.09383063
################ more easier way################################
##consider the ratio estimation mean(y)/mean(x) as a function
##consider data x, y as column of a matrix
xdata<-matrix(c(x,y),ncol=2)
theta2<-function(x,xdata){mean(xdata[x,2])/mean(xdata[x,1])}
result4<-jackknife(1:ll,theta2,xdata)
result4
## $jack.se
## [1] 0.3062012
## 
## $jack.bias
## [1] 0.02535994
## 
## $jack.values
##  [1] 2.288858 2.251348 2.470263 2.353274 2.466715 2.489895 2.303246
##  [8] 2.200273 2.257263 2.235049
## 
## $call
## jackknife(x = 1:ll, theta = theta2, xdata)
###################################bootstrap################################################

##simulated example##
#calculating the standard error of the median
#creating the data set by taking 100 observations 
#from a normal distribution with mean 5 and stdev 3
#we have rounded each observation to nearest integer
ex.data <- round(rnorm(100,5,3))
ex.data[1:10]
##  [1] 4 7 2 2 8 6 5 2 4 4
#obtaining 20 bootstrap samples 
resamples <- lapply(1:20, function(i) sample(ex.data, replace = T))
#display the first resample#
resamples[1]
## [[1]]
##   [1]  8 12  7  4  3 10  6  3  1  3  6  7  4  6  4  2  8  4  1  3  5  2  8
##  [24]  9  7  2  6  2  2 12  4  3  2  6  4  5  2  2  7  5  9  5  2 10  7  3
##  [47]  8 13  4  6  2  6  2  1  6  8 12  1  6 11  8  1  5  6  5  5  2  6  7
##  [70]  6  5  6  2  2  2  2  6  0  6  4  4  1  4  7  3  3  0  4  4  2  4  4
##  [93]  2  5  4  6  2  4  3  5
#calculating the median for each bootstrap sample 
r.median <- sapply(resamples, median)
r.median
##  [1] 4.0 5.0 4.5 4.0 4.0 5.0 5.0 5.0 5.0 5.0 4.0 4.0 5.0 4.0 4.0 4.0 4.0
## [18] 5.0 5.0 5.0
#calculating the standard deviation of the distribution of medians
sqrt(var(r.median))
## [1] 0.4993417
##easier way##
#using the boot function

library(boot)
samplemedian <- function(x, c) {
  return(median(x[c]))
}

boot(ex.data, samplemedian, R=20) #use package boot
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = ex.data, statistic = samplemedian, R = 20)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*        4   0.325    0.466651
##example 9.8 from Lohr's book##
library(SDaA) ##library(SDaA contain all the datasets in Lohr's book)
data(htpop)
htpop[1:10,]
##    height gender
## 1     173      F
## 2     163      F
## 3     160      F
## 4     148      F
## 5     160      F
## 6     165      F
## 7     144      F
## 8     163      F
## 9     167      F
## 10    170      F
median(htpop$height)  ##population median###
## [1] 168
data(htsrs)  ##htsrs is a simple random sample taken from htpop###
htsrs[1:10,]  
##      rn height gender
## 1   257    159      F
## 2  1016    174      M
## 3  1264    186      M
## 4   817    158      F
## 5   374    178      F
## 6  1063    177      M
## 7   692    168      F
## 8   528    159      F
## 9  1104    170      M
## 10 1933    179      M
n<-nrow(htsrs)   ##number of observations in htsrs is 200
n
## [1] 200
median(htsrs$height)
## [1] 169
par(mfrow=c(2,1))
hist(htpop$height)
hist(htsrs$height)

##find bootstrap variance, ci###
samplemedian <- function(x, c) {
  return(median(x[c]))
}
b<-boot(htsrs$height, samplemedian, R=2000)
b
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = htsrs$height, statistic = samplemedian, R = 2000)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*      169 0.26275   0.9500204
boot.ci(b, type="basic") 
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 2000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = b, type = "basic")
## 
## Intervals : 
## Level      Basic         
## 95%   (167.0, 170.5 )  
## Calculations and Intervals on Original Scale