#########################################Resampling################################################
###################################jackknife################################################
ex.data<-read.table(file="C:/jenn/teaching/stat472572/data/tuition.txt",header=T)
ex.data
## college enroll restuition nonrestuition
## 1 1 3482 1365 3747
## 2 2 5354 1677 4983
## 3 3 4500 1500 1500
## 4 4 1392 1080 2160
## 5 5 9195 1875 2475
## 6 6 3308 3071 5135
## 7 7 10802 1542 3950
## 8 8 18684 930 4050
## 9 9 7841 1340 4140
## 10 10 4784 1210 4166
x<-ex.data$restuition
y<-ex.data$nonrestuition
bhat<-mean(y)/mean(x)
bhat
## [1] 2.328801
ll<-nrow(ex.data)
xbarj<-rep(0,ll)
ybarj<-rep(0,ll)
bhatj<-rep(0,ll)
for(i in 1:ll){
xbarj[i]<-mean(x[-i])
ybarj[i]<-mean(y[-i])
bhatj[i]<-ybarj[i]/xbarj[i]
vhat<-(ll-1)/ll*sum((bhatj-bhat)^2)
}
list(xbarj=xbarj,ybarj=ybarj,bhatj=bhatj,vhat=vhat)
## $xbarj
## [1] 1580.556 1545.889 1565.556 1612.222 1523.889 1391.000 1560.889
## [8] 1628.889 1583.333 1597.778
##
## $ybarj
## [1] 3617.667 3480.333 3867.333 3794.000 3759.000 3463.444 3595.111
## [8] 3584.000 3574.000 3571.111
##
## $bhatj
## [1] 2.288858 2.251348 2.470263 2.353274 2.466715 2.489895 2.303246
## [8] 2.200273 2.257263 2.235049
##
## $vhat
## [1] 0.09383063
################easier way################################
###install package bootstrap##########################
#install.packages("bootstrap")
library(bootstrap)
theta1<-function(x){mean(x)}
result1<-jackknife(x,theta1) ##compare to xbarj##
result1
## $jack.se
## [1] 189.6246
##
## $jack.bias
## [1] 0
##
## $jack.values
## [1] 1580.556 1545.889 1565.556 1612.222 1523.889 1391.000 1560.889
## [8] 1628.889 1583.333 1597.778
##
## $call
## jackknife(x = x, theta = theta1)
result1$jack.values-xbarj #should be all 0s
## [1] 0 0 0 0 0 0 0 0 0 0
result2<-jackknife(y,theta1) ##compare to ybarj##
result2$jack.values-ybarj
## [1] 0 0 0 0 0 0 0 0 0 0
result3<-result2$jack.value/result1$jack.value ##compare to bhatj##
##calculate jackknife variance
varjk<-sum((result3-bhat)^2)*9/10
varjk
## [1] 0.09383063
################ more easier way################################
##consider the ratio estimation mean(y)/mean(x) as a function
##consider data x, y as column of a matrix
xdata<-matrix(c(x,y),ncol=2)
theta2<-function(x,xdata){mean(xdata[x,2])/mean(xdata[x,1])}
result4<-jackknife(1:ll,theta2,xdata)
result4
## $jack.se
## [1] 0.3062012
##
## $jack.bias
## [1] 0.02535994
##
## $jack.values
## [1] 2.288858 2.251348 2.470263 2.353274 2.466715 2.489895 2.303246
## [8] 2.200273 2.257263 2.235049
##
## $call
## jackknife(x = 1:ll, theta = theta2, xdata)
###################################bootstrap################################################
##simulated example##
#calculating the standard error of the median
#creating the data set by taking 100 observations
#from a normal distribution with mean 5 and stdev 3
#we have rounded each observation to nearest integer
ex.data <- round(rnorm(100,5,3))
ex.data[1:10]
## [1] 4 7 2 2 8 6 5 2 4 4
#obtaining 20 bootstrap samples
resamples <- lapply(1:20, function(i) sample(ex.data, replace = T))
#display the first resample#
resamples[1]
## [[1]]
## [1] 8 12 7 4 3 10 6 3 1 3 6 7 4 6 4 2 8 4 1 3 5 2 8
## [24] 9 7 2 6 2 2 12 4 3 2 6 4 5 2 2 7 5 9 5 2 10 7 3
## [47] 8 13 4 6 2 6 2 1 6 8 12 1 6 11 8 1 5 6 5 5 2 6 7
## [70] 6 5 6 2 2 2 2 6 0 6 4 4 1 4 7 3 3 0 4 4 2 4 4
## [93] 2 5 4 6 2 4 3 5
#calculating the median for each bootstrap sample
r.median <- sapply(resamples, median)
r.median
## [1] 4.0 5.0 4.5 4.0 4.0 5.0 5.0 5.0 5.0 5.0 4.0 4.0 5.0 4.0 4.0 4.0 4.0
## [18] 5.0 5.0 5.0
#calculating the standard deviation of the distribution of medians
sqrt(var(r.median))
## [1] 0.4993417
##easier way##
#using the boot function
library(boot)
samplemedian <- function(x, c) {
return(median(x[c]))
}
boot(ex.data, samplemedian, R=20) #use package boot
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = ex.data, statistic = samplemedian, R = 20)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 4 0.325 0.466651
##example 9.8 from Lohr's book##
library(SDaA) ##library(SDaA contain all the datasets in Lohr's book)
data(htpop)
htpop[1:10,]
## height gender
## 1 173 F
## 2 163 F
## 3 160 F
## 4 148 F
## 5 160 F
## 6 165 F
## 7 144 F
## 8 163 F
## 9 167 F
## 10 170 F
median(htpop$height) ##population median###
## [1] 168
data(htsrs) ##htsrs is a simple random sample taken from htpop###
htsrs[1:10,]
## rn height gender
## 1 257 159 F
## 2 1016 174 M
## 3 1264 186 M
## 4 817 158 F
## 5 374 178 F
## 6 1063 177 M
## 7 692 168 F
## 8 528 159 F
## 9 1104 170 M
## 10 1933 179 M
n<-nrow(htsrs) ##number of observations in htsrs is 200
n
## [1] 200
median(htsrs$height)
## [1] 169
par(mfrow=c(2,1))
hist(htpop$height)
hist(htsrs$height)
##find bootstrap variance, ci###
samplemedian <- function(x, c) {
return(median(x[c]))
}
b<-boot(htsrs$height, samplemedian, R=2000)
b
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = htsrs$height, statistic = samplemedian, R = 2000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 169 0.26275 0.9500204
boot.ci(b, type="basic")
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 2000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = b, type = "basic")
##
## Intervals :
## Level Basic
## 95% (167.0, 170.5 )
## Calculations and Intervals on Original Scale