#### Example: Age at First Transplant
# enter data as a vector
age <- c(54, 42, 51, 54, 49, 56, 33, 58, 54, 64, 49)
summary(age)

# Histogram overlaid with kernel density curve
hist(age, freq = FALSE, breaks = 6)
points(density(age), type = "l")
rug(age)
# stem-and-leaf plot
stem(age, scale=2)

# t.crit
qt(1 - 0.05/2, df = length(age) - 1)

# look at help for t.test
help(t.test)
  # defaults include: alternative = "two.sided", conf.level = 0.95
t.summary <- t.test(age, mu = 50)
t.summary

##The assumption of normality of the sampling distribution appears reasonablly close,
 # using the bootstrap discussed earlier.
bs.one.samp.dist(age)

#### Example: one-sided test, tomoto data
tomato <- c(20.5, 18.5, 20.0, 19.5, 19.5, 21.0, 17.5
, 22.5, 20.0, 19.5, 18.5, 20.0, 18.0, 20.5)
par(mfrow=c(1,2))
# Histogram overlaid with kernel density curve
hist(tomato, freq = FALSE, breaks = 6)
points(density(tomato), type = "l")
rug(tomato)
boxplot(tomato)
# t.crit
qt(1 - 0.05, df = length(tomato) - 1)

t.summary <- t.test(tomato, mu = 20, alternative = "less")


#### Illustration of Central Limit Theorem, Uniform distribution
# demo.clt.unif(N, n)
# draws N samples of size n from Uniform(0,1)
# and plots the N means with a normal distribution overlay
demo.clt.unif <- function(N, n) {
# draw sample in a matrix with N columns and n rows
sam <- matrix(runif(N*n, 0, 1), ncol=N);
# calculate the mean of each column
sam.mean <- colMeans(sam)
# the sd of the mean is the SEM
sam.se <- sd(sam.mean)
# calculate the true SEM given the sample size n
true.se <- sqrt((1/12)/n)
# draw a histogram of the means
hist(sam.mean, freq = FALSE, breaks = 25
, main = paste("True SEM =", round(true.se, 4)
, ", Est SEM = ", round( sam.se, 4))
, xlab = paste("n =", n))
# overlay a density curve for the sample means
points(density(sam.mean), type = "l")
# overlay a normal distribution, bold and red
x <- seq(0, 1, length = 1000)
points(x, dnorm(x, mean = 0.5, sd = true.se), type = "l", lwd = 2, col = "red")
# place a rug of points under the plot
rug(sam.mean)
}
par(mfrow=c(2,2))
demo.clt.unif(10000, 1)
demo.clt.unif(10000, 6)
demo.clt.unif(10000, 30)
demo.clt.unif(10000, 100)


#### Illustration of Central Limit Theorem, exponential distribution
demo.clt.exp <- function(N, n) {
# draw sample in a matrix with N columns and n rows
sam <- matrix(rexp(N*n, 1), ncol=N);
# calculate the mean of each column
sam.mean <- colMeans(sam)
# the sd of the mean is the SEM
sam.se <- sd(sam.mean)
# calculate the true SEM given the sample size n
true.se <- sqrt(1/n)
# draw a histogram of the means
hist(sam.mean, freq = FALSE, breaks = 25
, main = paste("True SEM =", round(true.se, 4), ", Est SEM = ", round(sam.se, 4))
, xlab = paste("n =", n))
# overlay a density curve for the sample means
points(density(sam.mean), type = "l")
# overlay a normal distribution, bold and red
x <- seq(0, 5, length = 1000)
points(x, dnorm(x, mean = 1, sd = true.se), type = "l", lwd = 2, col = "red")
# place a rug of points under the plot
rug(sam.mean)
}
par(mfrow=c(2,2))
demo.clt.exp(10000, 1)
demo.clt.exp(10000, 6)
demo.clt.exp(10000, 30)
demo.clt.exp(10000, 100)

#### Normal vs t-distributions with a range of degrees-of-freedom
x <- seq(-8, 8, length = 1000)
par(mfrow=c(1,1))
plot(x, dnorm(x), type = "l", lwd = 2, col = "red"
, main = "Normal (red) vs t-dist with df=1, 2, 6, 12, 30, 100")
points(x, dt(x, 1), type = "l")
points(x, dt(x, 2), type = "l")
points(x, dt(x, 6), type = "l")
points(x, dt(x, 12), type = "l")
points(x, dt(x, 30), type = "l")
points(x, dt(x,100), type = "l")

#### Visual comparison of whether sampling distribution is close to Normal via Bootstrap
# a function to compare the bootstrap sampling distribution with
#   a normal distribution with mean and SEM estimated from the data

bs.one.samp.dist <- function(dat, N = 1e4) {
  n <- length(dat);
  # resample from data
  sam <- matrix(sample(dat, size = N * n, replace = TRUE), ncol=N);
  # draw a histogram of the means
  sam.mean <- colMeans(sam);
  # save par() settings
  old.par <- par(no.readonly = TRUE)
  # make smaller margins
  par(mfrow=c(2,1), mar=c(3,2,2,1), oma=c(1,1,1,1))
  # Histogram overlaid with kernel density curve
  hist(dat, freq = FALSE, breaks = 6
      , main = "Plot of data with smoothed density curve")
  points(density(dat), type = "l")
  rug(dat)

  hist(sam.mean, freq = FALSE, breaks = 25
      , main = "Bootstrap sampling distribution of the mean"
      , xlab = paste("Data: n =", n
                   , ", mean =", signif(mean(dat), digits = 5)
                   , ", se =", signif(sd(dat)/sqrt(n)), digits = 5))
  # overlay a density curve for the sample means
  points(density(sam.mean), type = "l")
  # overlay a normal distribution, bold and red
  x <- seq(min(sam.mean), max(sam.mean), length = 1000)
  points(x, dnorm(x, mean = mean(dat), sd = sd(dat)/sqrt(n))
       , type = "l", lwd = 2, col = "red")
  # place a rug of points under the plot
  rug(sam.mean)
  # restore par() settings
  par(old.par)
}

bs.one.samp.dist