#### Example: Age at First Transplant # enter data as a vector age <- c(54, 42, 51, 54, 49, 56, 33, 58, 54, 64, 49) summary(age) # Histogram overlaid with kernel density curve hist(age, freq = FALSE, breaks = 6) points(density(age), type = "l") rug(age) # stem-and-leaf plot stem(age, scale=2) # t.crit qt(1 - 0.05/2, df = length(age) - 1) # look at help for t.test help(t.test) # defaults include: alternative = "two.sided", conf.level = 0.95 t.summary <- t.test(age, mu = 50) t.summary ##The assumption of normality of the sampling distribution appears reasonablly close, # using the bootstrap discussed earlier. bs.one.samp.dist(age) #### Example: one-sided test, tomoto data tomato <- c(20.5, 18.5, 20.0, 19.5, 19.5, 21.0, 17.5 , 22.5, 20.0, 19.5, 18.5, 20.0, 18.0, 20.5) par(mfrow=c(1,2)) # Histogram overlaid with kernel density curve hist(tomato, freq = FALSE, breaks = 6) points(density(tomato), type = "l") rug(tomato) boxplot(tomato) # t.crit qt(1 - 0.05, df = length(tomato) - 1) t.summary <- t.test(tomato, mu = 20, alternative = "less") #### Illustration of Central Limit Theorem, Uniform distribution # demo.clt.unif(N, n) # draws N samples of size n from Uniform(0,1) # and plots the N means with a normal distribution overlay demo.clt.unif <- function(N, n) { # draw sample in a matrix with N columns and n rows sam <- matrix(runif(N*n, 0, 1), ncol=N); # calculate the mean of each column sam.mean <- colMeans(sam) # the sd of the mean is the SEM sam.se <- sd(sam.mean) # calculate the true SEM given the sample size n true.se <- sqrt((1/12)/n) # draw a histogram of the means hist(sam.mean, freq = FALSE, breaks = 25 , main = paste("True SEM =", round(true.se, 4) , ", Est SEM = ", round( sam.se, 4)) , xlab = paste("n =", n)) # overlay a density curve for the sample means points(density(sam.mean), type = "l") # overlay a normal distribution, bold and red x <- seq(0, 1, length = 1000) points(x, dnorm(x, mean = 0.5, sd = true.se), type = "l", lwd = 2, col = "red") # place a rug of points under the plot rug(sam.mean) } par(mfrow=c(2,2)) demo.clt.unif(10000, 1) demo.clt.unif(10000, 6) demo.clt.unif(10000, 30) demo.clt.unif(10000, 100) #### Illustration of Central Limit Theorem, exponential distribution demo.clt.exp <- function(N, n) { # draw sample in a matrix with N columns and n rows sam <- matrix(rexp(N*n, 1), ncol=N); # calculate the mean of each column sam.mean <- colMeans(sam) # the sd of the mean is the SEM sam.se <- sd(sam.mean) # calculate the true SEM given the sample size n true.se <- sqrt(1/n) # draw a histogram of the means hist(sam.mean, freq = FALSE, breaks = 25 , main = paste("True SEM =", round(true.se, 4), ", Est SEM = ", round(sam.se, 4)) , xlab = paste("n =", n)) # overlay a density curve for the sample means points(density(sam.mean), type = "l") # overlay a normal distribution, bold and red x <- seq(0, 5, length = 1000) points(x, dnorm(x, mean = 1, sd = true.se), type = "l", lwd = 2, col = "red") # place a rug of points under the plot rug(sam.mean) } par(mfrow=c(2,2)) demo.clt.exp(10000, 1) demo.clt.exp(10000, 6) demo.clt.exp(10000, 30) demo.clt.exp(10000, 100) #### Normal vs t-distributions with a range of degrees-of-freedom x <- seq(-8, 8, length = 1000) par(mfrow=c(1,1)) plot(x, dnorm(x), type = "l", lwd = 2, col = "red" , main = "Normal (red) vs t-dist with df=1, 2, 6, 12, 30, 100") points(x, dt(x, 1), type = "l") points(x, dt(x, 2), type = "l") points(x, dt(x, 6), type = "l") points(x, dt(x, 12), type = "l") points(x, dt(x, 30), type = "l") points(x, dt(x,100), type = "l") #### Visual comparison of whether sampling distribution is close to Normal via Bootstrap # a function to compare the bootstrap sampling distribution with # a normal distribution with mean and SEM estimated from the data bs.one.samp.dist <- function(dat, N = 1e4) { n <- length(dat); # resample from data sam <- matrix(sample(dat, size = N * n, replace = TRUE), ncol=N); # draw a histogram of the means sam.mean <- colMeans(sam); # save par() settings old.par <- par(no.readonly = TRUE) # make smaller margins par(mfrow=c(2,1), mar=c(3,2,2,1), oma=c(1,1,1,1)) # Histogram overlaid with kernel density curve hist(dat, freq = FALSE, breaks = 6 , main = "Plot of data with smoothed density curve") points(density(dat), type = "l") rug(dat) hist(sam.mean, freq = FALSE, breaks = 25 , main = "Bootstrap sampling distribution of the mean" , xlab = paste("Data: n =", n , ", mean =", signif(mean(dat), digits = 5) , ", se =", signif(sd(dat)/sqrt(n)), digits = 5)) # overlay a density curve for the sample means points(density(sam.mean), type = "l") # overlay a normal distribution, bold and red x <- seq(min(sam.mean), max(sam.mean), length = 1000) points(x, dnorm(x, mean = mean(dat), sd = sd(dat)/sqrt(n)) , type = "l", lwd = 2, col = "red") # place a rug of points under the plot rug(sam.mean) # restore par() settings par(old.par) } bs.one.samp.dist