############### Handout#3 ###############
###############Hypothesis Testing and Confidnce Intervals ###################################


################## Example 1 #####################


## Create the Data
x <- c(1,2,3,4)
y <- c(1,3,2,5)

## Fit the Estimated Regression Line
myfit <- lm(y~x)  ## y~x means use the model y = a + bx


## Assign the values to b_0 and b_1
b_0 <- myfit$coef[1]
b_1 <- myfit$coef[2] 


## Plot the data with the fitted LS line
plot(x,y, main="Fitted Line Plot")  
abline(b_0, b_1)


## 95% confidence intervals for beta_0 and beta_1
confint(myfit, level=.95)

####################################################

## Hypothesis tests for
##   H_0: beta_0=0 vs. H_a: beta_0 != 0 and
##   H_0: beta_1=0 vs. H_a: beta_1 != 0 
## Also get r^2
summary(myfit)

####################################################

## Make an ANOVA table and F-test
anova(myfit)

####################################################

## 95% CI for mean response when X=3.5
#    Use the formulae from your notes for illustration
#    See below for easier way using the predict(...) function.

## First calculate MSE
res <- myfit$residuals
sse <-  sum(res^2)
n <- length(res)
mse <- sse/(n-2)

## Now calculate sum(x_i - x_bar)^2
x_bar <- mean(x)
sum_x_dev_sqr <- sum((x-x_bar)^2)

## Now calculate s_{Y_hat}
s2_yhat <- mse*(1/n + (3.5-x_bar)^2/sum_x_dev_sqr)
s_yhat <- sqrt(s2_yhat)

## Calculate Y_hat
y_hat <- b_0 + b_1 *3.5

## Now make 95% CI for E(Y)
EY_CI <- c(y_hat - qt(.975, n-2)*s_yhat, y_hat + qt(.975, n-2)*s_yhat)
names(EY_CI) <- c("lower", "upper")
EY_CI

####################################################

## 95% CI for mean response when X=2.5
y_hat <- b_0 + b_1 *2.5
s2_yhat <- mse*(1/n + (2.5-x_bar)^2/sum_x_dev_sqr)
s_yhat <- sqrt(s2_yhat)
EY_CI <- c(y_hat - qt(.975, n-2)*s_yhat, y_hat + qt(.975, n-2)*s_yhat)
names(EY_CI) <- c("lower", "upper")
EY_CI

####################################################

## 95% Preciction Interval for new observation when X=3.5
y_hat <- b_0 + b_1 *3.5
s2_pred <- mse*(1 + 1/n + (3.5-x_bar)^2/sum_x_dev_sqr)
s_pred <- sqrt(s2_pred)
Y_PI <- c(y_hat - qt(.975, n-2)*s_pred, y_hat + qt(.975, n-2)*s_pred)
names(Y_PI) <- c("lower", "upper")
Y_PI


#################  Easier Way  ###################
newdata <- data.frame(x=3.5)
predict(myfit, newdata, interval="confidence", level=.95)
predict(myfit, newdata, interval="prediction", level=.95)
## Notice the results are the same as those above

## Below will do prediction and confience intervals for both x=2.5 and x=3.5
newdata <- data.frame(x=c(2.5, 3.5))
predict(myfit, newdata, interval="confidence", level=.95)
predict(myfit, newdata, interval="prediction", level=.95)


#########Example 2: mass and age data###########################################


## Read in data
ex.data <- read.table(file="W:/teaching/stat440540/data/CH1/CH01PR27.txt")
head(ex.data)
plot(ex.data)

names(ex.data)[1]<-"mass"
names(ex.data)[2]<-"age"


## How many observations
n <- nrow(ex.data)
n

## Fit the estimated regression Line
myfit <- lm(mass~age, data=ex.data)  ## Fit the model 'mass' = a + b*'age'
myfit

## Assign the estimates to b_0 and b_1
b_0 <- myfit$coef[1]
b_1 <- myfit$coef[2] 


## Plot the data with the fitted LS line
plot(ex.data$age, ex.data$mass, xlab="age", ylab="mass", main="Fitted Line Plot")  
abline(b_0, b_1)


## 95% confidence intervals for beta_0 and beta_1
confint(myfit, level=.95)

####################################################

## Hypothesis tests for
##   H_0: beta_0=0 vs. H_a: beta_0 != 0 and
##   H_0: beta_1=0 vs. H_a: beta_1 != 0 
summary(myfit, level=.95)

####################################################

## Make an ANOVA table and F-test
anova(myfit)

####################################################


## 95% CI for mean response when X=50
newdata <- data.frame(age=50)
predict(myfit, newdata, interval="confidence", level=.95)
##note: if you use myfit2<-lm(ex.data$mass~ex.data$age), predict(myfit2, newdata, ...), you will
##get prediction for all the 60 observations.


## 90% CI for mean response when X=50
newdata <- data.frame(age=50)
predict(myfit, newdata, interval="confidence", level=.90)


## 95% PI for new observation when X=50
newdata <- data.frame(age=50)
predict(myfit, newdata, interval="prediction", level=.95)

## 90% PI for new observation when X=50
newdata <- data.frame(age=50)
predict(myfit, newdata, interval="prediction", level=.90)

######################General approach to testing in regression##############################

myfit <- lm(mass~age, data=ex.data)  
myfitr <- lm(mass~1, data=ex.data)  
anova(myfit)
anova(myfitr)
anova(myfit,myfitr)