############### Handout#3 ############### ###############Hypothesis Testing and Confidnce Intervals ################################### ################## Example 1 ##################### ## Create the Data x <- c(1,2,3,4) y <- c(1,3,2,5) ## Fit the Estimated Regression Line myfit <- lm(y~x) ## y~x means use the model y = a + bx ## Assign the values to b_0 and b_1 b_0 <- myfit$coef[1] b_1 <- myfit$coef[2] ## Plot the data with the fitted LS line plot(x,y, main="Fitted Line Plot") abline(b_0, b_1) ## 95% confidence intervals for beta_0 and beta_1 confint(myfit, level=.95) #################################################### ## Hypothesis tests for ## H_0: beta_0=0 vs. H_a: beta_0 != 0 and ## H_0: beta_1=0 vs. H_a: beta_1 != 0 ## Also get r^2 summary(myfit) #################################################### ## Make an ANOVA table and F-test anova(myfit) #################################################### ## 95% CI for mean response when X=3.5 # Use the formulae from your notes for illustration # See below for easier way using the predict(...) function. ## First calculate MSE res <- myfit$residuals sse <- sum(res^2) n <- length(res) mse <- sse/(n-2) ## Now calculate sum(x_i - x_bar)^2 x_bar <- mean(x) sum_x_dev_sqr <- sum((x-x_bar)^2) ## Now calculate s_{Y_hat} s2_yhat <- mse*(1/n + (3.5-x_bar)^2/sum_x_dev_sqr) s_yhat <- sqrt(s2_yhat) ## Calculate Y_hat y_hat <- b_0 + b_1 *3.5 ## Now make 95% CI for E(Y) EY_CI <- c(y_hat - qt(.975, n-2)*s_yhat, y_hat + qt(.975, n-2)*s_yhat) names(EY_CI) <- c("lower", "upper") EY_CI #################################################### ## 95% CI for mean response when X=2.5 y_hat <- b_0 + b_1 *2.5 s2_yhat <- mse*(1/n + (2.5-x_bar)^2/sum_x_dev_sqr) s_yhat <- sqrt(s2_yhat) EY_CI <- c(y_hat - qt(.975, n-2)*s_yhat, y_hat + qt(.975, n-2)*s_yhat) names(EY_CI) <- c("lower", "upper") EY_CI #################################################### ## 95% Preciction Interval for new observation when X=3.5 y_hat <- b_0 + b_1 *3.5 s2_pred <- mse*(1 + 1/n + (3.5-x_bar)^2/sum_x_dev_sqr) s_pred <- sqrt(s2_pred) Y_PI <- c(y_hat - qt(.975, n-2)*s_pred, y_hat + qt(.975, n-2)*s_pred) names(Y_PI) <- c("lower", "upper") Y_PI ################# Easier Way ################### newdata <- data.frame(x=3.5) predict(myfit, newdata, interval="confidence", level=.95) predict(myfit, newdata, interval="prediction", level=.95) ## Notice the results are the same as those above ## Below will do prediction and confience intervals for both x=2.5 and x=3.5 newdata <- data.frame(x=c(2.5, 3.5)) predict(myfit, newdata, interval="confidence", level=.95) predict(myfit, newdata, interval="prediction", level=.95) #########Example 2: mass and age data########################################### ## Read in data ex.data <- read.table(file="W:/teaching/stat440540/data/CH1/CH01PR27.txt") head(ex.data) plot(ex.data) names(ex.data)[1]<-"mass" names(ex.data)[2]<-"age" ## How many observations n <- nrow(ex.data) n ## Fit the estimated regression Line myfit <- lm(mass~age, data=ex.data) ## Fit the model 'mass' = a + b*'age' myfit ## Assign the estimates to b_0 and b_1 b_0 <- myfit$coef[1] b_1 <- myfit$coef[2] ## Plot the data with the fitted LS line plot(ex.data$age, ex.data$mass, xlab="age", ylab="mass", main="Fitted Line Plot") abline(b_0, b_1) ## 95% confidence intervals for beta_0 and beta_1 confint(myfit, level=.95) #################################################### ## Hypothesis tests for ## H_0: beta_0=0 vs. H_a: beta_0 != 0 and ## H_0: beta_1=0 vs. H_a: beta_1 != 0 summary(myfit, level=.95) #################################################### ## Make an ANOVA table and F-test anova(myfit) #################################################### ## 95% CI for mean response when X=50 newdata <- data.frame(age=50) predict(myfit, newdata, interval="confidence", level=.95) ##note: if you use myfit2<-lm(ex.data$mass~ex.data$age), predict(myfit2, newdata, ...), you will ##get prediction for all the 60 observations. ## 90% CI for mean response when X=50 newdata <- data.frame(age=50) predict(myfit, newdata, interval="confidence", level=.90) ## 95% PI for new observation when X=50 newdata <- data.frame(age=50) predict(myfit, newdata, interval="prediction", level=.95) ## 90% PI for new observation when X=50 newdata <- data.frame(age=50) predict(myfit, newdata, interval="prediction", level=.90) ######################General approach to testing in regression############################## myfit <- lm(mass~age, data=ex.data) myfitr <- lm(mass~1, data=ex.data) anova(myfit) anova(myfitr) anova(myfit,myfitr)