#### Example: Income Data income <- c(7, 1110, 7, 5, 8, 12, 0, 5, 2, 2, 46, 7) # sort in decreasing order income <- sort(income, decreasing = TRUE) income summary(income) sd(income) par(mfrow=c(3,1)) # Histogram overlaid with kernel density curve hist(income, freq = FALSE, breaks = 1000) points(density(income), type = "l") rug(income) # violin plot library(vioplot) vioplot(income, horizontal=TRUE, col="gray") # boxplot boxplot(income, horizontal=TRUE) library(car) qqPlot(income, las = 1, id.n = 0, id.cex = 1, lwd = 1, main="QQ Plot, Income") library(BSDA) t.test(income) SIGN.test(income) #### Example: Age at First Heart Transplant age <- c(54, 42, 51, 54, 49, 56, 33, 58, 54, 64, 49) # sort in decreasing order age <- sort(age, decreasing = TRUE) age summary(age) sd(age) par(mfrow=c(3,1)) # Histogram overlaid with kernel density curve hist(age, freq = FALSE, breaks = 10) points(density(age), type = "l") rug(age) # violin plot library(vioplot) vioplot(age, horizontal=TRUE, col="gray") # boxplot boxplot(age, horizontal=TRUE) library(car) qqPlot(age, las = 1, id.n = 0, id.cex = 1, lwd = 1, main="QQ Plot, Income") #### Example: Made-up Data dat <- c(20, 18, 23, 5, 14, 8, 18, 22) # sort in decreasing order dat <- sort(dat, decreasing = TRUE) dat summary(dat) sd(dat) par(mfrow=c(3,1)) # Histogram overlaid with kernel density curve hist(dat, freq = FALSE, breaks = 10) points(density(dat), type = "l") rug(dat) # violin plot library(vioplot) vioplot(dat, horizontal=TRUE, col="gray") # boxplot boxplot(dat, horizontal=TRUE) par(mfrow=c(1,1)) library(car) qqPlot(dat, las = 1, id.n = 0, id.cex = 1, lwd = 1, main="QQ Plot, Income") t.test(dat, mu=10) # with continuity correction in the normal approximation for the p-value wilcox.test(dat, mu=10, conf.int=TRUE) # without continuity correction wilcox.test(dat, mu=10, conf.int=TRUE, correct=FALSE) #### Example: Sleep Remedies # Data and numerical summaries a <- c( 0.7, -1.6, -0.2, -1.2, 0.1, 3.4, 3.7, 0.8, 0.0, 2.0) b <- c( 1.9, 0.8, 1.1, 0.1, -0.1, 4.4, 5.5, 1.6, 4.6, 3.0) d <- b - a; sleep <- data.frame(a, b, d) summary(sleep$d) shapiro.test(sleep$d) # boxplot library(ggplot2) p3 <- ggplot(sleep, aes(x = "d", y = d)) p3 <- p3 + geom_hline(yintercept=0, colour="#BB0000", linetype="dashed") p3 <- p3 + geom_boxplot() p3 <- p3 + geom_point() p3 <- p3 + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 4, alpha = 0.3) p3 <- p3 + coord_flip() print(p3) t.test(sleep$d, mu=0) # with continuity correction in the normal approximation for the p-value wilcox.test(sleep$d, mu=0, conf.int=TRUE) # can use the paired= option #wilcox.test(sleep$b, sleep$a, paired=TRUE, mu=0, conf.int=TRUE) # if don't assume symmetry, can use sign test #SIGN.test(sleep$d) #### Example: Comparison of Cooling Rates of Uwet and Walker Co. Meteorites Uwet <- c(0.21, 0.25, 0.16, 0.23, 0.47, 1.20, 0.29, 1.10, 0.16) Walker <- c(0.69, 0.23, 0.10, 0.03, 0.56, 0.10, 0.01, 0.02, 0.04, 0.22) met <- data.frame(Uwet=c(Uwet,NA), Walker) library(reshape2) met.long <- melt(met, variable.name = "site", value.name = "cool", na.rm=TRUE) # naming variables manually, the variable.name and value.name not working 11/2012 names(met.long) <- c("site", "cool") library(ggplot2) p <- ggplot(met.long, aes(x = site, y = cool, fill=site)) p <- p + geom_boxplot() p <- p + geom_point(position = position_jitter(w = 0.05, h = 0), alpha = 0.5) p <- p + stat_summary(fun.y = mean, geom = "point", shape = 3, size = 2) p <- p + coord_flip() p <- p + labs(title = "Cooling rates for samples of meteorites at two locations") p <- p + theme(legend.position="none") print(p) par(mfrow=c(1,2)) library(car) qqPlot(Walker, las = 1, id.n = 0, id.cex = 1, lwd = 1, main="QQ Plot, Walker") qqPlot(Uwet, las = 1, id.n = 0, id.cex = 1, lwd = 1, main="QQ Plot, Uwet") # numerical summaries summary(Uwet) c(sd(Uwet), IQR(Uwet), length(Uwet)) summary(Walker) c(sd(Walker), IQR(Walker), length(Walker)) t.test(Uwet, Walker, var.equal = TRUE) t.test(Uwet, Walker) wilcox.test(Uwet, Walker, conf.int = TRUE) rank(met.long$cool) by(rank(met.long$cool), met.long$site, summary) # note: the CI for ranks is not interpretable t.test(rank(met.long$cool) ~ met.long$site, var.equal = TRUE) #### Example: Newcombe's Data time <- c(24.828, 24.833, 24.834, 24.826, 24.824, 24.756 , 24.827, 24.840, 24.829, 24.816, 24.798, 24.822 , 24.824, 24.825, 24.823, 24.821, 24.830, 24.829 , 24.831, 24.824, 24.836, 24.819, 24.820, 24.832 , 24.836, 24.825, 24.828, 24.828, 24.821, 24.829 , 24.837, 24.828, 24.830, 24.825, 24.826, 24.832 , 24.836, 24.830, 24.836, 24.826, 24.822, 24.823 , 24.827, 24.828, 24.831, 24.827, 24.827, 24.827 , 24.826, 24.826, 24.832, 24.833, 24.832, 24.824 , 24.839, 24.824, 24.832, 24.828, 24.825, 24.825 , 24.829, 24.828, 24.816, 24.827, 24.829, 24.823) library(nortest) ad.test(time) # Histogram overlaid with kernel density curve Passage_df <- data.frame(time) p1 <- ggplot(Passage_df, aes(x = time)) # Histogram with density instead of count on y-axis p1 <- p1 + geom_histogram(aes(y=..density..), binwidth=0.001) p1 <- p1 + geom_density(alpha=0.1, fill="white") p1 <- p1 + geom_rug() # violin plot p2 <- ggplot(Passage_df, aes(x = "t", y = time)) p2 <- p2 + geom_violin(fill = "gray50") p2 <- p2 + geom_boxplot(width = 0.2, alpha = 3/4) p2 <- p2 + coord_flip() # boxplot p3 <- ggplot(Passage_df, aes(x = "t", y = time)) p3 <- p3 + geom_boxplot() p3 <- p3 + coord_flip() library(gridExtra) grid.arrange(grobs = list(p1, p2, p3), ncol=1) par(mfrow=c(1,1)) library(car) qqPlot(time, las = 1, id.n = 0, id.cex = 1, lwd = 1, main="QQ Plot, Time") bs.one.samp.dist(time) t.sum <- t.test(time) t.sum$conf.int diff(t.test(time)$conf.int) s.sum <- SIGN.test(time) s.sum[2,c(2,3)] diff(s.sum[2,c(2,3)]) w.sum <- wilcox.test(time, conf.int=TRUE) w.sum$conf.int diff(w.sum$conf.int) #### Example: Hydrocarbon (HC) Emissions Data emis <- read.table(text=" Pre-y63 y63-7 y68-9 y70-1 y72-4 2351 620 1088 141 140 1293 940 388 359 160 541 350 111 247 20 1058 700 558 940 20 411 1150 294 882 223 570 2000 211 494 60 800 823 460 306 20 630 1058 470 200 95 905 423 353 100 360 347 900 71 300 70 NA 405 241 223 220 NA 780 2999 190 400 NA 270 199 140 217 NA NA 188 880 58 NA NA 353 200 235 NA NA 117 223 1880 NA NA NA 188 200 NA NA NA 435 175 NA NA NA 940 85 NA NA NA 241 NA ", header=TRUE) #emis # convert to long format emis.long <- melt(emis, variable.name = "year", value.name = "hc", na.rm = TRUE ) # naming variables manually, the variable.name and value.name not working 11/2012 names(emis.long) <- c("year", "hc") # summary of each year by(emis.long$hc, emis.long$year, summary) # IQR and sd of each year by(emis.long$hc, emis.long$year, function(X) { c(IQR(X), sd(X), length(X)) }) # Plot the data using ggplot library(ggplot2) p <- ggplot(emis.long, aes(x = year, y = hc)) # plot a reference line for the global mean (assuming no groups) p <- p + geom_hline(yintercept = mean(emis.long$hc), colour = "black", linetype = "dashed", size = 0.3, alpha = 0.5) # boxplot, size=.75 to stand out behind CI p <- p + geom_boxplot(size = 0.75, alpha = 0.5) # points for observed data p <- p + geom_point(position = position_jitter(w = 0.05, h = 0), alpha = 0.5) # diamond at mean for each group p <- p + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 6, colour = "red", alpha = 0.8) # confidence limits based on normal distribution p <- p + stat_summary(fun.data = "mean_cl_normal", geom = "errorbar", width = .2, colour = "red", alpha = 0.8) p <- p + labs(title = "Albuquerque automobile hydrocarbon emissions data") + ylab("hc (ppm)") # to reverse order that years print, so oldest is first on top p <- p + scale_x_discrete(limits = rev(levels(emis.long$year)) ) p <- p + coord_flip() p <- p + theme(legend.position="none") print(p) fit.e <- aov(hc ~ year, data = emis.long) summary(fit.e) fit.e # ANOVA of rank, for illustration that this is similar to what KW is doing fit.er <- aov(rank(hc) ~ year, data = emis.long) summary(fit.er) fit.er # KW ANOVA fit.ek <- kruskal.test(hc ~ year, data = emis.long) fit.ek # log scale emis.long$loghc <- log(emis.long$hc) # summary of each year by(emis.long$loghc, emis.long$year, summary) # IQR and sd of each year by(emis.long$loghc, emis.long$year, function(X) { c(IQR(X), sd(X), length(X)) }) # Plot the data using ggplot library(ggplot2) p <- ggplot(emis.long, aes(x = year, y = loghc)) # plot a reference line for the global mean (assuming no groups) p <- p + geom_hline(yintercept = mean(emis.long$loghc), colour = "black", linetype = "dashed", size = 0.3, alpha = 0.5) # boxplot, size=.75 to stand out behind CI p <- p + geom_boxplot(size = 0.75, alpha = 0.5) # points for observed data p <- p + geom_point(position = position_jitter(w = 0.05, h = 0), alpha = 0.5) # diamond at mean for each group p <- p + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 6, colour = "red", alpha = 0.8) # confidence limits based on normal distribution p <- p + stat_summary(fun.data = "mean_cl_normal", geom = "errorbar", width = .2, colour = "red", alpha = 0.8) p <- p + labs(title = "Albuquerque automobile hydrocarbon emissions data (log scale)") p <- p + ylab("log(hc) (log(ppm))") # to reverse order that years print, so oldest is first on top p <- p + scale_x_discrete(limits = rev(levels(emis.long$year)) ) p <- p + coord_flip() p <- p + theme(legend.position="none") print(p) # ANOVA of rank, for illustration that this is similar to what KW is doing fit.le <- aov(loghc ~ year, data = emis.long) summary(fit.le) fit.le # KW ANOVA -- same conclusions as original scale, since based on ranks fit.lek <- kruskal.test(loghc ~ year, data = emis.long) fit.lek #### Example: Hodgkin's Disease Study hd <- read.table(text=" nc ahd ihd 5.37 3.96 5.37 5.80 3.04 10.60 4.70 5.28 5.02 5.70 3.40 14.30 3.40 4.10 9.90 8.60 3.61 4.27 7.48 6.16 5.75 5.77 3.22 5.03 7.15 7.48 5.74 6.49 3.87 7.85 4.09 4.27 6.82 5.94 4.05 7.90 6.38 2.40 8.36 9.24 5.81 5.72 5.66 4.29 6.00 4.53 2.77 4.75 6.51 4.40 5.83 7.00 NA 7.30 6.20 NA 7.52 7.04 NA 5.32 4.82 NA 6.05 6.73 NA 5.68 5.26 NA 7.57 NA NA 5.68 NA NA 8.91 NA NA 5.39 NA NA 4.40 NA NA 7.13 ", header=TRUE) #hd # convert to long format hd.long <- melt(hd, variable.name = "patient", value.name = "level", na.rm = TRUE ) # naming variables manually, the variable.name and value.name not working 11/2012 names(hd.long) <- c("patient", "level") # summary of each patient by(hd.long$level, hd.long$patient, summary) # IQR and sd of each patient by(hd.long$level, hd.long$patient, function(X) { c(IQR(X), sd(X), length(X)) }) # log scale hd.long$loglevel <- log(hd.long$level) # summary of each patient by(hd.long$loglevel, hd.long$patient, summary) # IQR and sd of each patient by(hd.long$loglevel, hd.long$patient, function(X) { c(IQR(X), sd(X), length(X)) }) # Plot the data using ggplot library(ggplot2) p <- ggplot(hd.long, aes(x = patient, y = level)) # plot a reference line for the global mean (assuming no groups) p <- p + geom_hline(yintercept = mean(hd.long$level), colour = "black", linetype = "dashed", size = 0.3, alpha = 0.5) # boxplot, size=.75 to stand out behind CI p <- p + geom_boxplot(size = 0.75, alpha = 0.5) # points for observed data p <- p + geom_point(position = position_jitter(w = 0.05, h = 0), alpha = 0.5) # diamond at mean for each group p <- p + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 6, colour = "red", alpha = 0.8) # confidence limits based on normal distribution p <- p + stat_summary(fun.data = "mean_cl_normal", geom = "errorbar", width = .2, colour = "red", alpha = 0.8) p <- p + labs(title = "Plasma bradykininogen levels for three patient groups") p <- p + ylab("level (mg/ml)") # to reverse order that years print, so oldest is first on top p <- p + scale_x_discrete(limits = rev(levels(hd.long$patient)) ) p <- p + ylim(c(0,max(hd.long$level))) p <- p + coord_flip() p <- p + theme(legend.position="none") print(p) ## log scale # Plot the data using ggplot library(ggplot2) p <- ggplot(hd.long, aes(x = patient, y = loglevel)) # plot a reference line for the global mean (assuming no groups) p <- p + geom_hline(yintercept = mean(hd.long$loglevel), colour = "black", linetype = "dashed", size = 0.3, alpha = 0.5) # boxplot, size=.75 to stand out behind CI p <- p + geom_boxplot(size = 0.75, alpha = 0.5) # points for observed data p <- p + geom_point(position = position_jitter(w = 0.05, h = 0), alpha = 0.5) # diamond at mean for each group p <- p + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 6, colour = "red", alpha = 0.8) # confidence limits based on normal distribution p <- p + stat_summary(fun.data = "mean_cl_normal", geom = "errorbar", width = .2, colour = "red", alpha = 0.8) p <- p + labs(title = "Plasma bradykininogen levels for three patient groups (log scale)") p <- p + ylab("log(level) (log(mg/ml))") # to reverse order that years print, so oldest is first on top p <- p + scale_x_discrete(limits = rev(levels(hd.long$patient)) ) p <- p + ylim(c(0,max(hd.long$loglevel))) p <- p + coord_flip() p <- p + theme(legend.position="none") print(p) # KW ANOVA fit.h <- kruskal.test(level ~ patient, data = hd.long) fit.h # with continuity correction in the normal approximation for the p-value wilcox.test(hd$nc , hd$ahd, conf.int=TRUE, conf.level = 0.9833) wilcox.test(hd$nc , hd$ihd, conf.int=TRUE, conf.level = 0.9833) wilcox.test(hd$ahd, hd$ihd, conf.int=TRUE, conf.level = 0.9833) #### Planned Comparisons # with continuity correction in the normal approximation for the p-value wilcox.test(emis$y63.7, emis$Pre.y63, conf.int=TRUE, conf.level = 0.9875) wilcox.test(emis$y68.9, emis$y63.7 , conf.int=TRUE, conf.level = 0.9875) wilcox.test(emis$y70.1, emis$y68.9 , conf.int=TRUE, conf.level = 0.9875) wilcox.test(emis$y72.4, emis$y70.1 , conf.int=TRUE, conf.level = 0.9875) #### Permutation tests # Calculated the observed difference in means # met.long includes both Uwet and Walker groups Tobs <- mean(met.long[(met.long$site == "Uwet" ), 2]) - mean(met.long[(met.long$site == "Walker"), 2]) Tobs # Plan: # Initialize a vector in which to store the R number of difference of means. # Calculate R differences in means for R permutations, storing the results. # Note that there are prod(1:19) = 10^17 total permutations, # but the R repetitions will serve as a good approximation. # Plot the permutation null distribution with an indication of the Tobs. # R = a large number of repetitions R <- 1e4 # initialize the vector of difference of means from the permutations Tperm <- rep(NA, R) # For each of R repetitions, permute the Uwet and Walker labels, # calculate the difference of means with the permuted labels, # and store the result in the i.R'th position of Tperm. for (i.R in 1:R) { # permutation of 19 = 9+10 integers 1, 2, ..., 19 ind.perm <- sample.int(nrow(met.long)) # identify as "TRUE" numbers 1, ..., 9 (the number of Uwet labels) lab.U <- (ind.perm <= sum(met.long$site == "Uwet")) #$ # identify as "TRUE" numbers 10, ..., 19 (the number of Walker labels) # that is, all the non-Uwet labels lab.W <- !lab.U # calculate the difference in means and store in Tperm at index i.R Tperm[i.R] <- mean(met.long[lab.U, 2]) - mean(met.long[lab.W, 2]) } # Plot the permutation null distribution with an indication of the Tobs. dat <- data.frame(Tperm) library(ggplot2) p <- ggplot(dat, aes(x = Tperm)) #p <- p + scale_x_continuous(limits=c(-20,+20)) p <- p + geom_histogram(aes(y=..density..), binwidth=0.01) p <- p + geom_density(alpha=0.1, fill="white") p <- p + geom_rug() # vertical line at Tobs p <- p + geom_vline(aes(xintercept=Tobs), colour="#BB0000", linetype="dashed") p <- p + labs(title = "Permutation distribution of difference in means, Uwet and Walker Meteorites") p <- p + xlab("difference in means (red line = observed difference in means)") print(p) # Calculate a two-sided p-value. p.upper <- sum((Tperm >= abs(Tobs))) / R p.upper p.lower <- sum((Tperm <= -abs(Tobs))) / R p.lower p.twosided <- p.lower + p.upper p.twosided # standard two-sample t-test with equal variances t.summary <- t.test(cool ~ site, data = met.long, var.equal = TRUE) t.summary # linear model form of t-test, "siteWalker" has estimate, se, t-stat, and p-value lm.summary <- lm(cool ~ site, data = met.long) summary(lm.summary) # permutation test version library(coin) # Fisher-Pitman permutation test oneway.summary <- oneway_test(cool ~ site, data = met.long) oneway.summary # # examples of extracting values from coins S4 class objects # expectation(oneway.summary) # covariance(oneway.summary) # pvalue(oneway.summary) # confint(oneway.summary) pvalue(oneway.summary) fit.e <- aov(hc ~ year, data = emis.long) summary(fit.e) library(coin) # Fisher-Pitman permutation test oneway.summary <- oneway_test(hc ~ year, data = emis.long) oneway.summary # these are the levels of the factor, ordered by their medians fac.lev <- levels(reorder(levels(emis.long$year) , -as.numeric(by(emis.long$loghc, emis.long$year, median))) ) fac.lev # create a matrix to store pairwise comparison p-values mc.pval <- matrix(NA , nrow = length(fac.lev) , ncol = length(fac.lev) , dimnames = list(fac.lev, fac.lev)) # diag is always 1, no group differs from itself diag(mc.pval) <- 1 mc.pval # loop over all pairs of factor levels, perform two-sample test, # and store p-value in matrix for (i1 in 1:(length(fac.lev) - 1)) { for (i2 in (i1 + 1):length(fac.lev)) { ## DEBUG - to make sure the indexing is working, you can print them: # print(cat(i1, i2)) library(coin) # Fisher-Pitman permutation test oneway.summary <- oneway_test(hc ~ year, data = subset(emis.long, (year == fac.lev[i1] | year == fac.lev[i2]))) # put p-value in matrix mc.pval[i1, i2] <- pvalue(oneway.summary) mc.pval[i2, i1] <- mc.pval[i1, i2] } } # p-values mc.pval # summary of pairwise comparisons # threshold is Bonferroni-corrected alpha=0.05 / 10 library(multcompView) multcompLetters( mc.pval , compare = "<" , threshold = 0.05 / choose(length(fac.lev), 2) , Letters = letters , reversed = FALSE) #### Density estimation # include time ranks 3 and above, that is, remove the lowest two values time2 <- time[(rank(time) >= 3)] old.par <- par(no.readonly = TRUE) # make smaller margins par(mfrow=c(5,1), mar=c(3,2,2,1), oma=c(1,1,1,1)) hist(time2, breaks=1 , main="1 break" , xlim=c(24.80,24.84), xlab=""); rug(time2) hist(time2, main="default" , xlim=c(24.80,24.84), xlab=""); rug(time2) hist(time2, breaks=10 , main="10 breaks" , xlim=c(24.80,24.84), xlab=""); rug(time2) hist(time2, breaks=20 , main="20 breaks" , xlim=c(24.80,24.84), xlab=""); rug(time2) hist(time2, breaks=100 , main="100 breaks", xlim=c(24.80,24.84), xlab=""); rug(time2) # restore par() settings par(old.par) par(mfrow=c(3,1)) # prob=TRUE scales the y-axis like a density function, total area = 1 hist(time2, prob=TRUE, main="") # apply a density function, store the result den = density(time2) # plot density line over histogram lines(den, col=2, lty=2, lwd=2) # extract the bandwidth (bw) from the density line b = round(den$bw, 4) title(main=paste("Default =", b), col.main=2) # undersmooth hist(time2, prob=TRUE, main="") lines(density(time2, bw=0.0004), col=3, lwd=2) text(17.5, .35, "", col=3, cex=1.4) title(main=paste("Undersmooth, BW = 0.0004"), col.main=3) # oversmooth hist(time2, prob=TRUE, main="") lines(density(time2, bw=0.008), col=4, lwd=2) title(main=paste("Oversmooth, BW = 0.008"), col.main=4) par(mfrow=c(1,1)) hist(time2, prob=TRUE, main="") # default kernel is Gaussian ("Normal") lines(density(time2) , col=2, lty=1, lwd=2) lines(density(time2, ker="epan"), col=3, lty=1, lwd=2) lines(density(time2, ker="rect"), col=4, lty=1, lwd=2) title(main="Gaussian, Epanechnikov, Rectangular") # other kernels include: "triangular", "biweight", "cosine", "optcosine" library(BSDA) I <- 10000 # number of iterations n <- seq(10,100,10) # sample sizes 10 through 100 in steps of size 10 power.t <- 1:10 #t test power.w <- 1:10 # wilcoxon power.s <- 1:10 # sign test for(j in 1:10) { decision.t <- 1:I decision.w <- 1:I decision.s <- 1:I for(i in 1:I) { x <- rnorm(n[j],1,3) pvalue.t <- t.test(x)$p.value pvalue.w <- wilcox.test(x)$p.value pvalue.s <- SIGN.test(x)$p.value decision.t[i] <- (pvalue.t < .05) # 1=correct decision decision.w[i] <- (pvalue.w < .05) #1=correct decision decision.s[i] <- (pvalue.s < .05) } power.t[j] <- mean(decision.t) # proportion of correct decisions power.w[j] <- mean(decision.w) power.s[j] <- mean(decision.s) } plot(power.t[j],type="n",xlim=c(0,100),ylim=c(0,1),main="",xlab="Sample size", ylab = "Power", cex.lab=1.3,cex.axis=1.3) points(n,power.t,type="l",lwd=2,col="black") points(n,power.w,type="l",lwd=2,lty=2,col="red") points(n,power.s,type="l",lwd=2,lty=3,col="orange") legend(0,1,legend=c("t-test","Wilcoxon","Sign test"),col=c("black","red","orange"),lty=c(1,2,3),cex=2,bty="n",lwd=c(2,2,2))