niehs <- read.table("http://faculty.washington.edu/kenrice/sisg/niehs.csv", header=T, sep=",") #check for missing all.ok <- complete.cases(niehs) table(all.ok) niehs[!all.ok,] names(niehs) myfunc <- function(x){ trt <- x[c(2,4,7,8,11,12)] ctrl <- x[c(3,5,6,9,10,13)] diff <- mean(trt-ctrl) pval <- t.test(trt, ctrl, paired=TRUE)$p.value c(diff=diff, p.value = pval) } system.time(my.output <- apply(niehs[all.ok,] , 1, myfunc)) Rprof("loop.prof") my.output <- apply(niehs[all.ok,] , 1, myfunc) Rprof(NULL) summaryRprof("loop.prof") niehsmat<-as.matrix(niehs[all.ok,]) system.time({ loop.output<-matrix(NA,nrow=1904,ncol=2) for(i in 1:nrow(niehsmat)){ trt <- niehsmat[i,c(2,4,7,8,11,12)] ctrl <- niehsmat[i,c(3,5,6,9,10,13)] test <- t.test(trt,ctrl,paired=TRUE) loop.output[i,1]<-test$estimate loop.output[i,2]<-test$p.value } }) #here's a volcano plot plot(my.output[1,] , -log(my.output[2,]), xlab="diff", ylab="log pval", xlim=c(-2.5, 2.5) ) abline(h=-log(c(0.05) ), lwd=2, col="red") ## fast t-test system.time({ trts <- niehs[,c(2,4,7,8,11,12)] ctrls <- niehs[,c(3,5,6,9,10,13)] diffs <- trts-ctrls deltas<-rowMeans(diffs) vars<-(6/5)*(rowMeans(diffs*diffs)-deltas*deltas)/6 tvalues<-deltas/sqrt(vars) pvalues<-pt(abs(tvalues),df=5,lower.tail=FALSE)*2 }) ## funnelplot plot(1/vars,deltas, ylim=c(-2.5,2.5),pch=19) onefp<-qnorm(0.5/1904,lower.tail=FALSE) curve(onefp/sqrt(x), add=TRUE,col="red") curve(-onefp/sqrt(x), add=TRUE,col="red") pt05<-qnorm(0.5/20, lower.tail=FALSE) curve(pt05/sqrt(x), add=TRUE,col="purple",n=300) curve(-pt05/sqrt(x), add=TRUE,col="purple",n=300) bonf<-qnorm(0.5/20/1904, lower.tail=FALSE) curve(bonf/sqrt(x), add=TRUE,col="royalblue",n=300) curve(-bonf/sqrt(x), add=TRUE,col="royalblue",n=300) legend("topright",col=c("red","purple","royalblue"), lty=1, legend=c("One false positive","p=0.05","Bonferroni")) title(main="Robustly null")