# Question 1 # to get started, do one simulation # note we generate the *differences*, for 100 people mydiff <- rnorm(100, m=2, s=7) t.test(mydiff) str( t.test(mydiff) ) t.test(mydiff)$p.value # now repeat this 10000 times, and draw pictures of the output my.sims <- replicate(10000, { mydiff <- rnorm(100, 2, 7) t.test(mydiff)$p.value }) hist(my.sims) abline(v=0.05, lwd=2, col="red") # what proportion is below 0.05? table(my.sims < 0.05) mean(my.sims < 0.05) # so we get about 80% power (pretty good) # and compare with the exact answer; power.t.test(n=100, delta=2, sd=7, sig.level=0.05, type="one.sample") ## ##question 2 ## #read in the data; bpdata <- read.table("bpdata.csv", sep=",", header=T) #write a function to do one association test, report the p-value one.p <- function(outcome, snp){ model<-lm(outcome~as.numeric(snp)) coef(summary(model))[2,4] } #write a function to do a collection of tests, report their min p-value # (equivalent to reporting the maximum z- or t-statistic) min.p<-function(outcome,snps){ min(sapply(snps, function(snp) one.p(outcome,snp))) } # do this once for our observed data min.p(bpdata$sbp, bpdata[,5:15]) # Now try a few replications, with shuffled blood pressures # - note use of sample() below system.time( replicate(3, min.p(sample(bpdata$sbp),bpdata[,5:15])) )*1000/3 manyp <- replicate(1000, min.p(sample(bpdata$sbp),bpdata[,5:15])) # nb this takes a while! hist(manyp) abline(v=0.01625, col="red") # our observed minimum p-value table(manyp < 0.01625 ) mean(manyp < 0.01625 ) # this is the accurate p-value #Bonferroni correction (a conservative-but-quick estimate) 0.01625*11