# Question 1

# to get started, do one simulation
# note we generate the *differences*, for 100 people

mydiff <- rnorm(100, m=2, s=7)
t.test(mydiff)
str( t.test(mydiff) )
t.test(mydiff)$p.value

# now repeat this 10000 times, and draw pictures of the output

my.sims <- replicate(10000, {
    mydiff <- rnorm(100, 2, 7)
    t.test(mydiff)$p.value
})

hist(my.sims)
abline(v=0.05, lwd=2, col="red") # what proportion is below 0.05?

table(my.sims < 0.05)
mean(my.sims < 0.05)

# so we get about 80% power (pretty good)

# and compare with the exact answer;

power.t.test(n=100, delta=2, sd=7, sig.level=0.05, type="one.sample") 

##
##question 2
##

#read in the data;
bpdata <- read.table("bpdata.csv", sep=",", header=T)

#write a function to do one association test, report the p-value
one.p <- function(outcome, snp){
              model<-lm(outcome~as.numeric(snp))
              coef(summary(model))[2,4]
          }

#write a function to do a collection of tests, report their min p-value
# (equivalent to reporting the maximum z- or t-statistic)

min.p<-function(outcome,snps){
       min(sapply(snps, function(snp) one.p(outcome,snp)))
       }

# do this once for our observed data
min.p(bpdata$sbp, bpdata[,5:15])

# Now try a few replications, with shuffled blood pressures 
# - note use of sample() below

system.time(
replicate(3, min.p(sample(bpdata$sbp),bpdata[,5:15]))
)*1000/3

manyp <- replicate(1000, min.p(sample(bpdata$sbp),bpdata[,5:15]))
# nb this takes a while!

hist(manyp)
abline(v=0.01625, col="red") # our observed minimum p-value

table(manyp < 0.01625 )
mean(manyp < 0.01625 )
# this is the accurate p-value

#Bonferroni correction (a conservative-but-quick estimate)
0.01625*11