##
## General rules for programming simulation studies;
## 1. Try one dataset, understand how the analysis works
## 2. Replicate a few times, using the same steps (cut and paste!)
##    and check this works
## 3. Run for a larger number of replications
##

NPEOPLE <- 1000
NSNPS  <- 50  # we will increase this later

set.seed(7)

environ <- rpois(NPEOPLE, 10) # Poisson data, mean 10 (e.g. #drinks per week)
hist(environ)
outcome <- environ*(environ+rnorm(NPEOPLE))
hist(outcome)

plot(outcome~environ)
abline(lm(outcome~environ))
cor(outcome, environ)^2
# data is a bit "curvy"!

one.gene <- rbinom(NPEOPLE, 2, 0.3)
table(one.gene)

plot(outcome~factor(one.gene), varwidth=TRUE)

#"interaction" analysis tests whether environ effect differs by genotype

plot(outcome~jitter(environ, 1), col=one.gene + 1)
abline(lm(outcome~environ, subset=one.gene==0), col=1)
abline(lm(outcome~environ, subset=one.gene==1), col=2)
abline(lm(outcome~environ, subset=one.gene==2), col=3)

lm(outcome~one.gene*environ)
summary(lm(outcome~one.gene*environ))$coeff
summary(lm(outcome~one.gene*environ))$coeff[4,4]

##
## Replicate for many SNPs
##

many.genes <- matrix(rbinom(NPEOPLE*NSNPS,2,0.3), ncol=NSNPS)

# "main effects" analysis, i.e. no interactions
system.time({
betas    <- numeric(NSNPS) # creates empty vectors - rep(NA, NSNPS)
ses      <- numeric(NSNPS)
pvalues  <- numeric(NSNPS)
for(i in 1:NSNPS){
	model    <- lm(outcome~many.genes[,i])
	output   <- coef(summary(model))
	betas[i]   <- output[2,1]
	ses[i]     <- output[2,2]
	pvalues[i] <- output[2,4]
	}
})

qqplot(-log10(ppoints(NSNPS)), -log10(pvalues))
abline(0,1, lty=2)

# same data, for interaction analysis
system.time({
betas   <- numeric(NSNPS)
ses     <- numeric(NSNPS)
pvalues <- numeric(NSNPS)
for(i in 1:NSNPS){
    model      <- lm(outcome~environ*many.genes[,i])
    output     <- coef(summary(model))
    betas[i]   <- output[4,1]
    ses[i]     <- output[4,2]
    pvalues[i] <- output[4,4]
    }
})
qqplot(-log10(ppoints(NSNPS)), -log10(pvalues))
abline(0,1, lty=2)

median( (betas/ses)^2)/0.4549 # genomic control "lambda"
                              # 0.4549=qchisq(0.5, df=1)

# are we just being unlucky? Try the interaction analysis x10;

int.lambdas <- replicate(10, {
    environ    <- rpois(NPEOPLE, 10)
    outcome    <- environ*(environ+rnorm(NPEOPLE))

    many.genes <- matrix(rbinom(NPEOPLE*NSNPS,2,0.3), ncol=NSNPS)
    betas      <- numeric(NSNPS)
    ses        <- numeric(NSNPS)

    for(i in 1:NSNPS){
        model    <- lm(outcome~environ*many.genes[,i])
        output   <- coef(summary(model))
        betas[i] <- output[4,1]
        ses[i]   <- output[4,2]
    }
    median( (betas/ses)^2)/0.4549
    }    )

alarm()

int.lambdas	
hist(int.lambdas)
stripchart(int.lambdas, pch=1, main="replicate lambdas")

##
## use robust std errs to fix it; see the sandwich package
##

library("sandwich")

# NSNPS  <- 500 # around 2 mins, on my machine

system.time({
inter.lambdas.robust <-replicate(10,{
  environ    <- rpois(NPEOPLE, 10)
  outcome    <- environ*(environ+rnorm(NPEOPLE))

  many.genes <- matrix(rbinom(NPEOPLE*NSNPS,2,0.3), ncol=NSNPS)
  betas      <- numeric(NSNPS)
  ses        <- numeric(NSNPS)

  for(i in 1:NSNPS){
	  model    <- lm(outcome~environ*many.genes[,i])
	  output   <- coef(summary(model))
	  betas[i] <- output[4,1]
	  ses[i]   <- sqrt(vcovHC(model)[4,4])
	}
   median( (betas/ses)^2)/0.4549
})
})

alarm()

stripchart(inter.lambdas.robust , pch=1, main="replicate lambdas\n interaction effect, robust")

main.lambdas.robust <-replicate(10,{
  environ <- rpois(NPEOPLE, 10)
  outcome <- environ*(environ+rnorm(NPEOPLE))

  many.genes <- matrix(rbinom(NPEOPLE*NSNPS,2,0.3), ncol=NSNPS)
  betas      <- numeric(NSNPS)
  ses        <- numeric(NSNPS)

  for(i in 1:NSNPS){
     model    <-  lm(outcome~many.genes[,i])
     output   <- coef(summary(model))
     betas[i] <-  output[2,1]
     ses[i]   <- sqrt(vcovHC(model)[2,2])
	}
   median( (betas/ses)^2)/0.4549
})

stripchart(main.lambdas.robust , pch=1, main="replicate lambdas\nmain effects, robust")

stripchart(list(inter.plain=int.lambdas, 
                inter.robust=inter.lambdas.robust, 
                main.robust=main.lambdas.robust), pch=21)