##################################################################################################################
##################################################################################################################
### R Scripts: let's read Gene2 into R and analyze it using both SKAT and also count based collapsing

library(SKAT)

Z = as.matrix(read.table("Gene2.txt", header = F))
y.c = scan("Trait2.txt") ## read in the values for trait 1

maf = apply(Z,2,mean)/2 ## calculate the mean of each column and divide by 2 to get the maf
rvs = which(maf<0.03)  ## Set the threshold to be 0.03

obj<-SKAT_Null_Model(y.c ~ 1, out_type="C") ## calculates the NULL model, i.e. there are no variants
SKAT(Z[,rvs], obj)$p.value ## restrict attention to rare variants.  The null model has not changed.

weights = dbeta(maf[rvs], 1,25)
C = Z[,rvs]%*%weights ## calculate the collapsed variable
summary(lm(y.c~C))


##################################################################################################################
##################################################################################################################
### R Scripts: omnibus version of SKAT with several different rho values

SKAT(Z[,rvs], obj, r.corr=0)$p.value  ## same as running SKAT

SKAT(Z[,rvs], obj, r.corr=1)$p.value ## same as weighted count collapsing

SKAT(Z[,rvs], obj, r.corr=.5)$p.value ## something in between


##################################################################################################################
##################################################################################################################
### R Scripts: omnibus version of SKAT with "optimal" rho value

SKAT(Z[,rvs], obj, method="optimal")$p.value

SKAT(Z[,rvs], obj, method="optimal.adj")$p.value ## slightly better type I error control in the tails.



##################################################################################################################
##################################################################################################################
### R Scripts: sample power analysis.

data(SKAT.haplotypes) ## load sample chromosomes/haplotypes from R package
attach(SKAT.haplotypes)

## Power calculation
out.c<-Power_Continuous(Haplotype,SNPInfo$CHROM_POS, SubRegion.Length=5000,
                        N.Sample.ALL = 1000,
                        Causal.MAF.Cutoff=0.05, alpha = 2.5e-6, Causal.Percent= 20,
                        N.Sim=50, MaxBeta=2,Negative.Percent=20) ## note that we only do 50 sims for speed, in reality, you need more
out.c


Get_RequiredSampleSize(out.c, Power=0.8)  ## only tells us that we need more than 1000

## instead of a single sample size, now we run a grid
out.c<-Power_Continuous(Haplotype,SNPInfo$CHROM_POS, SubRegion.Length=5000,
                        N.Sample.ALL = 100*seq(15,25),
                        Causal.MAF.Cutoff=0.05, alpha = 2.5e-6, Causal.Percent= 20,
                        N.Sim=50, MaxBeta=2,Negative.Percent=20) ## note that we only do 50 sims for speed, in reality, you need more
out.c

Get_RequiredSampleSize(out.c, Power=0.8)  ## basically, interpolates the values across the grid we calculated.