################################################################################################################## ################################################################################################################## ### R Scripts: let's read Gene2 into R and analyze it using both SKAT and also count based collapsing library(SKAT) Z = as.matrix(read.table("Gene2.txt", header = F)) y.c = scan("Trait2.txt") ## read in the values for trait 1 maf = apply(Z,2,mean)/2 ## calculate the mean of each column and divide by 2 to get the maf rvs = which(maf<0.03) ## Set the threshold to be 0.03 obj<-SKAT_Null_Model(y.c ~ 1, out_type="C") ## calculates the NULL model, i.e. there are no variants SKAT(Z[,rvs], obj)$p.value ## restrict attention to rare variants. The null model has not changed. weights = dbeta(maf[rvs], 1,25) C = Z[,rvs]%*%weights ## calculate the collapsed variable summary(lm(y.c~C)) ################################################################################################################## ################################################################################################################## ### R Scripts: omnibus version of SKAT with several different rho values SKAT(Z[,rvs], obj, r.corr=0)$p.value ## same as running SKAT SKAT(Z[,rvs], obj, r.corr=1)$p.value ## same as weighted count collapsing SKAT(Z[,rvs], obj, r.corr=.5)$p.value ## something in between ################################################################################################################## ################################################################################################################## ### R Scripts: omnibus version of SKAT with "optimal" rho value SKAT(Z[,rvs], obj, method="optimal")$p.value SKAT(Z[,rvs], obj, method="optimal.adj")$p.value ## slightly better type I error control in the tails. ################################################################################################################## ################################################################################################################## ### R Scripts: sample power analysis. data(SKAT.haplotypes) ## load sample chromosomes/haplotypes from R package attach(SKAT.haplotypes) ## Power calculation out.c<-Power_Continuous(Haplotype,SNPInfo$CHROM_POS, SubRegion.Length=5000, N.Sample.ALL = 1000, Causal.MAF.Cutoff=0.05, alpha = 2.5e-6, Causal.Percent= 20, N.Sim=50, MaxBeta=2,Negative.Percent=20) ## note that we only do 50 sims for speed, in reality, you need more out.c Get_RequiredSampleSize(out.c, Power=0.8) ## only tells us that we need more than 1000 ## instead of a single sample size, now we run a grid out.c<-Power_Continuous(Haplotype,SNPInfo$CHROM_POS, SubRegion.Length=5000, N.Sample.ALL = 100*seq(15,25), Causal.MAF.Cutoff=0.05, alpha = 2.5e-6, Causal.Percent= 20, N.Sim=50, MaxBeta=2,Negative.Percent=20) ## note that we only do 50 sims for speed, in reality, you need more out.c Get_RequiredSampleSize(out.c, Power=0.8) ## basically, interpolates the values across the grid we calculated.