################################################################################################################## ################################################################################################################## ### R Scripts: let's read Gene1 into R and look at the MAFs. Z = as.matrix(read.table("Gene1.txt", header = F)) maf = apply(Z,2,mean)/2 ## calculate the mean of each column and divide by 2 to get the maf maf = colMeans(Z)/2 hist(maf) ## plot histogram hist(maf[which(maf<0.05)]) ## Zoomed histogram (focus on really small values) ################################################################################################################## ################################################################################################################## ### R Scripts: test for association between each variant in Gene1 and Trait1 y.c = scan("Trait1.txt") ## read in the values for trait 1 pvs = rep(NA, ncol(Z)) for (j in seq(ncol(Z))) { pvs[j] = summary(lm(y.c~Z[,j]))$coef[2,4] ## test for association between y.c and each SNP } p.adjust(pvs, "bonf") ## adjust p-values for multiple testing ################################################################################################################## ################################################################################################################## ### R Scripts: test for association between the variants in Gene1 and Trait1. rvs = which(maf<0.03) ## Set the threshold to be 0.03 ### CAST: Binary collapsing C = as.numeric(apply(Z[,rvs],1,sum)>0) ## figures out if each person has any rare variants, ## i.e. the sum of their variants in additive mode would be greater than 0. summary(lm(y.c~C)) ### MZ Test C = apply(Z[,rvs],1,sum) ## number of rare variants each person haz summary(lm(y.c~C)) ### Weighted Count weights = 1/sqrt(maf[rvs]) C = Z[,rvs]%*%weights ## Weighted sum summary(lm(y.c~C)) weights = 1/maf[rvs] ## alternative weights C = Z[,rvs]%*%weights summary(lm(y.c~C)) weights = (1/maf[rvs])^2 ## alternative weights C = Z[,rvs]%*%weights summary(lm(y.c~C)) weights = (1/maf[rvs])^(1/3) ## alternative weights C = Z[,rvs]%*%weights summary(lm(y.c~C)) ################################################################################################################## ################################################################################################################## ### R Scripts: test for association between the variants in Gene2 and Trait2. Z = as.matrix(read.table("Gene2.txt", header = F)) maf = apply(Z,2,mean)/2 ## calculate the mean of each column and divide by 2 to get the maf y.c = scan("Trait2.txt") ## read in the values for trait 1 rvs = which(maf<0.03) ## Set the threshold to be 0.03 ### CAST: Binary collapsing C = as.numeric(apply(Z[,rvs],1,sum)>0) ## figures out if each person has any rare variants, ## i.e. the sum of their variants in additive mode would be greater than 0. summary(lm(y.c~C)) ### MZ Test C = apply(Z[,rvs],1,sum) ## number of rare variants each person haz summary(lm(y.c~C)) ### Weighted Count weights = 1/sqrt(maf[rvs]) C = Z[,rvs]%*%weights ## Weighted sum summary(lm(y.c~C)) ################################################################################################################## ################################################################################################################## ### R Scripts: test for association between the variants in Gene3 and Trait3. Z = as.matrix(read.table("Gene3.txt", header = F)) maf = apply(Z,2,mean)/2 ## calculate the mean of each column and divide by 2 to get the maf y.c = scan("Trait3.txt") ## read in the values for trait 1 ### CAST: Binary collapsing rvs = which(maf<0.05) ## Set the threshold to be 0.05 C = as.numeric(apply(Z[,rvs],1,sum)>0) summary(lm(y.c~C)) rvs = which(maf<0.03) ## Set the threshold to be 0.03 C = as.numeric(apply(Z[,rvs],1,sum)>0) summary(lm(y.c~C)) rvs = which(maf<0.01) ## Set the threshold to be 0.01 C = as.numeric(apply(Z[,rvs],1,sum)>0) summary(lm(y.c~C)) rvs = which(maf<0.005) ## Set the threshold to be 0.005 C = as.numeric(apply(Z[,rvs],1,sum)>0) summary(lm(y.c~C)) ### MZ Test rvs = which(maf<0.05) ## Set the threshold to be 0.05 C = apply(Z[,rvs],1,sum) summary(lm(y.c~C)) rvs = which(maf<0.03) ## Set the threshold to be 0.03 C = apply(Z[,rvs],1,sum) summary(lm(y.c~C)) rvs = which(maf<0.01) ## Set the threshold to be 0.01 C = apply(Z[,rvs],1,sum) summary(lm(y.c~C)) rvs = which(maf<0.005) ## Set the threshold to be 0.005 C = apply(Z[,rvs],1,sum) summary(lm(y.c~C)) ### Weighted Count rvs = which(maf<0.05) ## Set the threshold to be 0.05 weights = 1/maf[rvs] C = Z[,rvs]%*%weights ## Weighted sum summary(lm(y.c~C)) weights = 1/maf ## DO NOT RESTRICT ANALYSIS TO RARE VARIANTS C = Z%*%weights summary(lm(y.c~C))