## setwd("C:\\Users\\mcwu\\Desktop\\Exercises\\Lecture10") ################################################################################################################## ################################################################################################################## ### R Scripts: let's analyze chr22 for association with Y1 using both SKAT and also count based collapsing library(SKAT) # Create the MW File File.Bed<-"./ExomeChr22.bed" File.Bim<-"./ExomeChr22.bim" File.Fam<-"./ExomeChr22.fam" File.Cov<-"./ExomeChr22.Cov" File.SetID<-"./ExomeChr22.SetID" File.SSD<-"./ExomeChr22.SSD" File.Info<-"./ExomeChr22.SSD.info" # To use binary ped files, you have to generate SSD file first. # If you already have a SSD file, you do not need to call this function. Generate_SSD_SetID(File.Bed, File.Bim, File.Fam, File.SetID, File.SSD, File.Info) ############################################## # Open SSD, and Run SKAT # FAM<-Read_Plink_FAM_Cov(File.Fam, File.Cov) obj<-SKAT_Null_Model(Y1~X1+X2, out_type="C", data=FAM) # # To use a SSD file, please open it first. # SSD.INFO<-Open_SSD(File.SSD, File.Info) # Number of samples SSD.INFO$nSample # Number of Sets SSD.INFO$nSets re.SKAT<-SKAT.SSD.All(SSD.INFO, obj) ## original SKAT re.Burden<-SKAT.SSD.All(SSD.INFO, obj,r.corr=1) ## weighted count based collapsing re.SKATO<-SKAT.SSD.All(SSD.INFO, obj, method="optimal.adj") ## optimal omnibus test ################################################################################################################## ################################################################################################################## ### Re-analyzing the transferrin data set: THIS WILL USE BOTH PLINK AND R SCRIPTS ### Make sure that you are in the correct directory (where the transferrin data set is) ### we will need to create the setID file: this is often the most irritating part ### We begin by downloading the refseq transcripts (this is in the refGene.txt file). Then the following code will ### generate the setIDs. ################################################################################################################## ### R script for generating gene list refgenes = read.table("refGene.txt", sep = "\t", as.is = T, quote = "", header = F) dim(refgenes) chrs = refgenes[,3] # chromosomes, but there is a lot of junk in the chromsome names here that needs to be filtered. chrs = sapply(strsplit(chrs,"[r_]"),"[", 2) omits = c(which(chrs == "Y"), which(chrs=="X")) ## drop sex chromosomes out = cbind(chrs[-omits], refgenes[-omits, 5], refgenes[-omits, 6], refgenes[-omits, 13], refgenes[-omits, 13]) write.table(out, file = "gene.list", sep = "\t", quote =F, row.name = F, col.name = F) ### Note that there are multiple transcripts per gene, so we will condense these. ### In reality, we could also do a transcript level analysis. ################################################################################################################## ### plink script for generating set ids (not quite ready for skat format yet) plink --bfile transferrin --make-set gene.list --make-set-collapse-group --write-set ################################################################################################################## ### R script for generating setIDs which can be fed to SKAT sets = scan("plink.set", what = "char") setname = sets[1] i = 2 sink("transferrin.SetID") while (1) { while (sets[i] != "END") { cat(setname,"\t",sets[i],"\n", sep = "") i = i+1 } if (i == length(sets)) break; i = i+1 setname = sets[i] i = i+1 } sink() ### NOTE: I HAVE NOT BEEN CAREFUL ABOUT MATCHING REFSEQ GENE DEFINITIONS TO THE VERSION OF THE ### GENOME USED IN THE TRANSFERRIN DATA!!! YOU WILL NEED TO DO THIS IF YOU HAVE YOUR OWN DATA. ################################################################################################################## ### R script for doing the analysis with SKAT # Create the MW File File.Bed<-"./Transferrin.bed" File.Bim<-"./Transferrin.bim" #File.Fam<-"./Transferrin.fam" File.Cov<-"./Tr.pheno" File.SetID<-"./Transferrin.SetID" File.SSD<-"./Transferrin.SSD" File.Info<-"./Transferrin.SSD.info" # To use binary ped files, you have to generate SSD file first. # If you already have a SSD file, you do not need to call this function. Generate_SSD_SetID(File.Bed, File.Bim, File.Fam, File.SetID, File.SSD, File.Info) ############################################## # Open SSD, and Run SKAT # phenotypes = read.table(File.Cov, header = F) y = phenotypes[,3] obj<-SKAT_Null_Model(y~1, out_type="C") # # To use a SSD file, please open it first. # SSD.INFO<-Open_SSD(File.SSD, File.Info) # Number of samples SSD.INFO$nSample # Number of Sets SSD.INFO$nSets re.SKAT<-SKAT.SSD.All(SSD.INFO, obj, kernel = "linear") ## note the use of the linear kernel