setwd( "C:/Users/kenrice/Desktop/SISG/SISG-14" ) ## ## SnpStats ## source("http://bioconductor.org/biocLite.R") biocLite("snpStats") ### only do once, per version of R library("snpStats") load("AMDchrom1snpStats.Rdata") # downloaded from the course site show(amd1) snpsum <- summary(amd1) snpsum # rows and columns sample.qc <- row.summary(amd1) plot(sample.qc) # plot of call rate vs heterozygosity cc.status <- rep(1:0, times=c(96,50) ) my.tests <- single.snp.tests(cc.status, snp.data=amd1) summary(my.tests) names(my.tests) qq.chisq(chi.squared(my.tests, df=1), df=1) ## ## ShortRead ## biocLite("ShortRead") biocLite("yeastRNASeq") # NB a big download library("ShortRead") library("yeastRNASeq") ## what files do we have? (copied from the question) files <- list.files(file.path(system.file(package = "yeastRNASeq"), "reads"), pattern = "bowtie", full.names = TRUE) files names(files) <- c("mut_1_f", "mut_2_f", "wt_1_f", "wt_2_f") ## read one of them read1 <- readAligned(files[1], type = "Bowtie") read1 ## or load the package and data library("yeastRNASeq") data(yeastAligned) ## first file, looks the same yeastAligned[["mut_1_f"]] ## extract positions pos <- lapply(yeastAligned, position) str(pos) ## plot four histograms par(mfrow=c(4,1)) lapply(pos, hist) ## extract quality scores and turn into a matrix a <- as( quality(yeastAligned[["mut_1_f"]]), "matrix" ) ## plot mean quality by read position par(mfrow=c(1,1)) plot(colMeans(a)) ## there are 400,000 sequences, so use hexbin to plot quality by genomic position library("hexbin") plot(hexbin(pos[[1]],rowMeans(a),xbins=100)) ## ## edgeR ## biocLite("edgeR") biocLite("pasilla") library("edgeR") library("pasilla") ## load the data data(pasillaExons) pasillaExons ## put them together y <- DGEList(assayData(pasillaExons)$counts, group=phenoData(pasillaExons)@data$condition) ## filter, as in slides keep <- rowSums(cpm(y)>1) >= 3 y <- y[keep,] y$samples$lib.size <- colSums(y$counts) ## sanity check as in slides plotMDS(y) ## analyse as in slides y <- estimateCommonDisp(y, verbose=TRUE) y <- estimateTagwiseDisp(y) et <- exactTest(y) top <- topTags(et) top