setwd( "C:/Users/kenrice/Desktop/SISG/SISG-14" )

##
## SnpStats
##

source("http://bioconductor.org/biocLite.R")
biocLite("snpStats") ### only do once, per version of R
library("snpStats")

load("AMDchrom1snpStats.Rdata") # downloaded from the course site

show(amd1)
snpsum <- summary(amd1)
snpsum  # rows and columns
sample.qc <- row.summary(amd1)
plot(sample.qc) # plot of call rate vs heterozygosity

cc.status <- rep(1:0, times=c(96,50) )

my.tests <- single.snp.tests(cc.status, snp.data=amd1)
summary(my.tests)

names(my.tests)
qq.chisq(chi.squared(my.tests, df=1), df=1)

##
## ShortRead
##

biocLite("ShortRead")
biocLite("yeastRNASeq") # NB a big download
library("ShortRead")
library("yeastRNASeq")

## what files do we have? (copied from the question)
files <- list.files(file.path(system.file(package = "yeastRNASeq"), "reads"), pattern = "bowtie", full.names = TRUE)
files

names(files) <- c("mut_1_f", "mut_2_f", "wt_1_f", "wt_2_f")

## read one of them
read1 <- readAligned(files[1], type = "Bowtie")
read1

## or load the package and data
library("yeastRNASeq")
data(yeastAligned)

## first file, looks the same
yeastAligned[["mut_1_f"]]

## extract positions
pos <- lapply(yeastAligned, position)
str(pos)

## plot four histograms
par(mfrow=c(4,1))
lapply(pos, hist)

## extract quality scores and turn into a matrix
a <- as( quality(yeastAligned[["mut_1_f"]]), "matrix" )

## plot mean quality by read position
par(mfrow=c(1,1))
plot(colMeans(a))

## there are 400,000 sequences, so use hexbin to plot quality by genomic position
library("hexbin")
plot(hexbin(pos[[1]],rowMeans(a),xbins=100))

##
## edgeR
##

biocLite("edgeR")
biocLite("pasilla")

library("edgeR")
library("pasilla")

## load the data
data(pasillaExons)
pasillaExons

## put them together
y <- DGEList(assayData(pasillaExons)$counts,
   group=phenoData(pasillaExons)@data$condition)
   
## filter, as in slides   
keep <- rowSums(cpm(y)>1) >= 3 
y <- y[keep,] 
y$samples$lib.size <- colSums(y$counts) 

## sanity check as in slides
plotMDS(y)

## analyse as in slides
y <- estimateCommonDisp(y, verbose=TRUE) 
y <- estimateTagwiseDisp(y) 
et <- exactTest(y) 
top <- topTags(et)
top