## Bonus material on haplotypes install.packages("haplo.stats") # if you need to library(haplo.stats) data(hla.demo) ?hla.demo # NB this is *not* SNP data (for once!) - it's highly multi-allelic summary(hla.demo) # first we select the genotypic data of interest; names(hla.demo) geno <- as.matrix(hla.demo[,c(17,18,21:24)]) # For brevity, we'll drop subjects with missing genotypes. # Then we run haplo.glm()'s own pre-processing code; keep <- !apply(is.na(geno) | geno==0, 1, any) hla.demo <- hla.demo[keep,] geno <- geno[keep,] # Above three lines drop the missing genotypes geno <- setupGeno(geno, miss.val=c(0,NA)) attributes(geno)$unique.alleles set.seed(4) #set R's random number generator to a specific position #(any position will do; knowing which you used makes analysis reproducible) h.glm1 <- haplo.glm(formula = resp ~ geno, data = hla.demo, allele.lev = attr(geno, "unique.alleles"), control = haplo.glm.control(haplo.freq.min = 0.025, haplo.min.info = 0.01, em.c = haplo.em.control(min.posterior = 0.001))) h.glm1 # gives output; refer to geno for the alleles set.seed(4) h.glm2 <- haplo.glm(formula = resp ~ geno, data = hla.demo, allele.lev = attr(geno, "unique.alleles"), control = haplo.glm.control(haplo.freq.min = 0.025/2, haplo.min.info = 0.01/2, em.c = haplo.em.control(min.posterior = 0.001/2))) h.glm2 # many more haplotypes! -but the main finding is broadly similar # now with logistic regression, for a binary phenotype h.glm3 <- haplo.glm(I(resp.cat=="low") ~ geno, family=binomial, data = hla.demo, allele.lev = attr(geno, "unique.alleles"), control = haplo.glm.control(haplo.freq.min = 0.025, haplo.min.info = 0.01, em.c = haplo.em.control(min.posterior = 0.001))) h.glm3 # really much the same story; one haplotype seems to be the only thing # plausibly affecting response set.seed(4) h.glm1.adj <- haplo.glm(formula = resp ~ geno + age + male, data = hla.demo, allele.lev = attr(geno, "unique.alleles"), control = haplo.glm.control(haplo.freq.min = 0.025, haplo.min.info = 0.01, em.c = haplo.em.control(min.posterior = 0.001))) h.glm1.adj # age and sex both seem unrelated to resp summary( glm(resp~age + male, data=hla.demo) ) # a sanity check