## ## Date: July 13, 2005 ## Author: Ken Rice ## ###### Program: SNP funnel plot ###### ## Funnel plots for collections of SNPs, ## starting from estimate/std.error output ## - could be used in many other circumstances ## THREE EXAMPLES FOLLOW CODE ################################################ ## DESCRIPTION OF THE INPUT ## 1) snp.names: names of the snps ## 2) estimates : estimates of the model (log(OR)) (do not include the constant/intercept term) ## 3) se : standard errors for the estimates ## OPTIONAL INPUTS ## 1) plot.name : used for the main title of the plot ## 2) pval.list : list of p-values for prediction intervals; default is c(0.05,0.01), set to NULL for no intervals ## 3) bonf.list : as pval.list but bonferroni adjusted ## 4) null.line : TRUE or FALSE; should a horizontal line be drawn through zero, the null value? ## 5) legend.flag : TRUE or FALSE; should a legend be drawn automatically? ## 6) ylim : y-axis limits; default is plus/minus the largest estimate * 1.1; it is suggested these are kept symmetric ## 7) pthresh: threshold p-value below which "interesting" points are labelled for identification. Default is 0.01, NULL turns off labelling ################################################ ## DESCRIPTION OF THE OUTPUT ## A funnel plot is returned, with a point representing each SNP. ## Points noticeably outside the central funnel represents SNPs which show more association than might be ## expected under random variation snpfunnel <- function( snp.names, estimates, se, plot.name="", pval.list=c(0.05, 0.01), bonf.list=c(0.05, 0.01), null.line=T, legend.flag=T, ylim= c(-1,1)*max(abs(estimates*1.1)), pthresh= 0.01, ...){ n <- length(snp.names) plot(x = 1/se^2, y=estimates, ylim=ylim, main=plot.name, xlab="1/std error^2", ylab="log OR est", ...) xlist <- seq(min(1/se^2), max(1/se^2), l=301) npval <- length(pval.list) nbonf <- length(bonf.list) # prediction intervals if(length(pval.list)!=0){ for(i in 1:npval){ lines(xlist, qnorm(1-pval.list[i]/2)/sqrt(xlist), lty=i, lwd=2) lines(xlist, -qnorm(1-pval.list[i]/2)/sqrt(xlist), lty=i, lwd=2) } pval.names <- paste("regular p=",pval.list) pval.lty <- 1:npval } else { pval.names <- NULL pval.lty <- NULL } # bonferroni intervals if(length(bonf.list)!=0){ for(i in 1:nbonf){ lines(xlist, qnorm(1-bonf.list[i]/2/n)/sqrt(xlist), lty=i, lwd=1) lines(xlist, -qnorm(1-bonf.list[i]/2/n)/sqrt(xlist), lty=i, lwd=1) } bonf.names <- paste("bonf adj p=", bonf.list) bonf.lty <- 1:nbonf } else { bonf.names <- NULL bonf.lty <- NULL } # null line if(null.line) abline(h=0, lty = max(npval, nbonf) + 1 ) # legend # NOTE; the new "topright" option for location in R v2.1 would be better than my auto-positioning # legend and labelling are only intended for a first (automated) effort; hand-craft your own for prettyness if(legend.flag){ legend( x = quantile(xlist, c(0.5, 1)), y = c(1,0.5)*max(abs(estimates*1.1)), legend= c( pval.names, bonf.names), lty= c(pval.lty, bonf.lty), lwd=rep(2:1, times=c(npval, nbonf) ) )} # labels if(!is.null(pthresh) ){ snp.label.flag <- abs(estimates/se) > qnorm(1-pthresh/2) text(x= 1/se[snp.label.flag]^2, y= estimates[snp.label.flag], labels=snp.names[snp.label.flag], pos=2+sign(estimates[snp.label.flag]), cex=0.7) } invisible() } ## ## Examples ## #First example from the paper, uses raw OR estimates and confidence intervals diabsnp <- read.table("diabsnp.txt", header=T) snpfunnel(diabsnp[,1], log(diabsnp[,2]), (log(diabsnp[,4])-log(diabsnp[,3]))/2/1.96) #Second analysis from the paper, using count data elyall <- read.table("elybasic.txt") elylist <- t(apply(elyall, 1,FUN=function(x){summary(glm(matrix(as.numeric(x[c(3,5,7,9,11,13)]),3,2)~I(1:3), family=binomial))$coefficients[2,1:2] })) snpfunnel(elyall[,1], elylist[,1], elylist[,2], pval.list=0.05, bonf.list=0.05, pthresh=0.05) #Third example from the paper - three sets of computations before plotting rheumall <- read.table("rheumsnp.txt") rheumlist1 <- t(apply(rheumall, 1,FUN=function(x){summary(glm(matrix(as.numeric(x),3,2)~I(1:3), family=binomial))$coefficients[2,1:2] })) rheumlist2 <- t(apply(rheumall, 1,FUN=function(x){summary(glm(matrix(as.numeric(x),3,2)~c(0,1,1), family=binomial))$coefficients[2,1:2] })) rheumlist3 <- t(apply(rheumall, 1,FUN=function(x){summary(glm(matrix(as.numeric(x),3,2)~c(0,0,1), family=binomial))$coefficients[2,1:2] })) rheumlist <- rbind(rheumlist1, rheumlist2, rheumlist3) rheumlist.names <- c( paste("trend",1:18), paste("dominant",1:18), paste("recessive",1:18)) snpfunnel(rheumlist.names, rheumlist[,1], rheumlist[,2], pch=rep(c(22,24,19), each=18) )