##
## Date: July 13, 2005
## Author: Ken Rice
##

###### Program: SNP funnel plot ######
## Funnel plots for collections of SNPs,
## starting from estimate/std.error output
## - could be used in many other circumstances

## THREE EXAMPLES FOLLOW CODE

################################################
##  DESCRIPTION OF THE INPUT
##  1) snp.names: names of the snps
##  2) estimates : estimates of the model (log(OR)) (do not include the constant/intercept term)
##  3) se : standard errors for the estimates

##  OPTIONAL INPUTS

##  1) plot.name : used for the main title of the plot
##  2) pval.list : list of p-values for prediction intervals; default is c(0.05,0.01), set to NULL for no intervals
##  3) bonf.list : as pval.list but bonferroni adjusted
##  4) null.line : TRUE or FALSE; should a horizontal line be drawn through zero, the null value?
##  5) legend.flag : TRUE or FALSE; should a legend be drawn automatically?
##  6) ylim : y-axis limits; default is plus/minus the largest estimate * 1.1; it is suggested these are kept symmetric
##  7) pthresh: threshold p-value below which "interesting" points are labelled for identification. Default is 0.01, NULL turns off labelling

################################################
##  DESCRIPTION OF THE OUTPUT

## A funnel plot is returned, with a point representing each SNP. 
## Points noticeably outside the central funnel represents SNPs which show more association than might be
## expected under random variation

snpfunnel <- function(	snp.names,
			estimates,
			se,
			plot.name="",
			pval.list=c(0.05, 0.01),
			bonf.list=c(0.05, 0.01),
			null.line=T,
			legend.flag=T,
			ylim= c(-1,1)*max(abs(estimates*1.1)), 
			pthresh= 0.01, ...){

	n <- length(snp.names)
	plot(x = 1/se^2, y=estimates, 
	ylim=ylim, main=plot.name, xlab="1/std error^2", ylab="log OR est", ...)
	
	xlist <- seq(min(1/se^2), max(1/se^2), l=301)
	npval <- length(pval.list)
	nbonf <- length(bonf.list)

	# prediction intervals
	if(length(pval.list)!=0){
		for(i in 1:npval){
			lines(xlist,  qnorm(1-pval.list[i]/2)/sqrt(xlist), lty=i, lwd=2)
			lines(xlist, -qnorm(1-pval.list[i]/2)/sqrt(xlist), lty=i, lwd=2)
			}
		pval.names <- paste("regular p=",pval.list)
		pval.lty <- 1:npval
	}
	else {
		pval.names <- NULL
		pval.lty <- NULL
	}

	# bonferroni intervals
	if(length(bonf.list)!=0){
		for(i in 1:nbonf){
			lines(xlist,  qnorm(1-bonf.list[i]/2/n)/sqrt(xlist), lty=i, lwd=1)
			lines(xlist, -qnorm(1-bonf.list[i]/2/n)/sqrt(xlist), lty=i, lwd=1)
			}
		bonf.names <- paste("bonf adj p=", bonf.list)
		bonf.lty <- 1:nbonf
	}
	else {
		bonf.names <- NULL
		bonf.lty <- NULL
	}

	# null line
	if(null.line) abline(h=0, lty = max(npval, nbonf) + 1 )

	# legend
	# NOTE; the new "topright" option for location in R v2.1 would be better than my auto-positioning
	# legend and labelling are only intended for a first (automated) effort; hand-craft your own for prettyness
	if(legend.flag){
		legend( x = quantile(xlist, c(0.5, 1)), y = c(1,0.5)*max(abs(estimates*1.1)),
			legend=	c( pval.names, bonf.names),
			lty= c(pval.lty, bonf.lty), lwd=rep(2:1, times=c(npval, nbonf) )
		)}

	# labels
	if(!is.null(pthresh) ){
		snp.label.flag <- abs(estimates/se) > qnorm(1-pthresh/2)
		text(x= 1/se[snp.label.flag]^2, y= estimates[snp.label.flag], labels=snp.names[snp.label.flag], pos=2+sign(estimates[snp.label.flag]), cex=0.7)
	}
invisible()
}

##
## Examples
##

#First example from the paper, uses raw OR estimates and confidence intervals

diabsnp <- read.table("diabsnp.txt", header=T)
snpfunnel(diabsnp[,1], log(diabsnp[,2]), (log(diabsnp[,4])-log(diabsnp[,3]))/2/1.96)

#Second analysis from the paper, using count data

elyall <- read.table("elybasic.txt")
elylist <- t(apply(elyall, 1,FUN=function(x){summary(glm(matrix(as.numeric(x[c(3,5,7,9,11,13)]),3,2)~I(1:3), family=binomial))$coefficients[2,1:2]
}))
snpfunnel(elyall[,1], elylist[,1], elylist[,2], pval.list=0.05, bonf.list=0.05, pthresh=0.05)

#Third example from the paper - three sets of computations before plotting

rheumall <- read.table("rheumsnp.txt")
rheumlist1 <- t(apply(rheumall, 1,FUN=function(x){summary(glm(matrix(as.numeric(x),3,2)~I(1:3), family=binomial))$coefficients[2,1:2] }))
rheumlist2 <- t(apply(rheumall, 1,FUN=function(x){summary(glm(matrix(as.numeric(x),3,2)~c(0,1,1), family=binomial))$coefficients[2,1:2] }))
rheumlist3 <- t(apply(rheumall, 1,FUN=function(x){summary(glm(matrix(as.numeric(x),3,2)~c(0,0,1), family=binomial))$coefficients[2,1:2] }))

rheumlist <- rbind(rheumlist1, rheumlist2, rheumlist3)
rheumlist.names <- c( paste("trend",1:18), paste("dominant",1:18), paste("recessive",1:18))

snpfunnel(rheumlist.names, rheumlist[,1], rheumlist[,2], pch=rep(c(22,24,19), each=18) )