#Andrea Cook #Cumulative Residual Method #################################################################################### #Function to run GLM and get parameters of interest glmresid<-function(Y,X) { if(is.null(X)) { model1<-glm(Y~rep(1,length(Y))+0,family=binomial()) } else { model1<-glm(Y~X,family=binomial()) } beta<-as.matrix(coef(model1)) fitted<-as.matrix(fitted.values(model1)) residual<-(Y-fitted) Iinv<-vcov(model1) return(list(residual=residual,beta=beta,fitted=fitted,Iinv=Iinv)) } #################################################################################### #Function to indicate if locations are in rectangle in.rec<-function(loc,x,y,b) { inrec<-rep(0,nrow(loc)) for(i in 1:nrow(loc)) { if(loc[i,1]>(x-b[1]) & loc[i,1]<=x & loc[i,2]>(y-b[2]) & loc[i,2]<=y) { inrec[i]<-1 } } return(inrec) } # Functions to create Random Discrete Distribution Matrixes func1to1<-function(nsims,length) { return(matrix(sample(c(-1,1),size=(nsims*length),replace=TRUE,prob=c(.5,.5)), ncol=nsims)) } #################################################################################### #Function to calculate w.hat W.hat<-function(Z,inrec,part2,residual) { n<-length(residual) what<-1/sqrt(n)*sum((inrec+part2)*residual*Z) return(what) } #################################################################################### #Function to run Cumulative Residual Tests library(MASS) CumRes.Bern<-function(Y,loc,X,b,jumps,xarea=NULL,yarea=NULL,nkeep=100,nsims=1000) { # Run Model on Data fitval<-glmresid(Y,X) residual<-fitval\$residual Beta<-fitval\$beta fitted<-fitval\$fitted Iinv<-fitval\$Iinv n<-length(Y) Xint<-as.matrix(cbind(rep(1,n),X)) # Obtain standard normal vectors Z<-t(mvrnorm(nsims,rep(0,n),diag(1,n))) D<-func1to1(nsims,n) if(is.null(xarea)|is.null(yarea)) { # Starting Point for x and y xstart<-min(loc[,1])+b[1] ystart<-min(loc[,2])+b[2] # Finishing Point for x and y xfin<-max(loc[,1]) yfin<-max(loc[,2]) } else { # Starting Point for x and y xstart<-xarea[1]+b[1] ystart<-yarea[1]+b[2] # Finishing Point for x and y xfin<-xarea[2] yfin<-yarea[2] } # Values of x and y to be evaluated x<-seq(xstart,xfin,by=jumps[1]) y<-seq(ystart,yfin,by=jumps[2]) # Run through all possible pairings of x and y xright<-NULL yright<-NULL Wobs<-NULL WhatZkeep<-NULL WhatDkeep<-NULL SlocZhat<-NULL SlocDhat<-NULL const<-(matrix(rep(exp(Xint%*%Beta)/(1+exp(Xint%*%Beta))^2,ncol(Xint)),nrow=n) *Xint) for(xi in 1:length(x)) { for(yi in 1:length(y)) { inrec<-in.rec(loc,x[xi],y[yi],b) if(sum(inrec)>0) { xright<-rbind(xright,x[xi]) yright<-rbind(yright,y[yi]) Wobs<-rbind(Wobs,1/sqrt(n)*sum(inrec*residual)) # Fixed Parts vt<-(-1)*t(inrec)%*%const part2<-t(vt%*%Iinv%*%t(Xint)) WhatZcur<-apply(Z,2,W.hat,inrec=inrec,part2=part2,residual=residual) WhatDcur<-apply(D,2,W.hat,inrec=inrec,part2=part2,residual=residual) # Sup of What SlocZhatcur<-rbind(SlocZhat,WhatZcur) SlocZhat<-apply(SlocZhatcur,2,max) SlocDhatcur<-rbind(SlocDhat,WhatDcur) SlocDhat<-apply(SlocDhatcur,2,max) # Keeping nkeep of What WhatZkeep<-rbind(WhatZkeep,WhatZcur[1:nkeep]) WhatDkeep<-rbind(WhatDkeep,WhatDcur[1:nkeep]) } } cat("Iteration = ", xi*yi, "\n") } # Calculate One-Sided Test Sloc<-max(Wobs) #Location of the highest hotspot maxloc<-cbind(xright[Wobs==Sloc], yright[Wobs==Sloc]) colnames(maxloc)<-c("x","y") pvalSZ<-sum(Sloc<=SlocZhat)/nsims pvalSD<-sum(Sloc<=SlocDhat)/nsims pval<-cbind(pvalSZ,pvalSD) colnames(pval)<-c("Z","D") # 70,80,90, and 95% Critical Value for What critZ<-quantile(SlocZhat,prob=c(.9,.95,.975)) critD<-quantile(SlocDhat,prob=c(.9,.95,.975)) crit<-cbind(critZ,critD) colnames(crit)<-c("Z","D") meanSlochat<-cbind(mean(SlocZhat),mean(SlocDhat)) colnames(meanSlochat)<-c("Z","D") quantSlocZhat<-quantile(SlocZhat,prob=c(.025,.05,.1,.5,.9,.95,.975)) quantSlocDhat<-quantile(SlocDhat,prob=c(.025,.05,.1,.5,.9,.95,.975)) quantSlochat<-cbind(quantSlocZhat,quantSlocDhat) colnames(quantSlochat)<-c("Z","D") return(list(Y=Y,X=X,loc=loc,Wobs=Wobs, WhatZkeep=WhatZkeep,WhatDkeep=WhatDkeep, xright=xright,yright=yright,b=b,nsims=nsims, Sloc=Sloc,pval=pval,maxloc=maxloc,crit=crit, meanSlochat=meanSlochat, quantSlochat=quantSlochat)) } ########################################################################## #Plotting a Square squarep<-function(x,y,b,density=0,border=NA,col="lightpink",lty=1) { x<-c(x-b[1],x-b[1],x,x) y<-c(y,y-b[2],y-b[2],y) polygon(x,y,density=density,border=border,col=col,lty=lty) } ### Plot Function ### plot.CR<-function(CRresult,ylim=NULL,xlim=NULL,xlab="X1",ylab="X2",main="Cumulative Residual",type="Discrete") { b<-CRresult\$b stratum<-CRresult\$stratum Y<-CRresult\$Y loc<-CRresult\$loc x<-loc[,1] y<-loc[,2] #Location of the highest hotspot maxloc<-CRresult\$maxloc if(type=="Discrete") { pvalS<-CRresult\$pval[2] # All areas with .05 significant level high residual clustering xyall<-cbind(CRresult\$xright[CRresult\$Wobs>=CRresult\$crit[3,2]], CRresult\$yright[CRresult\$Wobs>=CRresult\$crit[3,2]]) } if(type=="Normal") { pvalS<-CRresult\$pval[1] # All areas with .05 significant level high residual clustering xyall<-cbind(CRresult\$xright[CRresult\$Wobs>=CRresult\$crit[3,1]], CRresult\$yright[CRresult\$Wobs>=CRresult\$crit[3,1]]) } xall<-xyall[,1] yall<-xyall[,2] if(!is.null(xlim)) { xlim<-c(min(x),max(x)) } if(!is.null(ylim)) { ylim<-c(min(y),max(y)) } plot(x[Y==1],y[Y==1],xlab=xlab,ylab=ylab, main=main,pch=2,xlim=xlim,ylim=ylim) # Find Significant Areas for(j in 1:length(xall)) { squarep(xall[j],yall[j],b,col="lightgray",density=40) } points(x[Y==0],y[Y==0]) points(x[Y==1],y[Y==1],pch=2) squarep(maxloc[1],maxloc[2],b,border="red",lty=1) mtext(paste("P-value = ",pvalS),side=1,line=4) } plot.CRs<-function(CRresult,ylim=NULL,xlim=NULL,xlab="X1",ylab="X2", main="Cumulative Residual",type="Discrete") { par(omi=c(.5,.1,.5,0),mai=c(.8,.8,.25,.25),cex.axis=.75,cex.lab=.75) nf<-layout(matrix(c(1,2),nrow=1,byrow=T)) layout.show(nf) b<-CRresult\$b xright<-CRresult\$xright yright<-CRresult\$yright if(type=="Discrete"|type=="discrete"|type=="D") { tnum<-2 Whatkeep<-CRresult\$WhatDkeep } if(type=="Normal"|type=="normal"|type=="N") { tnum<-1 Whatkeep<-CRresult\$WhatZkeep } Wobs<-CRresult\$Wobs ##################################### # Obtain average x Whatkeep.x<-Whatkeep[order(xright),] Wobs.x<-Wobs[order(xright),] xright.o<-xright[order(xright),] num.x<-table(xright.o) xright.u<-c(min(xright.o)-b[1],unique(xright.o)) Whatkeep.x.max<-matrix(rep(0,ncol(Whatkeep.x)),nrow=1) Wobs.x.max<-0 i<-1 for(j in 1:length(num.x)) { if(num.x[j]==1) { Whatkeep.x.max<-rbind(Whatkeep.x.max,Whatkeep.x[i,]) Wobs.x.max<-rbind(Wobs.x.max,Wobs.x[i]) } else { Whatkeep.x.max<-rbind(Whatkeep.x.max,apply(Whatkeep.x[i:(i+num.x[j]-1),],2,max)) Wobs.x.max<-rbind(Wobs.x.max,max(Wobs.x[i:(i+num.x[j]-1)])) } i<-i+num.x[j] } xright.u<-c(xright.u,max(xright.o)+b[1]) Whatkeep.x.max<-rbind(Whatkeep.x.max,matrix(rep(0,ncol(Whatkeep.x)),nrow=1)) Wobs.x.max<-rbind(Wobs.x.max,0) if(is.null(xlim)) { xlim<-c(min(xright.u),max(xright.u)) } yax<-c(min(Whatkeep.x.max),max(Whatkeep.x.max)) plot(xright.u,Wobs.x.max,type="l",xlab="", ylab=expression(paste(SUP[X[2]] ," CUMULATIVE RESIDUALS")), xlim=xlim,ylim=yax) for(k in 1:30) { lines(xright.u,Whatkeep.x.max[,k],col="gray") } lines(xright.u,Wobs.x.max,type="l") mtext(paste("b1 = ",b[1]),side=3,line=0.1,cex=.75) mtext(main,outer=T,cex=1.5) mtext(paste(xlab),side=1,line=2,cex=.75) ##################################### # Obtain average y Whatkeep.y<-Whatkeep[order(yright),] Wobs.y<-Wobs[order(yright),] yright.o<-yright[order(yright),] num.y<-table(yright.o) yright.u<-c(min(yright.o)-b[2],unique(yright.o)) Whatkeep.y.max<-matrix(rep(0,ncol(Whatkeep.y)),nrow=1) Wobs.y.max<-0 i<-1 for(j in 1:length(num.y)) { Whatkeep.y.max<-rbind(Whatkeep.y.max,apply(Whatkeep.y[i:(i+num.y[j]-1),],2,max)) Wobs.y.max<-rbind(Wobs.y.max,max(Wobs.y[i:(i+num.y[j]-1)])) i<-i+num.y[j] } yright.u<-c(yright.u,max(yright.o)+b[2]) Whatkeep.y.max<-rbind(Whatkeep.y.max,matrix(rep(0,ncol(Whatkeep.y)),nrow=1)) Wobs.y.max<-rbind(Wobs.y.max,0) if(is.null(ylim)) { ylim<-c(min(yright.u),max(yright.u)) } yax<-c(min(Whatkeep.y.max),max(Whatkeep.y.max)) plot(yright.u,Wobs.y.max,type="l",xlab="", ylab=expression(paste(SUP[X[1]] ," CUMULATIVE RESIDUALS")), ylim=yax,xlim=ylim) for(k in 1:30) { lines(yright.u,Whatkeep.y.max[,k],col="gray") } lines(yright.u,Wobs.y.max,type="l") mtext(paste("b2 = ",b[2]),side=3,line=0.1,cex=.75) mtext(paste(ylab),side=1,line=2,cex=.75) mtext(paste("P-Value:",CRresult\$pval[tnum]),side=1,cex=.75,outer=T) }