# Andrea Cook # Cumulative Geographic Residual for Repeated Measures # CumRes.REP - Method when only a single address per repeated measure # CumRes.REPCum - Method when you want to weight previous location history # Must load MASS and geepack package library(MASS) library(geepack) #################################################################################### # Function to run Area-Level Repeated Measures Cumulative Geographic Residual Tests # Data Requirements: # Y = Vector of outcomes of the length of sum(K_i) where K_i is the number of # observations of individual i. # X = Matrix of covariates (Make NULL for no covariates) # id = unique identifier of each individual # time = vector of time points to indicate which repeated measure # loc = two column matrix of location of individual # model = formula for how you want to model the relationship to Y and X # corstr = correlation structure as accepted by geeglm # inarea = matrix of number of areas by number of potential clusters with # each column is an indicator function if the area is in cluster # bxy = matrix with number of areas by 3 with the first column being # b, half-edge length of square potential cluster, the second two columns # are the location of the center of the square of potential cluster # b, xstart, ystart, xfin, yfin only needed if inarea and bxy are not specified # b = a vector of 1/2 edge-length of square clusters to be looked at # xstart, ystart = values of location to start to create grid to look for clusters # xfin, yfin = values of location to finish creating grid to to look for clusters # nkeep = number of simulated Whats to keep # nsims = number of total simulated Whats CumRes.REP<-function(Y,X,id,time,loc,model=NULL,corstr="exch", inarea=NULL,bxy=NULL,b=NULL,xstart=NULL,ystart=NULL,xfin=NULL,yfin=NULL, tpoint=NULL,nkeep=100,nsims=1000) { n<-length(unique(id)) nM<-length(id) Y<-Y[order(id,time)] loc<-loc[order(id,time),] if(!is.null(X)) X<-X[order(id,time),] time<-time[order(id,time)] id<-id[order(id)] # Run Model on Data without covariates if(is.null(model)) { if(is.null(X)) model<-formula(Y~as.factor(time)) else model<-formula(Y~as.factor(time)+X) } model1<-geeglm(model,id=id,family=binomial,corstr=corstr) beta<-model1$coefficients alpha<-model1$geese$alpha Xmod<-model1$geese$X p<-ncol(Xmod) repeats<-model1$geese$clusz # Residuals resid<-model1$residuals # Obtain Fixed Components if(corstr=="exch") DV<-exchCOR(model1) if(corstr=="ar1") DV<-ar1COR(model1) if(corstr=="unst") DV<-unstCOR(model1) Di<-DV$D Vinvi<-DV$Vinv I<-DV$I # Obtain standard normal and discrete vectors Z<-t(mvrnorm(nsims,rep(0,n),diag(1,n))) if(is.null(inarea)|is.null(inarea)) { if(is.null(xstart)) xstart<-min(loc[,1]) if(is.null(ystart)) ystart<-min(loc[,2]) if(is.null(xfin)) xfin<-max(loc[,1]) if(is.null(yfin)) yfin<-max(loc[,2]) bxycur<-NULL for(i in 1:length(b)) { # Finishing Point for x and y xfin<-xfin+b[i]/2 yfin<-yfin+b[i]/2 # Values of x and y to be evaluated x<-seq(xstart,xfin,by=b[i]/2) y<-seq(ystart,yfin,by=b[i]/2) bxycur<-rbind(bxycur,cbind(b[i],expand.grid(x,y))) } remove(x,y) cat("Obtained Expanded Grid: dim = ",dim(bxycur),"\n") bxy<-NULL Wobs<-NULL What<-NULL # constant of nuin connuin<-Xmod*matrix(rep(exp(Xmod%*%beta)/(1+exp(Xmod%*%beta))^2,p),ncol=p) Iinv<-solve(I) # Loop through bxy to calculate Wobs, What for(j in 1:nrow(bxycur)) { inarea<-as.matrix(in.areaSQ(bxycur[j,],loc=loc)) # For specific time point if(!is.null(tpoint)) inarea<-inarea*as.numeric(time==tpoint) #Remove Potential Clusters Without Data if(sum(inarea)>0) { bxy<-rbind(bxy,bxycur[j,]) # Calculate Wobs Wobs<-rbind(Wobs,1/sqrt(n)*t(inarea)%*%resid) # Obtain Nu.in nuin<-(-1)*t(inarea)%*%(connuin) # Obtain fixedpart fixedpart<-NULL i<-1 k<-1 while(i<=n) { icur<-c(k:(k+repeats[i]-1)) fixedpart<-cbind(fixedpart,t(inarea[icur])%*%resid[icur]+ nuin%*%Iinv%*%t(Di[[i]])%*%Vinvi[[i]]%*%resid[icur]) k<-k+repeats[i] i<-i+1 } What<-rbind(What,1/sqrt(n)*fixedpart%*%Z) } if(round(j*.01)==j*.01) cat("j = ",j,"\n") } cat("Obtained What Distribution","\n") print(dim(What)) } else { # constant of nuin connuin<-Xmod*matrix(rep(exp(Xmod%*%beta)/(1+exp(Xmod%*%beta))^2,p),ncol=p) Iinv<-solve(I) # For specific time point if(!is.null(tpoint)) inarea<-inarea*as.numeric(time==tpoint) # Calculate Wobs Wobs<-rbind(Wobs,1/sqrt(n)*t(inarea)%*%resid) # Obtain Nu.in nuin<-(-1)*t(inarea)%*%(connuin) # Obtain fixedpart fixedpart<-NULL i<-1 k<-1 while(i<=n) { icur<-c(k:(k+repeats[i]-1)) fixedpart<-cbind(fixedpart,t(inarea[icur])%*%resid[icur]+ nuin%*%Iinv%*%t(Di[[i]])%*%Vinvi[[i]]%*%resid[icur]) k<-k+repeats[i] i<-i+1 } What<-1/sqrt(n)*fixedpart%*%Z cat("Obtained What Distribution","\n") print(dim(What)) } # Sup of What Slochat<-apply(What,2,max) # Keeping nkeep of What Whatkeep<-What[,1:nkeep] # Calculate One-Sided Test Sloc<-max(Wobs) pval<-sum(Sloc<=Slochat)/nsims # 70,80,90, and 95% Critical Value for W Slochat<-sort(Slochat) crit<-c(Slochat[round(.7*nsims)],Slochat[round(.8*nsims)], Slochat[round(.9*nsims)],Slochat[round(.95*nsims)]) meanSlochat<-mean(Slochat) quantSlochat<-quantile(Slochat,prob=c(.05,.1,.5,.9,.95)) return(list(Y=Y,Xmod=Xmod,loc=loc,Wobs=Wobs, Whatkeep=Whatkeep, bxy=bxy,nsims=nsims, Sloc=Sloc,tpoint=tpoint,pval=pval,crit=crit, meanSlochat=meanSlochat, quantSlochat=quantSlochat)) } #################################################################################### # Function to run Repeated Measures Cumulative Geographic Residual Test # allowing multiple addresses for a given time point (weight past history) # Data Requirements: # Y = Vector of outcomes of the length of sum(K_i) where K_i is the number of # observations of individual i. # X = Matrix of covariates (Make NULL for no covariates) # id = unique identifier of each individual # time = vector of time points to indicate which repeated measure # locx = vector of x-coordinates of all locations for individual i # locy = vector of y-coordinates of all locations for individual i # w = weight to be used for a given address at every time point # model = formula for how you want to model the relationship to Y and X # corstr = correlation structure as accepted by geeglm # inarea = matrix of number of areas by number of potential clusters with # each column is an indicator function if the area is in cluster # bxy = matrix with number of areas by 3 with the first column being # b, half-edge length of square potential cluster, the second two columns # are the location of the center of the square of potential cluster # b, xstart, ystart, xfin, yfin only needed if inarea and bxy are not specified # b = a vector of 1/2 edge-length of square clusters to be looked at # xstart, ystart = values of location to start to create grid to look for clusters # xfin, yfin = values of location to finish creating grid to to look for clusters # nkeep = number of simulated Whats to keep # nsims = number of total simulated Whats CumRes.REPCum<-function(Y,X,id,time,locx,locy,w,model=NULL,corstr="exch", inarea=NULL,bxy=NULL,b=NULL,xstart=NULL,ystart=NULL,xfin=NULL,yfin=NULL, tpoint=NULL,nkeep=100,nsims=1000) { n<-length(unique(id)) nM<-length(id) Y<-Y[order(id,time)] locx<-locx[order(id,time),] locy<-locy[order(id,time),] w<-w[order(id,time),] if(!is.null(X)) X<-X[order(id,time),] time<-time[order(id,time)] id<-id[order(id)] # Run Model on Data without covariates if(is.null(model)) { if(is.null(X)) model<-formula(Y~as.factor(time)) else model<-formula(Y~as.factor(time)+X) } model1<-geeglm(model,id=id,family=binomial,corstr=corstr) beta<-model1$coefficients alpha<-model1$geese$alpha Xmod<-model1$geese$X p<-ncol(Xmod) repeats<-model1$geese$clusz # Residuals resid<-model1$residuals # Obtain Fixed Components if(corstr=="exch") DV<-exchCOR(model1) if(corstr=="ar1") DV<-ar1COR(model1) if(corstr=="unst") DV<-unstCOR(model1) Di<-DV$D Vinvi<-DV$Vinv I<-DV$I # Obtain standard normal and discrete vectors Z<-t(mvrnorm(nsims,rep(0,n),diag(1,n))) if(is.null(xstart)) xstart<-min(locx) if(is.null(ystart)) ystart<-min(locy) if(is.null(xfin)) xfin<-max(locx) if(is.null(yfin)) yfin<-max(locy) if(is.null(inarea)|is.null(bxy)) { bxycur<-NULL for(i in 1:length(b)) { # Finishing Point for x and y xfin<-xfin+b[i]/2 yfin<-yfin+b[i]/2 # Values of x and y to be evaluated x<-seq(xstart,xfin,by=b[i]/2) y<-seq(ystart,yfin,by=b[i]/2) bxycur<-rbind(bxycur,cbind(b[i],expand.grid(x,y))) } remove(x,y) cat("Obtained Expanded Grid: dim = ",dim(bxycur),"\n") Wobs<-NULL What<-NULL # constant of nuin connuin<-Xmod*matrix(rep(exp(Xmod%*%beta)/(1+exp(Xmod%*%beta))^2,p),ncol=p) Iinv<-solve(I) # Loop through bxy to calculate Wobs, What for(j in 1:nrow(bxycur)) { inarea<-as.matrix(in.areaSQ(bxycur[j,],locx=locx,locy=locy)) # For specific time point if(!is.null(tpoint)) inarea<-inarea*as.numeric(time==tpoint) inarea<-inarea*w inarea<-as.matrix(apply(inarea,1,sum)) #Remove Potential Clusters Without Data if(sum(inarea)>0) { bxy<-rbind(bxy,bxycur[j,]) # Calculate Wobs Wobs<-rbind(Wobs,1/sqrt(n)*t(inarea)%*%resid) # Obtain Nu.in nuin<-(-1)*t(inarea)%*%(connuin) # Obtain fixedpart fixedpart<-NULL i<-1 k<-1 while(i<=n) { icur<-c(k:(k+repeats[i]-1)) fixedpart<-cbind(fixedpart,t(inarea[icur])%*%resid[icur]+ nuin%*%Iinv%*%t(Di[[i]])%*%Vinvi[[i]]%*%resid[icur]) k<-k+repeats[i] i<-i+1 } What<-rbind(What,1/sqrt(n)*fixedpart%*%Z) } if(round(j*.01)==j*.01) cat("j = ",j,"\n") } cat("Obtained What Distribution","\n") print(dim(What)) } else { Wobs<-NULL What<-NULL # constant of nuin connuin<-Xmod*matrix(rep(exp(Xmod%*%beta)/(1+exp(Xmod%*%beta))^2,p),ncol=p) Iinv<-solve(I) # For specific time point if(!is.null(tpoint)) inarea<-inarea*as.numeric(time==tpoint) inarea<-inarea*w # Calculate Wobs Wobs<-rbind(Wobs,1/sqrt(n)*t(inarea)%*%resid) # Obtain Nu.in nuin<-(-1)*t(inarea)%*%(connuin) # Obtain fixedpart fixedpart<-NULL i<-1 k<-1 while(i<=n) { icur<-c(k:(k+repeats[i]-1)) fixedpart<-cbind(fixedpart,t(inarea[icur])%*%resid[icur]+ nuin%*%Iinv%*%t(Di[[i]])%*%Vinvi[[i]]%*%resid[icur]) k<-k+repeats[i] i<-i+1 } What<-1/sqrt(n)*fixedpart%*%Z cat("Obtained What Distribution","\n") print(dim(What)) } # Sup of What Slochat<-apply(What,2,max) # Keeping nkeep of What Whatkeep<-What[,1:nkeep] # Calculate One-Sided Test Sloc<-max(Wobs) pval<-sum(Sloc<=Slochat)/nsims # 70,80,90, and 95% Critical Value for W Slochat<-sort(Slochat) crit<-c(Slochat[round(.7*nsims)],Slochat[round(.8*nsims)], Slochat[round(.9*nsims)],Slochat[round(.95*nsims)]) meanSlochat<-mean(Slochat) quantSlochat<-quantile(Slochat,prob=c(.05,.1,.5,.9,.95)) return(list(Y=Y,Xmod=Xmod,locx=locx,locy=locy,Wobs=Wobs, Whatkeep=Whatkeep, bxy=bxy,nsims=nsims, Sloc=Sloc,tpoint=tpoint,pval=pval,crit=crit, meanSlochat=meanSlochat, quantSlochat=quantSlochat)) } ###################################################################################### ###################################################################################### # The following functions are called from the main functions CumRes.REP and CumRes.REPCUM ###################################################################################### # Function to indicate if points are within or outside of area in.areaSQ<-function(bxy,loc) { bxy<-as.numeric(bxy) area<-(((bxy[2]-bxy[1])<=loc[,1])& ((bxy[2]+bxy[1])>=loc[,1])& ((bxy[3]-bxy[1])<=loc[,2])& ((bxy[3]+bxy[1])>=loc[,2])) return(as.numeric(area)) } #################################################################################### # Functions to create Random Discrete Distribution Matrixes func1to1<-function(nsims,length) { return(matrix(sample(c(-1,1),size=(nsims*length),replace=TRUE,prob=c(.5,.5)), ncol=nsims)) } ################################################################################### # Model parameters when using unstructured covariance unstCOR<-function(model) { X<-model$geese$X beta<-model$coefficients p<-length(beta) fitted<-model$fitted.values alpha<-model$geese$alpha repeats<-model$geese$clusz # Create R matrix with all times K<-max(repeats) Rmax<-diag(1,nrow=K) Rmax[lower.tri(Rmax)]<-alpha Rmax<-t(Rmax) Rmax[lower.tri(Rmax)]<-alpha D<-NULL Vinv<-NULL I<-matrix(0,nrow=p,ncol=p) i<-1 j<-1 while(i<=length(repeats)) { curi<-c(j:(j+repeats[i]-1)) Dcur<-X[curi,]*matrix(rep(exp(X[curi,]%*%beta)/(1+exp(X[curi,]%*%beta))^2,p),ncol=p) D<-c(D,list(Dcur)) Acur<-diag(fitted[curi]*(1-fitted[curi])) if(repeats[i]==K) { Vcur<-solve(sqrt(Acur)%*%Rmax%*%sqrt(Acur)) Vinv<-c(Vinv,list(Vcur)) } else { # Indicator vector covariates are missing IM<-as.numeric(apply(X[curi,],2,sum)>0) # Reference Group missing IM[1]<-sum(apply(X[curi,2:K],1,sum)==0) Rcur<-Rmax[IM>0,IM>0] Vcur<-solve(sqrt(Acur)%*%Rcur%*%sqrt(Acur)) Vinv<-c(Vinv,list(Vcur)) } I<-I+t(Dcur)%*%Vcur%*%Dcur j<-j+repeats[i] i<-i+1 } return(list(D=D,Vinv=Vinv,I=I)) } # Model parameters using AR1 ar1COR<-function(model) { X<-model$geese$X beta<-model$coefficients p<-length(beta) fitted<-model$fitted.values rho<-model$geese$alpha repeats<-model$geese$clusz # Create R matrix with all times K<-max(repeats) Rmax<-diag(1,nrow=K) alphaA<-NULL for(par in 1:(K-1)) { alphaA<-c(alphaA,rho^par) } alpha<-NULL for(par in (K-1):1) { alpha<-c(alpha,alphaA[1:par]) } Rmax[lower.tri(Rmax)]<-alpha Rmax<-t(Rmax) Rmax[lower.tri(Rmax)]<-alpha D<-NULL Vinv<-NULL I<-matrix(0,nrow=p,ncol=p) i<-1 j<-1 while(i<=length(repeats)) { curi<-c(j:(j+repeats[i]-1)) Dcur<-X[curi,]*matrix(rep(exp(X[curi,]%*%beta)/(1+exp(X[curi,]%*%beta))^2,p),ncol=p) D<-c(D,list(Dcur)) Acur<-diag(fitted[curi]*(1-fitted[curi])) if(repeats[i]==K) { Vcur<-solve(sqrt(Acur)%*%Rmax%*%sqrt(Acur)) Vinv<-c(Vinv,list(Vcur)) } else { # Indicator vector covariates are missing IM<-as.numeric(apply(X[curi,],2,sum)>0) # Reference Group missing IM[1]<-sum(apply(X[curi,2:K],1,sum)==0) Rcur<-Rmax[IM>0,IM>0] Vcur<-solve(sqrt(Acur)%*%Rcur%*%sqrt(Acur)) Vinv<-c(Vinv,list(Vcur)) } I<-I+t(Dcur)%*%Vcur%*%Dcur j<-j+repeats[i] i<-i+1 } return(list(D=D,Vinv=Vinv,I=I)) } # Model parameters using exch exchCOR<-function(model) { X<-model$geese$X beta<-model$coefficients p<-length(beta) fitted<-model$fitted.values alpha<-model$geese$alpha repeats<-model$geese$clusz # Create R matrix with all times K<-max(repeats) Rmax<-matrix(alpha,nrow=K,ncol=K) diag(Rmax)<-1 D<-NULL Vinv<-NULL I<-matrix(0,nrow=p,ncol=p) i<-1 j<-1 while(i<=length(repeats)) { curi<-c(j:(j+repeats[i]-1)) Dcur<-X[curi,]*matrix(rep(exp(X[curi,]%*%beta)/(1+exp(X[curi,]%*%beta))^2,p),ncol=p) D<-c(D,list(Dcur)) Acur<-diag(fitted[curi]*(1-fitted[curi])) if(repeats[i]==K) { Vcur<-solve(sqrt(Acur)%*%Rmax%*%sqrt(Acur)) Vinv<-c(Vinv,list(Vcur)) } else { # Indicator vector covariates are missing IM<-as.numeric(apply(X[curi,],2,sum)>0) # Reference Group missing IM[1]<-sum(apply(X[curi,2:K],1,sum)==0) Rcur<-Rmax[IM>0,IM>0] Vcur<-solve(sqrt(Acur)%*%Rcur%*%sqrt(Acur)) Vinv<-c(Vinv,list(Vcur)) } I<-I+t(Dcur)%*%Vcur%*%Dcur j<-j+repeats[i] i<-i+1 } return(list(D=D,Vinv=Vinv,I=I)) }