# Andrea Cook
# Cumulative Geographic Residual for Repeated Measures
# CumRes.REP - Method when only a single address per repeated measure
# CumRes.REPCum - Method when you want to weight previous location history

# Must load MASS and geepack package
library(MASS)
library(geepack)
	
####################################################################################
# Function to run Area-Level Repeated Measures Cumulative Geographic Residual Tests
# Data Requirements:
# Y = Vector of outcomes of the length of sum(K_i) where K_i is the number of 
#	observations of individual i. 
# X = Matrix of covariates (Make NULL for no covariates)
# id = unique identifier of each individual
# time = vector of time points to indicate which repeated measure
# loc = two column matrix of location of individual
# model = formula for how you want to model the relationship to Y and X
# corstr = correlation structure as accepted by geeglm
# inarea = matrix of number of areas by number of potential clusters with
#           each column is an indicator function if the area is in cluster
# bxy = matrix with number of areas by 3 with the first column being
#        b, half-edge length of square potential cluster, the second two columns
#	  are the location of the center of the square of potential cluster
# b, xstart, ystart, xfin, yfin only needed if inarea and bxy are not specified 
# b = a vector of 1/2 edge-length of square clusters to be looked at
# xstart, ystart = values of location to start to create grid to look for clusters
# xfin, yfin = values of location to finish creating grid to to look for clusters
# nkeep = number of simulated Whats to keep
# nsims = number of total simulated Whats
CumRes.REP<-function(Y,X,id,time,loc,model=NULL,corstr="exch",
		inarea=NULL,bxy=NULL,b=NULL,xstart=NULL,ystart=NULL,xfin=NULL,yfin=NULL,
		tpoint=NULL,nkeep=100,nsims=1000)
{
	n<-length(unique(id))
	nM<-length(id)
	Y<-Y[order(id,time)]
	loc<-loc[order(id,time),]
	if(!is.null(X)) X<-X[order(id,time),]
	time<-time[order(id,time)]
	id<-id[order(id)]
	
	# Run Model on Data without covariates
	if(is.null(model)) 
	{
		if(is.null(X)) model<-formula(Y~as.factor(time))
		else model<-formula(Y~as.factor(time)+X)
	}	

	model1<-geeglm(model,id=id,family=binomial,corstr=corstr)
	beta<-model1$coefficients
	alpha<-model1$geese$alpha
	Xmod<-model1$geese$X
	p<-ncol(Xmod)
	repeats<-model1$geese$clusz
	
	# Residuals
	resid<-model1$residuals

	# Obtain Fixed Components
	if(corstr=="exch") DV<-exchCOR(model1)
	if(corstr=="ar1")  DV<-ar1COR(model1)
	if(corstr=="unst") DV<-unstCOR(model1)
	
	Di<-DV$D
	Vinvi<-DV$Vinv
	I<-DV$I
	
	# Obtain standard normal and discrete vectors
	Z<-t(mvrnorm(nsims,rep(0,n),diag(1,n)))

	if(is.null(inarea)|is.null(inarea))
	{
	  if(is.null(xstart)) xstart<-min(loc[,1])
	  if(is.null(ystart)) ystart<-min(loc[,2])
	  if(is.null(xfin)) xfin<-max(loc[,1])
	  if(is.null(yfin)) yfin<-max(loc[,2])

	  bxycur<-NULL
	  for(i in 1:length(b))
	  {
	 	# Finishing Point for x and y
		xfin<-xfin+b[i]/2
		yfin<-yfin+b[i]/2

		# Values of x and y to be evaluated
		x<-seq(xstart,xfin,by=b[i]/2)
		y<-seq(ystart,yfin,by=b[i]/2)

		bxycur<-rbind(bxycur,cbind(b[i],expand.grid(x,y)))
	  }
	  remove(x,y)
	  cat("Obtained Expanded Grid: dim = ",dim(bxycur),"\n")
	
	  bxy<-NULL
	  Wobs<-NULL
	  What<-NULL

	  # constant of nuin
	  connuin<-Xmod*matrix(rep(exp(Xmod%*%beta)/(1+exp(Xmod%*%beta))^2,p),ncol=p)
	  Iinv<-solve(I)

	  # Loop through bxy to calculate Wobs, What	
	  for(j in 1:nrow(bxycur))
	  {	
		inarea<-as.matrix(in.areaSQ(bxycur[j,],loc=loc))

		# For specific time point
		if(!is.null(tpoint)) inarea<-inarea*as.numeric(time==tpoint)

		#Remove Potential Clusters Without Data
		if(sum(inarea)>0)
		{
			bxy<-rbind(bxy,bxycur[j,])		
		
			# Calculate Wobs
			Wobs<-rbind(Wobs,1/sqrt(n)*t(inarea)%*%resid)

			# Obtain Nu.in
			nuin<-(-1)*t(inarea)%*%(connuin)
		
			# Obtain fixedpart
			fixedpart<-NULL
			i<-1
			k<-1
			while(i<=n)
			{	
				icur<-c(k:(k+repeats[i]-1))
				fixedpart<-cbind(fixedpart,t(inarea[icur])%*%resid[icur]+
					nuin%*%Iinv%*%t(Di[[i]])%*%Vinvi[[i]]%*%resid[icur])
				k<-k+repeats[i]
				i<-i+1
			}
			What<-rbind(What,1/sqrt(n)*fixedpart%*%Z)
		}
		if(round(j*.01)==j*.01) cat("j = ",j,"\n")
	  }
	  cat("Obtained What Distribution","\n")
	  print(dim(What))
	}
	else
	{
	  # constant of nuin
	  connuin<-Xmod*matrix(rep(exp(Xmod%*%beta)/(1+exp(Xmod%*%beta))^2,p),ncol=p)
	  Iinv<-solve(I)

	  # For specific time point
	  if(!is.null(tpoint)) inarea<-inarea*as.numeric(time==tpoint)

	  # Calculate Wobs
	  Wobs<-rbind(Wobs,1/sqrt(n)*t(inarea)%*%resid)

	  # Obtain Nu.in
	  nuin<-(-1)*t(inarea)%*%(connuin)
		
	  # Obtain fixedpart
	  fixedpart<-NULL
	  i<-1
	  k<-1
	  while(i<=n)
	  {	
		icur<-c(k:(k+repeats[i]-1))
		fixedpart<-cbind(fixedpart,t(inarea[icur])%*%resid[icur]+
			nuin%*%Iinv%*%t(Di[[i]])%*%Vinvi[[i]]%*%resid[icur])
		k<-k+repeats[i]
		i<-i+1
	  }
	  What<-1/sqrt(n)*fixedpart%*%Z
	  cat("Obtained What Distribution","\n")
	  print(dim(What))
	}				
	# Sup of What
	Slochat<-apply(What,2,max)
	
	# Keeping nkeep of What
	Whatkeep<-What[,1:nkeep]

	# Calculate One-Sided Test
	Sloc<-max(Wobs)

	pval<-sum(Sloc<=Slochat)/nsims

	# 70,80,90, and 95% Critical Value for W
	Slochat<-sort(Slochat)

	crit<-c(Slochat[round(.7*nsims)],Slochat[round(.8*nsims)],
		Slochat[round(.9*nsims)],Slochat[round(.95*nsims)])

	meanSlochat<-mean(Slochat)
	quantSlochat<-quantile(Slochat,prob=c(.05,.1,.5,.9,.95))

	return(list(Y=Y,Xmod=Xmod,loc=loc,Wobs=Wobs,
		 Whatkeep=Whatkeep,
		 bxy=bxy,nsims=nsims,
		 Sloc=Sloc,tpoint=tpoint,pval=pval,crit=crit,
		 meanSlochat=meanSlochat,
		 quantSlochat=quantSlochat))
}

####################################################################################
# Function to run Repeated Measures Cumulative Geographic Residual Test 
# allowing multiple addresses for a given time point (weight past history)
# Data Requirements:
# Y = Vector of outcomes of the length of sum(K_i) where K_i is the number of 
#	observations of individual i. 
# X = Matrix of covariates (Make NULL for no covariates)
# id = unique identifier of each individual
# time = vector of time points to indicate which repeated measure
# locx = vector of x-coordinates of all locations for individual i 
# locy = vector of y-coordinates of all locations for individual i
# w = weight to be used for a given address at every time point
# model = formula for how you want to model the relationship to Y and X
# corstr = correlation structure as accepted by geeglm
# inarea = matrix of number of areas by number of potential clusters with
#           each column is an indicator function if the area is in cluster
# bxy = matrix with number of areas by 3 with the first column being
#        b, half-edge length of square potential cluster, the second two columns
#	  are the location of the center of the square of potential cluster
# b, xstart, ystart, xfin, yfin only needed if inarea and bxy are not specified 
# b = a vector of 1/2 edge-length of square clusters to be looked at
# xstart, ystart = values of location to start to create grid to look for clusters
# xfin, yfin = values of location to finish creating grid to to look for clusters
# nkeep = number of simulated Whats to keep
# nsims = number of total simulated Whats
CumRes.REPCum<-function(Y,X,id,time,locx,locy,w,model=NULL,corstr="exch",
		inarea=NULL,bxy=NULL,b=NULL,xstart=NULL,ystart=NULL,xfin=NULL,yfin=NULL,
		tpoint=NULL,nkeep=100,nsims=1000)
{
	n<-length(unique(id))
	nM<-length(id)
	Y<-Y[order(id,time)]
	locx<-locx[order(id,time),]
	locy<-locy[order(id,time),]
	w<-w[order(id,time),]
	if(!is.null(X)) X<-X[order(id,time),]
	time<-time[order(id,time)]
	id<-id[order(id)]
	
	# Run Model on Data without covariates
	if(is.null(model)) 
	{
		if(is.null(X)) model<-formula(Y~as.factor(time))
		else model<-formula(Y~as.factor(time)+X)
	}	

	model1<-geeglm(model,id=id,family=binomial,corstr=corstr)
	beta<-model1$coefficients
	alpha<-model1$geese$alpha
	Xmod<-model1$geese$X
	p<-ncol(Xmod)
	repeats<-model1$geese$clusz
	
	# Residuals
	resid<-model1$residuals

	# Obtain Fixed Components
	if(corstr=="exch") DV<-exchCOR(model1)
	if(corstr=="ar1")  DV<-ar1COR(model1)
	if(corstr=="unst") DV<-unstCOR(model1)
	
	Di<-DV$D
	Vinvi<-DV$Vinv
	I<-DV$I
	
	# Obtain standard normal and discrete vectors
	Z<-t(mvrnorm(nsims,rep(0,n),diag(1,n)))

	if(is.null(xstart)) xstart<-min(locx)
	if(is.null(ystart)) ystart<-min(locy)
	if(is.null(xfin)) xfin<-max(locx)
	if(is.null(yfin)) yfin<-max(locy)

	if(is.null(inarea)|is.null(bxy))
	{
	  bxycur<-NULL
	  for(i in 1:length(b))
	  {
		# Finishing Point for x and y
		xfin<-xfin+b[i]/2
		yfin<-yfin+b[i]/2

		# Values of x and y to be evaluated
		x<-seq(xstart,xfin,by=b[i]/2)
		y<-seq(ystart,yfin,by=b[i]/2)

		bxycur<-rbind(bxycur,cbind(b[i],expand.grid(x,y)))
	  }
	  remove(x,y)
	  cat("Obtained Expanded Grid: dim = ",dim(bxycur),"\n")
	
	  Wobs<-NULL
	  What<-NULL

	  # constant of nuin
	  connuin<-Xmod*matrix(rep(exp(Xmod%*%beta)/(1+exp(Xmod%*%beta))^2,p),ncol=p)
	  Iinv<-solve(I)

	  # Loop through bxy to calculate Wobs, What	
	  for(j in 1:nrow(bxycur))
	  {	
		inarea<-as.matrix(in.areaSQ(bxycur[j,],locx=locx,locy=locy))

		# For specific time point
		if(!is.null(tpoint)) inarea<-inarea*as.numeric(time==tpoint)

		inarea<-inarea*w
		inarea<-as.matrix(apply(inarea,1,sum))
		#Remove Potential Clusters Without Data
		if(sum(inarea)>0)
		{
			bxy<-rbind(bxy,bxycur[j,])		
		
			# Calculate Wobs
			Wobs<-rbind(Wobs,1/sqrt(n)*t(inarea)%*%resid)

			# Obtain Nu.in
			nuin<-(-1)*t(inarea)%*%(connuin)
		
			# Obtain fixedpart
			fixedpart<-NULL
			i<-1
			k<-1
			while(i<=n)
			{	
				icur<-c(k:(k+repeats[i]-1))
				fixedpart<-cbind(fixedpart,t(inarea[icur])%*%resid[icur]+
					nuin%*%Iinv%*%t(Di[[i]])%*%Vinvi[[i]]%*%resid[icur])
				k<-k+repeats[i]
				i<-i+1
			}
			What<-rbind(What,1/sqrt(n)*fixedpart%*%Z)
		}
		if(round(j*.01)==j*.01) cat("j = ",j,"\n")
	  }
	  cat("Obtained What Distribution","\n")
	  print(dim(What))
	}
	else
	{
	  Wobs<-NULL
	  What<-NULL

	  # constant of nuin
	  connuin<-Xmod*matrix(rep(exp(Xmod%*%beta)/(1+exp(Xmod%*%beta))^2,p),ncol=p)
	  Iinv<-solve(I)

	  # For specific time point
	  if(!is.null(tpoint)) inarea<-inarea*as.numeric(time==tpoint)

	  inarea<-inarea*w
		
	  # Calculate Wobs
	  Wobs<-rbind(Wobs,1/sqrt(n)*t(inarea)%*%resid)

	  # Obtain Nu.in
	  nuin<-(-1)*t(inarea)%*%(connuin)

	  # Obtain fixedpart
	  fixedpart<-NULL
	  i<-1
	  k<-1
	  while(i<=n)
	  {	
		icur<-c(k:(k+repeats[i]-1))
		fixedpart<-cbind(fixedpart,t(inarea[icur])%*%resid[icur]+
			nuin%*%Iinv%*%t(Di[[i]])%*%Vinvi[[i]]%*%resid[icur])
		k<-k+repeats[i]
		i<-i+1
	  }
	  What<-1/sqrt(n)*fixedpart%*%Z
	  cat("Obtained What Distribution","\n")
	  print(dim(What))
	}
			
	# Sup of What
	Slochat<-apply(What,2,max)
	
	# Keeping nkeep of What
	Whatkeep<-What[,1:nkeep]

	# Calculate One-Sided Test
	Sloc<-max(Wobs)

	pval<-sum(Sloc<=Slochat)/nsims

	# 70,80,90, and 95% Critical Value for W
	Slochat<-sort(Slochat)

	crit<-c(Slochat[round(.7*nsims)],Slochat[round(.8*nsims)],
		Slochat[round(.9*nsims)],Slochat[round(.95*nsims)])

	meanSlochat<-mean(Slochat)
	quantSlochat<-quantile(Slochat,prob=c(.05,.1,.5,.9,.95))

	return(list(Y=Y,Xmod=Xmod,locx=locx,locy=locy,Wobs=Wobs,
		 Whatkeep=Whatkeep,
		 bxy=bxy,nsims=nsims,
		 Sloc=Sloc,tpoint=tpoint,pval=pval,crit=crit,
		 meanSlochat=meanSlochat,
		 quantSlochat=quantSlochat))
}

######################################################################################
######################################################################################
# The following functions are called from the main functions CumRes.REP and CumRes.REPCUM
######################################################################################
# Function to indicate if points are within or outside of area
in.areaSQ<-function(bxy,loc)
{
	bxy<-as.numeric(bxy)
	area<-(((bxy[2]-bxy[1])<=loc[,1])&
	       ((bxy[2]+bxy[1])>=loc[,1])&
	       ((bxy[3]-bxy[1])<=loc[,2])&
	       ((bxy[3]+bxy[1])>=loc[,2]))
	return(as.numeric(area))	
}

####################################################################################
# Functions to create Random Discrete Distribution Matrixes
func1to1<-function(nsims,length)
{
	return(matrix(sample(c(-1,1),size=(nsims*length),replace=TRUE,prob=c(.5,.5)),
		ncol=nsims))

}

###################################################################################
# Model parameters when using unstructured covariance
unstCOR<-function(model)
{
	X<-model$geese$X
	beta<-model$coefficients
	p<-length(beta)
	fitted<-model$fitted.values
	alpha<-model$geese$alpha

	repeats<-model$geese$clusz

	# Create R matrix with all times
	K<-max(repeats)
	Rmax<-diag(1,nrow=K)
	Rmax[lower.tri(Rmax)]<-alpha
	Rmax<-t(Rmax)
	Rmax[lower.tri(Rmax)]<-alpha

	D<-NULL
	Vinv<-NULL
	I<-matrix(0,nrow=p,ncol=p)
	i<-1
	j<-1
	while(i<=length(repeats))
	{
		curi<-c(j:(j+repeats[i]-1))
		Dcur<-X[curi,]*matrix(rep(exp(X[curi,]%*%beta)/(1+exp(X[curi,]%*%beta))^2,p),ncol=p)
		D<-c(D,list(Dcur))
		Acur<-diag(fitted[curi]*(1-fitted[curi]))
		if(repeats[i]==K)
		{
			Vcur<-solve(sqrt(Acur)%*%Rmax%*%sqrt(Acur))
			Vinv<-c(Vinv,list(Vcur))
		}
		else
		{
			# Indicator vector covariates are missing
			IM<-as.numeric(apply(X[curi,],2,sum)>0)
			# Reference Group missing
			IM[1]<-sum(apply(X[curi,2:K],1,sum)==0)
			Rcur<-Rmax[IM>0,IM>0]
			Vcur<-solve(sqrt(Acur)%*%Rcur%*%sqrt(Acur))
			Vinv<-c(Vinv,list(Vcur))
		}
		I<-I+t(Dcur)%*%Vcur%*%Dcur
		j<-j+repeats[i]
		i<-i+1
	}
	return(list(D=D,Vinv=Vinv,I=I))
}

# Model parameters using AR1
ar1COR<-function(model)
{
	X<-model$geese$X
	beta<-model$coefficients
	p<-length(beta)
	fitted<-model$fitted.values
	rho<-model$geese$alpha

	repeats<-model$geese$clusz

	# Create R matrix with all times
	K<-max(repeats)
	Rmax<-diag(1,nrow=K)
	alphaA<-NULL
	for(par in 1:(K-1))
	{
		alphaA<-c(alphaA,rho^par)
	}
	alpha<-NULL
	for(par in (K-1):1)
	{
		alpha<-c(alpha,alphaA[1:par])
	}
	
	Rmax[lower.tri(Rmax)]<-alpha
	Rmax<-t(Rmax)
	Rmax[lower.tri(Rmax)]<-alpha

	D<-NULL
	Vinv<-NULL
	I<-matrix(0,nrow=p,ncol=p)
	i<-1
	j<-1
	while(i<=length(repeats))
	{
		curi<-c(j:(j+repeats[i]-1))
		Dcur<-X[curi,]*matrix(rep(exp(X[curi,]%*%beta)/(1+exp(X[curi,]%*%beta))^2,p),ncol=p)
		D<-c(D,list(Dcur))
		Acur<-diag(fitted[curi]*(1-fitted[curi]))
		if(repeats[i]==K)
		{
			Vcur<-solve(sqrt(Acur)%*%Rmax%*%sqrt(Acur))
			Vinv<-c(Vinv,list(Vcur))
		}
		else
		{
			# Indicator vector covariates are missing
			IM<-as.numeric(apply(X[curi,],2,sum)>0)
			# Reference Group missing
			IM[1]<-sum(apply(X[curi,2:K],1,sum)==0)
			Rcur<-Rmax[IM>0,IM>0]
			Vcur<-solve(sqrt(Acur)%*%Rcur%*%sqrt(Acur))
			Vinv<-c(Vinv,list(Vcur))
		}
		I<-I+t(Dcur)%*%Vcur%*%Dcur
		j<-j+repeats[i]
		i<-i+1
	}
	return(list(D=D,Vinv=Vinv,I=I))
}

# Model parameters using exch
exchCOR<-function(model)
{
	X<-model$geese$X
	beta<-model$coefficients
	p<-length(beta)
	fitted<-model$fitted.values
	alpha<-model$geese$alpha

	repeats<-model$geese$clusz

	# Create R matrix with all times
	K<-max(repeats)
	Rmax<-matrix(alpha,nrow=K,ncol=K)
	diag(Rmax)<-1

	D<-NULL
	Vinv<-NULL
	I<-matrix(0,nrow=p,ncol=p)
	i<-1
	j<-1
	while(i<=length(repeats))
	{
		curi<-c(j:(j+repeats[i]-1))
		Dcur<-X[curi,]*matrix(rep(exp(X[curi,]%*%beta)/(1+exp(X[curi,]%*%beta))^2,p),ncol=p)
		D<-c(D,list(Dcur))
		Acur<-diag(fitted[curi]*(1-fitted[curi]))
		if(repeats[i]==K)
		{
			Vcur<-solve(sqrt(Acur)%*%Rmax%*%sqrt(Acur))
			Vinv<-c(Vinv,list(Vcur))
		}
		else
		{
			# Indicator vector covariates are missing
			IM<-as.numeric(apply(X[curi,],2,sum)>0)
			# Reference Group missing
			IM[1]<-sum(apply(X[curi,2:K],1,sum)==0)
			Rcur<-Rmax[IM>0,IM>0]
			Vcur<-solve(sqrt(Acur)%*%Rcur%*%sqrt(Acur))
			Vinv<-c(Vinv,list(Vcur))
		}
		I<-I+t(Dcur)%*%Vcur%*%Dcur
		j<-j+repeats[i]
		i<-i+1
	}
	return(list(D=D,Vinv=Vinv,I=I))
}