########################################################################
# Program written by Peter Gilbert, January 2003, modified February 2006
#
#########################################################################
# This program carries out the procedures described in Gilbert, Bosch, 
# Hudgens (2003, Biometrics) for assessing possible differences in viral load between 
# vaccine and placebo recipients in the always infected principal stratum.
# This function returns the estimated Average Causal Effect (ACE)
# of vaccine on viral load (placebo - vaccine) with 95% 2-sided confidence limits, 
# as described in Section 3.3 of Gilbert, Bosch, Hudgens (2003).  
#
# This function also returns a 1-sided p-value for the nonparametric
# mean-based statistic TM for testing whether the vaccine causes
# higher viral loads (i.e., test H0: ACE >= 0 vs H1: ACE < 0, which this
# function tests if beta is fixed >=0); or for testing whether the vaccine
# causes lower viral loads (i.e., test H0: ACE <= 0 vs H1: ACE > 0, which 
# this function tests if beta is fixed < 0)
#########################################################################
#
###################
# Input values:
###################
# b = number of bootstrap repetitions
# Nn1 = number randomized to vaccine group
# Nn0 = number randomized to placebo group
# y1.samp = vector of viral loads in vaccine group, 1 per subject
# y0.samp = vector of viral loads in placebo group, 1 per subject
# beta = a real number, presumed amount of selection bias
#        beta = 0 reflects no selection bias, beta > 0 reflects
#        selection bias towards higher viral loads in vaccinees,
#        beta < 0 reflects selection bias towards lower viral loads
#        in vaccinees. beta = 100 and beta = -100 are the extreme
#        amounts of bias considered by Hudgens, Hoering, and Self (2003).
#        The tests are 1-sided with type I error rate 0.05, and the
#        95% confidence interval is 2-sided.
###################
# Output values
###################
# The program writes output to a file named output.dat.
#
# Eleven values are outputted to 11 columns:
# 1. beta, the fixed sensitivity parameter
# 2. b, the number of bootstrap replications
# 3. n1, the number of infected vaccine recipients
# 4. n0, the number of infected placebo recipients
# 5. estimated.ves, the estimated vaccine efficacy to prevent infection
# 6. pvalM = the 1-sided p-value for the mean-based test statistic
# 9. TM = the Estimated Average Causal Effect (ACE) of vaccine on mean viral load
# 10. lowCI = lower 95% confidence limit for the ACE causal estimand at beta
# 11. upCI  = upper 95% confidence limit for the ACE causal estimand at beta
#####################################################################
#
#
# Functions used in the Main Program 
#####################################################################
## Function to find alpha, given beta and VEhat ##
##  and the empirical distribution for the   ##
##  viral loads in the control arm (Nonparametric)
#####################################################################

getalphanonpar <- function(inbeta, inve, y0.samp) {

if (abs(inbeta) > 99) { return(NA) }

else {

f <- function(alpha, beta, ve, vlc) {

w <- function(vl, alpha, beta) exp(alpha + beta*vl) / (1 + exp(alpha + beta*vl))

 sumve <- sum(1-w(vlc,alpha,beta))/length(vlc)

(sumve - ve)**2

}

w <- function(vl, alpha, beta) exp(alpha + beta*vl) / (1 + exp(alpha + beta*vl))

alp <- optimize(f, c(-50,50), beta=inbeta, ve=inve, vlc=y0.samp)$minimum

sumve <- sum(1-w(y0.samp,alp,inbeta))/length(y0.samp)
if (abs(sumve-inve) > 0.01) {
alp <- optimize(f, c(0,50), beta=inbeta, ve=inve, vlc=y0.samp)$minimum
sumve <- sum(1-w(y0.samp,alp,inbeta))/length(y0.samp)
if (abs(sumve-inve) > 0.01) {
alp <- optimize(f, c(-50,0), beta=inbeta, ve=inve, vlc=y0.samp)$minimum
}
}

return(alp)

}
}

#########################################################################
# Function for performing nonparametric bootstrap to compute the
# critical values for the TM test statistic
#
# B = number of bootstrap samples
#########################################################################
 
boot.nonpara<- function(B,y0,Nn0,Nn1,n0,n1,ve,beta,TM)  {

b.TM <- rep(0,B)

j<-0
repeat
{
b.m1       <-  sum(rpois( Nn1, n1/ Nn1))
b.m0       <-  sum(rpois( Nn0, n0/ Nn0))

if (b.m1==0) next
if (b.m0==0) next

boot.ves <- 1 - ((b.m1/Nn1)/(b.m0/Nn0))
if (boot.ves < 0) {boot.ves <- 0}

if (beta > 99) {

b.y0 <- sample(y0,size=b.m0,replace=T)

if (boot.ves <= 0)
  {y1.s<-y0}
  else
  { int <- round(m0*boot.ves)
    y1.s<- sort(y0)[(int+1):m0]}

b.y1 <- sample(y1.s,size=b.m1,replace=T)

}

if (beta < -99) {

b.y0 <- sample(y0,size=b.m0,replace=T)

if (boot.ves <= 0)
  {y1.s<-y0}
  else
  {int <- round(m0*(1-boot.ves))
    y1.s<- sort(y0)[1:(int-1)]}

b.y1 <- sample(y1.s,size=b.m1,replace=T)

}

if (abs(beta) <= 99) {

b.y0 <- sample(y0,size=b.m0,replace=T)

alpha <- getalphanonpar(beta,ve,y0)

# all the data points in y0.samp are unique:

probsplac <- rep(1/length(y0),length(y0))

probsvacc <- weight(y0,alpha,beta)*probsplac

b.y1 <- sample(y0,size=b.m1,prob=probsvacc,replace=T)

}

##########################################################################


if(j==B) break
j<-j+1
#cat(" Bootstrap #:", j, "\n")

boot.alpha <- getalphanonpar(beta,boot.ves,b.y0)

b.TM[j]  <- TMstat(b.y0,b.y1,boot.alpha,beta,boot.ves)

}

##########################################################################
# evaluate bootstrap p-value
#########################################################################

if (beta >= 0) {
x1<-length(b.TM[b.TM <= TM])/B
}

if (beta < 0) {
x1<-length(b.TM[b.TM >= TM])/B
}

return(x1)

}

#########################################################################
# Function for computing a 95% confidence interval about the
# Average Causal Effect (ACE) of vaccine on viral load.
# Returns the 95% CI for the mean difference in viral load
# (placebo - vaccine) in the principal stratum of subjects
# who would be HIV infected under either randomization assignment. 
# B = number of bootstrap samples
#########################################################################

boot.ci <- function(B,y0,y1,Nn0,Nn1,n0,n1,ve,beta)  {

b.TM <- rep(0,B)
b.TMl <- rep(0,B)
b.TMu <- rep(0,B)

j<-0
repeat
{
b.m1       <-  sum(rpois( Nn1, n1/ Nn1))
b.m0       <-  sum(rpois( Nn0, n0/ Nn0))

if (b.m1==0) next
if (b.m0==0) next

b.y1 <- sample(y1,size=b.m1,replace=T)
b.y0 <- sample(y0,size=b.m0,replace=T)

##########################################################################

if(j==B) break
j<-j+1

boot.ves <- 1 - ((b.m1/Nn1)/(b.m0/Nn0))
if (boot.ves < 0) {boot.ves <- 0}

boot.alpha <- getalphanonpar(beta,boot.ves,b.y0)

b.TM[j]  <- TMstat(b.y0,b.y1,boot.alpha,beta,boot.ves)

}

##########################################################################
# evaluate bootstrap p-value
#########################################################################

x5 <- quantile(b.TM,probs=0.025)
x6 <- quantile(b.TM,probs=0.975)

return(c(x5,x6))

}


############
############
# Function for calculating $\widehat F<-V(y)$ or \widehat F<-p(y)$ 
# (empirical distributions)
############################################

empdf <- function(y,ysamp) {

n <- length(ysamp)

vect <- rep(1,n)

x <- sum(vect[ysamp <= y])/n

return(x)

}

###########
###########
# Function for calculating $\widehat F<-p(y|beta)$
#############################################

lowbnddf <- function(y,ysamp,alpha,beta,ve) {

if (beta > 99) {

e.A  <- quantile(ysamp,ve)

return(ifelse(y < e.A,0,(empdf(y,ysamp) - ve)/(1 - ve)))

}

if (beta < -99) {

e.A  <- quantile(ysamp,1-ve)

return(ifelse(y > e.A,1,empdf(y,ysamp)/(1 - ve)))

}

if (abs(beta) <= 99) {

return(ifelse(y < min(ysamp),0,sum(weight(ysamp[ysamp <= y],alpha,beta))/
Weight(ysamp,alpha,beta)))
}

}


############
############
# logistic weight function
#################################

weight <- function(y,alpha,beta) {

return(exp(alpha + beta*y)/(1 + exp(alpha + beta*y)))

}

##############
##############
# Normalizing constant
##################################

Weight <- function(ysamp,alpha,beta) {

return(sum(weight(ysamp,alpha,beta)))

}


###############
###############
# Nonparametric mean shift test statistic
###############

TMstat <- function(y0.samp,y1.samp,alpha,beta,estimated.ves) {

m0 <- length(y0.samp)

mn.samp1 <- mean(y1.samp)

if (abs(beta) <= 99) {

mn.samp0 <- sum(weight(y0.samp,alpha,beta)*y0.samp)/
                Weight(y0.samp,alpha,beta)
}

if (beta > 99) {

if (estimated.ves < 0) {mn.samp0 <- mean(y0.samp) } 

else

{int <- round(m0*estimated.ves) 
 mn.samp0 <- mean(sort(y0.samp)[(int+1):m0])}
}

if (beta < -99) {

if (estimated.ves < 0) {mn.samp0 <- mean(y0.samp) }

else

{int <- round(m0*(1-estimated.ves))
 mn.samp0 <- mean(sort(y0.samp)[1:(int-1)])}

}

return(-(mn.samp1 - mn.samp0)) 

}

################################################
# Main Program
################################################
################################################
################
################

viralloadprogram <- function (b,Nn1,Nn0,y1.samp,y0.samp,beta) {

###################################################
# Input variables
#
# b number of bootstrap repetitions
# Nn1 = number randomized to vaccine group
# Nn0 = number randomized to placebo group
# y1.samp = vector of viral loads in vaccine group, 1 per subject
# y0.samp = vector of viral loads in placebo group, 1 per subject
# beta: presumed amount of selection bias
#       beta = 0 reflects no selection bias, beta > 0 reflects
#       selection bias towards higher viral loads in vaccinees,
#       beta < 0 reflects selection bias towards lower viral loads
#       in vaccinees. beta = 100 and beta = -100 are the extreme
#       amounts of bias considered by Hudgens, Hoering, and Self (2003).
#       The tests are 1-sided with type I error rate 0.05, and the
#       95% confidence interval is 2-sided
#########################################################################

# Numbers infected in the two groups:
n1 <- length(y1.samp)
n0 <- length(y0.samp)

# Remove NAs from the viral load samples
y1.samp <- y1.samp[!is.na(y1.samp)]
y0.samp <- y0.samp[!is.na(y0.samp)]

# Test statistic:
TM <- 0

# Critical values of test statistic:
cvnM <- 0

  estimated.ves <- 1 - ((n1/Nn1)/(n0/Nn0))
  estimated.ves.nontrunc <- estimated.ves
# Truncate the estimated ves to 0 if it is negative:
  if (estimated.ves < 0)  {estimated.ves <- 0}
 
# Calculate alpha given beta:
  alpha <- getalphanonpar(beta,estimated.ves,y0.samp)
  
# Evaluate the test statistic:
  TM  <-  TMstat(y0.samp,y1.samp,alpha,beta,estimated.ves)  

  cat(paste("Test stat TM = ",TM),"\n")

# Compute the 1-sided p-value for the nonparametric mean test statistic:
  x <- boot.nonpara(b,y0.samp, Nn0, Nn1, n0, n1, estimated.ves, beta,TM)

  pvalM <- x[1]

# Compute the 95% CI for the ACE:
   x <- boot.ci(b,y0.samp, y1.samp, Nn0, Nn1, n0, n1, estimated.ves, beta)

   lowCI <- x[1]
   upCI <-  x[2]

   x <-round(c(beta,b,n1,n0,estimated.ves.nontrunc,pvalM,TM,lowCI,upCI),5)

   cat(paste("P-value for TM statistic = ",pvalM),"\n")
   cat(paste("Estimated ACE(beta), LL 95% CI, UL 95% CI = ",TM,lowCI,upCI),"\n")

   write(x,file="output.dat",ncolumns=length(x),append=T)

}


##############################################
##############################################