########################################################################
# Program written by Peter Gilbert and Ron Bosch, January 2003
#########################################################################
# This program carries out the semiparametric procedures described in Gilbert, Bosch, 
# Hudgens (2003, Biometrics) for assessing possible differences in viral load between 
# vaccine and placebo recipients in the always infected principal stratum.
# Three nonparametric test statistics are computed: the mean-based statistic, 
# the Kolmogorov-Smirnov-type statistic, and the Anderson-Darling-type statistic.
# The sensitivity parameter beta is entered by the user, which can range between 
# -100 and 100. For beta = 100, the mean-based test is equivalent to the 
# nonparametric mean-based test of Hudgens, Hoering, and Self (2003, Stat Med) [using 
# the 'Controls Only' bootstrap procedure] for  testing for vaccine harm (increasing 
# viral load). For beta = -100, the mean-based test is equivalent to the 
# nonparametric mean-based test of Hudgens, Hoering, and Self (2003, Stat Med) [using 
# the 'Controls Only' bootstrap procedure] for testing for vaccine benefit
# (decreasing viral load).  This function returns 1-sided p-values for each of
# the 3 test statistics, and returns the estimated Average Causal Effect (ACE)
# of vaccine on viral load (placebo - vaccine) with 95% 2-sided confidence limits, 
# as described in Section 3.3 of Gilbert, Bosch, Hudgens (2003).  For beta >= 0
# the 1-sided statistics test for vaccine harm; for beta < 0 the 1-sided statistics 
# test for vaccine benefit.
#########################################################################
#
###################
# Input values:
###################
# b = number of bootstrap repetitions
# Nn1 = number randomized to vaccine group
# Nn0 = number randomized to placebo group
# y1.samp = vector of viral loads in vaccine group, 1 per subject
# y0.samp = vector of viral loads in placebo group, 1 per subject
# alpha1  = The 2-sided confidence intervals about the ACE(beta) have
#           coverage (1-alpha1)x100%.
# beta = a real number, the presumed amount of selection bias
#        beta = 0 reflects no selection bias, beta > 0 reflects
#        selection bias towards higher viral loads in vaccinees,
#        beta < 0 reflects selection bias towards lower viral loads
#        in vaccinees. beta = 100 and beta = -100 are the extreme
#        amounts of bias considered by Hudgens, Hoering, and Self (2003, Stat Med).
#
#                  NOTE: For beta >= 0, the 1-sided test statistics are for an 
#                        alternative where the vaccine increases viral load (harms).
#
#                        For beta < 0, the 1-sided test statistics are for an 
#                        alternative where the vaccine descreases viral load (helps)
#
###################
# Output values
###################
# The program writes output to a file named output.dat.
#
# Eleven  values are outputted to 11 columns:
# 1. beta
# 2. b
# 3. n1, the number of infected vaccine recipients
# 4. n0, the number of infected placebo recipients
# 5. estimated.ves, the estimated vaccine efficacy to prevent infection
# 6. pvalM = the 1-sided p-value for the mean-based test statistic
# 7. pvalKS = the 1-sided p-value for the Kolmogorov-Smirnov-type test statistic
# 8. pvalAD = the 1-sided p-value for the Anderson-Darling-type test statistic
# 9. TM = the Estimated Average Causal Effect (ACE) of vaccine on mean viral load
# 10. lowCI = lower 95% confidence limit for the ACE causal estimand
# 11. upCI  = upper 95% confidence limit for the ACE causal estimand
#####################################################################
#
#
# Functions used in the Main Program 
#####################################################################
## Function to find alpha, given beta and VE ##
##  and the empirical distribution for the   ##
##  viral loads in the control arm (Nonparametric)
#####################################################################

getalphanonpar <- function(inbeta, inve, y0.samp) {

if (abs(inbeta) > 99) { return(NA) }

else {

f <- function(alpha, beta, ve, vlc) {

w <- function(vl, alpha, beta) exp(alpha + beta*vl) / (1 + exp(alpha + beta*vl))

 sumve <- sum(1-w(vlc,alpha,beta))/length(vlc)

(sumve - ve)**2

}

w <- function(vl, alpha, beta) exp(alpha + beta*vl) / (1 + exp(alpha + beta*vl))

alp <- optimize(f, c(-50,50), beta=inbeta, ve=inve, vlc=y0.samp)$minimum

sumve <- sum(1-w(y0.samp,alp,inbeta))/length(y0.samp)
if (abs(sumve-inve) > 0.01) {
alp <- optimize(f, c(0,50), beta=inbeta, ve=inve, vlc=y0.samp)$minimum
sumve <- sum(1-w(y0.samp,alp,inbeta))/length(y0.samp)
if (abs(sumve-inve) > 0.01) {
alp <- optimize(f, c(-50,0), beta=inbeta, ve=inve, vlc=y0.samp)$minimum
}
}

return(alp)

}
}

#########################################################################
# Function for performing nonparametric bootstrap to compute the
# critical values for the 3 test statistics
#
# B = number of bootstrap samples
#########################################################################
 
boot.nonpara<- function(B,y0,Nn0,Nn1,n0,n1,ve,beta,TM,TKS,TAD)  {

b.TM <- rep(0,B)
b.TKS <- rep(0,B)
b.TAD <- rep(0,B)
#b.TPM <- rep(0,B)

j<-0
repeat
{
b.m1       <-  sum(rpois( Nn1, n1/ Nn1))
b.m0       <-  sum(rpois( Nn0, n0/ Nn0))

if (b.m1==0) next
if (b.m0==0) next

boot.ves <- 1 - ((b.m1/Nn1)/(b.m0/Nn0))
if (boot.ves < 0) {boot.ves <- 0}

if (beta > 99) {

b.y0 <- sample(y0,size=b.m0,replace=T)

if (boot.ves <= 0)
  {y1.s<-y0}
  else
  { int <- round(m0*boot.ves)
    y1.s<- sort(y0)[(int+1):m0]}

b.y1 <- sample(y1.s,size=b.m1,replace=T)

}

if (beta < -99) {

b.y0 <- sample(y0,size=b.m0,replace=T)

if (boot.ves <= 0)
  {y1.s<-y0}
  else
  {int <- round(m0*(1-boot.ves))
    y1.s<- sort(y0)[1:(int-1)]}

b.y1 <- sample(y1.s,size=b.m1,replace=T)

}

if (abs(beta) <= 99) {

b.y0 <- sample(y0,size=b.m0,replace=T)

alpha <- getalphanonpar(beta,ve,y0)

# all the data points in y0.samp are unique:

probsplac <- rep(1/length(y0),length(y0))

probsvacc <- weight(y0,alpha,beta)*probsplac

b.y1 <- sample(y0,size=b.m1,prob=probsvacc,replace=T)

}

##########################################################################


if(j==B) break
j<-j+1
#cat(" Bootstrap #:", j, "\n")

boot.alpha <- getalphanonpar(beta,boot.ves,b.y0)

b.TM[j]  <- TMstat(b.y0,b.y1,boot.alpha,beta,boot.ves)
b.TKS[j] <- TKSstat(b.y0,b.y1,boot.alpha,beta,boot.ves)
b.TAD[j] <- TADstat(b.y0,b.y1,boot.alpha,beta,boot.ves)
#b.TPM[j] <- TPMstat(b.y0,b.y1,boot.alpha,beta,boot.ves)

}

##########################################################################
# evaluate bootstrap p-value
#########################################################################

if (beta >= 0) {
x1<-length(b.TM[b.TM <= TM])/B
#x4<-length(b.TPM[b.TPM <= TPM])/B
}

if (beta < 0) {
x1<-length(b.TM[b.TM >= TM])/B
#x4<-length(b.TPM[b.TPM >= TPM])/B
}

x2<-length(b.TKS[b.TKS >= TKS])/B
x3<-length(b.TAD[b.TAD >= TAD])/B

return(c(x1,x2,x3))

}

#########################################################################
# Function for computing a (1-alpha1)x100% confidence interval about the
# Average Causal Effect (ACE) of vaccine on viral load.
# Returns the 95% CI for the mean difference in viral load
# (placebo - vaccine) in the principal stratum of subjects
# who would be HIV infected under either randomization assignment. 
# B = number of bootstrap samples
#########################################################################

boot.ci <- function(B,y0,y1,Nn0,Nn1,n0,n1,ve,alpha1,beta)  {

b.TM <- rep(0,B)

j<-0
repeat
{
b.m1       <-  sum(rpois( Nn1, n1/ Nn1))
b.m0       <-  sum(rpois( Nn0, n0/ Nn0))

if (b.m1==0) next
if (b.m0==0) next

b.y1 <- sample(y1,size=b.m1,replace=T)
b.y0 <- sample(y0,size=b.m0,replace=T)

##########################################################################

if(j==B) break
j<-j+1

boot.ves <- 1 - ((b.m1/Nn1)/(b.m0/Nn0))
if (boot.ves < 0) {boot.ves <- 0}

boot.alpha <- getalphanonpar(beta,boot.ves,b.y0)

b.TM[j]  <- TMstat(b.y0,b.y1,boot.alpha,beta,boot.ves)

}

##########################################################################
# evaluate bootstrap p-value
#########################################################################

x5 <- quantile(b.TM,probs=alpha1/2)
x6 <- quantile(b.TM,probs=1-alpha1/2)

return(c(x5,x6))

}


############
############
# Function for calculating $\widehat F_V(y)$ or \widehat F_p(y)$ 
# (empirical distributions)
############################################

empdf <- function(y,ysamp) {

n <- length(ysamp)

vect <- rep(1,n)

x <- sum(vect[ysamp <= y])/n

return(x)

}

###########
###########
# Function for calculating $\widehat F_p(y|beta)$
#############################################

lowbnddf <- function(y,ysamp,alpha,beta,ve) {

if (beta > 99) {

e.A  <- quantile(ysamp,ve)

return(ifelse(y < e.A,0,(empdf(y,ysamp) - ve)/(1 - ve)))

}

if (beta < -99) {

e.A  <- quantile(ysamp,1-ve)

return(ifelse(y > e.A,1,empdf(y,ysamp)/(1 - ve)))

}

if (abs(beta) <= 99) {

return(ifelse(y < min(ysamp),0,sum(weight(ysamp[ysamp <= y],alpha,beta))/
Weight(ysamp,alpha,beta)))
}

}


############
############
# logistic weight function
#################################

weight <- function(y,alpha,beta) {

return(exp(alpha + beta*y)/(1 + exp(alpha + beta*y)))

}

##############
##############
# Normalizing constant
##################################

Weight <- function(ysamp,alpha,beta) {

return(sum(weight(ysamp,alpha,beta)))

}


###############
###############
# Nonparametric mean shift test statistic
###############

TMstat <- function(y0.samp,y1.samp,alpha,beta,estimated.ves) {

m0 <- length(y0.samp)

mn.samp1 <- mean(y1.samp)

if (abs(beta) <= 99) {

mn.samp0 <- sum(weight(y0.samp,alpha,beta)*y0.samp)/
                Weight(y0.samp,alpha,beta)
}

if (beta > 99) {

if (estimated.ves < 0) {mn.samp0 <- mean(y0.samp) } 

else

{int <- round(m0*estimated.ves) 
 mn.samp0 <- mean(sort(y0.samp)[(int+1):m0])}
}

if (beta < -99) {

if (estimated.ves < 0) {mn.samp0 <- mean(y0.samp) }

else

{int <- round(m0*(1-estimated.ves))
 mn.samp0 <- mean(sort(y0.samp)[1:(int-1)])}

}

return(-(mn.samp1 - mn.samp0)) 

}

###############
###############
# Kolmogorov-Smirnov test statistic, for 1-sided alternative
###############

TKSstat <- function(y0.samp,y1.samp,alpha,beta,ve) {

fullysamp <- c(y0.samp,y1.samp)  

maxval <- 0

for (i in 1:length(fullysamp)) {

x <- ifelse(beta>=0,1,-1)*(lowbnddf(fullysamp[i],y0.samp,alpha,beta,ve) - empdf(fullysamp[i],y1.samp))

if (x > maxval) {maxval <- x} 

}

M <- (length(y0.samp)*length(y1.samp))/(length(y0.samp) + length(y1.samp))

maxval <- maxval*sqrt(M)

return(max(maxval,0)) 

}


###############
###############
# Anderson-Darling test statistic, for 1-sided alternative
###############

TADstat <- function(y0.samp,y1.samp,alpha,beta,ve) {

x <- 0

m <- length(y0.samp)
n <- length(y1.samp)
N <- m + n

fullysamp <- sort(c(y0.samp,y1.samp))  

probslowbnd <- rep(0,length(fullysamp))
probsempdf <- rep(0,length(fullysamp))

probslowbnd[1] <- lowbnddf(fullysamp[1],y0.samp,alpha,beta,ve)
probsempdf[1] <- empdf(fullysamp[1],y1.samp)

for (i in 2:length(fullysamp)) {

probslowbnd[i] <- lowbnddf(fullysamp[i],y0.samp,alpha,beta,ve) - lowbnddf(fullysamp[i-1],y0.samp,alpha,beta,ve)

probsempdf[i] <- empdf(fullysamp[i],y1.samp) - empdf(fullysamp[i-1],y1.samp)

}

for (i in 1:length(fullysamp)) {

temp <- (m/N)*lowbnddf(fullysamp[i],y0.samp,alpha,beta,ve) + (n/N)*empdf(fullysamp[i],y1.samp)

if (temp*(1-temp) > 0) {

y <- ifelse(beta>=0,1,-1)*(lowbnddf(fullysamp[i],y0.samp,alpha,beta,ve) - empdf(fullysamp[i],y1.samp))

x <- x + (ifelse(y > 0,y**2,0)/(temp*(1 - temp)))*((m/N)*probslowbnd[i] + (n/N)*probsempdf[i])

}

}

M <- (m*n)/N
 
x <- x*M

return(x)

} 


###############
###############
# Parametric mean shift test statistic, for 1-sided alternative
###############

#TPMstat <- function(y0.samp,y1.samp,alpha,beta,ve) {
#
#fullysamp <- c(y0.samp,y1.samp)
#
#x <- 0
#
#if (beta > 99) {
#
#e.A  <- qnorm(ve, mean(y0.samp), sqrt(var(y0.samp)))
#
#x <- -mean(y1.samp) + (mean(y0.samp) 
#- (dnorm((e.A - mean(y0.samp))/sqrt(var(y0.samp)))/
#(1-pnorm((e.A - mean(y0.samp))/sqrt(var(y0.samp)))))*sqrt(var(y0.samp)))
#
#}
#
#if (beta < -99) {
#
#e.A  <- qnorm(1-ve, mean(y0.samp), sqrt(var(y0.samp)))
#
#x <- -mean(y1.samp) + (mean(y0.samp)
#- (dnorm((mean(y0.samp) - e.A)/sqrt(var(y0.samp)))/
#(1-pnorm((mean(y0.samp) - e.A)/sqrt(var(y0.samp)))))*sqrt(var(y0.samp)))
#
#}
#
#if (abs(beta) <= 99) {
#
#k <- 6
#bandwidth <- 0.01
#sum <- 0
#mu0 <- mean(y0.samp)
#sd0 <- sqrt(var(y0.samp))
#midp <- mu0 - k*sd0
#while (midp < mu0 + k*sd0) {
# sum <- sum + midp*weight(midp,alpha,beta)*
#                   dnorm(midp,mean=mu0,sd=sd0)*
#                   bandwidth
# midp <- midp + bandwidth
#}
#
#sum2 <- 0
#mu1 <- mean(y1.samp)
#sd1 <- sqrt(var(y1.samp))
#midp <- mu1 - k*sd1
#while (midp < mu1 + k*sd1) {
# sum2 <- sum2 + midp*dnorm(midp,mean=mu1,sd=sd1)*
#                   bandwidth
# midp <- midp + bandwidth
#}
#
#x <- -(mean(y1.samp) - sum/(1-ve))
#
#}
#
#return(x) 
#
#}

################################################
# Main Program
################################################
################################################
################
################

viralloadprogram <- function(b,Nn1,Nn0,y1.samp,y0.samp,alpha1,beta) {

###################################################
# Input variables
#
# b number of bootstrap repetitions
# Nn1 = number randomized to vaccine group
# Nn0 = number randomized to placebo group
# y1.samp = vector of viral loads in vaccine group, 1 per subject
# y0.samp = vector of viral loads in placebo group, 1 per subject
# alpha1  = The 2-sided confidence intervals about the ACE(beta) have
#           coverage (1-alpha1)x100%.
# beta = a real number, the presumed amount of selection bias
#        beta = 0 reflects no selection bias, beta > 0 reflects
#        selection bias towards higher viral loads in vaccinees,
#        beta < 0 reflects selection bias towards lower viral loads
#        in vaccinees. beta = 100 and beta = -100 are the extreme
#        amounts of bias considered by Hudgens, Hoering, and Self (2003, Stat Med).
#########################################################################

# Numbers infected in the two groups:
n1 <- length(y1.samp)
n0 <- length(y0.samp)

# Remove missing values from the viral load samples
y1.samp <- y1.samp[!is.na(y1.samp)]
y0.samp <- y0.samp[!is.na(y0.samp)]

# Test statistics:
TM <- 0
TKS <- 0
TAD <- 0
#TPM <- 0

# Critical values of test statistics:
cvnM <- 0
cvnKS <- 0
cvnAD <- 0
#cvnPM <- 0

  estimated.ves <- 1 - ((n1/Nn1)/(n0/Nn0))
# Truncate the estimated ves to 0 if it is negative:
  if (estimated.ves < 0)  {estimated.ves <- 0}
 
# Calculate alpha given beta:
  alpha <- getalphanonpar(beta,estimated.ves,y0.samp)
  
# Evaluate the test statistics:
  TM  <-  TMstat(y0.samp,y1.samp,alpha,beta,estimated.ves)  
  TKS <- TKSstat(y0.samp,y1.samp,alpha,beta,estimated.ves)
  TAD <- TADstat(y0.samp,y1.samp,alpha,beta,estimated.ves)
#  TPM <- TPMstat(y0.samp,y1.samp,alpha,beta,estimated.ves)

  cat(paste("Test stats TM, TKS, TAD = ",TM,TKS,TAD),"\n")

# Compute the 1-sided p-values for the test statistics:
  x <- boot.nonpara(b,y0.samp, Nn0, Nn1, n0, n1, estimated.ves, beta,TM,TKS,TAD)

  pvalM <- x[1]
  pvalKS <- x[2]
  pvalAD <- x[3]
#  pvalPM <- x[4]

# Compute the (1-alpha)x100% CI for the ACE:
   x <- boot.ci(b,y0.samp, y1.samp, Nn0, Nn1, n0, n1, estimated.ves, alpha1, beta)

   lowCI <- x[1]
   upCI <-  x[2]

   x <-round(c(beta,b,n1,n0,estimated.ves,pvalM,pvalKS,pvalAD,TM,lowCI,upCI),5)

   cat(paste("P-values for TM, KS, AD statistics = ",pvalM,pvalKS,pvalAD),"\n")
   cat(paste("Estimated ACE, LL 95% CI, UL 95% CI = ",TM,lowCI,upCI),"\n")

   write(x,file="output.dat",ncolumns=length(x),append=T)

}


##############################################
##############################################