######################################################################## # Program written by Peter Gilbert and Ron Bosch, January 2003 ######################################################################### # This program carries out the semiparametric procedures described in Gilbert, Bosch, # Hudgens (2003, Biometrics) for assessing possible differences in viral load between # vaccine and placebo recipients in the always infected principal stratum. # Three nonparametric test statistics are computed: the mean-based statistic, # the Kolmogorov-Smirnov-type statistic, and the Anderson-Darling-type statistic. # The sensitivity parameter beta is entered by the user, which can range between # -100 and 100. For beta = 100, the mean-based test is equivalent to the # nonparametric mean-based test of Hudgens, Hoering, and Self (2003, Stat Med) [using # the 'Controls Only' bootstrap procedure] for testing for vaccine harm (increasing # viral load). For beta = -100, the mean-based test is equivalent to the # nonparametric mean-based test of Hudgens, Hoering, and Self (2003, Stat Med) [using # the 'Controls Only' bootstrap procedure] for testing for vaccine benefit # (decreasing viral load). This function returns 1-sided p-values for each of # the 3 test statistics, and returns the estimated Average Causal Effect (ACE) # of vaccine on viral load (placebo - vaccine) with 95% 2-sided confidence limits, # as described in Section 3.3 of Gilbert, Bosch, Hudgens (2003). For beta >= 0 # the 1-sided statistics test for vaccine harm; for beta < 0 the 1-sided statistics # test for vaccine benefit. ######################################################################### # ################### # Input values: ################### # b = number of bootstrap repetitions # Nn1 = number randomized to vaccine group # Nn0 = number randomized to placebo group # y1.samp = vector of viral loads in vaccine group, 1 per subject # y0.samp = vector of viral loads in placebo group, 1 per subject # alpha1 = The 2-sided confidence intervals about the ACE(beta) have # coverage (1-alpha1)x100%. # beta = a real number, the presumed amount of selection bias # beta = 0 reflects no selection bias, beta > 0 reflects # selection bias towards higher viral loads in vaccinees, # beta < 0 reflects selection bias towards lower viral loads # in vaccinees. beta = 100 and beta = -100 are the extreme # amounts of bias considered by Hudgens, Hoering, and Self (2003, Stat Med). # # NOTE: For beta >= 0, the 1-sided test statistics are for an # alternative where the vaccine increases viral load (harms). # # For beta < 0, the 1-sided test statistics are for an # alternative where the vaccine descreases viral load (helps) # ################### # Output values ################### # The program writes output to a file named output.dat. # # Eleven values are outputted to 11 columns: # 1. beta # 2. b # 3. n1, the number of infected vaccine recipients # 4. n0, the number of infected placebo recipients # 5. estimated.ves, the estimated vaccine efficacy to prevent infection # 6. pvalM = the 1-sided p-value for the mean-based test statistic # 7. pvalKS = the 1-sided p-value for the Kolmogorov-Smirnov-type test statistic # 8. pvalAD = the 1-sided p-value for the Anderson-Darling-type test statistic # 9. TM = the Estimated Average Causal Effect (ACE) of vaccine on mean viral load # 10. lowCI = lower 95% confidence limit for the ACE causal estimand # 11. upCI = upper 95% confidence limit for the ACE causal estimand ##################################################################### # # # Functions used in the Main Program ##################################################################### ## Function to find alpha, given beta and VE ## ## and the empirical distribution for the ## ## viral loads in the control arm (Nonparametric) ##################################################################### getalphanonpar <- function(inbeta, inve, y0.samp) { if (abs(inbeta) > 99) { return(NA) } else { f <- function(alpha, beta, ve, vlc) { w <- function(vl, alpha, beta) exp(alpha + beta*vl) / (1 + exp(alpha + beta*vl)) sumve <- sum(1-w(vlc,alpha,beta))/length(vlc) (sumve - ve)**2 } w <- function(vl, alpha, beta) exp(alpha + beta*vl) / (1 + exp(alpha + beta*vl)) alp <- optimize(f, c(-50,50), beta=inbeta, ve=inve, vlc=y0.samp)$minimum sumve <- sum(1-w(y0.samp,alp,inbeta))/length(y0.samp) if (abs(sumve-inve) > 0.01) { alp <- optimize(f, c(0,50), beta=inbeta, ve=inve, vlc=y0.samp)$minimum sumve <- sum(1-w(y0.samp,alp,inbeta))/length(y0.samp) if (abs(sumve-inve) > 0.01) { alp <- optimize(f, c(-50,0), beta=inbeta, ve=inve, vlc=y0.samp)$minimum } } return(alp) } } ######################################################################### # Function for performing nonparametric bootstrap to compute the # critical values for the 3 test statistics # # B = number of bootstrap samples ######################################################################### boot.nonpara<- function(B,y0,Nn0,Nn1,n0,n1,ve,beta,TM,TKS,TAD) { b.TM <- rep(0,B) b.TKS <- rep(0,B) b.TAD <- rep(0,B) #b.TPM <- rep(0,B) j<-0 repeat { b.m1 <- sum(rpois( n1, n1/ Nn1)) b.m0 <- sum(rpois( n0, n0/ Nn0)) if (b.m1==0) next if (b.m0==0) next boot.ves <- 1 - ((b.m1/Nn1)/(b.m0/Nn0)) if (boot.ves < 0) {boot.ves <- 0} if (beta > 99) { b.y0 <- sample(y0,size=b.m0,replace=T) if (boot.ves <= 0) {y1.s<-y0} else { int <- round(m0*boot.ves) y1.s<- sort(y0)[(int+1):m0]} b.y1 <- sample(y1.s,size=b.m1,replace=T) } if (beta < -99) { b.y0 <- sample(y0,size=b.m0,replace=T) if (boot.ves <= 0) {y1.s<-y0} else {int <- round(m0*(1-boot.ves)) y1.s<- sort(y0)[1:(int-1)]} b.y1 <- sample(y1.s,size=b.m1,replace=T) } if (abs(beta) <= 99) { b.y0 <- sample(y0,size=b.m0,replace=T) alpha <- getalphanonpar(beta,ve,y0) # all the data points in y0.samp are unique: probsplac <- rep(1/length(y0),length(y0)) probsvacc <- weight(y0,alpha,beta)*probsplac b.y1 <- sample(y0,size=b.m1,prob=probsvacc,replace=T) } ########################################################################## if(j==B) break j<-j+1 #cat(" Bootstrap #:", j, "\n") boot.alpha <- getalphanonpar(beta,boot.ves,b.y0) b.TM[j] <- TMstat(b.y0,b.y1,boot.alpha,beta,boot.ves) b.TKS[j] <- TKSstat(b.y0,b.y1,boot.alpha,beta,boot.ves) b.TAD[j] <- TADstat(b.y0,b.y1,boot.alpha,beta,boot.ves) #b.TPM[j] <- TPMstat(b.y0,b.y1,boot.alpha,beta,boot.ves) } ########################################################################## # evaluate bootstrap p-value ######################################################################### if (beta >= 0) { x1<-length(b.TM[b.TM <= TM])/B #x4<-length(b.TPM[b.TPM <= TPM])/B } if (beta < 0) { x1<-length(b.TM[b.TM >= TM])/B #x4<-length(b.TPM[b.TPM >= TPM])/B } x2<-length(b.TKS[b.TKS >= TKS])/B x3<-length(b.TAD[b.TAD >= TAD])/B return(c(x1,x2,x3)) } ######################################################################### # Function for computing a (1-alpha1)x100% confidence interval about the # Average Causal Effect (ACE) of vaccine on viral load. # Returns the 95% CI for the mean difference in viral load # (placebo - vaccine) in the principal stratum of subjects # who would be HIV infected under either randomization assignment. # B = number of bootstrap samples ######################################################################### boot.ci <- function(B,y0,y1,Nn0,Nn1,n0,n1,ve,alpha1,beta) { b.TM <- rep(0,B) j<-0 repeat { b.m1 <- sum(rpois( n1, n1/ Nn1)) b.m0 <- sum(rpois( n0, n0/ Nn0)) if (b.m1==0) next if (b.m0==0) next b.y1 <- sample(y1,size=b.m1,replace=T) b.y0 <- sample(y0,size=b.m0,replace=T) ########################################################################## if(j==B) break j<-j+1 boot.ves <- 1 - ((b.m1/Nn1)/(b.m0/Nn0)) if (boot.ves < 0) {boot.ves <- 0} boot.alpha <- getalphanonpar(beta,boot.ves,b.y0) b.TM[j] <- TMstat(b.y0,b.y1,boot.alpha,beta,boot.ves) } ########################################################################## # evaluate bootstrap p-value ######################################################################### x5 <- quantile(b.TM,probs=alpha1/2) x6 <- quantile(b.TM,probs=1-alpha1/2) return(c(x5,x6)) } ############ ############ # Function for calculating $\widehat F_V(y)$ or \widehat F_p(y)$ # (empirical distributions) ############################################ empdf <- function(y,ysamp) { n <- length(ysamp) vect <- rep(1,n) x <- sum(vect[ysamp <= y])/n return(x) } ########### ########### # Function for calculating $\widehat F_p(y|beta)$ ############################################# lowbnddf <- function(y,ysamp,alpha,beta,ve) { if (beta > 99) { e.A <- quantile(ysamp,ve) return(ifelse(y < e.A,0,(empdf(y,ysamp) - ve)/(1 - ve))) } if (beta < -99) { e.A <- quantile(ysamp,1-ve) return(ifelse(y > e.A,1,empdf(y,ysamp)/(1 - ve))) } if (abs(beta) <= 99) { return(ifelse(y < min(ysamp),0,sum(weight(ysamp[ysamp <= y],alpha,beta))/ Weight(ysamp,alpha,beta))) } } ############ ############ # logistic weight function ################################# weight <- function(y,alpha,beta) { return(exp(alpha + beta*y)/(1 + exp(alpha + beta*y))) } ############## ############## # Normalizing constant ################################## Weight <- function(ysamp,alpha,beta) { return(sum(weight(ysamp,alpha,beta))) } ############### ############### # Nonparametric mean shift test statistic ############### TMstat <- function(y0.samp,y1.samp,alpha,beta,estimated.ves) { m0 <- length(y0.samp) mn.samp1 <- mean(y1.samp) if (abs(beta) <= 99) { mn.samp0 <- sum(weight(y0.samp,alpha,beta)*y0.samp)/ Weight(y0.samp,alpha,beta) } if (beta > 99) { if (estimated.ves < 0) {mn.samp0 <- mean(y0.samp) } else {int <- round(m0*estimated.ves) mn.samp0 <- mean(sort(y0.samp)[(int+1):m0])} } if (beta < -99) { if (estimated.ves < 0) {mn.samp0 <- mean(y0.samp) } else {int <- round(m0*(1-estimated.ves)) mn.samp0 <- mean(sort(y0.samp)[1:(int-1)])} } return(-(mn.samp1 - mn.samp0)) } ############### ############### # Kolmogorov-Smirnov test statistic, for 1-sided alternative ############### TKSstat <- function(y0.samp,y1.samp,alpha,beta,ve) { fullysamp <- c(y0.samp,y1.samp) maxval <- 0 for (i in 1:length(fullysamp)) { x <- ifelse(beta>=0,1,-1)*(lowbnddf(fullysamp[i],y0.samp,alpha,beta,ve) - empdf(fullysamp[i],y1.samp)) if (x > maxval) {maxval <- x} } M <- (length(y0.samp)*length(y1.samp))/(length(y0.samp) + length(y1.samp)) maxval <- maxval*sqrt(M) return(max(maxval,0)) } ############### ############### # Anderson-Darling test statistic, for 1-sided alternative ############### TADstat <- function(y0.samp,y1.samp,alpha,beta,ve) { x <- 0 m <- length(y0.samp) n <- length(y1.samp) N <- m + n fullysamp <- sort(c(y0.samp,y1.samp)) probslowbnd <- rep(0,length(fullysamp)) probsempdf <- rep(0,length(fullysamp)) probslowbnd[1] <- lowbnddf(fullysamp[1],y0.samp,alpha,beta,ve) probsempdf[1] <- empdf(fullysamp[1],y1.samp) for (i in 2:length(fullysamp)) { probslowbnd[i] <- lowbnddf(fullysamp[i],y0.samp,alpha,beta,ve) - lowbnddf(fullysamp[i-1],y0.samp,alpha,beta,ve) probsempdf[i] <- empdf(fullysamp[i],y1.samp) - empdf(fullysamp[i-1],y1.samp) } for (i in 1:length(fullysamp)) { temp <- (m/N)*lowbnddf(fullysamp[i],y0.samp,alpha,beta,ve) + (n/N)*empdf(fullysamp[i],y1.samp) if (temp*(1-temp) > 0) { y <- ifelse(beta>=0,1,-1)*(lowbnddf(fullysamp[i],y0.samp,alpha,beta,ve) - empdf(fullysamp[i],y1.samp)) x <- x + (ifelse(y > 0,y**2,0)/(temp*(1 - temp)))*((m/N)*probslowbnd[i] + (n/N)*probsempdf[i]) } } M <- (m*n)/N x <- x*M return(x) } ############### ############### # Parametric mean shift test statistic, for 1-sided alternative ############### #TPMstat <- function(y0.samp,y1.samp,alpha,beta,ve) { # #fullysamp <- c(y0.samp,y1.samp) # #x <- 0 # #if (beta > 99) { # #e.A <- qnorm(ve, mean(y0.samp), sqrt(var(y0.samp))) # #x <- -mean(y1.samp) + (mean(y0.samp) #- (dnorm((e.A - mean(y0.samp))/sqrt(var(y0.samp)))/ #(1-pnorm((e.A - mean(y0.samp))/sqrt(var(y0.samp)))))*sqrt(var(y0.samp))) # #} # #if (beta < -99) { # #e.A <- qnorm(1-ve, mean(y0.samp), sqrt(var(y0.samp))) # #x <- -mean(y1.samp) + (mean(y0.samp) #- (dnorm((mean(y0.samp) - e.A)/sqrt(var(y0.samp)))/ #(1-pnorm((mean(y0.samp) - e.A)/sqrt(var(y0.samp)))))*sqrt(var(y0.samp))) # #} # #if (abs(beta) <= 99) { # #k <- 6 #bandwidth <- 0.01 #sum <- 0 #mu0 <- mean(y0.samp) #sd0 <- sqrt(var(y0.samp)) #midp <- mu0 - k*sd0 #while (midp < mu0 + k*sd0) { # sum <- sum + midp*weight(midp,alpha,beta)* # dnorm(midp,mean=mu0,sd=sd0)* # bandwidth # midp <- midp + bandwidth #} # #sum2 <- 0 #mu1 <- mean(y1.samp) #sd1 <- sqrt(var(y1.samp)) #midp <- mu1 - k*sd1 #while (midp < mu1 + k*sd1) { # sum2 <- sum2 + midp*dnorm(midp,mean=mu1,sd=sd1)* # bandwidth # midp <- midp + bandwidth #} # #x <- -(mean(y1.samp) - sum/(1-ve)) # #} # #return(x) # #} ################################################ # Main Program ################################################ ################################################ ################ ################ viralloadprogram <- function(b,Nn1,Nn0,y1.samp,y0.samp,alpha1,beta) { ################################################### # Input variables # # b number of bootstrap repetitions # Nn1 = number randomized to vaccine group # Nn0 = number randomized to placebo group # y1.samp = vector of viral loads in vaccine group, 1 per subject # y0.samp = vector of viral loads in placebo group, 1 per subject # alpha1 = The 2-sided confidence intervals about the ACE(beta) have # coverage (1-alpha1)x100%. # beta = a real number, the presumed amount of selection bias # beta = 0 reflects no selection bias, beta > 0 reflects # selection bias towards higher viral loads in vaccinees, # beta < 0 reflects selection bias towards lower viral loads # in vaccinees. beta = 100 and beta = -100 are the extreme # amounts of bias considered by Hudgens, Hoering, and Self (2003, Stat Med). ######################################################################### # Numbers infected in the two groups: n1 <- length(y1.samp) n0 <- length(y0.samp) # Remove missing values from the viral load samples y1.samp <- y1.samp[!is.na(y1.samp)] y0.samp <- y0.samp[!is.na(y0.samp)] # Test statistics: TM <- 0 TKS <- 0 TAD <- 0 #TPM <- 0 # Critical values of test statistics: cvnM <- 0 cvnKS <- 0 cvnAD <- 0 #cvnPM <- 0 estimated.ves <- 1 - ((n1/Nn1)/(n0/Nn0)) # Truncate the estimated ves to 0 if it is negative: if (estimated.ves < 0) {estimated.ves <- 0} # Calculate alpha given beta: alpha <- getalphanonpar(beta,estimated.ves,y0.samp) # Evaluate the test statistics: TM <- TMstat(y0.samp,y1.samp,alpha,beta,estimated.ves) TKS <- TKSstat(y0.samp,y1.samp,alpha,beta,estimated.ves) TAD <- TADstat(y0.samp,y1.samp,alpha,beta,estimated.ves) # TPM <- TPMstat(y0.samp,y1.samp,alpha,beta,estimated.ves) cat(paste("Test stats TM, TKS, TAD = ",TM,TKS,TAD),"\n") # Compute the 1-sided p-values for the test statistics: x <- boot.nonpara(b,y0.samp, Nn0, Nn1, n0, n1, estimated.ves, beta,TM,TKS,TAD) pvalM <- x[1] pvalKS <- x[2] pvalAD <- x[3] # pvalPM <- x[4] # Compute the (1-alpha)x100% CI for the ACE: x <- boot.ci(b,y0.samp, y1.samp, Nn0, Nn1, n0, n1, estimated.ves, alpha1, beta) lowCI <- x[1] upCI <- x[2] x <-round(c(beta,b,n1,n0,estimated.ves,pvalM,pvalKS,pvalAD,TM,lowCI,upCI),5) cat(paste("P-values for TM, KS, AD statistics = ",pvalM,pvalKS,pvalAD),"\n") cat(paste("Estimated ACE, LL 95% CI, UL 95% CI = ",TM,lowCI,upCI),"\n") write(x,file="output.dat",ncolumns=length(x),append=T) } ############################################## ##############################################