#########################################################################
# R FUNCTION: Plotconfidenceband.loghazardratio.r
#
# DESCRIPTION: When sourced in R, this function:
#
# (1) Inputs a 2-arm clinical trial dataset in the file datafile;
#
# (2) Calls the function confidenceband.loghazardratio() contained in the file 
# confidenceband.loghazardratio.r, to compute the estimated log hazard ratio
# (treatment 2/treatment 1) and 95% pointwise and 80% and 95% simultaenous confidence 
# intervals about the log hazard ratio over time;  and 
#
# (3) Plots the estimated log hazard ratio over time with 95% pointwise and
# 80%, 95% simultaneous confidence intervals; this plot is written to the 
# file figureloghazardratio.ps
#
# The method used to compute the simultaneous confidence intervals is described
# in Gilbert P, Wei LJ, Kosorok MR, Clemens JD. Simultaneous inference on the 
# contrast of two hazard functions with censored observations. Biometrics, 2002; 
# 58:773-780.
#########################################################################
#
# INPUT variables into the function Plotconfidenceband.loghazardratio.r
#
# datafile = The name of the text file that contains the dataset, with
#                n rows (n = number of subjects in the trial) and 3 columns.
#                First column: the minimum of the censoring time and failure time.
#                Second column: failure indicator: 0 if censored, 1 if event.
#                Third column: treatment indicator: 1 = treatment 1 (e.g., control), 
#                2 = treatment 2 (e.g., active). This variable must be coded as 1 and 2.
#
# [u1,u2]  = the time interval over which the confidence band is computed
# [t1,t2]  = the time interval over which the confidence band could be computed (at least
#            as wide as [u1,u2]). t1 should be chosen larger than both of the smallest
#            infection times for each group. t2 should be chosen smaller than both of 
#            the largest infection times for each group.
#            In practice it is often fine to set t1=u1; t2=u2
# N        = number of simulated Gaussian variates used for computing the
#            simultaneous confidence bands (N = 1000 is adequate)
# band11   = second derivative bandwidth for treatment 1, as on page 249 of Andersen, Borgan,
#            Gill, and Keiding (1993), used to calculate the optimal bandwidth band1
#            for the treatment 1 sample.
#            If band11 = 0, the optimal band11 is calculated using a bootstrap procedure.
# band1    = the bandwidth for the first sample. The default value is 
#            the optimal bandwidth, calculated using the formula  in ABGK (1993)
# band12   = second derivative bandwidth for treatment 2, as on page 249 of Andersen, Borgan,
#            Gill, and Keiding (1993), used to calculate the optimal bandwidth band2
#            for the treatment 2 sample.
#            If band12 = 0, the optimal band12 is calculated using a bootstrap procedure.
# band2    = the bandwidth for the second sample. The default value is
#            the optimal bandwidth, calculated using the formula  in ABGK (1993)
# ngrid    = the number of time-points spanning [t1,t2] for computing
#            the log hazard ratio and the confidence bands. The default value is 50.
# biasadjust = T means that the hazard rate estimates are adjusted by the asymptotic
#            bias correction term (4.2.27) in ABGK.
# tailsl   = T means that the method described on page 251 of
#            ABGK is used to calculate the log hazard ratio estimate
#            and the confidence intervals/bands in the lower tail [t1,u1]
# tailsu   = T means that the method described on page 251 of
#	     ABGK is used to calculate the log hazard ratio estimate
#	     and the confidence intervals/bands in the upper tail [u2,t2]
# nboot    = Number of bootstrap samples used for the cross-validation
#            procedure for estimating the bandwidth parameters used to
#            estimate the second derivatives band11 of the hazard functions
# Nv       = 0 implies that an asymptotic variance formula will be used for the 
#            variance function v()
#          > 0 implies that the simulation procedure will be used to calculate
#            the variance function v(). In this case, Nv is the number of iterations
#            used to calculate the simulated estimate of v.
# yliml    = lower limit of y-axis. If yliml=0, then it is set at the value of
#            the lower 95% simultaneous confidence band at t1.
# ylimu    = upper limit of y-axis. If ylimu=0, then it is set at the value of
#            the upper 95% simultaneous confidence band at t2.
###################################################################################

Plotconfidenceband.loghazardratio <- function(datafile,Rx,u1,u2,t1,t2,N=1000,band11=0,
band1=0,band12=0,band2=0,ngrid=50,biasadjust=T,tailsl=T,tailsu=T,nboot=500,Nv=0,yliml=0,ylimu=0) {

tempmat <- matrix(scan(datafile),ncol=3,byrow=T)

time <- tempmat[,1]
delta <- tempmat[,2]
Rx <- tempmat[,3]

# Make sure t1<=u1 and u2<=t2
if (t1 > u1) { u1 <- t1 }
if (t2 > u2) { u2 <- t2 }

# Make sure t1 is big enough

m1 <- sort(time[Rx==1 & delta==1])[2]
m2 <- sort(time[Rx==2 & delta==1])[2]
t1 <- max(m1,m2)

# Make sure t2 is small enough

m1 <- sort(time[Rx==1 & delta==1])[length(sort(time[Rx==1 & delta==1]))-1]
m2 <- sort(time[Rx==2 & delta==1])[length(sort(time[Rx==2 & delta==1]))-1]
t2 <- min(m1,m2)

## Compute the estimates and bands:

source('confidenceband.loghazardratio.r')

answermat <- confidenceband.loghazardratio(time,delta,Rx,u1,u2,t1,t2,N,band11,band1,band12,band2,
                                           ngrid,biasadjust,tailsl,tailsu,nboot,Nv)

time <- answermat[,1]
loghazratio <- answermat[,2]
lowint <- answermat[,3]
upint <- answermat[,4]
lowband <- answermat[,5]
upband <- answermat[,6]
lowband80 <- answermat[,9]
upband80 <- answermat[,10]

yliml <- ifelse(yliml==0,quantile(lowband,.025),yliml)
ylimu <- ifelse(ylimu==0,quantile(upband,.975),ylimu)

postscript("figureloghazratio.ps",horizontal=T)
par(las=1,cex=1.2)
plot(time,lowband80,xlab="Time",ylab="Log hazard ratio",ylim=c(yliml,ylimu),type='n')

lines(time,loghazratio,lwd=2.3,col="red")
lines(time[!is.na(lowint)],lowint[!is.na(lowint)],lty=2,lwd=2.3,col="navy")
lines(time[!is.na(upint)],upint[!is.na(upint)],lty=2,lwd=2.3,col="navy")
lines(time[!is.na(lowband80)],lowband80[!is.na(lowband80)],lty=3,lwd=2.3,col="purple")
lines(time[!is.na(upband80)],upband80[!is.na(upband80)],lty=3,lwd=2.3,col="purple")
lines(time[!is.na(lowband)],lowband[!is.na(lowband)],lty=4,lwd=2.3,col="green")
lines(time[!is.na(upband)],upband[!is.na(upband)],lty=4,lwd=2.3,col="green")

abline(h=0)
legend(quantile(time,.05),yliml + 0.2*(ylimu-yliml),legend=c("95% pointwise CIs","80% simultaneous CIs","95% simultaneous CIs"),lty=c(2,3,4),col=c("navy","purple","green"),lwd=2.3)

title(main="Estimate of log hazard ratio over time",cex=1.2)
dev.off()

}

#############################################################################