README1006440004230000170000000332707117205235010365 0 0hughesusers 1 0 This file contains programs written in SPlus and Fortran to implement the methods described in "Analysis of a Randomized Trial to Prevent Vertical Transmission of HIV-1" by J.P. Hughes and B.A. Richardson, JASA, Dec. 2000. This software can be freely used and redistributed for non-commercial purposes only. The following programs are included in this tarfile: README - this file combined.s - does nonparametric estimate of joint distribution of HIV infection time and death (see Hughes and Richardson, JASA, Dec. 2000 (tent.)) combined.f - associated fortran routines called by combined.s weibspl.s - does semiparametric estimate of joint distribution of HIV infection time and death (see Hughes and Richardson, JASA, Dec. 2000 (tent.)) weibspl.f - associated fortran routines called by weibspl.s example.s - example S session example.sdata - S formatted data and result objects Both programs (combined and weibspl) are documented in the respective .s program file. To use either program you need to do the following (% is the unix prompt and > is the Splus prompt). % f77 -O -c combined.f % f77 -O -c weibspl.f % Splus > source("combined.s") > source("weibspl.s") Expect some variation across systems - I did all this on a Decstation running Ultrix with Splus version 3.1. In particular, note that 1) both combined.s and weibspl.s include dyn.load() functions to load the Fortran object files. You may have to change this for your system. 2) weibspl.s loads the Therneau survival package, which is standard on later versions of S. The function survfit() (and supporting routines) is used from this package. Report problems/comments to Jim Hughes (hughes@biostat.washington.edu) combined.s1006440004230000170000002133007034201755007767 037777777777 1 0 combined <- function(hl,hr,dl,dr,possfp,test0,result0,start,phi=1.0, fixphi=T,eps=.000001,quiet=T,info=F){ # # Generate empirical pdf for joint distribution of hiv infection and death # where either one or both may be interval censored. Uses a self-consistent # algorithm combined with vertex exchange for identifying zero mass points. # phi, the specificity at time 0 (birth), may also be estimated from the data. # Diagnostic plots are offered to determine behavior of the information matrix. # # INPUT # Required: # hl,hr = left and right endpoints, respectively, of the interval # during which hiv infection occurred. Open on the left, # closed on the right (hl,hr]. Time -1 means infected at or # before birth; time 25 means HIV-free at death/24 months. See # section 2 of Hughes and Richardson for details. # dl,dr = left and right endpoints, respectively, of the interval # during which death occurred. Open on the left, # closed on the right (hl,hr]. Time 25 means death occurred # after 24 months. # possfp = is this a possible false positive test (i.e. positive at time 0, # and never tested negative later) # test0 = was a test done at time 0? # result0 = result at time 0 (if done) # # Optional: # start = list giving initial value information; must consist of the # following 4 components (most easily obtained from a prior # call to combined8; see OUTPUT, below): # hiv.mass.pts # death.mass.pts # initial.set # f0 # phi = specificity (default is 1.0) # fixphi = if T, fix phi at the initial value; if false, estimate phi # eps = average change in parameters to stop the SC iterations; may # be automatically decreased to achieve small lagrange multipliers # quiet = do you want intermediate output during iterations? # info = compute the information matrix? (Note: it is not # clear that information is useful for this estimate since # asymptotic normality probably does not hold in general) # # Note: # Missing values not allowed # # OUTPUT # A list with the following components: # hiv.mass.pts - m x 2 matrix of support intervals (left,right] of # HIV infection time # death.mass.pts - n x 2 matrix of support intervals (left,right] of death time # initial.set - m row by n column matrix; the initial mass on the i,j'th # support set (formed by the intersection of hiv.mass.pts and # death.mass.pts) is given f0[initial.set[i,j]]; a "0" in the # initial.set matrix means there is no mass on that support # set # f0 - initial estimates of mass (see initial.set) # phi - final estimate of specificity (or initial value, if fixed) # final.set - m row by n column matrix; the final mass on the i,j'th # support set (formed by the intersection of hiv.mass.pts and # death.mass.pts) is given f[final.set[i,j]]; a "0" in the # final.set matrix means there is no mass on that support # set # f - final estimates of mass (see final.set) # var - covariance matrix for f (use with caution; see note under "info" above) # lagrange - lagrange multipliers (see Gentleman and Geyer) # dyn.load("combined.o") n <- length(hl) hl <- ifelse(hl==hr,hl-.0000001,hl) dl <- ifelse(dl==dr,dl-.0000001,dl) # # Initial values if (missing(start)) { # # Get maximal number of possible support intervals for HIV infection; # Picks the intervals (hleft,hright] such that hleft \in hl, # hright \in hr, and there is no other hl or hr between hleft and hright. hleft <- unique(hl) names(hleft) <- rep(0,length(hleft)) hright <- unique(hr) if (sum(possfp)>0) hright <- unique(c(0,hright)) names(hright) <- rep(1,length(hright)) hint <- sort(c(hright,hleft)) hdiff <- diff(as.integer(names(hint))) hleft <- hint[c(hdiff,0)==1] hright <- hint[c(0,hdiff)==1] nh <- length(hleft) # # Get maximal number of possible support intervals for death times; # Picks the intervals (dleft,dright] such that dleft \in dl, # dright \in dr, and there is no other dl or dr between dleft and dright. dleft <- unique(dl) names(dleft) <- rep(0,length(dleft)) dright <- unique(dr) names(dright) <- rep(1,length(dright)) dint <- sort(c(dright,dleft)) ddiff <- diff(as.integer(names(dint))) dleft <- dint[c(ddiff,0)==1] dright <- dint[c(0,ddiff)==1] nd <- length(dleft) # # Set up the alpha matrix and get initial values for f # maxp <- sum(outer(hright,dright,"<=")) + length(dright) - 1 np <- 0 ierr <- 0 initial.set <- matrix(0,nh,nd) alpha <- matrix(0,n,maxp) f0 <- rep(0,maxp) storage.mode(initial.set) <- "integer" storage.mode(alpha) <- "integer" # Find the maximal support set and set up alpha matrix z1 <- .Fortran("alpha", as.integer(n), as.integer(nh), as.double(hleft), as.double(hright), as.integer(nd), as.double(dleft), as.double(dright), as.double(hl), as.double(hr), as.double(dl), as.double(dr), as.integer(maxp), set=initial.set, np=as.integer(np), alpha=alpha, f0=as.double(f0), ierr=as.integer(ierr)) if (z1$ierr==1) stop(paste("Too many support points; np=",z1$np)) np <- z1$np f0 <- z1$f0[1:np] initial.set <- matrix(z1$set,nh,nd) alpha <- matrix(z1$alpha,n,np) } else { hleft <- start$hiv.mass.pts[,1] hright <- start$hiv.mass.pts[,2] nh <- length(hleft) dleft <- start$death.mass.pts[,1] dright <- start$death.mass.pts[,2] nd <- length(dleft) initial.set <- start$initial.set f0 <- start$f0 np <- length(f0) alpha <- matrix(0,n,np) storage.mode(initial.set) <- "integer" storage.mode(alpha) <- "integer" # set up alpha matrix z2 <- .Fortran("alpha2", as.integer(n), as.integer(nh), as.double(hleft), as.double(hright), as.integer(nd), as.double(dleft), as.double(dright), as.double(hl), as.double(hr), as.double(dl), as.double(dr), as.integer(np), initial.set, alpha=alpha) alpha <- matrix(z2$alpha,n,np) } # Does this support point correspond to HIV infection at birth? t.zero <- ifelse(match(1:np,initial.set[1,],nomatch=0)>0,1,0) d <- rep(0,np) nu <- rep(0,n) a <- matrix(0,n,np) # SC steps: cat("Starting self-consistency steps...\n") storage.mode(alpha) <- "integer" storage.mode(a) <- "double" z <- .Fortran("selfcst2", as.integer(n), as.integer(np), phi=as.double(phi), as.integer(possfp), as.integer(test0), as.integer(result0), as.integer(t.zero), alpha, a, f0=as.double(f0), nu=as.double(nu), d=as.double(d), as.integer(fixphi), as.integer(quiet), as.double(eps)) phi <- z$phi f0 <- z$f0 nu <- z$nu u <- n-z$d f <- f0[f0>0] final.set <- matrix(match(initial.set,(1:np)[f0>0],nomatch=0), nrow=dim(initial.set)[1]) if (info) { np1 <- sum(f0>0)-1 ind <- (1:np)[f0>0] Z <- rbind(rep(-1,np1),diag(np1)) alf <- (alpha*(1-phi)^outer(possfp,1-t.zero))[,ind]/nu k <- 0 cat("Do you want a variance stability plot? ") ans <- readline() if (ans=="y"|ans=="yes") { motif() repeat{ kvals <- eval(parse(prompt="Enter a vector of k values: ")) nk <- length(kvals) kvar <- matrix(NA,np1,nk) for (k in 1:nk) { kvar[,k]<-sqrt(diag(solve(t(Z)%*%(t(alf)%*%alf+diag(kvals[k]/f))%*%Z))) } plot(c(min(kvals),max(kvals)),c(min(kvar,na.rm=T),max(kvar,na.rm=T)), type="n",xlab="k",ylab="Var") for (i in 1:np1) { lines(kvals,kvar[i,]) # text(0,kvar[i,1],paste(i),adj=0,cex=.5) } cat("Another plot? ") ans <- readline() if (ans=="n"|ans=="no") break } dev.off() k <- eval(parse(prompt="Enter k: ")) } var <- solve(t(Z)%*%(t(alf)%*%alf+diag(k/f))%*%Z) cov <- -apply(var,1,sum) var <- cbind(c(sum(var),cov),rbind(cov,var)) } else { var <- NULL } list(hiv.mass.pts = cbind(hleft,hright), death.mass.pts = cbind(dleft,dright), initial.set = initial.set, f0 = f0, phi = phi, final.set = final.set, f = f, var=var, lagrange = u) } ariation across systems - I did all this on a Decstation running Ultrix with Splus version 3.1. In particular, note that 1) both combined.s and weibspl.s include dyn.load() functions to load the Fortran object files. You may have to change this for your system. 2) weibspl.s loads the Therneaucombined.f1006440004230000170000001364707032746506007774 037777777777 1 0 subroutine alpha(n,nh,hleft,hright,nd,dleft,dright,hl,hr,dl,dr, * maxp,set,np,alf,f,ierr) PARAMETER (MAXN = 2000) integer n,nh,nd,np,ierr,maxp integer a(MAXN),alf(n,maxp),set(nh,nd) real*8 hleft(*),hright(*),dleft(*),dright(*) real*8 hl(*),hr(*),dl(*),dr(*),f(*) real*8 sum logical same c c Routine to set up a (alpha) for self-consitency computations. First c dimension of a is observations, second dimension is hiv infection times, c third dimension is death times. Force a(.,i,j) = 0 for i > j, except for c i = nh, which corresponds to not infected. c c This routine also eliminates any time points which are not associated c with at least one failure and time points with non-unique solutions. c np = 0 do 50 j = 1,nh do 50 k = 1,nd if (hleft(j).le.dright(k).or.j.eq.nh) then c if (hright(j).le.dright(k).or.j.eq.nh) then c call makea(n,j,k,hleft,hright,dleft,dright,hl,hr,dl,dr,a) do 60 i = 1,np if (same(n,a,alf(1,i))) then set(j,k) = i goto 50 endif 60 continue np = np + 1 if (np.gt.maxp) then ierr = 1 return endif set(j,k) = np do 70 i = 1,n 70 alf(i,np) = a(i) c endif 50 continue c c Generate initial estimate of f c do 100 i = 1,np 100 f(i) = 0 do 200 i = 1,n sum = 0.0 do 210 j = 1,np 210 sum = sum + n*alf(i,j) do 220 j = 1,np 220 f(j) = f(j) + alf(i,j)/sum 200 continue return end subroutine alpha2(n,nh,hleft,hright,nd,dleft,dright,hl,hr,dl,dr, * maxp,set,alf) PARAMETER (MAXN = 2000) integer n,nh,nd,maxp integer a(MAXN),alf(n,maxp),set(nh,nd) real*8 hleft(*),hright(*),dleft(*),dright(*) real*8 hl(*),hr(*),dl(*),dr(*) c c Routine to set up a (alpha) for self-consitency computations. First c dimension of a is observations, second dimension is hiv infection times, c third dimension is death times. Force a(.,i,j) = 0 for i > j, except for c i = nh, which corresponds to not infected. c do 50 j = 1,nh do 50 k = 1,nd if (hleft(j).le.dright(k).or.j.eq.nh) then c call makea(n,j,k,hleft,hright,dleft,dright,hl,hr,dl,dr,a) do 70 i = 1,n 70 alf(i,set(j,k)) = a(i) c endif 50 continue c return end logical function same(n,a,b) integer n,a(*),b(*) c same=.FALSE. do 100 i = 1,n if (a(i).ne.b(i)) return 100 continue same=.TRUE. return end subroutine makea(n,j,k,hleft,hright,dleft,dright,hl,hr,dl,dr,a) integer n,a(*),j,k real*8 hleft(*),hright(*),dleft(*),dright(*) real*8 hl(*),hr(*),dl(*),dr(*) c do 100 i = 1,n if ( & (hl(i).le.hleft(j).and.hr(i).ge.hright(j)) & .and. & (dl(i).le.dleft(k).and.dr(i).ge.dright(k)) & ) then a(i) = 1 else a(i) = 0 endif 100 continue return end subroutine selfcst2(n,np,phi,possfp,test0,result0, * tzero,alpha,a,f,nu,d,fixphi,quiet,eps) integer n,np,quiet,possfp(*),test0(*),tzero(*),result0(*) integer mn,mx,alpha(n,np),fixphi real*8 f(np),eps,phi,nu(n),d(np),a(n,np) real*8 delta,fpr,zero,one,Q1,dmx,dmn,num,denom real*8 y,phinum,phiden data zero/0.0d0/,one/1.0d0/ c c Self-consistency algorithm with vertex exchange to find zero mass points c 1 continue c phinum = zero phiden = zero do 5 j = 1,np d(j) = zero 5 continue do 1000 i = 1,n fpr = 1.0 if (possfp(i).eq.1) fpr = 1.0-phi do 10 j = 1,np if (tzero(j).eq.0) then a(i,j) = alpha(i,j)*fpr else a(i,j) = alpha(i,j) endif 10 continue nu(i) = zero y = zero do 15 j = 1,np nu(i) = nu(i) + a(i,j)*f(j) 15 continue c if (nu(i).eq.zero) then c call intpr("i",1,i,1) c call dblepr("nu(i)",5,nu(i),1) c endif do 20 j = 1,np d(j) = d(j) + a(i,j)/nu(i) y = y + a(i,j)*f(j)*tzero(j)/nu(i) 20 continue y = (1.0 - y)*test0(i) phinum = phinum+y*(1-result0(i)) phiden = phiden+y 1000 continue c call dblepr("d",1,d,np) c dmn=n dmx=zero do 30 j = 1,np if (d(j).gt.dmx.and.f(j).gt.zero) then dmx=d(j) mx=j endif if (d(j).lt.dmn.and.f(j).gt.zero) then dmn=d(j) mn=j endif 30 continue c c call intpr("mx",2,mx,1) c call dblepr("dmx",3,dmx,1) c Q1 = zero do 2000 i = 1,n num = f(mn)*(a(i,mx) - a(i,mn)) if (num.eq.zero) goto 2000 denom = nu(i)+num if (denom.eq.zero) then c don't do vertex exchange! it would eliminate a critical mass point Q1 = -1.0 goto 150 else Q1 = Q1 + num/denom endif 2000 continue c 150 continue c call dblepr("Q1",2,Q1,1) if (Q1.lt.zero) then c do an EM step c delta = zero do 60 j = 1,np f(j) = d(j)*f(j)/n c delta = delta + (f(j)*(1.0-n/d(j)))**2 60 continue c delta = sqrt(delta/np) delta = dmx/n - one c call dblepr("f",1,f,np) if (quiet.eq.0) call dblepr("EM: delta",9,delta,1) if (fixphi.eq.0) phi = phinum/phiden c call dblepr("phi",3,phi,1) if (delta.lt.eps) return else c else do a vertex exchange step c call intpr("mn",2,mn,1) c call intpr("mx",2,mx,1) c call dblepr("Q1",2,Q1,1) f(mx) = f(mx) + f(mn) f(mn) = zero c call dblepr("f",1,f,np) if (quiet.eq.0) call intpr("VE: mass point set to 0.0 ",26,mn,1) endif goto 1 end Too many support points; np=",z1$np)) np <- z1$np f0 <- z1$f0[1:np] initial.set weibspl.s1006440004230000170000001565007034201702007654 037777777777 1 0 weibspl <- function(hl,hr,dtime,dstatus,possfp,phi=1.0,knots, start,eps=1e-6,print.level=0,...){ # # Generate empirical pdf for joint distribution of hiv infection and death # where death is known or right censored and HIV infection is interval # censored. Uses NPMLE for marginal distribiution of death time and # Weibull distribution (with time-varying coefficients) for conditional # distribution of time of HIV infection given death time. The algorithm # alternates between maximizing the observed likelihood for HIV infection # time given death time and the marginal distribution of death time (see # Hughes and Richardson, 1999, for details). # # INPUT # Required: # hl,hr = left and right endpoints, respectively, of the interval # during which hiv infection occurred. Open on the left, # closed on the right (hl,hr]. Time -1 means infected at or # before birth; time >24 means HIV-free at death/24 months. See # section 2 of Hughes and Richardson for details. # dtime = time of death or right censoring # dstatus = was it a death (dstatus = 1) or censoring (dstatus = 0) # possfp = is this a possible false positive test (i.e. positive at time 0, # and never tested negative later) # knots = either a single vector which gives common knotpoints for all # parameters; or a list with components p, b, c giving points # to put knots # at for parameters p, b and c (ignored if start is given) # # Optional: # start = list giving initial value information; must consist of the # following components (see Output, below): # death.support # death.mass # hiv.basis # hiv.par # phi = specificity of test at time 0 (default is 1.0) # eps = average change in parameters to stop the iterations # print.level = do you want intermediate output during iterations? # ... = optional arguments to be passed on to nlmin # # Note: # Missing values not allowed # # OUTPUT # A list with the following components: # loglike = final value of the observed likelihood # hiv.par = list giving the parameters of the splines for HIV infection # time distribution. Components of the list are: # p= vector of spline parameters for prob of escaping # in-utero infection # b= vector of spline parameters for Weibull scale # c= vector of spline parameters for Weibull shape # hiv.basis = list giving the basis for the splines for HIV infection # time distribution. Components of the list are: # npar = vector of length 3 giving #knots + 2 for escape prob., # Weibsull scale and shape (p,b,c), respectively # knots = list with components: # p = location of knots for p (NULL if no knots) # b = location of knots for b (NULL if no knots) # c = location of knots for c (NULL if no knots) # basis = list with components: # p= basis space for p (column dimension = npar[1]) # b= basis space for b (column dimension = npar[2]) # c= basis space for c (column dimension = npar[3]) # death.support = vector of support times for the death time distribution # death.mass = vector of mass estimates for the death time distribution # phi = value of phi used in the estimation # delta = sum of squared differences of parameters in the last two iterations # # load Therneau's survival routines library(survival, first = T) library.dynam("survival", "survival_l.o") # dyn.load("weibspl.o") n <- length(hl) if (missing(start)) { temp <- summary(survfit(Surv(dtime,dstatus) ~ 1)) df <- -diff(c(1,temp$surv)) dpts <- temp$time if (is.list(knots)) { pknots <- knots$p bknots <- knots$b cknots <- knots$c } else { pknots <- knots bknots <- knots cknots <- knots } np <- c(2+length(pknots),2+length(bknots),2+length(cknots)) pX <- t(ns(dpts,knots=pknots,intercept=T)) bX <- t(ns(dpts,knots=bknots,intercept=T)) cX <- t(ns(dpts,knots=cknots,intercept=T)) u <- rep(0,sum(np)) } else { df <- start$death.mass dpts <- start$death.support np <- start$hiv.basis$npar pknots <- start$hiv.basis$knots$p bknots <- start$hiv.basis$knots$b cknots <- start$hiv.basis$knots$c pX <- t(start$hiv.basis$basis$p) bX <- t(start$hiv.basis$basis$b) cX <- t(start$hiv.basis$basis$c) u <- c(start$hiv.par$p,start$hiv.par$b,start$hiv.par$c) } nd <- length(df) storage.mode(pX) <- "double" storage.mode(bX) <- "double" storage.mode(cX) <- "double" assign("n",n,frame=1) assign("np",np,frame=1) assign("nd",nd,frame=1) assign("hl",hl,frame=1) assign("hr",hr,frame=1) assign("dtime",dtime,frame=1) assign("dstatus",dstatus,frame=1) assign("pX",pX,frame=1) assign("bX",bX,frame=1) assign("cX",cX,frame=1) assign("dpts",dpts,frame=1) assign("phi",phi,frame=1) assign("possfp",possfp,frame=1) repeat{ #maximize g(s|t) assign("df",df,frame=1) temp <- nlmin(obslike,u,print.level=print.level,...) pnrm <- sum((u-temp$x)^2) u <- temp$x #maximize f(t) temp <- .Fortran("fgiveng", as.integer(n), as.integer(nd), as.double(hl), as.double(hr), as.double(dtime), as.integer(dstatus), as.integer(np[1]), as.integer(np[2]), as.integer(np[3]), pX, bX, cX, as.double(dpts), df=as.double(df), as.double(u), as.double(phi), as.integer(possfp), as.integer(print.level), as.double(eps)) pnrm <- pnrm + sum((df-temp$df)^2) df <- temp$df if (sqrt(pnrm/(nd+sum(np)))