ccfit <- function(formula, weights, idel, icoh, istr, str.sz,...) ### ccfit <- function(formula, idel, icoh, istr, str.sz) ### ### Fits case-cohort design using Cox PH model ### ### idel contains indicator of failure (the same as given in formula) ### icoh contains indicator of subcohort ### istr contains stratum number (must be 1,...,K) ### str.sz is a vector of K stratum sizes for the whole cohort ### (must be previously restricted to the relevant subset) { options(contrasts=c(factor="contr.treatment",ordered="contr.poly")) ## indicate sampled controls samctr <- icoh==1 & idel==0 ## calculate no. of sampled controls in strata subc.sz <- table(istr[samctr]) ## weight cases by 1, controls by 1/empirical probability of sampling ## weights <- ifelse(idel==1,1,(str.sz/subc.sz)[istr]) ## fit weighted Cox model fitc <- coxph(formula=formula,weights=weights,x=TRUE) ## calculate unweighted score residuals scres <- residuals(fitc,type="score",weighted=FALSE) ## divide by number of sampled controls if(is.null(dim(scres))) { # just a vector, one covariate ## calculate sums of residuals of sampled controls by stratum str.mean <- rowsum(scres[samctr],istr[samctr],reorder=TRUE) str.mean <- str.mean/subc.sz ## calculate terms to be squared adj.vec <- (scres-str.mean[istr]) } else # true matrix { ## calculate sums of residuals of sampled controls by stratum str.mean <- rowsum(scres[samctr,],istr[samctr],reorder=TRUE) str.mean <- sweep(str.mean,1,subc.sz,"/") ## calculate terms to be squared adj.vec <- (scres-str.mean[istr,]) } ## calculate stratum-specific weights wgterm <- (str.sz-subc.sz)*str.sz/(subc.sz^2) wgmat <- diag((1-idel)*icoh*wgterm[istr]) ## square and sum the terms to get extra case-cohort variance sigex <- t(adj.vec)%*%wgmat%*%adj.vec ## reassign the variances varcox <- fitc$var fitc$naive.var <- varcox fitc$var <- varcox+varcox%*%sigex%*%varcox ## output: fitc fitc } ###################################################### # Schematic example of running ccfit # # x = the expensive covariate of interest, # z = covariates to adjust for # time = the right-censored failure time # delta = the failure indicator, # xK = the sampling stratum, # subcohort = the indicator of whether in the random subcohort # The above 5 variables all have length equal to the number of subjects # # Also, alphaK is a K-vector containing the K sampling fractions # alpha1, ..., alphaK for the K strata. # Each alphak is a scalar constant. # # Code once the variables are defined: keep <- !is.na(x) & !is.na(z) icoh <- subcohort[keep] idel <- c(delta)[keep] samctr <- icoh==1 & idel==0 istr <- xK # Must be an integer 1, 2, ... , K # calculate no. of sampled controls in strata, subc.sz subc.sz <- table(istr[samctr]) # str.sz.m is a vector of K stratum sizes for the whole cohort # (must be previously restricted to the relevant subset) str.sz.m <- table(istr[samctr])*c(1/alphaK) # weight cases by 1, controls by 1/empirical probability of sampling weights <- ifelse(idel==1,1,(str.sz.m/subc.sz)[istr]) fit <- ccfit(Surv(time[keep],delta[keep]) ~ x[keep] + z[keep], weights=weights,idel=idel,icoh=icoh,istr=istr,str.sz=str.sz.m)