ccfit <- function(formula, weights, idel, icoh, istr, str.sz,...)
### ccfit <- function(formula, idel, icoh, istr, str.sz)
###
### Fits case-cohort design using Cox PH model
###
### idel contains indicator of failure (the same as given in formula)
### icoh contains indicator of subcohort
### istr contains stratum number (must be 1,...,K)
### str.sz is a vector of K stratum sizes for the whole cohort
###        (must be previously restricted to the relevant subset)
{
options(contrasts=c(factor="contr.treatment",ordered="contr.poly"))
  ## indicate sampled controls
samctr <- icoh==1 & idel==0
  ## calculate no. of sampled controls in strata
subc.sz <- table(istr[samctr])  
  ## weight cases by 1, controls by 1/empirical probability of sampling
  ##  weights <- ifelse(idel==1,1,(str.sz/subc.sz)[istr])
  ## fit weighted Cox model
  fitc <- coxph(formula=formula,weights=weights,x=TRUE)
  ## calculate unweighted score residuals
  scres <- residuals(fitc,type="score",weighted=FALSE)
  ## divide by number of sampled controls
  if(is.null(dim(scres)))
    { # just a vector, one covariate
      ## calculate sums of residuals of sampled controls by stratum
      str.mean <- rowsum(scres[samctr],istr[samctr],reorder=TRUE)
      str.mean <- str.mean/subc.sz
      ## calculate terms to be squared
      adj.vec <- (scres-str.mean[istr])
    }
  else # true matrix
    {
      ## calculate sums of residuals of sampled controls by stratum
      str.mean <- rowsum(scres[samctr,],istr[samctr],reorder=TRUE)
      str.mean <- sweep(str.mean,1,subc.sz,"/")
      ## calculate terms to be squared
      adj.vec <- (scres-str.mean[istr,])
    }
  ## calculate stratum-specific weights
  wgterm <- (str.sz-subc.sz)*str.sz/(subc.sz^2)
  wgmat <- diag((1-idel)*icoh*wgterm[istr])
  ## square and sum the terms to get extra case-cohort variance
  sigex <- t(adj.vec)%*%wgmat%*%adj.vec
  ## reassign the variances
  varcox <- fitc$var
  fitc$naive.var <- varcox
  fitc$var <- varcox+varcox%*%sigex%*%varcox
  ## output: fitc
  fitc
}

######################################################
# Schematic example of running ccfit
#
# x = the expensive covariate of interest, 
# z = covariates to adjust for
# time = the right-censored failure time
# delta = the failure indicator, 
# xK = the sampling stratum,
# subcohort = the indicator of whether in the random subcohort
# The above 5 variables all have length equal to the number of subjects
#
# Also, alphaK is a K-vector containing the K sampling fractions
# alpha1, ..., alphaK for the K strata.
# Each alphak is a scalar constant.
#
# Code once  the variables are defined:

keep <- !is.na(x) & !is.na(z)
icoh <- subcohort[keep] 
idel <- c(delta)[keep]
samctr <- icoh==1 & idel==0
istr <- xK # Must be an integer 1, 2, ... , K
# calculate no. of sampled controls in strata, subc.sz 
subc.sz <- table(istr[samctr])
# str.sz.m is a vector of K stratum sizes for the whole cohort
#        (must be previously restricted to the relevant subset)
str.sz.m <- table(istr[samctr])*c(1/alphaK)
# weight cases by 1, controls by 1/empirical probability of sampling
weights <- ifelse(idel==1,1,(str.sz.m/subc.sz)[istr])
fit <- ccfit(Surv(time[keep],delta[keep]) ~ x[keep] + z[keep],
weights=weights,idel=idel,icoh=icoh,istr=istr,str.sz=str.sz.m)