######################################################## ## ## check data validality ## ######################################################## check.data <- function() { rtn <- TRUE if(nAlpha > nCovariate) { cat("Error: size of parameter alpha can not great then number of covariates!\n\n") rtn <- FALSE } if(nBeta > nCovariate) { cat("Error: size of parameter beta can not great then number of covariates!\n\n") rtn <- FALSE } if(nGamma > nCovariate) { cat("Error: size of parameter gamma can not great then number of covariates!\n\n") rtn <- FALSE } if(sum(XalphaIndicator)!=nAlpha) { cat("Error: the alpha indicator XalphaIndicator is not consistent with nAlpha!\n\n") rtn <- FALSE } if(sum(XbetaIndicator)!=nBeta) { cat("Error: the beta indicator XbetaIndicator is not consistent with nBeta!\n\n") rtn <- FALSE } if(sum(XgammaIndicator)!=nGamma) { cat("Error: the gamma indicator XgammaIndicator is not consistent with nGamma!\n\n") rtn <- FALSE } return(rtn) } ######################################################## ## ## calculation variance of parameters in regression model ## ######################################################## varPar <- function(beta, gamma, Xbeta, Xgamma, Y) { ind <- Y<1e-6 g <- as.vector(fg(beta, gamma, Xbeta, Xgamma)) zz <- Xbeta[!ind,] / g[!ind] beta.var <- t(zz) %*% zz beta.var <- try(solve(beta.var),TRUE) if(is.character(beta.var)) { return(beta.var) } zz <- Xgamma[!ind,] gamma.var <- t(zz) %*% zz / 2 gamma.var <- try(solve(gamma.var),TRUE) if(is.character(gamma.var)) { return(gamma.var) } B.beta <- cbind(nSample*beta.var, matrix(0, nBeta, nGamma)) B.gamma <- cbind(matrix(0, nGamma, nBeta), nSample*gamma.var) return(list(B.beta=B.beta, B.gamma=B.gamma)) } ######################################################## ## ## MLE estimator for beta and gamma ## define estimating equation and jacobi ## ######################################################## # estimating equation for MLE bg.mle <- function(x, Xbeta, Xgamma, Y) { beta <- x[1:nBeta] gamma <- x[(nBeta+1):(nBeta+nGamma)] ind <- Y<1e-6 g <- as.vector(fg(beta, gamma, Xbeta, Xgamma)) g[ind] <- 0 e <- as.vector(fhinv(Y) - fm(beta, Xbeta)) / g e[ind] <- 0 # ith row of fval stores \Psi_i fval <- matrix(0, nSample, nBeta+nGamma) temp <- (e*e-1) / g fval[, 1:nBeta] <- ( e / g ) * Xbeta + temp * dfg.beta(beta,gamma,Xbeta, Xgamma) fval[, (nBeta+1):(nBeta+nGamma)] <- temp * dfg.gamma(beta,gamma,Xbeta, Xgamma) fval[ind,] <- 0 return(colSums(fval)) } # jacobi of estimating equation for MLE # # de.beta = - dfm.beta/g - (e/g) * dfg.beta # de.gamma = - (e/g) * dfg.gamma # dbg.mle <- function(x, Xbeta, Xgamma, Y) { D11 <- matrix(0, nBeta, nBeta) D12 <- matrix(0, nBeta, nGamma) D21 <- matrix(0, nGamma, nBeta) D22 <- matrix(0, nGamma, nGamma) beta <- x[1:nBeta] gamma <- x[(nBeta+1):(nBeta+nGamma)] ind <- Y<1e-6 g <- as.vector(fg(beta, gamma, Xbeta, Xgamma)) g[ind] <- 0 e <- as.vector(fhinv(Y) - fm(beta, Xbeta)) / g e[ind] <- 0 k1 <- 1/(g*g) k2 <- 2*e*k1 k3 <- (3*e*e-1)*k1 k4 <- (e*e-1)/g for(i in 1:nSample) { if(!ind[i]) { dm <- dfm.beta(beta, Xbeta[i,]) dg1 <- dfg.beta(beta, gamma, Xbeta[i,], Xgamma[i,]) dg2 <- dfg.gamma(beta, gamma, Xbeta[i,], Xgamma[i,]) D11 <- D11 - outer(Xbeta[i,], k1[i]*dm + k2[i]*dg1) - outer(dg1, k2[i]*dm + k3[i]*dg1) + k4[i]*d2fg.beta(beta,gamma,Xbeta[i,],Xgamma[i,]) D12 <- D12 - outer(Xbeta[i,], k2[i]*dg2) - outer(dg1, k3[i]*dg2) + k4[i]*d2fg.betagamma(beta,gamma,Xbeta[i,],Xgamma[i,]) D21 <- D21 - outer(dg2, k2[i]*dm + k3[i]*dg1) + k4[i]*d2fg.gammabeta(beta,gamma,Xbeta[i,],Xgamma[i,]) D22 <- D22 - outer(dg2, k3[i]*dg2) + k4[i]*d2fg.gamma(beta,gamma,Xbeta[i,],Xgamma[i,]) } } return(rbind(cbind(D11,D12), cbind(D21,D22))) } ######################################################## ## ## MLE estimator for beta and gamma ## define negative log-likehood function and gradient ## ######################################################## # negative log-likelihood function = -logfe + logfg # input variable is composed by beta and gamma bg.negl <- function(x, Xbeta, Xgamma, Y) { beta <- x[1:nBeta] gamma <- x[(nBeta+1):(nBeta+nGamma)] ind <- Y<1e-6 g <- as.vector(fg(beta, gamma, Xbeta, Xgamma)) g[ind] <- 0 e <- as.vector(fhinv(Y) - fm(beta, Xbeta)) / g e[ind] <- 0 temp <- logfg(beta, gamma, Xbeta, Xgamma) + 0.5 * e^2 temp[ind] <- 0 return(sum(temp)) } # gradient of negative likelihood function bg.dnegl <- function(x, Xbeta, Xgamma, Y) { beta <- x[1:nBeta] gamma <- x[(nBeta+1):(nBeta+nGamma)] ind <- Y<1e-6 g <- as.vector(fg(beta, gamma, Xbeta, Xgamma)) g[ind] <- 0 e <- as.vector(fhinv(Y) - fm(beta, Xbeta)) / g e[ind] <- 0 # ith row of fval stores \Psi_i fval <- matrix(0, nSample, nBeta+nGamma) temp <- (e*e-1) / g fval[, 1:nBeta] <- ( e / g ) * Xbeta + temp * dfg.beta(beta,gamma,Xbeta, Xgamma) fval[, (nBeta+1):(nBeta+nGamma)] <- temp * dfg.gamma(beta,gamma,Xbeta, Xgamma) fval[ind,] <- 0 return(-colSums(fval)) } ######################################################## ## ## estimate mean on the original space ## ######################################################## # estimate parameters alpha, beta and gamma estpar <- function(Y, Xalpha, Xbeta, Xgamma, alpha.init, beta.init, gamma.init) { ## ## estimate zero observation parameter alpha and its variance ## # define formula zeroprob <- as.numeric(Y<1e-6) formu <- "zeroprob ~ Xalpha[,2]" if(nAlpha>=3) for(i in 3:nAlpha) formu <- paste(formu, "+Xalpha[,", i, "]", sep="") formu <- as.formula(formu) # call glm() to obtain estimator of alpha and its variance esttemp <- try(summary(glm(formu, family=binomial, start=alpha.init)),TRUE) if(is.character(esttemp)) { return(esttemp) } est <- esttemp$coefficients alpha <- est[,1] alpha.std <- est[,2] names(alpha) <- NULL names(alpha.std) <- NULL ## ## estimate non-zero observation parameters beta and gamma ## if(option.bg==2) { tol <- 1e-10 iter <- 0 diff <- 1 bStop <- FALSE x.new <- c(beta.init,gamma.init) while(diff>tol & !bStop) { iter <- iter + 1 x.old <- x.new f.old <- bg.mle(x.old, Xbeta, Xgamma, Y) jacobi <- dbg.mle(x.old, Xbeta, Xgamma, Y) jacobiinv <- try(solve(jacobi),silent=TRUE) if(is.character(jacobiinv)) { bStop <- TRUE break } x.new <- x.old - jacobiinv %*% f.old if(any(is.nan(x.new)) | any(is.infinite(x.new))) { bStop <- TRUE break } temp <- x.old - x.new diff <- t(temp) %*% temp } if(bStop) { est<- optim(c(beta.init,gamma.init), bg.negl, bg.dnegl, method="BFGS", lower=-Inf, upper=Inf, control=list(maxit=1000), hessian=FALSE, Xbeta, Xgamma, Y)$par beta <- as.vector(est[1:nBeta]) gamma <- as.vector(est[(nBeta+1):(nBeta+nGamma)]) } else { beta <- x.old[1:nBeta] gamma <- x.old[(nBeta+1):(nBeta+nGamma)] } } else if(option.bg==1) { est<- optim(c(beta.init,gamma.init), bg.negl, bg.dnegl, method="BFGS", lower=-Inf, upper=Inf, control=list(maxit=1000), hessian=FALSE, Xbeta, Xgamma, Y)$par beta <- as.vector(est[1:nBeta]) gamma <- as.vector(est[(nBeta+1):(nBeta+nGamma)]) } ## ## calculate variance of beta and gamma ## temp <- try(varPar(beta, gamma, Xbeta, Xgamma, Y),TRUE) if(is.character(temp)) { return(temp) } B.beta <- temp$B.beta # size nBeta-by-(nBeta+nGamma) B.gamma <- temp$B.gamma # size nGamma-by-(nBeta+nGamma) beta.var <- B.beta[,1:nBeta] / nSample gamma.var <- B.gamma[,(nBeta+1):(nBeta+nGamma)] / nSample beta.std <- sqrt(diag(beta.var)) gamma.std <- sqrt(diag(gamma.var)) return(list(alpha=alpha, beta=beta, gamma=gamma, B.beta=B.beta, B.gamma=B.gamma, alpha.std=alpha.std, beta.std=beta.std, gamma.std=gamma.std)) } # output estimators of parameters alpha, beta, and gamma outpar <- function(estpar) { cat("estimator of parameter alpha =\n") cat(estpar$alpha, "\n\n") cat("std of alpha estimator =\n") cat(estpar$alpha.std, "\n\n") cat("estimator of parameter beta =\n") cat(estpar$beta, "\n\n") cat("std of beta estimator =\n") cat(estpar$beta.std, "\n\n") cat("estimator of parameter theta =\n") cat(estpar$gamma, "\n\n") cat("std of theta estimator =\n") cat(estpar$gamma.std, "\n\n") } # calculate internal and external estimators of the mean ME <- function(Y, Xalpha, Xbeta, Xgamma, x0, alpha, beta, gamma, B.beta, B.gamma, r=0.05) { x0alpha <- x0[XalphaIndicator>0] x0beta <- x0[XbetaIndicator>0] x0gamma <- x0[XgammaIndicator>0] ## ## calculate entites ## ind <- Y<1e-6 pi0 <- exp(x0alpha %*% alpha) pi0 <- pi0 / (1+pi0) # scalar pi <- exp(Xalpha %*% alpha) pi <- pi / (1+pi) pi <- as.vector(pi) # size nSample-by-1 g0 <- as.vector(fg(beta, gamma, x0beta, x0gamma)) # scalar g <- as.vector(fg(beta, gamma, Xbeta, Xgamma)) # size nSample-by-1 dg0.beta <- dfg.beta(beta, gamma, x0beta, x0gamma) # size 1-by-nBeta dg.beta <- dfg.beta(beta, gamma, Xbeta, Xgamma) # size nSample-by-nBeta dg0.gamma <- dfg.gamma(beta, gamma, x0beta, x0gamma)# size 1-by-nGamma dg.gamma <- dfg.gamma(beta, gamma, Xbeta, Xgamma) # size nSample-by-nGamma u <- t(matrix(x0beta, nBeta, nSample)) - (g0/g)*Xbeta # size nSample-by-nBeta v <- t(matrix(dg0.beta, nBeta, nSample)) - (g0/g)*dg.beta # size nSample-by-nBeta tau <- t(matrix(dg0.gamma, nGamma, nSample)) - (g0/g)*dg.gamma # size nSample-by-nGamma m0 <- fm(beta, x0beta) # scalar m <- fm(beta, Xbeta) # size nSample-by-1 m[ind] <- 0 e <- as.vector(( fhinv(Y) - m ) / g) # size nSample-by-1 e[ind] <- 0 eta <- as.vector(m0 + e * g0) # size nSample-by-1 eta[ind] <- 0 heta <- fh(eta) # size nSample-by-1 heta[ind] <- 0 dheta <- dfh(eta) # size nSample-by-1 dheta[ind] <- 0 Eh <- mean(heta/(1-pi)) Edh <- mean(dheta/(1-pi)) Eedh <- mean(dheta*e/(1-pi)) Psi <- matrix(0, nSample, nBeta+nGamma) Psi[, 1:nBeta] <- ( e / g ) * Xbeta + ( (e*e-1) / g ) * dg.beta Psi[, (nBeta+1):(nBeta+nGamma)] <- ( (e*e-1) / g ) * dg.gamma Psi[ind,] <- 0 ## ## estimate means ## mean.ex <- mean(heta) * (1-pi0) / (1-mean(pi)) mean.in <- mean(heta/(1-pi)) * (1-pi0) mean.ex <- as.vector(mean.ex) mean.in <- as.vector(mean.in) ## ## external estimator variance ## nDim <- nBeta+nGamma w <- matrix(0, 1+nDim, 1) w[1] <- 1 w[2:(1+nDim)] <- t(B.beta) %*% ( Edh*colSums((1-pi)*u)/nSample + Eedh*colSums((1-pi)*v)/nSample ) w[2:(1+nDim)] <- w[2:(1+nDim)] + t(B.gamma) %*% ( Eedh*colSums((1-pi)*tau)/nSample ) temp <- heta - (1-pi)*Eh omega <- rbind( temp, t(Psi)) sigma <- omega %*% t(omega) / nSample var.ex <- t(w) %*% sigma %*% w temp <- (1-pi0)/(1-mean(pi)) var.ex <- var.ex * temp * temp / nSample d <- colSums(Xalpha*pi*(1-pi))/nSample d <- x0alpha*pi0*(1-pi0) - d*(1-pi0)/(1-mean(pi)) d <- d * Eh d <- as.matrix(d) temp <- Xalpha * sqrt(pi*(1-pi)) A <- t(temp) %*% temp / nSample A.inv <- solve(A) var.ex <- var.ex + t(d) %*% A.inv %*% d / nSample temp <- sqrt(var.ex)*qnorm(1-r/2) CI.ex <- c( mean.ex-temp, mean.ex+temp ) ## ## internal estimator variance ## w <- matrix(0, 1+nDim, 1) w[1] <- 1 w[2:(1+nDim)] <- t(B.beta) %*% ( Edh*colSums(u)/nSample + Eedh*colSums(v)/nSample ) w[2:(1+nDim)] <- w[2:(1+nDim)] + t(B.gamma) %*% ( Eedh*colSums(tau)/nSample ) temp <- heta/(1-pi) - Eh temp[ind] <- 0 omega <- rbind( temp, t(Psi)) sigma <- omega %*% t(omega) / nSample var.in <- t(w) %*% sigma %*% w temp <- (1-pi0) var.in <- var.in * temp * temp / nSample d <- colSums(Xalpha*pi)/nSample d <- x0alpha*pi0*(1-pi0) - d*(1-pi0) d <- d * Eh d <- as.matrix(d) var.in <- var.in + t(d) %*% A.inv %*% d / nSample temp <- sqrt(var.in)*qnorm(1-r/2) CI.in <- c( mean.in-temp, mean.in+temp ) ## ## return ## return(list(mean.ex=mean.ex, mean.in=mean.in, var.ex=var.ex, var.in=var.in, std.ex=sqrt(var.ex), std.in=sqrt(var.in), CI.ex=CI.ex, CI.in=CI.in)) } ######################################################## ## ## quantile of empirical distribution ## ######################################################## qemp <- function(obsv, r) { obsv <- as.vector(sort(obsv)) k <- r * length(obsv) a <- floor(k) b <- ceiling(k) if(a==0) a<-b val <- obsv[a] + (obsv[b]-obsv[a])*(k-a) return(val) } ######################################################## ## ## main function ## ######################################################## estmean <- function(estpar, result, tscore) { ## ## estimate means ## est <- ME(Y, Xalpha, Xbeta, Xgamma, cov, estpar$alpha, estpar$beta, estpar$gamma, estpar$B.beta, estpar$B.gamma, confidence.level) mean.ex <- est$mean.ex mean.in <- est$mean.in var.ex <- est$var.ex var.in <- est$var.in # estimator # confidence interval of assumed normal distribution i <- 1 result[i,1] <- mean.ex result[i,2] <- est$std.ex temp <- qnorm(1-confidence.level/2)*sqrt(var.ex) result[i,3] <- mean.ex - temp result[i,4] <- mean.ex + temp result[i,5] <- mean.in result[i,6] <- est$std.in temp <- qnorm(1-confidence.level/2)*sqrt(var.in) result[i,7] <- mean.in - temp result[i,8] <- mean.in + temp ## ## bootstrap procedure ## if(option.ConfInt==1) { # save original data X.orig <- X Y.orig <- Y iter <- 1 itercount <- 1 while(iter <= nBootstrap) { # generate bootstrap sample ind.bootstrap <- sample(1:nSample, nSample, replace=TRUE) X <- X.orig[ind.bootstrap,] Y <- Y.orig[ind.bootstrap] Xalpha <- X[, XalphaIndicator>0] Xbeta <- X[, XbetaIndicator>0] Xgamma <- X[, XgammaIndicator>0] # estimation for bootstrap sample estpar <- estpar(Y, Xalpha, Xbeta, Xgamma, alpha.init, beta.init, gamma.init) est <- try(ME(Y, Xalpha, Xbeta, Xgamma, cov, estpar$alpha, estpar$beta, estpar$gamma, estpar$B.beta, estpar$B.gamma, confidence.level), TRUE) if(is.character(est)) { cat(i, icount, iter, itercount, est, "\n\n") itercount <- itercount + 1 next } bmean.ex <- est$mean.ex bmean.in <- est$mean.in bvar.ex <- est$var.ex bvar.in <- est$var.in # calculate t-score tscore[iter,1] <- (bmean.ex - mean.ex) / sqrt(bvar.ex) tscore[iter,2] <- (bmean.in - mean.in) / sqrt(bvar.in) # go to next bootstrap iteration iter <- iter + 1 } # restore original data X <- X.orig Y <- Y.orig Xalpha <- X[, XalphaIndicator>0] Xbeta <- X[, XbetaIndicator>0] Xgamma <- X[, XgammaIndicator>0] # confidence interval of empirical t-score result[i,9] <- mean.ex - qemp(tscore[1:nBootstrap,1], 1-confidence.level/2)*sqrt(var.ex) result[i,10] <- mean.ex - qemp(tscore[1:nBootstrap,1], confidence.level/2)*sqrt(var.ex) result[i,11] <- mean.in - qemp(tscore[1:nBootstrap,2], 1-confidence.level/2)*sqrt(var.in) result[i,12] <- mean.in - qemp(tscore[1:nBootstrap,2], confidence.level/2)*sqrt(var.in) } # the estimations of mean should be great then or equal to zero result <- pmax(result, 0) outresult(result) } ## ## output simulation results ## outresult <- function(result) { sink(resultfile, append=TRUE) cat("Covariate =", cov[-1], "\n\n") cat("Externally weighted estimator:\n") cat(" mean =", result[1], "\n") cat(" standard deviation =", result[2], "\n") cat(" ", (1-confidence.level)*100, "% confidence interval = [", result[3], ", ", result[4], "]\n", sep="") if(option.ConfInt==1) cat((1-confidence.level)*100, "% bootstrap confidence interval = [", result[9], ", ", result[10], "]\n", sep="") cat("\n") cat("Internally weighted estimator:\n") cat(" mean =", result[5], "\n") cat(" standard deviation =", result[6], "\n") cat(" ", (1-confidence.level)*100, "% confidence interval = [", result[7], ", ", result[8], "]\n", sep="") if(option.ConfInt==1) cat((1-confidence.level)*100, "% bootstrap confidence interval = [", result[11], ", ", result[12], "]\n", sep="") cat("----------------------------------------------------------\n\n") sink() }