########################################################## ## ## ## Simulation study for "Estimating the retransformed ## ## mean in a heteroscedastic two-part model" by Welsh ## ## and Zhou. ## ## ## ## Developed by Hao Cheng for Andrew Zhou ## ## Date: December 10, 2004 ## ## ## ########################################################## #----- clear environment ----- rm(list=ls()) print(Sys.time()) ########################################################## ## ## ## simulation setting ## ## ## ########################################################## #----- input data, generate covariates and response ----- inputfile <- "example.dat" if(!file.exists(inputfile)) stop("File ", inputfile, " does not exist. Stop!") dataset <- as.matrix(read.table(inputfile)) X <- cbind(1, dataset[,-1]) Y <- as.vector(dataset[,1]) #----- sample size ----- nSample <- nrow(X) #----- number of covariates ----- # nCovariate = 1 + (number of actual covariates) # covariates are given in format (1, covariate_1, ..., covariate_n), # where n = nCovariate - 1 nCovariate <- ncol(X) #----- number of data sets ----- nDataset <- 1 #----- (1-r) confidence interval ----- confidence.level <- 0.05 #----- option on calculating confidence interval of mean estimators ----- # option.ConfInt = 0: use formulas given in the paper # option.ConfInt = 1: use boostrap option.ConfInt = 1 #----- size of bootstrap samples ----- nBootstrap <- 100 #----- option on estimating parameters beta and gamma ----- # option.bg = 0: maximum-likelihood estimator, solve estimating equation # option.bg = 1: maximum-likelihood estimator, maximize log-likelihood function option.bg = 1 #----- random number generator seed ----- seed <- nSample+10 set.seed(seed) ########################################################## ## ## ## simulation scenario definition ## ## ## ########################################################## #----- define covariates used for zero response (alpha) computation ----- # # nAlpha: number of covariates used to calculate alpha # nAlpha <= nCovariate # # XalphaIndicator is a vector that has nCovariate components. # It shows which components of covariates are used # when estimating parameter alpha # sum(indicator) must equal nAlpha nAlpha <- nCovariate XalphaIndicator <- rep(1,nCovariate) #----- define covariates used for estimating mean in transformed space (beta) ----- nBeta <- nCovariate XbetaIndicator <- rep(1,nCovariate) #----- define covariates used for estimating variance in transformed space (gamma) ----- nGamma <- nCovariate XgammaIndicator <- rep(1,nCovariate) #----- real parameter values ----- # For real data, they are initial guess of parameters # For simulation, they are real parameter values used to generate data sets alpha.0 <- rep(0, nCovariate) beta.0 <- rep(0, nCovariate) gamma.0 <- rep(0, nCovariate) #----- load functions ----- source("estmean.r") #----- check paramer setting validality ----- if(!check.data()) stop() ########################################################## ## ## ## specify transformation function h() ## ## ## ########################################################## # transformation function h() fh <- function(x) { return(exp(x)) } # inverse of transformation function h() fhinv <- function(x) { return(log(x)) } # derivative of transformation function h() dfh <- function(x) { return(exp(x)) } ########################################################## ## ## ## specify mean function m() in transformed space ## ## ## ## mean is only related with parameter beta ## ## both beta and x have nBeta componets ## ## ## ########################################################## # mean in transformed space # m = beta*x fm <- function(beta, xbeta) { return(xbeta %*% beta) } # partial derivative of fm on beta dfm.beta <- function(beta, xbeta) { return(xbeta) } ########################################################## ## ## ## specify vatiance function g() in transformed space ## ## ## ## variance is related with parameters beta and gamma ## ## both beta and xbeta have nBeta componets ## ## both gamma and xgamma have nGamma components ## ## ## ########################################################## # variance in transformed space # g = exp( gamma*x / 2 ) fg <- function(beta, gamma, xbeta, xgamma) { fval <- exp(0.5 * xgamma %*% gamma) return(fval) } # partial derivative of fg on beta dfg.beta <- function(beta, gamma, xbeta, xgamma) { y <- xbeta * 0 return(y) } # partial derivative of fg on gamma dfg.gamma <- function(beta, gamma, xbeta, xgamma) { y <- xgamma y <- y * as.vector(0.5 * exp(0.5 * xgamma %*% gamma)) return(y) } # 2nd order partial derivative of fg on beta d2fg.beta <- function(beta, gamma, xbeta, xgamma) { y <- matrix(0, nBeta, nBeta) return(y) } # 2nd order partial derivative of fg on beta and gamma d2fg.betagamma <- function(beta, gamma, xbeta, xgamma) { y <- matrix(0, nBeta, nGamma) return(y) } # 2nd order partial derivative of fg on gamma and beta d2fg.gammabeta <- function(beta, gamma, xbeta, xgamma) { y <- matrix(0, nGamma, nBeta) return(y) } # 2nd order partial derivative of fg on gamma d2fg.gamma <- function(beta, gamma, xbeta, xgamma) { k1 <- as.vector(0.25 * exp(0.5 * xgamma %*% gamma)) D <- k1 * outer(xgamma, xgamma) return(D) } # log of fg logfg <- function(beta, gamma, xbeta, xgamma) { return(0.5 * xgamma %*% gamma) } ########################################################## ## ## ## execute computation ## ## ## ########################################################## # allocate memory to store results for each dataset if(option.ConfInt==0) { result <- matrix(0, nDataset, 8) } if(option.ConfInt==1) { result <- matrix(0, nDataset, 12) tscore <- matrix(0, nBootstrap, 2) } # generate covariates Xalpha <- X[, XalphaIndicator>0] Xbeta <- X[, XbetaIndicator>0] Xgamma <- X[, XgammaIndicator>0] ## ## estimate parameters ## alpha.init <- alpha.0[XalphaIndicator>0] beta.init <- beta.0[XbetaIndicator>0] gamma.init <- gamma.0[XgammaIndicator>0] par <- estpar(Y, Xalpha, Xbeta, Xgamma, alpha.init, beta.init, gamma.init) parfile <- paste(unlist(strsplit(inputfile,".",fixed=TRUE))[1], "_par.dat", sep="") sink(parfile) outpar(par) sink() resultfile <- paste(unlist(strsplit(inputfile,".",fixed=TRUE))[1], "_result.dat", sep="") file.remove(resultfile) sink(resultfile) cat("Source data file: ", inputfile, "\n") cat("Number of observations: ", nSample, "\n") cat("Number of covariates: ", nCovariate-1, "\n") cat("----------------------------------------------------------\n\n") sink() #----- covariates value ----- cov <- c(1, 55, 1, 0, 50) estmean(par, result, tscore) print(Sys.time())