##########################################################
##                                                      ##
##  Simulation study for "Estimating the retransformed  ##
##  mean in a heteroscedastic two-part model" by Welsh  ##
##  and Zhou.                                           ##
##                                                      ##
##  Developed by Hao Cheng for Andrew Zhou              ##
##  Date: December 10, 2004                             ##
##                                                      ##
##########################################################



#----- clear environment -----
rm(list=ls())
print(Sys.time())



##########################################################
##                                                      ##
## simulation setting                                   ##
##                                                      ##
##########################################################

#----- input data, generate covariates and response -----
inputfile <- "example.dat"
if(!file.exists(inputfile))
    stop("File ", inputfile, " does not exist. Stop!")
dataset <- as.matrix(read.table(inputfile))
X <- cbind(1, dataset[,-1])
Y <- as.vector(dataset[,1])

#----- sample size -----
nSample <- nrow(X)

#----- number of covariates -----
#      nCovariate = 1 + (number of actual covariates)
#      covariates are given in format (1, covariate_1, ..., covariate_n),
#      where n = nCovariate - 1
nCovariate <- ncol(X)

#----- number of data sets -----
nDataset <- 1

#----- (1-r) confidence interval -----
confidence.level <- 0.05

#----- option on calculating confidence interval of mean estimators -----
#      option.ConfInt = 0: use formulas given in the paper
#      option.ConfInt = 1: use boostrap
option.ConfInt = 1

#----- size of bootstrap samples -----
nBootstrap <- 100

#----- option on estimating parameters beta and gamma -----
#      option.bg = 0: maximum-likelihood estimator, solve estimating equation
#      option.bg = 1: maximum-likelihood estimator, maximize log-likelihood function
option.bg = 1

#----- random number generator seed -----
seed <- nSample+10
set.seed(seed)


##########################################################
##                                                      ##
## simulation scenario definition                       ##
##                                                      ##
##########################################################


#----- define covariates used for zero response (alpha) computation -----
#
#      nAlpha: number of covariates used to calculate alpha
#      nAlpha <= nCovariate
#
#      XalphaIndicator is a vector that has nCovariate components.
#      It shows which components of covariates are used
#      when estimating parameter alpha
#      sum(indicator) must equal nAlpha
nAlpha <- nCovariate
XalphaIndicator <- rep(1,nCovariate)

#----- define covariates used for estimating mean in transformed space (beta) -----
nBeta <- nCovariate
XbetaIndicator <- rep(1,nCovariate)

#----- define covariates used for estimating variance in transformed space (gamma) -----
nGamma <- nCovariate
XgammaIndicator <- rep(1,nCovariate)

#----- real parameter values -----
#      For real data, they are initial guess of parameters
#      For simulation, they are real parameter values used to generate data sets
alpha.0 <- rep(0, nCovariate)
beta.0 <- rep(0, nCovariate)
gamma.0 <- rep(0, nCovariate)


#----- load functions -----
source("estmean.r")

#----- check paramer setting validality -----
if(!check.data()) stop()




##########################################################
##                                                      ##
## specify transformation function h()                  ##
##                                                      ##
##########################################################

# transformation function h()
fh <- function(x)
{
    return(exp(x))
}


# inverse of transformation function h()
fhinv <- function(x)
{
    return(log(x))
}


# derivative of transformation function h()
dfh <- function(x)
{
    return(exp(x))
}



##########################################################
##                                                      ##
## specify mean function m() in transformed space       ##
##                                                      ##
## mean is only related with parameter beta             ##
## both beta and x have nBeta componets                 ##
##                                                      ##
##########################################################

# mean in transformed space
# m = beta*x
fm <- function(beta, xbeta)
{
    return(xbeta %*% beta)
}

# partial derivative of fm on beta
dfm.beta <- function(beta, xbeta)
{
    return(xbeta)
}



##########################################################
##                                                      ##
## specify vatiance function g() in transformed space   ##
##                                                      ##
## variance is related with parameters beta and gamma   ##
## both beta and xbeta have nBeta componets             ##
## both gamma and xgamma have nGamma components         ##
##                                                      ##
##########################################################

# variance in transformed space
# g = exp( gamma*x / 2 )
fg <- function(beta, gamma, xbeta, xgamma)
{
    fval <- exp(0.5 * xgamma %*% gamma)
    return(fval)
}

# partial derivative of fg on beta
dfg.beta <- function(beta, gamma, xbeta, xgamma)
{
    y <- xbeta * 0
    return(y)
}

# partial derivative of fg on gamma
dfg.gamma <- function(beta, gamma, xbeta, xgamma)
{
    y <- xgamma
    y <- y * as.vector(0.5 * exp(0.5 * xgamma %*% gamma))
    return(y)
}

# 2nd order partial derivative of fg on beta
d2fg.beta <- function(beta, gamma, xbeta, xgamma)
{
    y <- matrix(0, nBeta, nBeta)
    return(y)
}

# 2nd order partial derivative of fg on beta and gamma
d2fg.betagamma <- function(beta, gamma, xbeta, xgamma)
{
    y <- matrix(0, nBeta, nGamma)
    return(y)
}

# 2nd order partial derivative of fg on gamma and beta
d2fg.gammabeta <- function(beta, gamma, xbeta, xgamma)
{
    y <- matrix(0, nGamma, nBeta)
    return(y)
}

# 2nd order partial derivative of fg on gamma
d2fg.gamma <- function(beta, gamma, xbeta, xgamma)
{
    k1 <- as.vector(0.25 * exp(0.5 * xgamma %*% gamma))
    D <- k1 * outer(xgamma, xgamma)
    return(D)
}

# log of fg
logfg <- function(beta, gamma, xbeta, xgamma)
{
    return(0.5 * xgamma %*% gamma)
}



##########################################################
##                                                      ##
## execute computation                                  ##
##                                                      ##
##########################################################

# allocate memory to store results for each dataset
if(option.ConfInt==0)
{
    result <- matrix(0, nDataset, 8)
}
if(option.ConfInt==1)
{
    result <- matrix(0, nDataset, 12)
    tscore <- matrix(0, nBootstrap, 2)
}

# generate covariates
Xalpha <- X[, XalphaIndicator>0]
Xbeta <- X[, XbetaIndicator>0]
Xgamma <- X[, XgammaIndicator>0]

##
## estimate parameters
##
alpha.init <- alpha.0[XalphaIndicator>0]
beta.init <- beta.0[XbetaIndicator>0]
gamma.init <- gamma.0[XgammaIndicator>0]
par <- estpar(Y, Xalpha, Xbeta, Xgamma, alpha.init, beta.init, gamma.init)

parfile <- paste(unlist(strsplit(inputfile,".",fixed=TRUE))[1], "_par.dat", sep="")
sink(parfile)
outpar(par)
sink()

resultfile <- paste(unlist(strsplit(inputfile,".",fixed=TRUE))[1], "_result.dat", sep="")
file.remove(resultfile)
sink(resultfile)
cat("Source data file:       ", inputfile, "\n")
cat("Number of observations: ", nSample, "\n")
cat("Number of covariates:   ", nCovariate-1, "\n")
cat("----------------------------------------------------------\n\n")
sink()

#----- covariates value -----
cov <- c(1, 55, 1, 0, 50)

estmean(par, result, tscore)
print(Sys.time())