# Basic Source Code for # "Determining Growth Determinants: Default Priors and Predictive Performance in Bayesian Model Averaging" # Theo Eicher, Chris Papageorgiou and Adrian Raftery # Journal of Applied Econometrics, 26, 1, (February 2011): 30-55 # # Programming by Adrian Raftery, Drew Creal, Amanda Cox # LPS program by Jennifer Hoeting # CRPS program by Tilmann Gneiting # # Date 4/29/2007 bma.compare <- function(x, y, wt = NULL, strict = FALSE, OR = 1000, maxCol = 31, drop.factor.levels = TRUE, nbest = 10, compare = TRUE, performance = FALSE, performance.compare = FALSE, performance.only = FALSE, percent.split = 0.8, percent.cov = 0.9, define.V = TRUE, g.prior = 1, nu = NULL, lambda = NULL, phi = NULL, delta = NULL, gam = NULL, PMsize = NULL, in.index = NULL, out.index = NULL) { y <- as.matrix(y) if(is.null(wt)) { wt <- rep(1, length(y)) } if(performance.only==TRUE) { compare <- FALSE performance.compare <- FALSE performance <- FALSE } if(performance==TRUE) { compare <- FALSE performance.compare <- FALSE } if(performance.compare==TRUE) { compare <- FALSE } # ===================================================================== # # Function to split the data into "held.out" and "used" samples # ===================================================================== # get.indices <- function(obs, percent.split, save.x) { accept <- FALSE while(accept==FALSE) { indices <- seq(from=1,to=obs,by=1) in.index <- sort(sample(indices,ceiling(obs*(percent.split)),replace=FALSE)) out.index <- sort(indices[-in.index]) test.variance1 <- diag(var(save.x[in.index,])) test.variance2 <- diag(var(save.x[out.index,])) temp1 <- 0 for(i in 1:length(test.variance1)) { if(test.variance1[i]!=0) { temp1 <- temp1 + 1 } } temp2 <- 0 for(i in 1:length(test.variance2)) { if(test.variance2[i]!=0) { temp2 <- temp2 + 1 } } if((temp1==length(test.variance1))&&(temp2==length(test.variance2))) { # Accept the indices accept <- TRUE } } result <- list(in.index = as.numeric(in.index),out.index = as.numeric(out.index)) return(result) } # ============================================================== # # Function to calculate the marginal likelihood # ============================================================== # log.marginal <- function(Y, X.data, model.vect, phi, nu, lambda, delta, gam, define.V, g.prior) { n <- length(Y) k <- ncol(X.data) + 1 p <- sum(model.vect) ones <- rep(1, n) X <- as.matrix(cbind(ones, X.data[, model.vect])) Y <- as.matrix(Y) # Create the matrix of new RHS variables mu <- as.matrix(rep(0,ncol(X))) # "g" may be different for each model if(is.null(phi)) { g.temp <- find.g(Y, X.data, n, k, p + 1, g.prior, delta, gam) phi.temp <- sqrt(1/(g.temp*n)) } else { phi.temp <- phi g.temp <- 1/((phi.temp^2)*n) } V.temp <- define.V if(g.prior==11) { V.temp <- TRUE } # Create the prior covariance matrix if(V.temp==TRUE) { # Raftery, Madigan, and Hoetings' (1997) definition of V V <- as.matrix(diag(c(1, rep(phi.temp^2, p)))) } else { # Fernandez, Ley, and Steels' (2001) definition of V V <- solve(t(X)%*%X)*(1/g.temp) V[,1] <- 0 V[1,] <- 0 V[1,1] <- 10000 * var(y) } # Update the sufficient statistics det <- diag(ones) + X %*% V %*% t(X) divs <- prod(eigen(det, TRUE, TRUE)$values)^0.5 # Compute the marginal likelihood result <- lgamma((nu + n)/2) + (nu/2)*log(nu*lambda) - (n/2)*log(pi) - lgamma((nu/2)) - log(divs) - ((nu + n)/2)*log((t(Y) %*% solve(det, Y)) + (nu * lambda)) return(result) } # ============================================================== # # Function to calculate the coefficients # ============================================================== # calculate.coefficients <- function(Y, X.data, model.vect, phi, nu, lambda, delta, gam, define.V, g.prior) { Y <- as.matrix(Y) X.data <- as.matrix(X.data) k <- ncol(X.data) + 1 n <- dim(Y)[1] ones <- rep(1, n) p <- sum(model.vect) num.vars <- ncol(X.data) + 1 # Create a matrix of explanatory variables X <- as.matrix(cbind(ones, X.data[, model.vect])) # "g" may be different for each model if(is.null(phi)) { g.temp <- find.g(Y, X.data, n, k, p + 1, g.prior, delta, gam) phi.temp <- sqrt(1/(g.temp*n)) } else { phi.temp <- phi g.temp <- 1/((phi.temp^2)*n) } V.temp <- define.V if(g.prior==11) { V.temp <- TRUE } # Calculate the hyperparameter V for the variance of the betas using one # of the two methods if(V.temp==TRUE) { # Raftery, Madigan, and Hoetings' (1997) definition of V V <- diag(c(1, rep(phi.temp^2, p))) } else { # Fernandez, Ley, and Steels' (2001) definition of V V <- solve(t(X)%*%X)*(1/g.temp) V[,1] <- 0 V[1,] <- 0 V[1,1] <- 10000 * var(y) } # These formulas are on page 26 of Jennifer Hoeting's (1994) dissertation inv.V <- solve(V) V.prime <- solve(inv.V + t(X)%*%X) # Note that the hyperparameter for Beta which is "MU" in Hoeting (1994) # is a vector of zeros. Therefore, I do not include it here. # Note by Adrian 4/27/07: In principle this should be included. mu.prime <- as.matrix(V.prime%*%(t(X)%*%Y)) # Update the sufficient statistics for the variance. nu.prime <- nu + n lambda.prime <- (1/(nu.prime))*(nu*lambda + t(Y)%*%Y - t(t(X)%*%Y)%*%solve(t(X)%*%X + inv.V)%*%(t(X)%*%Y)) # Calculate the mean and variance of the vector of coefficients index <- as.matrix(1:(num.vars-1)) index <- c(1,index[model.vect]+1) ebi <- matrix(rep(0,num.vars,num.vars,1)) ebi[index] <- mu.prime sebi <- matrix(rep(0,num.vars,num.vars,1)) sebi[index] <- sqrt(diag(as.vector((lambda.prime*nu.prime/(nu.prime - 2)))*V.prime)) varbi <- matrix(rep(0,(num.vars)*(num.vars)), nrow=(num.vars),ncol=(num.vars)) varbi[index,index] <- as.vector((lambda.prime*nu.prime/(nu.prime - 2)))*V.prime # Return the mean and variance for each of these parameters result <- list(ebi=ebi,sebi=sebi,varbi=varbi) return(result) } # ============================================================================== # # Function to compute the correlations between regressors and then order them # ============================================================================== # compute.correlations <- function(x) { correl_mat <- cor(x) k <- 2 correl_vect <- 0 correl_names <- c("dummy","names") for(i in 1:nrow(correl_mat)) { for(j in k:ncol(correl_mat)) { vect_names <- c(dimnames(correl_mat)[[1]][i], dimnames(correl_mat)[[2]][j]) correl_names <- rbind(correl_names,vect_names) correl_vect <- rbind(correl_vect,correl_mat[i,j]) } if(k maxCol) { any.dropped <- TRUE droplm <- drop1(lm.out, test = "none") dropped <- row.names(droplm)[which.min(droplm$RSS[-1]) + 1] dropped.index <- match(dropped, form.vars) form.vars <- form.vars[-dropped.index] formla <- formula(paste("y", "~", paste(form.vars, collapse = " + "), sep = " ")) lm.out <- lm(formla, data = x1.ldf, weights = temp.wt) dropped.which <- c(dropped.which, dropped) } new.var.names <- names(lm.out$coefficients) return(list(mm = model.matrix(lm.out)[, -1, drop = FALSE], any.dropped = any.dropped, dropped = dropped.which, var.names = new.var.names)) } # ======================================================================== # # This section gets rid of NAs in the data # ======================================================================== # y <- as.matrix(y) cl <- match.call() x <- data.frame(x) if (is.null(dimnames(x))) { dimnames(x) <- list(NULL, paste("X", 1:ncol(x), sep = "")) } y <- as.numeric(y) options(contrasts = c("contr.treatment", "contr.treatment")) xnames <- dimnames(x)[[2]] x2 <- na.omit(data.frame(x)) used <- match(row.names(data.frame(x)), row.names(x2)) omitted <- seq(nrow(x))[is.na(used)] if (length(omitted) > 0) { wt <- wt[-omitted] x <- x2 y <- y[-omitted] warning(paste("There were ", length(omitted), "records deleted due to NA's")) } # ======================================================================== # # This section reduces the number of columns in "x" to maxCol # ======================================================================== # if (drop.factor.levels) { cdf <- cbind.data.frame(y = y, x) mm <- model.matrix(formula(cdf), data = cdf)[, -1, drop = FALSE] x <- mm } xx <- dropcols(x, y, wt, maxCol) xnames <- xx$var.names[-1] x <- xx$mm reduced <- xx$any.dropped dropped <- NULL if (reduced) { dropped <- xx$dropped } nvar <- length(x[1, ]) # Get the mean and standard deviation before normalizing the data mean.y <- mean(y) sd.y <- sd(y) mean.x <- as.matrix(apply(x,2,mean)) sd.x <- as.matrix(sd(x)) # Normalize the data save.y <- y save.x <- x y <- as.matrix(scale(y)) x <- as.matrix(scale(x)) obs <- nrow(y) # This is the default for Raftery Madigan and Hoeting if(g.prior==11) { r2 <- summary(lm(save.y ~ ., data = data.frame(save.y,save.x)))$r.squared if(r2 < 0.9) { # Relatively informative priors new.nu <- 2.58 new.lambda <- 0.28 } else { # Relatively non-informative priors new.nu <- 0.2 new.lambda <- 0.1684 } if(is.null(nu)) { nu <- new.nu } if(is.null(lambda)) { lambda <- new.lambda } } else { phi <- NULL } if(is.null(lambda)) { lambda <- 1e-08 } if(is.null(nu)) { nu <- 1e-08 } # ============================================================================== # # This is the main function that runs bicreg # ============================================================================== # run.bicreg <- function(x, y, wt, strict, OR, nbest, define.V, phi, nu, lambda, delta, gam, PMsize, mean.y, mean.x, sd.y, sd.x, g.prior, save.y, save.x) { # ======================================================================== # # This section runs the leaps algorithm that computes a set of best models # from the set of all possible models. # ======================================================================== # nvar <- length(x[1, ]) if (nvar > 2) { a <- leaps(x, y, wt = wt, method = "r2", names = dimnames(x)[[2]], strictly.compatible = FALSE, nbest = nbest) a$r2 <- pmin(pmax(0, a$r2), 0.999) x.lm <- cbind.data.frame(y = y, as.data.frame(x[, a$which[2, , drop = FALSE]]), w = wt) lm.fix <- lm(y ~ . - w, weights = w, data = x.lm) r2.fix <- summary(lm.fix)$r.sq N <- ncol(x) magic <- N * log(1 - a$r2[2]) - N * log(1 - r2.fix) a$r2 <- 1 - (1 - a$r2) * exp(-magic/N) r2 <- round(c(0, a$r2) * 100, 3) size <- c(1, a$size) which <- rbind(rep(FALSE, ncol(x)), a$which) templabs <- t(matrix(rep(colnames(which), times = nrow(which)), ncol = nrow(which))) templabs[!which] <- "" label <- apply(templabs, 1, paste, collapse = "") label[1] <- "NULL" } else { r2 <- bic <- NULL nmod <- switch(ncol(x), 2, 4) bic <- label <- rep(0, nmod) model.fits <- as.list(rep(0, nmod)) which <- matrix(c(FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE), nmod, nmod/2) size <- c(1, 2, 2, 3)[1:nmod] sep <- if (all(nchar(dimnames(x)[[2]]) == 1)) "" else "," for (k in 1:nmod) { if (k == 1) { label[k] <- "NULL" lm1 <- lm(y ~ 1, w = wt) } else { label[k] <- paste(dimnames(x)[[2]][which[k, ]], collapse = sep) x.lm <- cbind.data.frame(y = y, x = x[, which[k, , drop = FALSE]], wt = wt) lm1 <- lm(y ~ . - wt, data = x.lm, w = wt) } r2[k] <- summary(lm1)$r.sq * 100 } } # ======================================================================== # # Calculate the BIC for each of the models while adjusting for the prior # on the model size. # ======================================================================== # n <- length(y) # Here I call the function that calculates the marginal likelihood log.marg <- as.matrix(apply(which, 1, function(which) log.marginal(as.matrix(y), as.matrix(x), which, phi, nu, lambda, delta, gam, define.V, g.prior))) if(is.null(PMsize)) { pmw <- rep(1, length(size)) } else { kj <- apply(which, 1, sum) K <- ncol(which) pmw <- (PMsize/K)^kj * ((1 - PMsize/K)^(K-kj)) } log.marg <- log.marg + log(pmw) # ======================================================================== # # This section calculates and applies Occam's Window # ======================================================================== # occam <- which(-1*log.marg-min(-1*log.marg) < log(OR)) r2 <- r2[occam] size <- size[occam] label <- label[occam] which <- which[occam, , drop = FALSE] log.marg <- log.marg[occam] # ======================================================================== # # This section calculates the posterior probability using the marginal # likelihoods # ======================================================================== # postprob <- exp(log.marg)/sum(exp(log.marg)) postprob[is.na(postprob)] <- 1 order.marg <- order(-1*log.marg, size, label) r2 <- r2[order.marg] size <- size[order.marg] label <- label[order.marg] which <- which[order.marg, , drop = FALSE] log.marg <- log.marg[order.marg] postprob <- postprob[order.marg] # ======================================================================== # # If "strict=TRUE," the algorithm reduces the set of models even further by # excluding those models that have better models nested within them. # ======================================================================== # if (strict) { nmod <- length(log.marg) if (nmod > 1) { occam <- rep(TRUE, nmod) for (k in (2:nmod)) { for (j in (1:(k - 1))) { which.diff <- which[k, ] - which[j, ] if(all(which.diff >= 0)) { occam[k] <- FALSE } } } r2 <- r2[occam] size <- size[occam] label <- label[occam] nmod <- sum(occam) which <- which[occam, , drop = FALSE] log.marg <- log.marg[occam] postprob <- postprob[occam] postprob <- postprob/sum(postprob) } } # ======================================================================== # # This step computes the probability that the regression coefficient is not # equal to zero. # ======================================================================== # probne0 <- round(100 * t(which) %*% as.matrix(postprob), 1) # ======================================================================== # # This section calculates the expected value of each regressor for each model # as well as the standard deviation. # ======================================================================== # #drew <- as.matrix(apply(which, 1, function(which) calculate.coefficients(as.matrix(y), as.matrix(x), # which, phi, nu, lambda, delta, gam, define.V, g.prior))) # Calculate coefficients nmod <- length(log.marg) model.fits <- as.list(rep(0, nmod)) EbiMk <- matrix(rep(0, nmod * (nvar + 1)), nrow = nmod) sebiMk <- matrix(rep(0, nmod * (nvar + 1)), nrow = nmod) for (k in (1:nmod)) { if (sum(which[k, ]) != 0) { result <- calculate.coefficients(as.matrix(y), as.matrix(x), which[k,], phi, nu, lambda, delta, gam, define.V, g.prior) EbiMk[k,] <- result$ebi sebiMk[k,] <- result$sebi } else { result <- calculate.coefficients(as.matrix(y), as.matrix(x), which[k,], phi, nu, lambda, delta, gam, define.V, g.prior) EbiMk[k,] <- result$ebi sebiMk[k,] <- result$sebi } } # ================================================= # # adjust the estimates due to normalization -Drew # ================================================= # EbiMk.save <- EbiMk for(i in 1:nmod) { for(j in 1:(nvar+1)) if(j==1) { EbiMk[i,j] <- mean.y + sd.y*(EbiMk[i,j] - sum((EbiMk[i,2:(nvar+1)]*mean.x)/sd.x)) sebiMk[i,j] <- mean.y + sd.y*(sebiMk[i,j] - sum((sebiMk[i,2:(nvar+1)]*mean.x)/sd.x)) } else { sebiMk[i,j] <- (sebiMk[i,j]*sd.y)/(sd.x[j-1]) EbiMk[i,j] <- (EbiMk[i,j]*sd.y)/sd.x[j-1] } } Ebi <- rep(0, (nvar + 1)) SDbi <- rep(0, (nvar + 1)) CEbi <- Ebi CSDbi <- SDbi for (i in 1:(nvar + 1)) { if ((i == 1) || (sum(which[, (i - 1)] != 0))) { Ebi[i] <- as.numeric(sum(postprob * EbiMk[, i])) SDbi[i] <- sqrt(postprob %*% (sebiMk[, i]^2) + postprob %*% ((EbiMk[, i] - Ebi[i])^2)) if (i == 1) { CEbi[i] <- Ebi[i] CSDbi[i] <- SDbi[i] } else { sel <- which[, i - 1] cpp <- postprob[sel]/sum(postprob[sel]) CEbi[i] <- as.numeric(sum(cpp * EbiMk[sel, i])) CSDbi[i] <- sqrt(cpp %*% (sebiMk[sel, i]^2) + cpp %*% ((EbiMk[sel, i] - CEbi[i])^2)) } } } # ======================================================================== # # Recalculate the R squared # ======================================================================== # #r2 <- matrix(rep(0,nmod),nrow=nmod) #for(i in 1:nmod) #{ # temp <- as.matrix(cbind(matrix(rep(1,nrow(save.x))),save.x)) # resid <- as.matrix(save.y) - temp%*%as.matrix(EbiMk[i,]) # r2[i] <- 1 - t(resid)%*%resid/(t(as.matrix(save.y))%*%as.matrix(save.y)) #} #n <- length(y) #adj.r2 <- 1 - (1-r2)*((n-1)/(n - (size))) r2 <- matrix(rep(0,nmod),nrow=nmod) for(i in 1:nmod) { temp <- as.matrix(cbind(matrix(rep(1,nrow(x))),x)) resid <- as.matrix(y) - temp%*%as.matrix(EbiMk.save[i,]) r2[i] <- 1 - t(resid)%*%resid/(t(as.matrix(y))%*%as.matrix(y)) } n <- length(y) adj.r2 <- 1 - (1-r2)*((n-1)/(n - (size))) xnames <- dimnames(as.data.frame(x))[[2]] # ============================================================================== # # This section collects the output into a "list" variable and returns the list. # ============================================================================== # dimnames(which) <- list(NULL, dimnames(x)[[2]]) dimnames(EbiMk) <- list(NULL, c("Int", dimnames(x)[[2]])) dimnames(sebiMk) <- list(NULL, c("Int", dimnames(x)[[2]])) result <- list(postprob = as.matrix(postprob), namesx = xnames, label = label, r2 = r2, adj.r2 = adj.r2, phi = phi, log.marg = as.matrix(log.marg), size = (size - 1), which = which, probne0 = c(probne0), postmean = as.matrix(Ebi), postsd = as.matrix(SDbi), condpostmean = CEbi, condpostsd = CSDbi, ols = EbiMk, se = sebiMk, reduced = reduced, dropped = dropped, call = cl, n.models = length(postprob), n.vars = length(probne0), nu = nu, lambda = lambda, V = define.V, g.prior = g.prior, delta = delta, gam = gam, PMsize = PMsize) return(result) } # ================================================================== # # This section calls the above function # ================================================================== # if(compare==TRUE) { # If the user specifies the first prior, then the program will # default to bicreg if(g.prior==1) { nvar <- ncol(save.x) result.alt <- bicreg(save.x, save.y, wt, strict, OR, maxCol=maxCol,nbest=nbest) result.hpdi <- compute.HPDI(as.matrix(result.alt$postprob),as.matrix(result.alt$ols), as.matrix(result.alt$se),length(y)) hpdi90.alt <- result.hpdi$hpdi90 hpdi95.alt <- result.hpdi$hpdi95 hpdi99.alt <- result.hpdi$hpdi99 nu.alt <- 0 lambda.alt <- 0 V.alt <- "TRUE" PMsize.alt <- NULL g.alt <- 1 phi.alt <- 1 delta.alt <- NULL gam.alt <- NULL r2.alt <- as.matrix(as.numeric(result.alt$r2/100)) adj.r2.alt <- 1 - (1-result.alt$r2/100)*((length(save.y)-1)/ (length(save.y) - (result.alt$size))) log.marg.alt <- result.alt$bic } else { nvar <- ncol(x) # Run bicreg using the priors sent in by the user result.alt <- run.bicreg(x, y, wt, strict, OR, nbest, define.V, phi, nu, lambda, delta, gam, PMsize, mean.y, mean.x, sd.y, sd.x, g.prior, save.y, save.x) result.hpdi <- compute.HPDI(as.matrix(result.alt$postprob),as.matrix(result.alt$ols), as.matrix(result.alt$se),(length(y)+nu)) hpdi90.alt <- result.hpdi$hpdi90 hpdi95.alt <- result.hpdi$hpdi95 hpdi99.alt <- result.hpdi$hpdi99 nu.alt <- result.alt$nu lambda.alt <- result.alt$lambda V.alt <- result.alt$V PMsize.alt <- result.alt$PMsize g.alt <- result.alt$g.prior delta.alt <- result.alt$delta phi.alt <- NULL gam.alt <- result.alt$gam adj.r2.alt <- result.alt$adj.r2 r2.alt <- as.matrix(as.numeric(result.alt$r2)) log.marg.alt <- result.alt$log.marg } # Run bicreg using the standard settings => UIP result.UIP <- bicreg(save.x, save.y, wt, strict, OR, nbest=nbest, maxCol=maxCol) nvar <- ncol(save.x) result.hpdi <- compute.HPDI(as.matrix(result.UIP$postprob),as.matrix(result.UIP$ols), as.matrix(result.UIP$se),length(y)) hpdi90.UIP <- result.hpdi$hpdi90 hpdi95.UIP <- result.hpdi$hpdi95 hpdi99.UIP <- result.hpdi$hpdi99 # Compute the difference in R squared across the top model adj.r2.UIP <- 1 - (1-result.UIP$r2/100)*((length(y)-1)/ (length(y) - (result.UIP$size))) # Compute the difference in inclusion probability for each regressor prob.diff <- as.matrix(result.alt$probne0 - result.UIP$probne0) # Compute the difference in mean for each of the regressors postmean.diff <- as.matrix(result.alt$postmean[1:(nvar+1)] - result.UIP$postmean[1:(nvar+1)]) # Compute the correlations between the right hand side variables correlations <- compute.correlations(save.x) result <- list(obs = length(y), nu.alt = nu.alt ,lambda.alt = lambda.alt, V.alt = V.alt, PMsize.alt = PMsize.alt, g.alt = g.alt, phi.alt = phi.alt, delta.alt = delta.alt, gam.alt = gam.alt, hpdi90.alt = hpdi90.alt, hpdi95.alt = hpdi95.alt, hpdi99.alt = hpdi99.alt, hpdi90.UIP = hpdi90.UIP, hpdi95.UIP = hpdi95.UIP, hpdi99.UIP = hpdi99.UIP, postmean.alt = as.matrix(result.alt$postmean[1:(nvar+1)]), postsd.alt = as.matrix(result.alt$postsd[1:(nvar+1)]), probne0.alt = as.matrix(result.alt$probne0), postmean.UIP = as.matrix(result.UIP$postmean[1:(nvar+1)]), postsd.UIP = as.matrix(result.UIP$postsd[1:(nvar+1)]), probne0.UIP = as.matrix(result.UIP$probne0), probne0.diff = prob.diff,postmean.diff = postmean.diff, correlations = correlations,postprob.alt = result.alt$postprob, postprob.UIP = as.matrix(result.UIP$postprob), namesx = result.alt$namesx,label.alt = result.alt$label, label.UIP = result.UIP$label, r2.alt = r2.alt, r2.UIP = as.matrix(result.UIP$r2), adj.r2.alt = adj.r2.alt, adj.r2.UIP = as.matrix(adj.r2.UIP), log.marg.alt = log.marg.alt, log.marg.UIP = as.matrix(result.UIP$bic), size.alt = (result.alt$size - 1), size.UIP = (result.UIP$size - 1), which.alt = result.alt$which,which.UIP = result.UIP$which, condpostmean.alt = result.alt$condpostmean, condpostmean.UIP = result.UIP$condpostmean, condpostsd.alt = result.alt$condpostsd, condpostsd.UIP = result.UIP$condpostsd, ols.alt = result.alt$ols,ols.UIP = result.UIP$ols, se.alt = result.alt$se,se.UIP = result.UIP$se, reduced = reduced, dropped = dropped, call = cl, n.models.alt = length(result.alt$postprob), n.models.UIP = length(result.UIP$postprob), n.vars = length(result.alt$probne0)) class(result) <- "bma.compare" } else { if(performance.compare==TRUE) { # If the user specifies the first prior, then the program will # default to bicreg if(g.prior==1) { nvar <- ncol(save.x) result.alt <- bicreg(save.x, save.y, wt, strict, OR, maxCol=maxCol,nbest=nbest) result.hpdi <- compute.HPDI(as.matrix(result.alt$postprob),as.matrix(result.alt$ols), as.matrix(result.alt$se),length(y)) hpdi90.alt <- result.hpdi$hpdi90 hpdi95.alt <- result.hpdi$hpdi95 hpdi99.alt <- result.hpdi$hpdi99 nu.alt <- 0 lambda.alt <- 0 V.alt <- "TRUE" PMsize.alt <- NULL g.alt <- 1 delta.alt <- NULL gam.alt <- NULL r2.alt <- as.matrix(as.numeric(result.alt$r2/100)) adj.r2.alt <- 1 - ((1-result.alt$r2/100)*(length(save.y)-1)/ (length(save.y) - (result.alt$size-1) - 1)) log.marg.alt <- result.alt$bic } else { nvar <- ncol(x) # Run bicreg using the priors sent in by the user result.alt <- run.bicreg(x, y, wt, strict, OR, nbest, define.V, phi, nu, lambda, delta, gam, PMsize, mean.y, mean.x, sd.y, sd.x, g.prior, save.y, save.x) result.hpdi <- compute.HPDI(as.matrix(result.alt$postprob),as.matrix(result.alt$ols), as.matrix(result.alt$se),(length(y)+nu)) hpdi90.alt <- result.hpdi$hpdi90 hpdi95.alt <- result.hpdi$hpdi95 hpdi99.alt <- result.hpdi$hpdi99 nu.alt <- result.alt$nu lambda.alt <- result.alt$lambda V.alt <- result.alt$V PMsize.alt <- result.alt$PMsize g.alt <- result.alt$g.prior delta.alt <- result.alt$delta gam.alt <- result.alt$gam r2.alt <- as.matrix(as.numeric(result.alt$r2)) adj.r2.alt <- result.alt$adj.r2 log.marg.alt <- result.alt$log.marg } # Run bicreg using the standard settings => UIP result.UIP <- bicreg(save.x, save.y, wt, strict, OR, nbest=nbest, maxCol=maxCol) nvar <- ncol(save.x) result.hpdi <- compute.HPDI(as.matrix(result.UIP$postprob),as.matrix(result.UIP$ols), as.matrix(result.UIP$se),length(y)) hpdi90.UIP <- result.hpdi$hpdi90 hpdi95.UIP <- result.hpdi$hpdi95 hpdi99.UIP <- result.hpdi$hpdi99 # Compute the difference in R squared across the top model r2.diff <- result.alt$r2[1] - result.UIP$r2[1]/100 adj.r2.UIP <- 1 - (1-result.UIP$r2/100)*((length(y)-1)/ (length(y) - (result.UIP$size))) # Compute the difference in inclusion probability for each regressor prob.diff <- as.matrix(result.alt$probne0 - result.UIP$probne0) # Compute the difference in mean for each of the regressors postmean.diff <- as.matrix(result.alt$postmean[1:(nvar+1)] - result.UIP$postmean[1:(nvar+1)]) # If the user didn't send in indices, then randomly split the data if(is.null(in.index)||is.null(out.index)) { # Call the function that splits the data new.indices <- get.indices(obs,percent.split,save.x) in.index <- new.indices$in.index out.index <- new.indices$out.index } # Separate the data into two groups used.y <- as.matrix(y[in.index]) save.used.y <- as.matrix(save.y[in.index]) used.x <- x[in.index,] save.used.x <- as.matrix(save.x[in.index,]) used.wt <- wt[in.index] held.out.y <- as.matrix(y[out.index]) held.out.x <- x[out.index,] save.held.out.y <- as.matrix(save.y[out.index]) save.held.out.x <- save.x[out.index,] held.out.wt <- wt[out.index] used.mean.y <- mean(save.y[in.index]) used.sd.y <- sqrt(var(save.y[in.index])) used.mean.x <- as.matrix(apply(save.x[in.index,],2,mean)) #mean.x <- as.matrix(apply(x,2,mean)) used.sd.x <- sd(save.x[in.index,]) # If the user selects the first prior, then run bicreg as usual if(g.prior==1) { result.alt.split <- bicreg(save.used.x, save.used.y, used.wt, strict=strict, OR=OR, maxCol=maxCol,nbest=nbest) # Run LPS on the alternative model. result.alt.LPS <- compute.LPS.PC.CRPS(used.y, used.x, held.out.y, held.out.x, models.list = result.alt.split$which, result.alt.split$postprob, define.V = TRUE, phi = 1, nu = 1e-08, lambda = 1e-08, delta = NULL, gam = NULL, percent.coverage = percent.cov, g.prior = 1) } else { # Run bicreg using the priors sent in by the user result.alt.split <- run.bicreg(used.x, used.y, used.wt, strict, OR, nbest, define.V, phi, nu, lambda, delta, gam, PMsize, used.mean.y, used.mean.x, used.sd.y, used.sd.x, g.prior, save.used.y, save.used.x) # Run LPS on the alternative model result.alt.LPS <- compute.LPS.PC.CRPS(used.y, used.x, held.out.y, held.out.x, models.list = result.alt.split$which, result.alt.split$postprob, define.V = result.alt.split$V, phi = result.alt.split$phi, nu = result.alt.split$nu, lambda = result.alt.split$lambda, delta = result.alt.split$delta, gam = result.alt.split$gam, percent.coverage = percent.cov, g.prior = result.alt.split$g.prior) } # Run bicreg using the standard settings result.UIP.split <- bicreg(save.used.x, save.used.y, used.wt, strict=strict, OR=OR, maxCol=maxCol, nbest=nbest) # Run LPS using the standard settings. result.UIP.LPS <- compute.LPS.PC.CRPS(used.y, used.x, held.out.y, held.out.x, models.list = result.UIP.split$which, result.UIP.split$postprob, define.V = TRUE, phi = 1, nu = 1e-08, lambda = 1e-08, delta = NULL, gam = NULL, percent.coverage = percent.cov , g.prior = 1) # Compute the difference in the log predictive score lps.diff <- as.matrix(result.alt.LPS$lps - result.UIP.LPS$lps) # Compute the difference in predictive coverage pc.diff <- as.matrix(result.alt.LPS$predictive.coverage - result.UIP.LPS$predictive.coverage) result <- list(obs = length(y), percent.cov = percent.cov,percent.split = percent.split, r2.diff = r2.diff,nu.alt = nu.alt, lambda.alt = lambda.alt,V.alt = V.alt, PMsize.alt = PMsize.alt,g.alt = g.alt, delta.alt = delta.alt, gam.alt = gam.alt, lps.per.model.UIP = result.UIP.LPS$lps.per.model, lps.per.model.alt = result.alt.LPS$lps.per.model, hpdi90.alt = hpdi90.alt, hpdi95.alt = hpdi95.alt, hpdi99.alt = hpdi99.alt, hpdi90.UIP = hpdi90.UIP, hpdi95.UIP = hpdi95.UIP, hpdi99.UIP = hpdi99.UIP, lps.UIP = result.UIP.LPS$lps,lps.alt = result.alt.LPS$lps, pc.UIP = result.UIP.LPS$predictive.coverage, pc.alt = result.alt.LPS$predictive.coverage, crps.UIP = result.UIP.LPS$crps, crps.alt = result.alt.LPS$crps, mse.alt = result.alt.LPS$mse, mse.UIP = result.UIP.LPS$mse, lps.diff = lps.diff,pc.diff = pc.diff, in.index = as.matrix(in.index),out.index = as.matrix(out.index), postmean.alt = as.matrix(result.alt$postmean[1:(nvar+1)]), postsd.alt = as.matrix(result.alt$postsd[1:(nvar+1)]), probne0.alt = as.matrix(result.alt$probne0), postmean.UIP = as.matrix(result.UIP$postmean[1:(nvar+1)]), postsd.UIP = as.matrix(result.UIP$postsd[1:(nvar+1)]), probne0.UIP = as.matrix(result.UIP$probne0), probne0.diff = prob.diff,postmean.diff = postmean.diff, postprob.alt = result.alt$postprob, postprob.UIP = result.UIP$postprob, postprob.alt.split = result.alt.split$postprob, postprob.UIP.split = result.UIP.split$postprob, namesx = result.alt$namesx,namesx.alt = result.alt$namesx, namesx.UIP = result.UIP$namesx, label.alt = result.alt$label,label.UIP = result.UIP$label, r2.alt = r2.alt,r2.UIP = result.UIP$r2, adj.r2.alt = adj.r2.alt,adj.r2.UIP = adj.r2.UIP, log.marg.alt = as.matrix(log.marg.alt),log.marg.UIP = result.UIP$bic, size.alt = (result.alt$size - 1), size.UIP = (result.UIP$size - 1), which.alt = result.alt$which,which.UIP = result.UIP$which, reduced = reduced, dropped = dropped, call = cl, ols.alt = result.alt$ols,ols.UIP = result.UIP$ols, se.alt = result.alt$se, se.UIP = result.UIP$se, n.models.alt = length(result.alt$postprob), n.models.UIP = length(result.UIP$postprob), n.models.alt.split = length(result.alt.split$postprob), n.models.UIP.split = length(result.UIP.split$postprob), n.vars = length(result.alt$probne0)) class(result) <- "bma.performance.compare" } else { if(performance==TRUE) { # If the user didn't send in indices, then randomly split the data if(is.null(in.index)||is.null(out.index)) { # Call the function that splits the data new.indices <- get.indices(obs,percent.split,save.x) in.index <- new.indices$in.index out.index <- new.indices$out.index } # Separate the data into two groups used.y <- as.matrix(y[in.index]) save.used.y <- as.matrix(save.y[in.index]) used.x <- x[in.index,] save.used.x <- as.matrix(save.x[in.index,]) used.wt <- wt[in.index] held.out.y <- as.matrix(y[out.index]) held.out.x <- as.matrix(x[out.index,]) save.held.out.y <- as.matrix(save.y[out.index]) save.held.out.x <- as.matrix(save.x[out.index,]) held.out.wt <- wt[out.index] used.mean.y <- mean(save.y[in.index]) used.sd.y <- sqrt(var(save.y[in.index])) used.mean.x <- as.matrix(apply(save.x[in.index,],2,mean)) used.sd.x <- sd(save.x[in.index,]) # If the user selects the first prior, then run bicreg as usual if(g.prior==1) { result.alt.split <- bicreg(save.used.x, save.used.y, used.wt, strict=strict, OR=OR, maxCol=maxCol, nbest=nbest) # Run LPS on the alternative model. result.alt.LPS <- compute.LPS.PC.CRPS(used.y, used.x, held.out.y, held.out.x, models.list = result.alt.split$which, models.post = result.alt.split$postprob, define.V = TRUE, phi = 1, nu = 1e-08, lambda = 1e-08, percent.coverage = percent.cov, g.prior = 1) nu.alt <- 0 lambda.alt <- 0 V.alt <- "TRUE" PMsize.alt <- NULL g.alt <- 1 delta.alt <- NULL gam.alt <- NULL log.marg.alt <- result.alt.split$bic adj.r2.alt <- 1 - (1-result.alt.split$r2/100)*((length(save.used.y)-1)/ (length(save.used.y) - (result.alt.split$size))) } else { # Run bicreg using the priors sent in by the user result.alt.split <- run.bicreg(used.x, used.y, used.wt, strict, OR, nbest, define.V, phi, nu, lambda, delta, gam, PMsize, used.mean.y, used.mean.x, used.sd.y, used.sd.x, g.prior, save.used.y, save.used.x) # Run LPS on the alternative model result.alt.LPS <- compute.LPS.PC.CRPS(used.y, used.x, held.out.y, held.out.x, models.list = result.alt.split$which, models.post = result.alt.split$postprob, define.V = result.alt.split$V, phi = result.alt.split$phi, nu = result.alt.split$nu, lambda = result.alt.split$lambda, delta = result.alt.split$delta, gam = result.alt.split$gam, percent.coverage = percent.cov, g.prior = result.alt.split$g.prior) nu.alt <- result.alt.split$nu lambda.alt <- result.alt.split$lambda V.alt <- result.alt.split$V PMsize.alt <- result.alt.split$PMsize g.alt <- result.alt.split$g.prior delta.alt <- result.alt.split$delta gam.alt <- result.alt.split$gam log.marg.alt <- result.alt.split$log.marg adj.r2.alt <- result.alt.split$adj.r2 } # Run bicreg using the standard settings result.UIP.split <- bicreg(save.used.x, save.used.y, used.wt, strict=strict, OR=OR, maxCol=maxCol, nbest=nbest) # Run LPS using the standard settings. result.UIP.LPS <- compute.LPS.PC.CRPS(used.y, used.x, held.out.y, held.out.x, models.list = result.UIP.split$which, models.post = result.UIP.split$postprob, define.V = TRUE, phi = 1, nu = 1e-08, lambda = 1e-08, delta = NULL, gam = NULL, percent.coverage = percent.cov, g.prior = 1) adj.r2.UIP <- 1 - (1-(result.UIP.split$r2/100))*((length(save.used.y)-1)/ (length(save.used.y) - (result.UIP.split$size))) # Compute the difference in the log predictive score lps.diff <- as.matrix(result.alt.LPS$lps - result.UIP.LPS$lps) # Compute the difference in predictive coverage pc.diff <- as.matrix(result.alt.LPS$predictive.coverage - result.UIP.LPS$predictive.coverage) result <- list(obs = length(used.y), percent.cov = percent.cov,percent.split = percent.split, nu.alt = nu.alt, adj.r2.alt = adj.r2.alt, adj.r2.UIP = adj.r2.UIP, lambda.alt = lambda.alt,V.alt = V.alt, delta.alt = delta.alt, gam.alt = gam.alt, PMsize.alt = PMsize.alt,g.alt = g.alt, lps.per.model.UIP = result.UIP.LPS$lps.per.model, lps.per.model.alt = result.alt.LPS$lps.per.model, lps.UIP = result.UIP.LPS$lps,lps.alt = result.alt.LPS$lps, pc.UIP = result.UIP.LPS$predictive.coverage, pc.alt = result.alt.LPS$predictive.coverage, crps.UIP = result.UIP.LPS$crps, crps.alt = result.alt.LPS$crps, mse.alt = result.alt.LPS$mse, mse.UIP = result.UIP.LPS$mse, lps.diff = lps.diff,pc.diff = pc.diff, in.index = as.matrix(in.index),out.index = as.matrix(out.index), postprob.alt = as.matrix(result.alt.split$postprob), postprob.UIP = as.matrix(result.UIP.split$postprob), namesx.alt = result.alt.split$namesx, namesx.UIP = result.UIP.split$namesx, label.alt = result.alt.split$label,label.UIP = result.UIP.split$label, r2.alt = result.alt.split$r2,r2.UIP = result.UIP.split$r2, log.marg.alt = as.matrix(log.marg.alt),log.marg.UIP = result.UIP.split$bic, size.alt = (result.alt.split$size - 1), size.UIP = (result.UIP.split$size - 1), which.alt = result.alt.split$which,which.UIP = result.UIP.split$which, reduced = reduced, dropped = dropped, call = cl, ols.alt = result.alt.split$ols,ols.UIP = result.UIP.split$ols, se.alt = result.alt.split$se, se.UIP = result.UIP.split$se, n.models.alt = length(result.alt.split$postprob), n.models.UIP = length(result.UIP.split$postprob), n.vars = length(result.alt.split$probne0)) class(result) <- "bma.performance" } else { if(performance.only==TRUE) { # If the user didn't send in indices, then randomly split the data if(is.null(in.index)||is.null(out.index)) { new.indices <- get.indices(obs,percent.split,save.x) in.index <- new.indices$in.index out.index <- new.indices$out.index } # Separate the data into two groups used.y <- as.matrix(y[in.index]) save.used.y <- as.matrix(save.y[in.index]) used.x <- x[in.index,] save.used.x <- as.matrix(save.x[in.index,]) used.wt <- wt[in.index] held.out.y <- as.matrix(y[out.index]) held.out.x <- x[out.index,] save.held.out.y <- as.matrix(save.y[out.index]) save.held.out.x <- save.x[out.index,] held.out.wt <- wt[out.index] used.mean.y <- mean(save.y[in.index]) used.sd.y <- sqrt(var(save.y[in.index])) used.mean.x <- as.matrix(apply(save.x[in.index,],2,mean)) used.sd.x <- sd(save.x[in.index,]) # If the user selects the first prior, then run bicreg as usual if(g.prior==1) { result.split <- bicreg(save.used.x, save.used.y, used.wt, strict=strict, OR=OR, maxCol=maxCol,nbest=nbest) # Run LPS on the alternative model. result.LPS <- compute.LPS.PC.CRPS(used.y, used.x, held.out.y, held.out.x, models.list = result.split$which, models.post = result.split$postprob, define.V = TRUE, phi = 1, nu = 1e-08, lambda = 1e-08, delta = NULL, gam = NULL, percent.coverage = percent.cov, g.prior = 1) nu <- 0 lambda <- 0 V <- "TRUE" PMsize <- NULL g <- 1 delta <- NULL gam <- NULL log.marg <- result.split$bic adj.r2 <- 1 - (1-(result.split$r2/100))*((length(used.y)-1)/ (length(used.y) - (result.split$size))) } else { # Run bicreg using the priors sent in by the user result.split <- run.bicreg(used.x, used.y, used.wt, strict, OR, nbest, define.V, phi, nu, lambda, delta, gam, PMsize, used.mean.y, used.mean.x, used.sd.y, used.sd.x, g.prior, save.used.y, save.used.x) # Run LPS on the alternative model result.LPS <- compute.LPS.PC.CRPS(used.y, used.x, held.out.y, held.out.x, models.list = result.split$which, models.post = result.split$postprob, define.V = result.split$V, phi = result.split$phi, nu = result.split$nu, lambda = result.split$lambda, delta = result.split$delta, gam = result.alt$gam, percent.coverage = percent.cov, g.prior = result.split$g.prior) nu <- result.split$nu lambda <- result.split$lambda V <- result.split$V PMsize <- result.split$PMsize g <- result.split$g.prior gam <- result.split$gam delta <- result.split$delta log.marg <- result.split$log.marg adj.r2 <- result.split$adj.r2 } result <- list(obs = length(used.y), percent.cov = percent.cov, percent.split = percent.split, adj.r2 = adj.r2, nu = nu, lambda = lambda, V = V, PMsize = PMsize, g = g, delta = delta, gam = gam, lps.per.model = result.LPS$lps.per.model, lps = result.LPS$lps, pc = result.LPS$predictive.coverage, crps = result.LPS$crps, mse = result.LPS$mse, in.index = as.matrix(in.index),out.index = as.matrix(out.index), postprob = as.matrix(result.split$postprob), namesx = result.split$namesx, label = result.split$label, r2 = result.split$r2, log.marg = as.matrix(log.marg), size = (result.split$size - 1), which = result.split$which, reduced = reduced, dropped = dropped, call = cl, ols = result.split$ols, se = result.split$se, n.models = length(result.split$postprob), n.vars = length(result.split$probne0)) class(result) <- "bma.performance.only" } else { # Compute the correlations correlations <- compute.correlations(save.x) if(g.prior==1) { result.normal <- bicreg(save.x, save.y, wt=wt, strict=strict, OR=OR, maxCol=maxCol,nbest=nbest) r2 <- as.matrix(as.numeric(result.normal$r2/100)) adj.r2 <- 1 - (1-result.normal$r2/100)*((length(y)-1)/ (length(y) - (result.normal$size))) nvar <- ncol(save.x) result.hpdi <- compute.HPDI(as.matrix(result.normal$postprob),as.matrix(result.normal$ols), as.matrix(result.normal$se),length(y)) hpdi90 <- result.hpdi$hpdi90 hpdi95 <- result.hpdi$hpdi95 hpdi99 <- result.hpdi$hpdi99 nu <- 0 lambda <- 0 phi <- 1 PMsize <- NULL V <- TRUE delta <- NULL gam <- gam g.prior <- 1 log.marg <- result.normal$bic } else { result.normal <- run.bicreg(x, y, wt, strict, OR, nbest, define.V, phi, nu, lambda, delta, gam, PMsize, mean.y, mean.x, sd.y, sd.x, g.prior, save.y, save.x) adj.r2 <- result.normal$adj.r2 result.hpdi <- compute.HPDI(as.matrix(result.normal$postprob), as.matrix(result.normal$ols), as.matrix(result.normal$se),(length(y)+nu)) hpdi90 <- result.hpdi$hpdi90 hpdi95 <- result.hpdi$hpdi95 hpdi99 <- result.hpdi$hpdi99 nu <- result.normal$nu lambda <- result.normal$lambda PMsize <- result.normal$PMsize V <- result.normal$V delta <- result.normal$delta gam <- result.normal$gam g.prior <- result.normal$g.prior phi <- result.normal$phi log.marg <- result.normal$log.marg r2 <- as.matrix(as.numeric(result.normal$r2)) } result <- list(obs = length(y),correlations = correlations, nu = nu, lambda = lambda, PMsize = PMsize, V = V, g.prior = g.prior, phi = phi, gam = gam, delta = delta, postprob = as.matrix(as.numeric(result.normal$postprob)), namesx = result.normal$namesx, hpdi90 = hpdi90, hpdi95 = hpdi95, hpdi99 = hpdi99, label = result.normal$label, r2 = r2, log.marg = as.matrix(log.marg), adj.r2 = as.matrix(as.numeric(adj.r2)), size = (result.normal$size - 1), which = result.normal$which, probne0 = as.matrix(result.normal$probne0), postmean = as.matrix(as.numeric(result.normal$postmean[1:(nvar+1)])), postsd = as.matrix(as.numeric(result.normal$postsd[1:(nvar+1)])), condpostmean = as.matrix(as.numeric(result.normal$condpostmean)), condpostsd = as.matrix(as.numeric(result.normal$condpostsd)), ols = result.normal$ols, se = result.normal$se, reduced = result.normal$reduced, dropped = result.normal$dropped, call = result.normal$call, n.models = result.normal$n.models, n.vars = result.normal$n.vars) class(result) <- "bma.regular" } } } } # ================================================= # # Return the output # ================================================= # return(result) } # ================================================================ # # Function to calculate the highest posterior density intervals # ================================================================ # compute.HPDI <- function(postprob,postmean,postsd,dof) { postmean <- as.matrix(postmean) nmod <- nrow(postmean) nvar <- ncol(postmean) hpdi.left <- matrix(rep(0,3*nvar),nrow=nvar,ncol=3) hpdi.right <- matrix(rep(0,3*nvar),nrow=nvar,ncol=3) left.interval <- c(0.05,0.025,0.005) right.interval <- c(0.95,0.975,0.995) tol <- 1e-10 # Run a bisection optimizer to compute the quantiles for(i in 1:nvar) { for(j in 1:3) { a <- -100 b <- 100 s <- sign(left.prob.mass(left.interval[j],a,postprob,postmean[,i],postsd[,i],dof)) x <- (a + b)/2 d <- (b - a)/2 while(d>tol) { d <- d/2 if(s==sign(left.prob.mass(left.interval[j],x,postprob,postmean[,i],postsd[,i],dof))) { x <- x + d } else { x <- x - d } } hpdi.left[i,j] <- x a <- -100 b <- 100 s <- sign(right.prob.mass(right.interval[j],a,postprob,postmean[,i],postsd[,i],dof)) x <- (a + b)/2 d <- (b - a)/2 while(d>tol) { d <- d/2 if(s==sign(right.prob.mass(right.interval[j],x,postprob,postmean[,i],postsd[,i],dof))) { x <- x + d } else { x <- x - d } } hpdi.right[i,j] <- x } } hpdi90 <- cbind(hpdi.left[,1],hpdi.right[,1]) hpdi95 <- cbind(hpdi.left[,2],hpdi.right[,2]) hpdi99 <- cbind(hpdi.left[,3],hpdi.right[,3]) result <- list(hpdi90=hpdi90,hpdi95=hpdi95,hpdi99=hpdi99) return(result) } # ================================================================ # # Function to compute the leftward probability mass # ================================================================ # left.prob.mass <- function(CI,f.eval,postprob,postmean,postsd,dof) { cum.prob <- 0 if(f.eval<0) { for(i in 1:nrow(postprob)) { if(postsd[i]!=0) { f.eval.normed <- (f.eval - postmean[i])/postsd[i] cum.prob <- cum.prob + postprob[i]*pt(f.eval.normed,dof) } } result <- cum.prob - CI } else { if(f.eval==0) { for(i in 1:nrow(postprob)) { if(postsd[i]!=0) { f.eval.normed <- (f.eval - postmean[i])/postsd[i] cum.prob <- cum.prob + postprob[i]*pt(f.eval.normed,dof) } else { cum.prob <- cum.prob + postprob[i] } } if((cum.prob>=CI)) { result <- 0 } else { result <- cum.prob - CI } } else { for(i in 1:nrow(postprob)) { if(postsd[i]!=0) { f.eval.normed <- (f.eval - postmean[i])/postsd[i] cum.prob <- cum.prob + postprob[i]*pt(f.eval.normed,dof) } else { cum.prob <- cum.prob + postprob[i] } } result <- cum.prob - CI } } return(result) } # ================================================================ # # Function to compute the leftward probability mass # ================================================================ # right.prob.mass <- function(CI,f.eval,postprob,postmean,postsd,dof) { cum.prob <- 0 if(f.eval<0) { for(i in 1:nrow(postprob)) { if(postsd[i]!=0) { f.eval.normed <- (f.eval - postmean[i])/postsd[i] cum.prob <- cum.prob + postprob[i]*pt(f.eval.normed,dof) } } result <- cum.prob - CI } else { if(f.eval==0) { for(i in 1:nrow(postprob)) { if(postsd[i]!=0) { f.eval.normed <- (f.eval - postmean[i])/postsd[i] cum.prob <- cum.prob + postprob[i]*pt(f.eval.normed,dof) } else { cum.prob <- cum.prob + postprob[i] } } if((cum.prob>=CI)) { result <- 0 } else { result <- cum.prob - CI } } else { for(i in 1:nrow(postprob)) { if(postsd[i]!=0) { f.eval.normed <- (f.eval - postmean[i])/postsd[i] cum.prob <- cum.prob + postprob[i]*pt(f.eval.normed,dof) } else { cum.prob <- cum.prob + postprob[i] } } result <- cum.prob - CI } } return(result) } # ================================================================ # # Function to calculate the correct value of "g" from FLS's paper # ================================================================ # find.g <- function(y, x, n, k , kj, g.prior, delta.sp, gam.sp) { if(g.prior==1) { g <- 1/n names(g) <- "Prior 1" } if(g.prior==2) { g <- kj/n names(g) <- "Prior 2" } if(g.prior==3) { g <- (k^(1/kj))/n names(g) <- "Prior 3" } if(g.prior==4) { g <- sqrt(1/n) names(g) <- "Prior 4" } if(g.prior==5) { g <- sqrt(kj/n) names(g) <- "Prior 5" } if(g.prior==6) { g <- 1/(log(n)^3) names(g) <- "Prior 6" } if(g.prior==7) { g <- (log(kj + 1))/log(n) names(g) <- "Prior 7" } if(g.prior==8) { if((is.null(delta.sp))&&(is.null(gam.sp))) { if((kj<=15)&&(kj>1)) { delta <- 0.15 gam <- 0.65 } else { stop('The user must specify delta and gam') } } else { delta <- delta.sp gam <- gam.sp } g <- (delta*(gam^(1/kj)))/(1 - delta*gam^(1/kj)) names(g) <- "Prior 8" } if(g.prior==9) { g <- 1/(k^2) names(g) <- "Prior 9" } if(g.prior==10) { g <- 1/max(n,k^2) names(g) <- "Prior 10" } if(g.prior==11) { r2 <- summary(lm(y ~ ., data = data.frame(y,x)))$r.squared if(r2<0.9) { new.phi <- 2.85 g <- 1/((new.phi^2)*n) } else { new.phi <- 9.2 g <- 1/((new.phi^2)*n) } names(g) <- "Prior 11" } if(g.prior==12) { g <- 1/n names(g) <- "Prior 12" } return(g) } # ================================================================ # # Function to calculate the posterior predictive distribution. # ================================================================ # calculate.ppo <- function(held.out.y, Z, V.prime, mu.prime, lambda.prime, nu.prime) { #Z <- t(as.matrix(as.numeric(rbind(c(1,held.out.x[model.vect]))))) p <- 1 # Calculate the log posterior predictive distribution which is a # Student T distribution log.pred <- lgamma((nu.prime + p)/2) - lgamma(nu.prime/2) - (p/2)*log(nu.prime*pi) - 0.5*log(lambda.prime*(Z%*%V.prime%*%t(Z) + 1)) - ((nu.prime + p)/2)*log(1 + (1/nu.prime)*((held.out.y - Z%*%mu.prime)^2)/ (lambda.prime*(Z%*%V.prime%*%t(Z)+1))) return(log.pred) } # ================================================================ # # Function to calculate the log predictive score. # ================================================================ # compute.LPS.PC.CRPS <- function(used.y, used.x, held.out.y, held.out.x, models.list, models.post, define.V, phi, nu, lambda, delta, gam, g.prior, percent.coverage) { used.x <- data.frame(used.x) n.models <- nrow(as.matrix(models.list)) obs <- nrow(as.matrix(held.out.y)) #n.var <- ncol(used.x) + 1 models.post <- as.matrix(models.post) # ===================================================== # # Calculate the sufficient statistics for the posterior # predictive distributions which are Student t # ===================================================== # # Storage matrices for the posterior predictive ordinates ppo.mat <- matrix(0,obs,n.models) post.mat <- matrix(0,obs,n.models) post.pred.mean <- matrix(0,obs,n.models) post.pred.var <- matrix(0,obs,n.models) for(j in 1:n.models) { n <- length(used.y) ones <- rep(1, n) k <- ncol(used.x) + 1 # Create the matrix of used RHS variables X <- as.matrix(cbind(ones, used.x[, models.list[j,]])) Y <- as.matrix(used.y) # Create the matrix of new RHS variables mu <- as.matrix(rep(0,ncol(X))) p <- ncol(X) # "g" may be different for each model if(is.null(phi)) { g.temp <- find.g(used.y, used.x, n, k, p, g.prior, delta, gam) phi.temp <- sqrt(1/(g.temp*n)) } else { phi.temp <- phi g.temp <- 1/((phi.temp^2)*n) } V.temp <- define.V if(g.prior==11) { V.temp <- TRUE } # Create the prior covariance matrix if(V.temp==TRUE) { # Raftery, Madigan, and Hoetings' (1997) method V <- as.matrix(diag(c(1, rep(phi.temp^2, p-1)))) } else { # Fernandez, Ley, and Steels' (2001) method. V <- solve(t(X)%*%X)*(1/g.temp) V[,1] <- 0 V[1,] <- 0 V[1,1] <- 10000 * var(y) } # Update the sufficient statistics V.inv <- as.matrix(solve(V)) V.prime <- as.matrix(solve(V.inv + t(X)%*%X)) mu.prime <- as.matrix(V.prime%*%(t(X)%*%Y + V.inv%*%mu)) nu.prime <- nu + n lambda.prime <- (1/(nu.prime))*(nu*lambda + t(mu)%*%V.inv%*%mu + t(Y)%*%Y - t(t(X)%*%Y + V.inv%*%mu)%*%solve(t(X)%*%X + V.inv)%*%(t(X)%*%Y + V.inv%*%mu)) # =================================================================== # # Loop around each of the "held.out" observations for this model # =================================================================== # for(i in 1:obs) { XX <- held.out.x[i,] Z <- t(as.matrix(c(1,as.matrix(XX[models.list[j,]])))) post.pred.mean[i,j] <- Z%*%mu.prime post.pred.var[i,j] <- (nu.prime/(nu.prime-2))*(lambda.prime*(Z%*%V.prime%*%t(Z)+1)) result <- calculate.ppo(held.out.y[i], Z, V.prime, mu.prime, lambda.prime, nu.prime) ppo.mat[i,j] <- result post.mat[i,j] <- exp(ppo.mat[i,j] + log(models.post[j])) } dof <- nu + n } # ================================================================== # # Calculate the predictive coverage (PC) # ================================================================== # bma.PC <- 0 pred.quantiles <- matrix(rep(0,2*obs),nrow=obs,ncol=2) mse <- matrix(rep(0,obs),nrow=obs,ncol=1) for(i in 1:obs) { result <- compute.PC(held.out.y[i], models.post , post.pred.mean[i,], post.pred.var[i,] , dof, percent.coverage) pred.quantiles[i,1] <- result$left pred.quantiles[i,2] <- result$right if((held.out.y[i]>=pred.quantiles[i,1])&&(held.out.y[i]<=pred.quantiles[i,2])) { bma.PC <- bma.PC + 1 } # ============================================================ # # Calculate the mse # ============================================================ # mse[i] <- (held.out.y[i] - sum(post.pred.mean[i,]*t(models.post)))^2 } bma.PC <- bma.PC/obs mse <- sum(mse)/obs # ================================================================== # # Calculate the continuous ranked probability score (CRPS) # ================================================================== # crps <- compute.CRPS(held.out.y,models.post,post.pred.mean,post.pred.var,dof) # ================================================================== # # Calculate the log predictive score (LPS) # ================================================================== # lps <- -sum(log(as.matrix(apply(post.mat,1,sum)))) # Calculate the score for each individual model lps.per.model <- -apply(ppo.mat,2,sum) # ================================================================== # # Collect the output into a list variable and return it. # ================================================================== # result <- list(lps = lps, crps = crps, lps.per.model = as.matrix(as.numeric(lps.per.model)), predictive.coverage = bma.PC, mse = mse, percent.coverage = percent.coverage) return(result) } # ================================================================ # # Function to calculate the highest posterior density intervals # ================================================================ # compute.PC <- function(held.out.y, models.post, post.pred.mean, post.pred.var, dof, percent.coverage) { postmean <- as.matrix(models.post) nmod <- nrow(postmean) per.cov.right <- 1 - (1 - percent.coverage)/2 per.cov.left <- 1 - per.cov.right tol <- 1e-10 # Run a bisection optimizer to compute the quantiles a <- -100 b <- 100 s <- sign(compute.prob.mass(per.cov.left, a, models.post , post.pred.mean, post.pred.var,dof)) x <- (a + b)/2 d <- (b - a)/2 while(d>tol) { d <- d/2 if(s==sign(compute.prob.mass(per.cov.left, x, models.post , post.pred.mean, post.pred.var, dof))) { x <- x + d } else { x <- x - d } } left <- x a <- -100 b <- 100 s <- sign(compute.prob.mass(per.cov.right,a,models.post , post.pred.mean, post.pred.var,dof)) x <- (a + b)/2 d <- (b - a)/2 while(d>tol) { d <- d/2 if(s==sign(compute.prob.mass(per.cov.right, x, models.post , post.pred.mean, post.pred.var, dof))) { x <- x + d } else { x <- x - d } } right <- x result <- list(left=left,right=right) return(result) } compute.prob.mass <- function(CI, f.eval, models.post, post.pred.mean, post.pred.var, dof) { cum.prob <- 0 post.pred.mean <- as.matrix(post.pred.mean) post.pred.var <- as.matrix(post.pred.var) postprob <- as.matrix(models.post) if(f.eval<0) { for(i in 1:nrow(models.post)) { if(post.pred.var[i]!=0) { f.eval.normed <- (f.eval - post.pred.mean[i])/sqrt(post.pred.var[i]) cum.prob <- cum.prob + postprob[i]*pt(f.eval.normed,dof) } } result <- cum.prob - CI } else { for(i in 1:nrow(models.post)) { if(post.pred.var[i]!=0) { f.eval.normed <- (f.eval - post.pred.mean[i])/sqrt(post.pred.var[i]) cum.prob <- cum.prob + postprob[i]*pt(f.eval.normed,dof) } } result <- cum.prob - CI } return(result) } get.indices <- function(obs, percent.split, save.x) { accept <- FALSE while(accept==FALSE) { indices <- seq(from=1,to=obs,by=1) in.index <- sort(sample(indices,ceiling(obs*(percent.split)),replace=FALSE)) out.index <- sort(indices[-in.index]) test.variance1 <- diag(var(save.x[in.index,])) test.variance2 <- diag(var(save.x[out.index,])) temp1 <- 0 for(i in 1:length(test.variance1)) { if(test.variance1[i]!=0) { temp1 <- temp1 + 1 } } temp2 <- 0 for(i in 1:length(test.variance2)) { if(test.variance2[i]!=0) { temp2 <- temp2 + 1 } } if((temp1==length(test.variance1))&&(temp2==length(test.variance2))) { # Accept the indices accept <- TRUE } } result <- list(in.index = as.numeric(in.index),out.index = as.numeric(out.index)) return(result) } # ========================================================================= # # Function to compute the CRPS by simulation # ========================================================================= # compute.CRPS <- function(held.out.y, models.post, post.pred.mean, post.pred.var, dof) { held.out.y <- as.matrix(held.out.y) models.post <- as.matrix(models.post) post.pred.mean <- as.matrix(post.pred.mean) obs <- length(held.out.y) n.models <- length(models.post) post.pred.sd <- sqrt(as.matrix(post.pred.var)) n.sim <- 1000 n.draws <- round(models.post*n.sim) n.sim.true <- sum(n.draws) mix.mat <- post.pred.mean[1] + post.pred.sd[1]*rt(obs*n.draws[1],df=dof) mix.mat <- matrix(mix.mat,nrow=obs,ncol=n.draws[1]) for(i in 2:n.models) { temp <- post.pred.mean[i] + post.pred.sd[i]*rt(obs*n.draws[i],df=dof) temp <- matrix(temp,nrow=obs,ncol=n.draws[i]) mix.mat <- cbind(mix.mat,temp) } obs.mat <- matrix(rep(held.out.y,each=n.sim.true),ncol=n.sim.true,nrow=obs) vector.1 <- abs(mix.mat - obs.mat) vector.1 <- as.matrix(apply(vector.1,1,mean)) vector.2 <- 0 for(i in 1:n.sim.true) { for(j in 1:n.sim.true) { vector.2 <- vector.2 + abs(mix.mat[,i] - mix.mat[,j]) } } vector.2 <- vector.2/(2*(n.sim.true)^2) crps <- mean(vector.1 - vector.2) return(crps) }