library("BMA") print("Loading 2SBMA version 21, last updated September 8, 2009") ##---- Changes Log ----------------------- ## ## v 1: First versions that were sent to T. Eicher ## v 2: Updated code to allow for additional BIC options ## v 3: Made this robust ## v 4: Took care of data frame/cbind issue when X is passed as NULL ## v 5: Added a routine for two independent endogenous variables ## v 6: Worked on the nested routine ## v 7: Updated output from nested routine ## v 8: Made a nested version of the two variables 2SBMA ## v 9: Updated two variable 2SBMA to allow bicreg options ## v10: Put in a summary routine ## v11: Fixed a bug in summary routine ## v12: Modified summary routine ## v13: Added Sargan tests, included residuals ## v14: Updated top models double routine ## v15: Included Cragg-Donald Statistic ## v16: Updated Summary routines ## v17: More updates of Summary routines ## v18: Update to Cragg-Donald Statistic ## v19: Included conditional sd calculations ## v20: Implemented several of Theo's suggestions on presentation ## also corrected a number of changes to variance calculations ## v21: Fixed a bug related to conditional means ##-------------------------------------------- TSBMA.posterior <- function(Y,X,W,Z,...) { n <- length(Y) if(!is.null(X)) { dfXZ <- data.frame(cbind(X, Z)) }else{ dfXZ <- data.frame(Z) } a.s1 <- bicreg(dfXZ, W,...) if(a.s1$reduced)dfXZ <- as.matrix(dfXZ[,-match(a.s1$dropped, names(dfXZ))]) nmodels.s1 <- length(a.s1$postprob) W.tilde.k <- as.matrix(cbind(1,dfXZ)) %*% t(a.s1$ols) W.tilde <- W.tilde.k %*% a.s1$postprob a.s2 <- bicreg(cbind(W.tilde, X), Y,...) return(list(s1 = a.s1, s2 = a.s2)) } TSBMA.Double <- function(Y,X,W1,Z1,W2,Z2,...) { n <- length(Y) if(!is.null(X)) { dfXZ1 <- data.frame(cbind(X, Z1)) }else{ dfXZ1 <- data.frame(Z1) } a.s1.w1 <- bicreg(dfXZ1, W1,...) if(a.s1.w1$reduced)dfXZ1 <- as.matrix(dfXZ1[,-match(a.s1.w1$dropped, names(dfXZ1))]) nmodels.s1.w1 <- length(a.s1.w1$postprob) W1.tilde.k <- as.matrix(cbind(1,dfXZ1)) %*% t(a.s1.w1$ols) W1.tilde <- W1.tilde.k %*% a.s1.w1$postprob if(!is.null(X)) { dfXZ2 <- data.frame(cbind(X, Z2)) }else{ dfXZ2 <- data.frame(Z2) } a.s1.w2 <- bicreg(dfXZ2, W2,...) if(a.s1.w2$reduced)dfXZ2 <- as.matrix(dfXZ2[,-match(a.s1.w2$dropped, names(dfXZ2))]) nmodels.s1.w2 <- length(a.s1.w2$postprob) W2.tilde.k <- as.matrix(cbind(1,dfXZ2)) %*% t(a.s1.w2$ols) W2.tilde <- W2.tilde.k %*% a.s1.w2$postprob a.s2 <- bicreg(cbind(W1.tilde, W2.tilde, X), Y,...) return(list(s1.w1 = a.s1.w1, s1.w2 = a.s1.w2, s2 = a.s2)) } TSBMA.Double.Nested <- function(Y,X,W1,Z1,W2,Z2,...) { n <- length(Y) p.X <- dim(X)[2] p.Z <- dim(Z1)[2] ##------------- First Stage with first endogenous variable ----------------- if(!is.null(X)) { dfXZ1 <- data.frame(cbind(X, Z1)) }else{ dfXZ1 <- data.frame(Z1) } a.s1.w1 <- bicreg(dfXZ1, W1,...)## OR = 1e+12, maxCol = dim(X)[2] + dim(Z)[2] + 1,nbest = 4)##...) if(a.s1.w1$reduced)dfXZ1 <- as.matrix(dfXZ1[,-match(a.s1.w1$dropped, names(dfXZ1))]) nmodels.s1.w1 <- length(a.s1.w1$postprob) ##-------------------------------------------------------------------------- ##------------ First Stage with second endogenous variable ----------------- if(!is.null(X)) { dfXZ2 <- data.frame(cbind(X, Z2)) }else{ dfXZ2 <- data.frame(Z2) } a.s1.w2 <- bicreg(dfXZ2, W2,...)## OR = 1e+12, maxCol = dim(X)[2] + dim(Z)[2] + 1,nbest = 4)##...) if(a.s1.w2$reduced)dfXZ2 <- as.matrix(dfXZ2[,-match(a.s1.w2$dropped, names(dfXZ2))]) nmodels.s1.w2 <- length(a.s1.w2$postprob) ##--------------------------------------------------------------------------- ##------------- Set-up for second stage ------------------------------------- prob.include.w1 <- sum(a.s1.w1$postprob[a.s1.w1$size > 1]) prob.include.w2 <- sum(a.s1.w2$postprob[a.s1.w2$size > 1]) Pi.i <- matrix(0, nmodels.s1.w1, nmodels.s1.w2) Beta.hat.istar <- array(dim = c(3 + dim(X)[2],nmodels.s1.w1, nmodels.s1.w2)) Beta.hat.istar.cond <- array(dim = c(3 + dim(X)[2],nmodels.s1.w1, nmodels.s1.w2), data = 0) for(i in 1:nmodels.s1.w1) { for(j in 1:nmodels.s1.w2) { if( (a.s1.w1$size[i] > 1) && (a.s1.w2$size[j] > 1)) { Pi.i[i,j] <- a.s1.w1$postprob[i] * a.s1.w2$postprob[j] / prob.include.w1 / prob.include.w2 } } } W1.tilde.k <- as.matrix(cbind(1,dfXZ1)) %*% t(a.s1.w1$ols) W2.tilde.k <- as.matrix(cbind(1,dfXZ2)) %*% t(a.s1.w2$ols) keep.models <- NULL keep.models$probs <- rep(0, 100) keep.models$s2 <- matrix(0, 100, dim(X)[2] + 3) keep.models$sargan <- rep(0, 100) keep.models$cd <- rep(0, 100) keep.models$cd.pval <- rep(0, 100) keep.models$sy <- rep(0,100) keep.models$s2.bic <- rep(0, 100) keep.models$s2.r2 <- rep(0, 100) keep.models$s2.nvar <- rep(0, 100) keep.models$s1.w1 <- rep(0, 100) keep.models$s1.w2 <- rep(0, 100) sargan.pval <- 0 cd.pval <- 0 s2 <- NULL s2$ols <- rep(0, 2 + 1 + dim(X)[2]) s2$var <- rep(0, 2 + 1 + dim(X)[2]) s2$var.correct <- rep(0, 2 + 1 + dim(X)[2]) s2$cond.var.correct <- rep(0, 2 + 1 + dim(X)[2]) s2$cond.var <- rep(0, 2 + 1 + dim(X)[2]) s2$prob <- rep(0, 2 + 1 + dim(X)[2]) cd.stat.avg <- 0 sy.stat.avg <- 0 ##---------------------------------------------------------------------------- ##------------ Run Second Stage ---------------------------------------------- for(i in 1:nmodels.s1.w1) { for(j in 1:nmodels.s1.w2) { print(paste("Model", i, "of", nmodels.s1.w1, "and", j, "of", nmodels.s1.w2, date())) if(a.s1.w1$size[i] > 1 && a.s1.w2$size[j] > 1) { ##---------- Run this combination ------------------ a.temp <- suppressWarnings(bicreg(cbind(W1.tilde.k[,i], W2.tilde.k[,j],X), Y,...))##OR = 1e+12, maxCol = dim(X)[2] + dim(Z)[2] + 1,nbest = 4))## ...) ##--------------------------------------------------- ##---------- Calculate On-the-fly results ------------ s2$ols <- s2$ols + a.temp$postmean * Pi.i[i,j] s2$var <- s2$var + (a.temp$postsd^2 + a.temp$postmean^2) * Pi.i[i,j] s2$var.correct <- s2$var.correct + Pi.i[i,j] * a.temp$postsd^2 s2$cond.var.correct <- s2$cond.var.correct + Pi.i[i,j] * a.temp$condpostsd^2 Beta.hat.istar[,i,j] <- a.temp$postmean Beta.hat.istar.cond[,i,j] <- a.temp$condpostmean s2$cond.var <- s2$cond.var + Pi.i[i,j] * (a.temp$condpostsd^2 + a.temp$condpostmean^2) s2$prob <- s2$prob + c(100,a.temp$probne0) * (a.s1.w1$postprob[i] / prob.include.w1) * (a.s1.w2$postprob[j] / prob.include.w2) / 100 tot.probs <- a.temp$postprob * Pi.i[i,j] ##------------------------------------------------------ ##-------- Go through individual models ---------------- for(k in 1:length(tot.probs)) { ## print(c(i,j,k)) ##------------- Set up for tests -------------------------------- W <- cbind(W1,W2) W.tilde <- cbind(W1.tilde.k[,i],W2.tilde.k[,j]) Z.include <- (1:p.Z)[ (a.s1.w1$which[i,(p.X + 1):(p.X + p.Z)] + a.s1.w2$which[j,(p.X + 1):(p.X + p.Z)]) > 0] if(length(Z.include) == 0)Z.include <- NULL Z.s1 <- Z[,Z.include,drop = FALSE] X.index.s2 <- (1:p.X)[ a.temp$which[k, 3:(p.X + 2)] ] if(length(X.index.s2) == 0)X.index.s2 <- NULL X.s2 <- X[,X.index.s2,drop = FALSE] X.in.s1 <- (1:p.X)[(a.s1.w1$which[i,1:p.X] + a.s1.w2$which[j,1:p.X]) > 0] X.instruments.index <- setdiff(X.in.s1, X.index.s2) if(length(X.instruments.index) == 0)X.instruments.index <- NULL X.instruments <- X[,X.instruments.index, drop = FALSE] if(dim(X.instruments)[2] + dim(Z.s1)[2] != 0) { s2.ols <- a.temp$ols[k,c(1:3,3 + X.index.s2)] sargan.this <- get.sargan.probs(Y,X.s2,cbind(Z.s1,X.instruments),W.tilde,s2.ols) sargan.pval <- sargan.pval + tot.probs[k] * sargan.this min.eval <- get.CD.probs(X.s2, cbind(Z.s1, X.instruments), W) CD.stat <- n * min.eval SY.stat <- (n - dim(X.instruments)[2] - dim(Z.s1)[2] - dim(X.s2)[2] - 1) / (dim(X.instruments)[2] + dim(Z.s1)[2]) cd.stat.avg <- cd.stat.avg + tot.probs[k] * CD.stat sy.stat.avg <- sy.stat.avg + tot.probs[k] * SY.stat cd.pval.this <- (1 - pchisq(CD.stat, dim(Z.s1)[2] + dim(X.instruments)[2] - 1)) cd.pval <- cd.pval + tot.probs[k] * cd.pval.this } min.saved <- min(keep.models$probs) if(min.saved < tot.probs[k]) { index.min <- order(keep.models$probs)[1] keep.models$probs[index.min] <- tot.probs[k] keep.models$s2[index.min, ] <- a.temp$ols[k, ] keep.models$sargan[index.min] <- sargan.this keep.models$cd[index.min] <- CD.stat keep.models$cd.pval[index.min] <- cd.pval.this keep.models$sy[index.min] <- SY.stat keep.models$s2.bic[index.min] <- a.temp$bic[k] keep.models$s2.r2[index.min] <- a.temp$r2[k] keep.models$s2.nvar[index.min] <- a.temp$size[k] keep.models$s1.w1[index.min] <- i keep.models$s1.w2[index.min] <- j } ##--------------------------------------------------------------- } ##------------------------------------------------------------------------------ } } } ##---------------------------------------------------------------------------- for(i in 1:(3 + p.X))s2$condmean[i] <- sum(Beta.hat.istar.cond[i,,] * Pi.i) for(i in 1:nmodels.s1.w1) { for(j in 1:nmodels.s1.w2) { if(a.s1.w1$size[i] > 1 && a.s1.w2$size[j] > 1) { s2$var.correct <- s2$var.correct + Pi.i[i,j] * (Beta.hat.istar[,i,j] - s2$ols)^2 s2$cond.var.correct <- s2$cond.var.correct + Pi.i[i,j] * (Beta.hat.istar.cond[,i,j] - s2$condmean)^2 } } } ##------------------------------------------------------------------------------ return(list(keep.models = keep.models, s2 = s2, sargan.pval = sargan.pval, cd.pval = cd.pval, cd.stat.avg = cd.stat.avg, sy.stat.avg = sy.stat.avg, s1.w1 = a.s1.w1, s1.w2 = a.s1.w2)) } TSBMA.nested <- function(Y, X, W, Z,...) { n <- length(Y) a.s1 <- bicreg(cbind(X,Z), W)##,...) prob.include <- sum(a.s1$postprob[a.s1$size != 0]) nmodels.s1 <- length(a.s1$postprob) W.tilde.k <- as.matrix(cbind(1,X,Z)) %*% t(a.s1$ols) a.s2 <- list() for(i in 1:nmodels.s1) { if(a.s1$size[i] != 0) { a.s2[[i]] <- bicreg(cbind(W.tilde.k[,i], X), Y)##,...) } } a.s2.summary <- NULL a.s2.summary$ols <- rep(0, 1 + 1 + dim(X)[2]) a.s2.summary$var <- rep(0, 1 + 1 + dim(X)[2]) a.s2.summary$prob <- rep(0, 1 + 1 + dim(X)[2]) sargan.probs <- get.sargan.probs(Y,X,Z,W.tilde.k,a.s1,a.s2) for(i in 1:nmodels.s1) { if(a.s1$size[i] != 0) { a.s2.summary$ols <- a.s2.summary$ols + a.s2[[i]]$postmean * a.s1$postprob[i] / prob.include a.s2.summary$var <- a.s2.summary$var + (a.s1$postprob[i] / prob.include) * (a.s2[[i]]$postsd^2 + a.s2[[i]]$postmean^2) a.s2.summary$prob <- a.s2.summary$prob + c(100,a.s2[[i]]$probne0) * a.s1$postprob[i] / prob.include / 100 } } a.s2.summary$var <- a.s2.summary$var - a.s2.summary$ols^2 return(list(s1 = a.s1, s2 = a.s2, a.s2.summary = a.s2.summary,sargan.probs = sargan.probs)) }##TSBMA.nested ##------------- These routines give the C-D p-value ------------------ proj.mat <- function(A) { return( A %*% solve(t(A) %*% A) %*% t(A)) } M.mat <- function(A) { P <- proj.mat(A) return(diag(dim(P)[1]) - P) } A.resid <- function(A,X) { M.X <- M.mat(X) return(M.X %*% A) } get.CD.probs <- function(X.this,Z.this,W) { X.this <- as.matrix(X.this) Z.this <- as.matrix(Z.this) W <- as.matrix(W) N <- dim(X.this)[1] K.1 <- dim(Z.this)[2] K.2 <- dim(X.this)[2] Z.bar <- cbind(X.this,Z.this) if(K.2 == 0) { Z.perp <- Z.this W.perp <- W }else{ Z.perp <- M.mat(X.this) %*% Z.this W.perp <- M.mat(X.this) %*% W } P.Z.perp <- proj.mat(Z.perp) Sigma.VV <- t(W) %*% M.mat(Z.bar) %*% W ## G.T <- t((W.resid.Z.bar)^(-1/2)) %*% W.resid.Z.resid %*% W.resid.Z.bar^(-1/2) * ((N - K.1 - K.2)) / K.1 G.T <- t(solve(chol(Sigma.VV))) %*% t(W.perp) %*% P.Z.perp %*% W.perp %*% solve(chol(Sigma.VV)) return(min(eigen(G.T)$values)) }##get.CD.probs get.min.eval.stat <- function(X.this,Z.this,W) { X.this <- as.matrix(X.this) Z.this <- as.matrix(Z.this) W <- as.matrix(W) N <- dim(X.this)[1] K.1 <- dim(Z.this)[2] K.2 <- dim(X.this)[2] Z.bar <- cbind(X.this,Z.this) W.resid.Z.bar <- t(W) %*% (M.mat(Z.bar)) %*% W if(K.2 == 0) { Z.resid.X <- Z.this W.resid.X <- W }else{ Z.resid.X <- M.mat(X.this) %*% Z.this W.resid.X <- M.mat(X.this) %*% W } Z.resid.X.proj <- proj.mat(Z.resid.X) W.resid.Z.resid <- t(W.resid.X) %*% Z.resid.X.proj %*% W.resid.X ## G.T <- t((W.resid.Z.bar)^(-1/2)) %*% W.resid.Z.resid %*% W.resid.Z.bar^(-1/2) * ((N - K.1 - K.2)) / K.1 G.T <- t(chol(solve(W.resid.Z.bar))) %*% W.resid.Z.resid %*% chol(solve(W.resid.Z.bar)) return(min(eigen(G.T)$values)) }##get.min.eval.stat ##------------------------------------------------------------------------- get.sargan.probs <- function(Y,X.this,Z.this,W.tilde, ols) { n <- length(Y) Probs <- NULL p.X <- dim(X.this)[2] p.Z <- dim(Z.this)[2] X.this <- as.matrix(X.this) Z.this <- as.matrix(Z.this) ehat <- Y - cbind(1, W.tilde,X.this) %*% ols test.stat <- n * summary(lm(ehat ~ cbind(X.this,Z.this)))$r.sq prob <- 1 - pchisq(test.stat, dim(X.this)[2] + dim(Z.this)[2] - dim(W.tilde)[2]) return(prob) }##get.sargan.probs top.models.nested.Double <- function(a, n.models, W1.text, W2.text, X.text,n) { p.X <- length(X.text) top.n <- order(a$keep.models$probs, decreasing = TRUE)[1:n.models] tbl.s2 <- matrix(a$keep.models$s2[top.n,], p.X + 3, n.models, byrow = TRUE) tbl.nvar <- a$keep.models$s2.nvar[top.n] tbl.r2 <- a$keep.models$s2.r2[top.n] tbl.bic <- a$keep.models$s2.bic[top.n] tbl.probs <- a$keep.models$probs[top.n] tbl.sarg <- a$keep.models$sargan[top.n] tbl.cd <- a$keep.models$cd[top.n] tbl.cd.pval <- a$keep.models$cd.pval[top.n] tbl.sy <- a$keep.models$sy[top.n] tbl.s1.w1 <- a$keep.models$s1.w1[top.n] tbl.s1.w2 <- a$keep.models$s1.w2[top.n] tbl.s1.w1.bic <- a$s1.w1$bic[tbl.s1.w1] tbl.s1.w1.r2 <- a$s1.w1$r2[tbl.s1.w1] tbl.s1.w2.bic <- a$s1.w2$bic[tbl.s1.w2] tbl.s1.w2.r2 <- a$s1.w2$r2[tbl.s1.w2] tbl.mods <- rbind(tbl.s2, tbl.nvar, tbl.r2, tbl.bic, tbl.probs, tbl.sarg, tbl.cd, tbl.cd.pval, tbl.sy, tbl.s1.w1, tbl.s1.w1.bic, tbl.s1.w1.r2, tbl.s1.w2, tbl.s1.w2.bic, tbl.s1.w2.r2) tbl.p <- c(a$s2$prob, rep(0,14)) tbl.ev <- c(a$s2$ols, rep(0,4),a$sargan, a$cd.stat.avg,a$cd.pval,a$sy.stat.avg,rep(0,6)) tbl.sd <- c(sqrt(a$s2$var.correct), rep(0,14)) tbl.cev <- c(a$s2$condmean, rep(0,14)) tbl.csd <- c(sqrt(a$s2$cond.var.correct), rep(0,14)) tbl.s2 <- cbind(tbl.p, tbl.ev, tbl.sd, tbl.cev, tbl.csd, tbl.mods) rownames(tbl.s2) <- c("Int", W1.text, W2.text, X.text, "nvar", "Gen_r2", "bic", "probs", "Sargan", "C-D","C-D Pval","SY", "W1_Mod", "W1_BIC", "W1_R2", "W2_Mod", "W2_BIC", "W2_R2") c.names <- c("p!=0", "EV", "SD", "CEV", "CSD") for(i in 1:n.models)c.names <- c(c.names, i) colnames(tbl.s2) <- c.names cat(paste("There were",n.obs,"observations")) cat("\n") return(round(tbl.s2,5)) }##top.models.nested.double