## Using overimputation to compute coverage of multiple imputation ## prediction intervals, with plotting in tile ## ## note: takes a while to run ## ## Turnout example using data from WA 2004 governor's race, ## median household income data (2004, Census) and ## mostly-imputed college attainment rates (2005, Census -- earliest avail) ## ## Christopher Adolph ## 29 November 2016 faculty.washington.edu/cadolph # Clear memory rm(list=ls()) # Load libraries library(Amelia) # for multiple imputation library(mice) # for multiple imputation library(simcf) # for simulation library(tile) # for visualization library(RColorBrewer) # for color selection # Set seed set.seed(10) # Simulation settings nimp <- 100 ## Get nice colors brewer <- brewer.pal(9, "Set1") red <- brewer[1] blue <- brewer[2] purple <- brewer[4] orange <- brewer[5] brown <- brewer[7] green <- brewer[3] nicegray <- "gray45" purple1 <- rgb(110, 32, 208, maxColorValue = 255) purple2 <- rgb(148, 91, 221, maxColorValue = 255) purple3 <- rgb(195, 45, 184, maxColorValue = 255) purple4 <- rgb(209, 103, 212, maxColorValue = 255) ## Load data file <- "wa2004govWithCovariates.csv" data <- read.csv(file, header=TRUE) ## Process data data$college25 <- data$college25/100 data$gradRate05 <- data$gradRate05/100 data$nonvotes <- data$regvoters - data$votes obsCases <- (1:nrow(data))[!is.na(data$college25)] obsCollege <- data$college25[obsCases] obsN <- length(obsCases) miOverimp <- miceOverimp <- matrix(NA, nrow=nimp, ncol=obsN) for (i in 1:obsN) { data0 <- data ## Knock out next obs college data0$college25[obsCases[i]] <- NA ## Multiple imputation of missing education data -- Amelia miData <- amelia(data0, m=nimp, idvars=c(1,7), logs=2:4, lgstc=5:6, parallel = "multicore") ## Extract imp0 <- rep(NA, nimp) for (j in 1:nimp) { imp0[j] <- miData$imputations[[j]]$college25[obsCases[i]] } miOverimp[,i] <- imp0 ## MI by MICE with PMM miceData <- mice(data0, m=nimp) ## Extract miceOverimp[,i] <- as.numeric(miceData$imp$college25[as.character(obsCases[i]),]) } # Plot coverage of prediction intervals cis <- seq(0.05, 0.95, 0.05) coverageMI <- coverageMICE <- rep(NA, length(cis)) for (k in 1:length(cis)) { coverageMI[k] <- mean((obsCollege>=apply(miOverimp, 2, quantile, probs=(1-cis[k])/2))& (obsCollege<=apply(miOverimp, 2, quantile, probs=(1+cis[k])/2))) coverageMICE[k] <- mean((obsCollege>=apply(miceOverimp, 2, quantile, probs=(1-cis[k])/2))& (obsCollege<=apply(miceOverimp, 2, quantile, probs=(1+cis[k])/2))) } coverageMItrace <- pointsTile(x=cis, y=coverageMI, col=purple1, size=1.6, alpha=0.6, pch=16, clip="off", plot=1) coverageMICEtrace <- pointsTile(x=cis, y=coverageMICE, col=purple3, size=1.6, alpha=0.6, pch=16, clip="off", plot=1) diagline <- linesTile(x=c(0,1), y=c(0,1), lex=1, col="black", layer=20, plot=1) miLab1 <- textTile(labels="Amelia", x=0.15, y=0.92, col=purple1, cex=1.1, plot=1) miceLab1 <- textTile(labels="MICE PMM", x=0.15, y=0.83, col=purple3, cex=1.1, plot=1) miCovLab1 <- textTile(labels=sprintf("%.0f", 100*coverageMI), x=cis, y=coverageMI, cex=0.75, col="white", alpha=1, plot=1) miceCovLab1 <- textTile(labels=sprintf("%.0f", 100*coverageMICE), x=cis, y=coverageMICE, cex=0.75, col="white", alpha=1, plot=1) file <- "coverageMIturnoutExtra" tile(coverageMItrace, diagline, miLab1, miCovLab1, limits=c(-0.025,1.025,-0.025,1.025), output=list(file=file, width=5), yaxis=list(major=FALSE, labels=paste0(seq(0,100,20),"%")), xaxis=list(major=FALSE, labels=paste0(seq(0,100,20),"%")), xaxistitle=list(labels1="Prediction interval"), plottitle=list(labels1="Coverage rate", x=0.01175, y=1.5), height=list(xaxistitle=3), width=list(yaxistitle=3.5, yaxis.labelspace=-1.0) ) file <- "coverageMIMICEturnoutExtra" tile(coverageMItrace, coverageMICEtrace, diagline, miLab1, miceLab1, miceCovLab1, limits=c(-0.025,1.025,-0.025,1.025), output=list(file=file, width=5), yaxis=list(major=FALSE, labels=paste0(seq(0,100,20),"%")), xaxis=list(major=FALSE, labels=paste0(seq(0,100,20),"%")), xaxistitle=list(labels1="Prediction interval"), plottitle=list(labels1="Coverage rate", x=0.01175, y=1.5), height=list(xaxistitle=3), width=list(yaxistitle=3.5, yaxis.labelspace=-1.0) )