## Using overimputation to compute coverage of multiple imputation
## prediction intervals, with plotting in tile
##
##   note: takes a while to run
##
## Turnout example using data from WA 2004 governor's race,
## median household income data (2004, Census) and 
## mostly-imputed college attainment rates (2005, Census -- earliest avail)
##
## Christopher Adolph
## 29 November 2016   faculty.washington.edu/cadolph

# Clear memory
rm(list=ls())


# Load libraries
library(Amelia)         # for multiple imputation
library(mice)           # for multiple imputation
library(simcf)          # for simulation
library(tile)           # for visualization
library(RColorBrewer)   # for color selection

# Set seed
set.seed(10)

# Simulation settings
nimp <- 100

## Get nice colors
brewer <- brewer.pal(9, "Set1")
red <- brewer[1]
blue <- brewer[2]
purple <- brewer[4]    
orange <- brewer[5]  
brown <- brewer[7]   
green <- brewer[3]   
nicegray <- "gray45"

purple1 <- rgb(110, 32, 208, maxColorValue = 255)
purple2 <- rgb(148, 91, 221, maxColorValue = 255)
purple3 <- rgb(195, 45, 184, maxColorValue = 255)
purple4 <- rgb(209, 103, 212, maxColorValue = 255)



## Load data
file <- "wa2004govWithCovariates.csv"
data <- read.csv(file, header=TRUE)

## Process data
data$college25 <- data$college25/100
data$gradRate05 <- data$gradRate05/100
data$nonvotes <- data$regvoters - data$votes

obsCases <- (1:nrow(data))[!is.na(data$college25)]

obsCollege <- data$college25[obsCases]

obsN <- length(obsCases)

miOverimp <- miceOverimp <- matrix(NA, nrow=nimp, ncol=obsN)

for (i in 1:obsN) {

    data0 <- data
    
    ## Knock out next obs college
    data0$college25[obsCases[i]] <- NA

    ## Multiple imputation of missing education data -- Amelia
    miData <- amelia(data0, m=nimp, idvars=c(1,7), logs=2:4, lgstc=5:6,
                     parallel = "multicore")

    ## Extract
    imp0 <- rep(NA, nimp)
    for (j in 1:nimp) {
        imp0[j] <- miData$imputations[[j]]$college25[obsCases[i]]
    }
    miOverimp[,i] <- imp0
    
    ## MI by MICE with PMM
    miceData <- mice(data0, m=nimp)

    ## Extract
    miceOverimp[,i] <- as.numeric(miceData$imp$college25[as.character(obsCases[i]),])
    
}


# Plot coverage of prediction intervals

cis <- seq(0.05, 0.95, 0.05)
coverageMI <- coverageMICE <- rep(NA, length(cis))

for (k in 1:length(cis)) {
    coverageMI[k] <- mean((obsCollege>=apply(miOverimp, 2, quantile, probs=(1-cis[k])/2))&
                              (obsCollege<=apply(miOverimp, 2, quantile, probs=(1+cis[k])/2)))
    coverageMICE[k] <- mean((obsCollege>=apply(miceOverimp, 2, quantile, probs=(1-cis[k])/2))&
                              (obsCollege<=apply(miceOverimp, 2, quantile, probs=(1+cis[k])/2)))
}

coverageMItrace <- pointsTile(x=cis,
                              y=coverageMI,
                              col=purple1,
                              size=1.6,
                              alpha=0.6,
                              pch=16,
                              clip="off",
                              plot=1)

coverageMICEtrace <- pointsTile(x=cis,
                                y=coverageMICE,
                                col=purple3,
                                size=1.6,
                                alpha=0.6,
                                pch=16,
                                clip="off",
                                plot=1)

diagline <- linesTile(x=c(0,1), y=c(0,1),
                      lex=1,
                      col="black",
                      layer=20,
                      plot=1)


miLab1 <- textTile(labels="Amelia",
                   x=0.15,
                   y=0.92,
                   col=purple1,
                   cex=1.1,
                   plot=1)

miceLab1 <- textTile(labels="MICE PMM",
                   x=0.15,
                   y=0.83,
                   col=purple3,
                   cex=1.1,
                   plot=1)

miCovLab1 <- textTile(labels=sprintf("%.0f", 100*coverageMI),
                      x=cis,
                      y=coverageMI,
                      cex=0.75,
                      col="white",
                      alpha=1,
                      plot=1)


miceCovLab1 <- textTile(labels=sprintf("%.0f", 100*coverageMICE),
                      x=cis,
                      y=coverageMICE,
                      cex=0.75,
                      col="white",
                      alpha=1,
                      plot=1)



file <- "coverageMIturnoutExtra"
tile(coverageMItrace, diagline,
     miLab1, miCovLab1,
     limits=c(-0.025,1.025,-0.025,1.025),
     output=list(file=file, width=5),
     yaxis=list(major=FALSE, labels=paste0(seq(0,100,20),"%")),
     xaxis=list(major=FALSE, labels=paste0(seq(0,100,20),"%")),
     xaxistitle=list(labels1="Prediction interval"),
     plottitle=list(labels1="Coverage rate",
         x=0.01175, y=1.5),
     height=list(xaxistitle=3),
     width=list(yaxistitle=3.5, yaxis.labelspace=-1.0)
     )


file <- "coverageMIMICEturnoutExtra"
tile(coverageMItrace, coverageMICEtrace, diagline,
     miLab1, miceLab1, miceCovLab1,
     limits=c(-0.025,1.025,-0.025,1.025),
     output=list(file=file, width=5),
     yaxis=list(major=FALSE, labels=paste0(seq(0,100,20),"%")),
     xaxis=list(major=FALSE, labels=paste0(seq(0,100,20),"%")),
     xaxistitle=list(labels1="Prediction interval"),
     plottitle=list(labels1="Coverage rate",
         x=0.01175, y=1.5),
     height=list(xaxistitle=3),
     width=list(yaxistitle=3.5, yaxis.labelspace=-1.0)
     )