# Four helpful goodness of fit functions
# for binary choice and ordered probit
# Chris Adolph
# chrisadolph.com
#
# (in the below, the null model assumes the model category always occurs)
#
# 1. percent correctly predicted, binary glm
#
# pcp.glm(res,             # an estimated glm object
#         y,               # the response data as a vector
#         type = "model"   # or "null" for the null model PCP
#         )                #   or "improve" for degree of improvement from null model
#
# 2. percent correctly predicted, ordered probit (requires simcf package from my website)
#
# pcp.oprobit(x,               # matrix of covariates (leave out the constant)
#             y,               # the response data as a vector
#             b,               # ordered probit coefficients, with cutpoints at the end
#             constant=1,      # location of the constant in the b vector;
#                                 if you used polr(), set to NA for no constant
#             ncat=3,          # number of possible outcomes
#             type = "model"   # or "null" for the null model PCP
#             )                #   or "improve" for degree of improvement from null model
#
# 3. concordance index (area under ROC), binary glm
#     - more useful for rare events than PCP
#
# concord.glm(res,             # an estimated glm object
#             y,               # the response data as a vector
#             type = "model"   # or "null" for the null model concordance
#             )                #   or "improve" for degree of improvement from null model
#
# 4. concordance index (area under ROC), ordered probit (requires simcf package from my website)
#     - more useful for rare events than PCP
#
# concord.oprobit(x,               # matrix of covariates (leave out the constant)
#                 y,               # the response data as a vector
#                 b,               # ordered probit coefficients, with cutpoints at the end
#                 constant=1,      # location of the constant in the b vector;
#                                     if you used polr(), set to NA for no constant
#                 ncat=3,          # number of possible outcomes
#                 type = "model"   # or "null" for the null model concordance
#                 )                #   or "improve" for degree of improvement from null model
#
pcp.glm <- function(res, y, type="model") { # other types:  null, improve
  
  pcp <- mean(round(predict(res,type="response"))==y)
  pcpNull <- max(mean(y), mean(1-y))
  pcpImprove <- (pcp-pcpNull)/(1-pcpNull)

  if (type=="model")
    return(pcp)
  if (type=="null")
    return(pcpNull)
  if (type=="improve")
    return(pcpImprove)
}

pcp.oprobit <- function(x, y, b, constant=1, ncat=3, type="model") { # other types:  null, improve

  require(simcf)
  b <- matrix(b,nrow=100,ncol=length(b),byrow=TRUE)
  simy <- oprobitsimev(x, b, constant=constant, cat=ncat)

  cats <- sort(unique(y))
  
  predcatN <- cats[rev(order(table(y)))][1]

  n <- length(y)
  pcp <- pcpNull <- predcatM <- rep(NA,n)
  for (i in 1:n) {
    predcatM[i] <- cats[rev(order(simy$pe[i,]))][1]
    pcp[i] <- predcatM[i]==y[i]
    pcpNull[i] <- predcatN==y[i]
  }
  
  pcp <- mean(pcp)
  pcpNull <- mean(pcpNull)
  pcpImprove <- (pcp-pcpNull)/(1-pcpNull)
  
  if (type=="model")
    return(pcp)
  if (type=="null")
    return(pcpNull)
  if (type=="improve")
    return(pcpImprove)
  
}


concord.glm <- function(res, y, type="model") { # other types:  null, improve
  require(verification)
  if (type=="model") {
    yhat <- predict(res,type="response")
    concord <- roc.area(y, yhat)$A
  }
  if (type=="null") {    
    yhat <- rep(max(mean(y), mean(1-y)), length(y))
    concord <- roc.area(y, yhat)$A
  }
  if (type=="improve") {
    yhat <- predict(res,type="response")
    model <- roc.area(y, yhat)$A
    yhat <- rep( max(mean(y), mean(1-y)), length(y))
    null <- roc.area(y, yhat)$A
    concord <- (model-null)/(1-null)
  }
  concord
}


concord.oprobit <- function(x, y, b, constant=1, ncat=3, type="model") { # other types:  null, improve
  require(simcf)
  require(verification)
  b <- matrix(b,nrow=100,ncol=length(b),byrow=TRUE)
  simy <- oprobitsimev(x, b, constant=constant, cat=ncat)
  cats <- sort(unique(y))

  if (type=="model") {
    model <- rep(NA,ncat)
    for (j in 1:ncat) {
      yhat <- simy$pe[,j]
      model[j] <- roc.area(as.numeric(y==cats[j]), yhat)$A
    }
    concord <- mean(model) 
  }

  if (type=="null") {
    null <- rep(NA,ncat)
    for (j in 1:ncat) {
      probs <- rep(mean(y==cats[j]), length(y))      
      null[j] <- roc.area(as.numeric(y==cats[j]), probs)$A
    }
    concord <- mean(null) 
  }
  
  if (type=="improve") {
    improve <- rep(NA,ncat)
    for (j in 1:ncat) {
      probs <- rep(mean(y==cats[j]), length(y))
      null <- roc.area(as.numeric(y==cats[j]), probs)$A
      yhat <- simy$pe[,j]
      model <- roc.area(as.numeric(y==cats[j]), yhat)$A
      improve[j] <- (model-null)/(1-null)
    }
    concord <- mean(improve) 
  }
  
  concord
}