## k-fold CV prediction functions for ## Binomial regression, Beta-binomial regression ## Quasibinomial regression, and linear regression ## ## Christopher Adolph faculty.washington.edu/cadolph ## 18 November 2016 kfoldcvBinom <- function(obj, model, data, k=10, runs=1000) { m <- dim(data)[1] predAll <- matrix(NA, nrow=m, ncol=runs) for (j in 1:runs) { pred <- rep(NA, m) sampleM <- sample(m, m, replace=FALSE) ## Make folds foldsize <- rep(trunc(m/k), k) foldsize[k] <- foldsize[k] + m%%k folds <- list() s <- 0 for (i in 1:k) { folds[[i]] <- sampleM[(s+1):(s+foldsize[i])] s <- sum(foldsize[1:i]) } ## Loop over folds for (i in 1:k) { train <- unlist(folds[-i]) test <- unlist(folds[i]) i.binom <- glm(model, data=data[train,], family=binomial) pred[test] <- predict(i.binom, newdata=data[test,], type="response") } predAll[,j] <- pred } apply(predAll, 1, mean) } kfoldcvNorm <- function(obj, model, data, k=10, runs=1000) { m <- dim(data)[1] predAll <- matrix(NA, nrow=m, ncol=runs) for (j in 1:runs) { pred <- rep(NA, m) sampleM <- sample(m, m, replace=FALSE) ## Make folds foldsize <- rep(trunc(m/k), k) foldsize[k] <- foldsize[k] + m%%k folds <- list() s <- 0 for (i in 1:k) { folds[[i]] <- sampleM[(s+1):(s+foldsize[i])] s <- sum(foldsize[1:i]) } ## Loop over folds for (i in 1:k) { train <- unlist(folds[-i]) test <- unlist(folds[i]) i.norm <- glm(model, data=data[train,]) pred[test] <- predict(i.norm, newdata=data[test,], type="response") } predAll[,j] <- pred } apply(predAll, 1, mean) } kfoldcvQuasibin <- function(obj, model, data, k=10, runs=1000) { m <- dim(data)[1] predAll <- matrix(NA, nrow=m, ncol=runs) for (j in 1:runs) { pred <- rep(NA, m) sampleM <- sample(m, m, replace=FALSE) ## Make folds foldsize <- rep(trunc(m/k), k) foldsize[k] <- foldsize[k] + m%%k folds <- list() s <- 0 for (i in 1:k) { folds[[i]] <- sampleM[(s+1):(s+foldsize[i])] s <- sum(foldsize[1:i]) } ## Loop over folds for (i in 1:k) { train <- unlist(folds[-i]) test <- unlist(folds[i]) i.quasibin <- glm(model, data=data[train,], family=quasibinomial) pred[test] <- predict(i.quasibin, newdata=data[test,], type="response") } predAll[,j] <- pred } apply(predAll, 1, mean) } kfoldcvBetabin <- function(obj, model, data, k=10, runs=1000) { m <- dim(data)[1] predAll <- matrix(NA, nrow=m, ncol=runs) for (j in 1:runs) { pred <- rep(NA, m) sampleM <- sample(m, m, replace=FALSE) ## Make folds foldsize <- rep(trunc(m/k), k) foldsize[k] <- foldsize[k] + m%%k folds <- list() s <- 0 for (i in 1:k) { folds[[i]] <- sampleM[(s+1):(s+foldsize[i])] s <- sum(foldsize[1:i]) } ## Loop over folds for (i in 1:k) { train <- unlist(folds[-i]) test <- unlist(folds[i]) i.betabin <- vglm(model, data=data[train,], family=betabinomial) pred[test] <- predict(i.betabin, newdata=data[test,], type="response") } predAll[,j] <- pred } apply(predAll, 1, mean) }