#
#
# R CODE FOR REPRODUCING THE FIGURES AND ANALYSES IN JW'S 
#       "BAYESIAN AND FREQUENTIST REGRESSION ANALYSIS" CHAPTER 12
# CODE WRITTEN BY JON WAKEFIELD, UNLESS OTHERWISE STATED
#
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
# Example at the end of Section 12.2
#
library(lasso2)
data(Prostate)
attach(Prostate)
library(mgcv)
gammod <- gam(lpsa~s(x=lcavol,k=7,fx=F,bs="cr",m=2)+
      s(x=lweight,k=7,fx=F,bs="cr",m=2)+s(x=age,k=7,fx=F,bs="cr",m=2)+
      s(x=lbph,k=7,fx=F,bs="cr",m=2)+s(x=lcp,k=7,fx=F,bs="cr",m=2)+
      svi+gleason+s(x=pgg45,k=7,fx=F,bs="cr",m=2),method="GCV.Cp")
#
# Fig 12.1(a)
#
pdf("ProstateSmoothFig1.pdf",h=4,w=4)
plot(gammod,select=1,ylab="Fitted Cubic Spline",xlab="log(can vol)",shade=T,seWithMean=T)
dev.off()
#
# Fig 12.1(b)
#
pdf("ProstateSmoothFig2.pdf",h=4,w=4)
plot(gammod,select=2,ylab="Fitted Cubic Spline",xlab="log(weight)",shade=T,seWithMean=T)
dev.off()
#
# Fig 12.1(c)
#
pdf("ProstateSmoothFig3.pdf",h=4,w=4)
plot(gammod,select=3,ylab="Fitted Cubic Spline",xlab="Age",shade=T,seWithMean=T)
dev.off()
#
# Fig 12.1(d)
#
pdf("ProstateSmoothFig4.pdf",h=4,w=4)
plot(gammod,select=4,ylab="Fitted Cubic Spline",xlab="log(BPH)",shade=T,seWithMean=T)
dev.off()
#
# Fig 12.1(e)
#
pdf("ProstateSmoothFig5.pdf",h=4,w=4)
plot(gammod,select=5,ylab="Fitted Cubic Spline",xlab="log(cap pen)",shade=T,seWithMean=T)
dev.off()
#
# Fig 12.1(f)
#
pdf("ProstateSmoothFig6.pdf",h=4,w=4)
plot(gammod,select=6,ylab="Fitted Cubic Spline",xlab="PGS45",shade=T,seWithMean=T)
dev.off()
# 
# Prostate Cancer Example in Section 12.3.2
# 
# Linear model first
#
linmod <- lm(lpsa~.,data=Prostate)
wgrid <- seq(min(Prostate$lweight),max(Prostate$lweight),length=25)
cgrid <- seq(min(Prostate$lcavol),max(Prostate$lcavol),length=25)
zvals <- matrix(0,nrow=length(cgrid),ncol=length(wgrid))
for (i in 1:length(cgrid)){
    for (j in 1:length(wgrid)){
    	zvals[i,j] <- coef(linmod)[3]*wgrid[i] + coef(linmod)[2]*cgrid[j]
    }
}
#
# Fig 12.2(a)
#
pdf("Prostate-2dpersp.pdf",h=8,w=8)
persp(x=wgrid,y=cgrid,z=zvals,theta=35,phi=25,xlab="log(can vol)",
      ylab="log(weight)",zlab="Fitted Linear")
dev.off()
#
# Now TPRS
#
gammod <- gam(lpsa~s(lcavol,lweight,bs="tp")+s(x=age,k=7,fx=F,bs="cr",m=2)+
          s(x=lbph,k=7,fx=F,bs="cr",m=2)+s(x=lcp,k=7,fx=F,bs="cr",m=2)+
          svi+gleason+s(x=pgg45,k=7,fx=F,bs="cr",m=2),method="GCV.Cp")
#
# Fig 12.2(b)
#
pdf("TPRSprost1.pdf",h=8,w=8)
plot(gammod,select=1,pers=T,xlab="log(can vol)",ylab="log(weight)",
     main="Fitted TPRS",theta=35,phi=25)
dev.off()
#
# Now tensor products
#
gammod2 <- gam(lpsa~te(lcavol,lweight,k=c(6,6))+s(x=age,k=7,fx=F,bs="cr",m=2)+
    s(x=lbph,k=7,fx=F,bs="cr",m=2)+s(x=lcp,k=7,fx=F,bs="cr",m=2)+svi+gleason+
    s(x=pgg45,k=7,fx=F,bs="cr",m=2),method="GCV.Cp")
#
# Fig 12.2(c)
#
pdf("TensProdprost1.pdf",h=8,w=8)
plot(gammod2,select=1,pers=T,xlab="log(can vol)",ylab="log(weight)",
     main="Fitted Tensor Product",theta=35,phi=25)
dev.off()
summary(gammod2) # For effective degrees of freedom of tensor product = 12.4
#
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Varying coefficient model for the ethanol data -- example at the
# end of Section 12.6
#
library(SemiPar)
data(ethanol)
attach(ethanol)
nint <- 9
quantE <- midE <- NULL
quantE[1] <- min(E)-.01
quantE[2:nint] <- quantile(E,p=seq(1:(nint-1))/nint)
quantE[nint+1] <- max(E)+.01
#
# Fig 12.3
#
pdf("ethfig1.pdf")
par(mfrow=c(3,3))
betas <- matrix(0,nrow=nint,ncol=2)
for (i in 1:nint){
    y <- NOx[E>quantE[i]&E<=quantE[i+1]]
    x <- C[E>quantE[i]&E<=quantE[i+1]]
    midE[i] <- quantE[i] +(quantE[i+1]-quantE[i])/2
    plot(y~x,ylim=c(min(NOx),max(NOx)),xlim=c(min(C),max(C)),xlab="C",ylab="NOx")
    mod <- lm(y~x)
    abline(coef(mod))
    betas[i,] <- coef(mod)
    cat("Quintile n beta: ",i,length(y),coef(mod),"\n")
}
dev.off()
inter <- 1.29
Elims <- c(.51,1.25)
ylims1 <- c(-2.5,2)
ylims2 <- c(-.03,.15)
#
modgam2 <- gam(NOx~s(E,bs="cr")+s(E,by=C,bs="cr"))
summary(modgam2) # effective degrees of freedom of 6.4 and 4.7 on the intercept and slope smooths
#
# Fig 12.4(a)
#
pdf("ethvarycoeffig1.pdf",h=4,w=4)
par(mfrow=c(1,1))
plot.gam(modgam2,select=1,se=F,ylab="Intercept(E)",ylim=ylims1,xlim=Elims)
points(midE,adjust)
dev.off()
#
# Fig 12.4(b)
#
pdf("ethvarycoeffig2.pdf",h=4,w=4)
par(mfrow=c(1,1))
plot.gam(modgam2,select=2,se=F,ylab="Slope(E)",ylim=ylims2,xlim=Elims)
points(midE,betas[,2])
dev.off()
#
# Fitted surface
#
Eval <- seq(min(E),max(E),length=25)
Cval <- seq(min(C),max(C),length=25)
fitf <- matrix(0,nrow=length(Eval),ncol=length(Cval))
newd <- data.frame(C=seq(min(C),max(C),length=100),E=seq(min(E),max(E),length=100))
for (i in 1:length(Eval)){
    for (j in 1:length(Cval)){
    	newd <- data.frame(C=Cval[j],E=Eval[i])
    	fitf[i,j] <- predict.gam(modgam2,newdata=newd)
    }
}
#
# Fig 12.5
#
pdf("ethvaryingimage.pdf",h=5,w=5)
par(mfrow=c(1,1))
image(Eval,Cval,fitf,xlab="E",ylab="C",col=gray((0:32)/32))
dev.off()
#
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Regression trees
#
x1 <- seq(0,1)
x2 <- seq(0,1)
#
# Fig 12.6(a)
#
pdf("regtreefig0.pdf",h=5,w=5) 
par(pty="s")
plot(x1,x2,type="n",xlab="",ylab="",axes=F)
lines(x=c(0,1),y=c(0,0))
lines(x=c(0,0),y=c(0,1))
lines(x=c(0,1),y=c(1,1))
lines(x=c(1,1),y=c(0,1))
axis(side=1,at=c(0.5),labels=expression(x[1]),tick=F,cex.axis=2)
axis(side=2,at=c(0.5),labels=expression(x[2]),tick=F,cex.axis=2)
lines(x=c(0,.25),y=c(0,.6))
lines(x=c(0,.5),y=c(.7,.5))
lines(x=c(0.4,.6),y=c(1,0))
lines(x=c(.54,1),y=c(.3,.7))
dev.off()
#
# Fig 12.6(b)
#
pdf("regtreefig1.pdf",h=5,w=5) 
par(pty="s")
plot(x1,x2,type="n",xlab="",ylab="",axes=F)
axis(side=1,at=c(0.5),labels=expression(x[1]),tick=F,cex.axis=2.0)
 axis(side=2,at=c(0.5),labels=expression(x[2]),tick=F,cex.axis=2.0)
lines(x=c(0,1),y=c(0,0))
lines(x=c(0,0),y=c(0,1))
lines(x=c(0,1),y=c(1,1))
lines(x=c(1,1),y=c(0,1))
lines(x=c(0,.3),y=c(.6,.6))
lines(x=c(0.3,.9),y=c(.4,.4))
lines(x=c(0.3,.9),y=c(.75,.75))
lines(x=c(0.3,.3),y=c(.4,.75))
lines(x=c(0.9,.9),y=c(0,.9))
lines(x=c(0.5,1),y=c(.9,.9))
lines(x=c(0.5,.5),y=c(0.9,1))
dev.off()
#
# Fig 12.7
#
pdf("regtreefig2.pdf")
par(pty="s")
x1 <- seq(0,1)
x2 <- seq(0,1)
plot(x1,x2,type="n",xlab=expression(x[1]),ylab=expression(x[2]),axes=F)
lines(x=c(0,1),y=c(0,0))
lines(x=c(0,0),y=c(0,1))
lines(x=c(0,1),y=c(1,1))
lines(x=c(1,1),y=c(0,1))
t1 <- .3
lines(x=c(0,1),y=c(t1,t1))
t2 <- .2
lines(x=c(t2,t2),y=c(0,t1))
t3 <- .8
lines(x=c(t3,t3),y=c(t1,1))
t4 <- .7
lines(x=c(0,t3),y=c(t4,t4))
axis(2,at=c(t1,t4),labels=c(expression(t[1]),expression(t[4])),las=1,tick=F)
axis(1,at=c(t2,t3),labels=c(expression(t[2]),expression(t[3])),las=1,tick=F)
text(x=t2/2,y=t1/2,label=expression(R[1]))
text(x=t2+(1-t2)/2,y=t1/2,label=expression(R[2]))
text(x=t3+(1-t3)/2,y=t1+(1-t1)/2,label=expression(R[3]))
text(x=t3/2,y=t1+(t4-t1)/2,label=expression(R[4]))
text(x=t3/2,y=t4+(1-t4)/2,label=expression(R[5]))
dev.off()
#
# Fig 12.8
#
pdf("regtreefig3.pdf",h=6,w=6) 
x1 <- seq(0,1)
x2 <- seq(0,1)
par(pty="s")
plot(x1,x2,type="n",xlab="",ylab="",axes=F)
height <- .25
lev1height <- .95
lev2height <- lev1height - height
lev3height <- lev1height - 2*height
lev4height <- lev1height - 3*height
vline1 <- 0
vline2 <- .2
vline3 <- .4
vline4 <- .5
vline5 <- .6
vline6 <- .65
vline7 <- .8
vline8 <- 1
lines(x=c(vline2,vline7),y=c(lev1height,lev1height))
lines(x=c(vline7,vline7),y=c(lev1height,lev2height))
lines(x=c(vline2,vline2),y=c(lev1height,lev2height))
lines(x=c(vline1,vline3),y=c(lev2height,lev2height))
lines(x=c(vline5,vline8),y=c(lev2height,lev2height))
lines(x=c(vline1,vline1),y=c(lev2height,lev3height))
lines(x=c(vline3,vline3),y=c(lev2height,lev3height))
lines(x=c(vline5,vline5),y=c(lev2height,lev3height))
lines(x=c(vline8,vline8),y=c(lev2height,lev3height))
lines(x=c(vline4,vline4),y=c(lev4height,lev3height))
lines(x=c(vline7,vline7),y=c(lev4height,lev3height))
lines(x=c(vline4,vline7),y=c(lev3height,lev3height))
lines(x=c(vline2+(vline7-vline2)/2,vline2+(vline7-vline2)/2),y=c(lev1height-.01,lev1height+.01))
eps <- .04
eps2 <- .09
cexno <- .7
text(x=vline2+(vline7-vline2)/2,y=lev1height+eps,label=expression(x[2]<=t[1]),
     family="Arial",cex=cexno)
text(x=vline2-eps2,y=lev2height+eps,label=expression(x[1]<=t[2]),family="Arial",cex=cexno)
text(x=vline8-eps2,y=lev2height+eps,label=expression(x[1]<=t[3]),family="Arial",cex=cexno)
text(x=vline5-eps2,y=lev3height+eps,label=expression(x[2]<=t[4]),family="Arial",cex=cexno)
text(x=vline1,y=lev3height-eps,label=expression(R[1]),cex=cexno,font=3)
text(x=vline3,y=lev3height-eps,label=expression(R[2]),cex=cexno,font=3)
text(x=vline4,y=lev4height-eps,label=expression(R[4]),cex=cexno,font=3)
text(x=vline7,y=lev4height-eps,label=expression(R[5]),cex=cexno,font=3)
text(x=vline8,y=lev3height-eps,label=expression(R[3]),cex=cexno,font=3)
dev.off()
#
#------------------------------------------------------------------
# Function to create fitted surface for Fig 12.9
#
predict.example <- function(x1,x2){
	t1 <- .3; t2 <- .2; t3 <- .8; t4 <- .7
        R1 <- -3; R2 <- -1; R3 <- 0; R4 <- 1; R5 <- 3
	if (x2 <= t1){
	   if (x1 <= t2) predict.example <- R1
	   else if (x1 > t2) predict.example <- R2
	}
	else if (x2 > t1){
	   if (x1 > t3) predict.example <- R3
	   else if (x1 <= t3){
		if (x2 <= t4) predict.example <- R4
		else if (x2 > t4) predict.example <- R5
           }
        }
}
x1val <- seq(0,1,length=100)
x2val <- seq(0,1,length=100)
fitf <- matrix(0,nrow=length(x1val),ncol=length(x2val))
for (i in 1:length(x1val)){
    for (j in 1:length(x2val)){
        fitf[i,j] <- predict.example(x1val[i],x2val[j])
    }
}
library(lattice)
library(reshape)
M <- melt(data.frame(x1val,fitf,check.names=FALSE),id=1,variable="x2val")
M2 <- M
M2$x2val <- as.numeric(M$x2val)/100
#
# Fig 12.9
#
pdf("regtreefig4.pdf",h=6,w=6) 
par(mfrow=c(1,1))
trellis.par.set("axis.line",list(col=NA,lty=1,lwd=1))
wireframe(fitf~x1val*x2val,data=M2,xlab=expression(x[1]),ylab=expression(x[2]),zlab="y",
screen=list(z=30,x=-60),colkey=F,col="grey",scales=list(arrows=F))
dev.off()
#
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Regreesion tree prostate data example in Section 12.7
#
#-------------------------------------------------------------------------------
# My version of function to plot CV versus complexity in Fig 12.10
#
myplotcp <- function (x, minline = TRUE, lty = 3, col = 1, upper = c("size", 
    "splits", "none"), ...) 
{
    dots <- list(...)
    if (!inherits(x, "rpart")) 
        stop("Not legitimate rpart object")
    upper <- match.arg(upper)
    p.rpart <- x$cptable
    if (ncol(p.rpart) < 5L) 
        stop("cptable does not contain cross-validation results")
    xstd <- p.rpart[, 5L]
    xerror <- p.rpart[, 4L]
    nsplit <- p.rpart[, 2L]
    ns <- seq_along(nsplit)
    cp0 <- p.rpart[, 1L]
    cp <- sqrt(cp0 * c(Inf, cp0[-length(cp0)]))
    if (!"ylim" %in% names(dots)) 
        dots$ylim <- c(min(xerror - xstd) - 0.1, max(xerror + 
            xstd) + 0.1)
    do.call(plot, c(list(ns, xerror, axes = FALSE, xlab = "Complexity", 
        ylab = "CV Estimate", type = "o"), dots))
    box()
    axis(2, ...)
    segments(ns, xerror - xstd, ns, xerror + xstd)
    axis(1L, at = ns, lab = as.character(signif(cp, 2L)), ...)
    switch(upper, size = {
        axis(3L, at = ns, lab = as.character(nsplit + 1), ...)
        mtext("Size of Tree", side = 3, line = 3)
    }, splits = {
        axis(3L, at = ns, lab = as.character(nsplit), ...)
        mtext("number of splits", side = 3, line = 3)
    }, )
    minpos <- min(seq_along(xerror)[xerror == min(xerror)])
    if (minline) 
        abline(h = (xerror + xstd)[minpos], lty = lty, col = col)
    invisible()
}
#
library(rpart)
library(lasso2)
data(Prostate)
attach(Prostate)
treefit <- rpart(lpsa~.,data=Prostate,method="anova",control=list(minsplit=5,minbucket=3,cp=0.0))
#
# Plot of tree -- very cluttered and not included in book
#
plot(treefit,margin=.07,uniform=T,compress=T)
text(treefit,use.n=TRUE,cex=1)
# Details of fit
printcp(treefit)
# Now prune the tree
prunedtreefit <- prune(treefit,cp=treefit$cptable[which.min(treefit$cptable[,"xerror"]),"CP"])
#
# Fig 12.10
#
pdf("TreeProstateFig1.pdf",h=6,w=6) 
par(pty="s")
myplotcp(treefit,minline=F)
dev.off()
#
# Fig 12.11
#
pdf("TreeProstateFig2.pdf",h=6,w=6) 
par(pty="s")
plot(prunedtreefit,margin=.07,uniform=T)
text(prunedtreefit,use.n=TRUE,cex=1)
dev.off()
#
# Ethanol with regression trees
#
library(SemiPar)
data(ethanol)
attach(ethanol)
library(rpart)
TREEmod <- rpart(NOx~E+C,method="anova")
#
# Hierarchical tree etc initial plots: not in book
#
par(mfrow=c(2,2))
plot(TREEmod,margin=.1)
text(TREEmod)
plot(NOx~E)
lines(loess.smooth(y=NOx,x=E,span=.3))
# Lines are from tree model
abline(v=1.095,col="red",lty=2)
abline(v=0.796,col="red",lty=2)
abline(v=0.646,col="red",lty=2)
abline(v=1.023,col="red",lty=2)
plot(NOx~C)
lines(loess.smooth(y=NOx,x=C))
abline(v=8.25,col="red",lty=2)
# Cross-validation plot
plotcp(TREEmod)
#
# Now prune the tree
#
prunedtreefit <- prune(TREEmod,cp=TREEmod$cptable[which.min(TREEmod$cptable[,"xerror"]),"CP"])
#
Eval <- seq(min(E),max(E),length=100)
Cval <- seq(min(C),max(C),length=100)
fitf <- matrix(0,nrow=length(Eval),ncol=length(Cval))
#
# Create fitted values using a sledgehammer
#
for (i in 1:length(Eval)){
    for (j in 1:length(Cval)){
        if (Eval[i] >= 1.095) {fitf[i,j] <- 0.8753}
        else {
          if (Eval[i] < 0.796){ fitf[i,j]<-ifelse (Eval[i]<0.646,0.6402,1.651)}
          else {
             if (Eval[i] >= 1.023) {fitf[i,j] <- 2.16}
             else{ fitf[i,j] <- ifelse(Cval[j] > 8.25,3.023,3.544) }
       }}}
}
#
# Image plot
#
# Fig 12.12(a)
#
pdf("EthanolTree.pdf",h=5,w=5) 
par(mfrow=c(1,1))
image(Eval,Cval,fitf,xlab="E",ylab="C",col=gray((0:32)/32))
dev.off()
#
#
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Ethanol MARS
#
# First illustrate the use of the earth package
#
library(SemiPar)
data(ethanol)
attach(ethanol)
library(earth)
Emod <- earth(NOx~E+C,data=ethanol)
Eval <- seq(min(E),max(E),length=25)
Cval <- seq(min(C),max(C),length=25)
fitf <- matrix(0,nrow=length(Eval),ncol=length(Cval))
for (i in 1:length(Eval)){
    for (j in 1:length(Cval)){
    	fitf[i,j] <- predict(Emod,newdata=c(Eval[i],Cval[j]))
    }
}
#
# Fig 12.12(b)
#
pdf("ethMARSfig1.pdf",h=5,w=5) 
par(mfrow=c(1,1))
image(Eval,Cval,fitf,xlab="E",ylab="C",col=gray((0:32)/32))
dev.off()
# Perspective version: not in book
par(mfrow=c(1,1))
persp(Eval,Cval,fitf,theta=30,phi=30,ticktype="detailed")
#
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Linear and quadratic discriminant analysis of BPD and birthweight data
# in Section 12.8.2
#
#----------------------------------------------------------
# Define expit functions to obtain fitted logistic linear regression on [0,1]
#
expit <- function(x,b0,b1){
      expit <- exp(b0+b1*x)/( 1+exp(b0+b1*x) )}
#----------------------------------------------------------
# Define expit functions to obtain fitted logistic quadratic regression on [0,1]
#
expit2 <- function(x,b0,b1,b2){
      expit <- exp(b0+b1*x+b2*x*x)/( 1+exp(b0+b1*x+b2*x*x) )}
bw <- read.table("birthweight.txt",header=T)
birthweight <- bw$birthweight
BPD <- bw$BPD
birthweight2 <- birthweight^2
#
# Linear and quadratic logistic regression
#
lrmod1 <- glm(BPD~birthweight,family=binomial)
lrmod2 <- glm(BPD~birthweight+birthweight2,family=binomial)
#
# Fig 12.13
#
pdf("bwlogitpredfig1.pdf",height=4,width=6)
plot(birthweight,BPD,pch="|",xlab="Birthweight (grams)")
x <- seq(min(birthweight),max(birthweight))
lines(x,expit(x,b0=lrmod1$coeff[1],b1=lrmod1$coeff[2]),lty=2)
lines(x,expit2(x,b0=lrmod2$coeff[1],b1=lrmod2$coeff[2],b2=lrmod2$coef[3]),lty=3)
abline(h=0.5,lty=1,col="grey")
abline(v=-lrmod1$coeff[1]/lrmod1$coeff[2],lty=2)
xquad1 <- (-lrmod2$coeff[2] + sqrt(lrmod2$coeff[2]**2-
          4*lrmod2$coeff[1]*lrmod2$coeff[3]))/(2*lrmod2$coeff[3])
xquad2 <- (-lrmod2$coeff[2] - sqrt(lrmod2$coeff[2]**2-
          4*lrmod2$coeff[1]*lrmod2$coeff[3]))/(2*lrmod2$coeff[3])
abline(v=xquad2,lty=3)
legend(x=1100,y=.95,legend=c("Linear Logistic","Quadratic Logistic"),lty=2:3,bty="n")
dev.off()
#
# Examination of normality within each disease classification
#
# Fig 12.14(a)
#
pdf("BPDqqBPD0.pdf",h=4,w=4)
qqnorm(birthweight[BPD==0],main="")
dev.off()
#
# Fig 12.14(b)
#
pdf("BPDqqBPD1.pdf",h=4,w=4)
qqnorm(birthweight[BPD==1],main="")
dev.off()
#
# Empirical estimates of prior probabilities of being in each group
#
pi1 <- sum(BPD)/length(BPD)
pi0 <- 1-pi1
#
# Empirical estimates of means and common variance (for LDA)
#
mu0 <- mean(birthweight[BPD==0])
mu1 <- mean(birthweight[BPD==1])
Sigma <- (sum((birthweight[BPD==0]-mu0)^2) + sum((birthweight[BPD==1]-mu1)^2))/(length(BPD)-2)
#
# 
LDAa0 <- -2*log(pi0) + mu0^2/Sigma
LDAa1 <- -2*log(pi1) + mu1^2/Sigma
LDAb0 <- -2*mu0/Sigma
LDAb1 <- -2*mu1/Sigma
xseq <- seq(min(birthweight),max(birthweight),1)
line0 <- LDAa0 + LDAb0*xseq
line1 <- LDAa1 + LDAb1*xseq
alpha0 <- log(pi1/pi0) + 0.5*mu0*mu0/Sigma - 0.5*mu1*mu1/Sigma
alpha1 <- mu1/Sigma - mu0/Sigma
cat("LDA boundary = ",-alpha0/alpha1,"\n") 
#
# Fig 12.15
#
pdf("BPDlindis.pdf",h=5,w=5)
plot(line0~xseq,type="n",ylab="-2 log Pr(Y=k | x)",xlab="Birthweight, x",
     ylim=c(min(line0,line1),max(line0,line1)))
lines(x=xseq,y=line0,lty=2)
lines(x=xseq,y=line1,lty=3)
abline(v=-alpha0/alpha1)
legend("topright",legend=c("k=0","k=1"),lty=2:3,bty="n")
dev.off()
#
# Variance estimates for QDA
#
Sigma0 <- sum((birthweight[BPD==0]-mu0)^2)/(length(birthweight[BPD==0])-1)
Sigma1 <- sum((birthweight[BPD==1]-mu1)^2)/(length(birthweight[BPD==1])-1)
# Quadratic discrimant
a <- (.5/Sigma0 - .5/Sigma1)
b <- (mu1/Sigma1 - mu0/Sigma0)
c <- log(pi1) - log(pi0) + .5*log(Sigma0) - .5*log(Sigma1) + .5*mu0^2/Sigma0 - .5*mu1^2/Sigma1
root1 <- .5*(-b+sqrt(b^2-4*a*c))/a
root2 <- .5*(-b-sqrt(b^2-4*a*c))/a
qdafun <- qda(BPD~birthweight) # The quick way of doing it!
#
# Fig 12.16
#
pdf("BPDquaddiscrim.pdf",h=4,w=6)
plot(birthweight,BPD,pch="|",xlab="Birthweight (grams)")
xseq <- seq(min(birthweight),max(birthweight),1)
scalenorm0 <- dnorm(xseq,mean=mu0,sd=sqrt(Sigma0))
max0 <- max(scalenorm0)
scalenorm1 <- dnorm(xseq,mean=mu1,sd=sqrt(Sigma1))
max1 <- max(scalenorm1)
points(xseq,pi0*scalenorm0/max0,type="l")
points(xseq,pi1*scalenorm1/max1,type="l")
abline(v=root2,lty=2)
dev.off()
#
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# KDE and classification applied to BPD and birthweight in Section 12.8.3
#
library(sm)
myevalpts <- seq(min(birthweight),max(birthweight),1)
#
# Default smoothing parameter estimation is assuming normal distributions
#
sm0 <- sm.density(birthweight[BPD==0],display="none",eval.points=myevalpts)
sm1 <- sm.density(birthweight[BPD==1],display="none",eval.points=myevalpts)
sm0$h # Smoothing parameter for BPD=0
sm1$h # Smoothing parameter for BPD=1
#
# Fig 12.17(a)
#
pdf("KDEBPDfig1.pdf",h=5,w=5)
par(mfrow=c(1,1))
plot(birthweight,BPD,type="n",ylim=c(0,max(sm0$est,sm1$est)),
xlim=c(min(sm0$eval,sm1$eval),max(sm0$eval,sm1$eval)),ylab="",xlab="Birthweight, x")
points(sm0$eval,sm0$est,type="l")
points(sm1$eval,sm1$est,type="l",lty=2)
legend("topright",legend=c("k=0","k=1"),bty="n",lty=1:2,lwd=2)
dev.off()
pi1 <- sum(BPD)/length(BPD)
pi0 <- 1-pi1
#
yvals0 <- log(sm0$est/sm1$est)-log(pi0/pi1)
ind0 <- min(which(yvals0>0))
#
# Fig 12.17(b)
#
pdf("KDEBPDfig2.pdf",h=5,w=5)
plot(myevalpts,log(sm0$est/sm1$est)-log(pi0/pi1),type="l",xlab="Birthweight,x",
   ylab="log[Pr(Y=1|x)/Pr(Y=0|x)]")
abline(h=0,lty=2)
abline(v=myevalpts[ind0],lty=2)
dev.off()
#
# CV approach to smoothing parameter selection
#
h0hat <- hcv(birthweight[BPD==0],display="add") # CV estimate for BPD=0
h1hat <- hcv(birthweight[BPD==1],display="add") # CV estimate for BPD=1
sm0h <- sm.density(birthweight[BPD==0],h=h0hat,display="none",
    eval.points=myevalpts)
sm1h <- sm.density(birthweight[BPD==1],h=h1hat,display="none",
    eval.points=myevalpts)
#
# Fig 12.17(c)
#
pdf("KDEBPDfig3.pdf",h=5,w=5)
par(mfrow=c(1,1))
plot(birthweight,BPD,type="n",xlim=c(min(sm0h$eval,sm1h$eval),max(sm0h$eval,sm1h$eval)),
ylab="",xlab="Birthweight, x",ylim=c(0,max(sm0h$est,sm1h$est)))
points(sm0h$eval,sm0h$est,type="l")
points(sm1h$eval,sm1h$est,type="l",lty=2)
legend("topright",legend=c("k=0","k=1"),bty="n",lty=1:2,lwd=2)
dev.off()
#
# Fig 12.17(d)
#
pdf("KDEBPDfig4.pdf",h=5,w=5)
yvals1 <- log(sm0h$est/sm1h$est)-log(pi0/pi1)
ind1 <- min(which(yvals1>0))
plot(myevalpts,log(sm0h$est/sm1h$est)-log(pi0/pi1),type="l",xlab="Birthweight,x",
   ylab="log[Pr(Y=1|x)/Pr(Y=0|x)]")
abline(v=myevalpts[ind1],lty=2)
abline(h=0,lty=2)
dev.off()
#
# Now plug-in method of smoothing parameter estimation (Sheather-Jones)
#
sm0hsj <- sm.density(birthweight[BPD==0],method="sj",display="none",
    eval.points=myevalpts)
sm1hsj <- sm.density(birthweight[BPD==1],method="sj",display="none",
    eval.points=myevalpts)
sm0hsj$h # SJ estimation for BPD=0
sm1hsj$h # SJ estimation for BPD=1
#
# Fig 12.17(e)
#
pdf("KDEBPDfig5.pdf",h=5,w=5)
par(mfrow=c(1,1))
plot(birthweight,BPD,type="n",xlim=c(min(sm0hsj$eval,sm1hsj$eval),max(sm0hsj$eval,sm1hsj$eval)),
ylab="",xlab="Birthweight, x",ylim=c(0,max(sm0hsj$est,sm1hsj$est)))
points(sm0hsj$eval,sm0hsj$est,type="l")
points(sm1hsj$eval,sm1hsj$est,type="l",lty=2)
legend("topright",legend=c("k=0","k=1"),bty="n",lty=1:2,lwd=2)
dev.off()
#
# Fig 12.17(f)
#
pdf("KDEBPDfig6.pdf",h=5,w=5)
yvals2 <- log(sm0hsj$est/sm1hsj$est)-log(pi0/pi1)
ind2 <- min(which(yvals2>0))
myevalpts[ind2]
plot(myevalpts,log(sm0hsj$est/sm1hsj$est)-log(pi0/pi1),type="l",xlab="Birthweight,x",
   ylab="log[Pr(Y=1|x)/Pr(Y=0|x)]")
abline(v=myevalpts[ind2],lty=2)
abline(h=0,lty=2)
dev.off()
#
# Classification trees in Section 12.8.4
#
#
pseq <- seq(0.0001,.99999,.001)
misclass <- NULL
for (i in 1:length(pseq)){
    misclass[i] <- 1-max(pseq[i],1-pseq[i])}
#
# Cost complexity plot
#
# Fig 12.18
#
pdf("CostComplexBinary.pdf",h=5,w=5)
plot(misclass~pseq,xlab=expression(p[j]),type="l",ylab="Lack of Fit")
points(2*pseq*(1-pseq)~pseq,type="l",lty=2)
dev <- -pseq*log(pseq) - (1-pseq)*log(1-pseq)
dev <- 0.5*dev/max(dev)
points(dev~pseq,type="l",lty=3)
legend("bottom",legend=c("Misclassification Error","Gini Index","Deviance"),lty=c(1,2,3),bty="n")
dev.off()
#
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Motivation for bagging in 12.8.5: 3 random bootstrap samples -- will differ from
# book figures as different samples.
#
# First bootstrap sample
#
library(rpart)
library(lasso2)
data(Prostate)
attach(Prostate)
scramble <- sample(c(1:97),size=97,replace=T)
traini <- scramble[1:97]
traindat <- Prostate[traini,]
treefit1 <- rpart(lpsa~.,data=traindat,method="anova",
control=list(minsplit=15,minbucket=10,cp=0.0))
prunedtreefit1 <- prune(treefit1,cp=treefit$cptable[which.min(treefit$cptable[,"xerror"]),"CP"])
plotcp(treefit1,minline=F)
#
# Fig 12.19(a)
#
pdf("TreeProstateBAG1.pdf",h=6,w=6)
par(pty="s")
plot(prunedtreefit1,margin=.07,uniform=T)
text(prunedtreefit1,use.n=TRUE,cex=1)
dev.off()
# 
# Second bootstrap sample
#
scramble <- sample(c(1:97),size=97,replace=T)
traini <- scramble[1:97]
traindat2 <- Prostate[traini,]
treefit2 <- rpart(lpsa~.,data=traindat2,method="anova",control=list(minsplit=15,minbucket=10,cp=0.0))
prunedtreefit2 <- prune(treefit2,cp=treefit$cptable[which.min(treefit$cptable[,"xerror"]),"CP"])
#
# Fig 12.19(b)
#
pdf("TreeProstateBAG2.pdf",h=6,w=6)
par(pty="s")
plot(prunedtreefit2,margin=.07,uniform=T)
text(prunedtreefit2,use.n=TRUE,cex=1)
dev.off()
# 
# Third bootstrap sample
#
scramble <- sample(c(1:97),size=97,replace=T)
traini <- scramble[1:97]
traindat3 <- Prostate[traini,]
treefit3 <- rpart(lpsa~.,data=traindat3,method="anova",control=list(minsplit=15,minbucket=10,cp=0.0))
prunedtreefit3 <- prune(treefit3,cp=treefit$cptable[which.min(treefit$cptable[,"xerror"]),"CP"])
#
# Fig 12.19(bc
#
pdf("TreeProstateBAG3.pdf",h=6,w=6)
par(pty="s")
plot(prunedtreefit3,margin=.07,uniform=T)
text(prunedtreefit3,use.n=TRUE,cex=1)
dev.off()
#
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Outcome after head injury in Section 12.8: Code written by Theresa Smith
#
library(MASS)
library(BMA)
library(rpart)
library(randomForest)
library(ipred)
#
##############Run once to set stuff up##########################
#
# Head injury data
#
yagg <- c(9,5,5,7,58,11,32,12,19,6,21,14,45,7,61,15,7,12,19,25,20,7,42,17)
zagg <- c(47,77,11,24,29,24,13,16,15,44,18,38,11,16,11,21,1,6,2,15,0,2,7,7)
haem <- (c(0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1))
pup <- (c(0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1))
coma <- (c(0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1))
agec <- as.factor(c(0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2))
#               
head <- data.frame(yagg,zagg,haem,pup,coma,agec)
#####un stack data######
####One row for each observation so that we can do train-test###
outcome <- factor(c(rep(1,sum(yagg)),rep(0,sum(zagg))),labels=c(0,1))
haem2 <- (c(rep(haem,yagg),rep(haem,zagg)))
pup2 <- (c(rep(pup,yagg),rep(pup,zagg)))
coma2 <- (c(rep(coma,yagg),rep(coma,zagg)))
age2 <- factor(c(rep(agec,yagg),rep(agec,zagg)), labels=c(0,1,2))
head2 <- data.frame(outcome,haem2,pup2,coma2,age2)
colnames(head2) <- c("outcome",colnames(head)[3:6])
#
# Create matrix of 0's and 1's to keep track of interactions and model elements#######
#
design.it<-function(k){
  design<-matrix(nrow=2^k,ncol=k)
  for(i in 1:k){
    design[,i]=rep(rep(c(0,1),each=2^(k-i)),2^(i-1))
  }
  return(design)
}
#
###is a subset of b? ##############
#
is.subset<-function(a,b,k){
  yes<-1
  for(i in 1:k)
    {
      yes=yes*(a[i]<=b[i])
    }
  return(yes)
}
#
#####is this a hierarchical  model? ########
#
is.hier<-function(int.mat,model,p,k){
  yes=1
  for(i in 1:p){
    if(model[i]){
      for(j in 1:p){
        if(is.subset(int.mat[j,],int.mat[i,],k)){
          yes= yes * model[j]
        }
      }
    }
  }
  return(yes)
}
#
##matrix to help identify all possible interactions####
#
interactions <- design.it(4)[-1,]
order <- apply(interactions,1,sum)
keep = order<3
interactions <- interactions[keep,] 
vars <- names(head)[-c(1,2)]
colnames(interactions) = vars
varints <- apply(interactions, 1, function(x){paste(colnames(interactions)[as.logical(x)], 
           collapse=":")})
#
##matrix to identify which interactions to include####
#
possible.model <- design.it(10)
rownames(possible.model) <- 1:1024
check.hier <- function(c){
  return(is.hier(interactions,c,10,4))
}
#
##keep models that are hierarchical
#
check.it <- apply(possible.model,1,check.hier)
keep <- which(check.it==1)
hier.models <- possible.model[keep,]
rownames(hier.models) <- 1:113
#
#####find aic for all heirachical models#########
#
get.aic<-function(model,var,hdata,bicflag){
  my.formula <- as.formula(paste(c("outcome ~ 1", var[as.logical(model)]), collapse=" + "))
  my.model <- glm(my.formula, family = binomial, data=hdata)
  return((1-bicflag)*extractAIC(my.model)[2]+bicflag*extractAIC(my.model,k=log(dim(hdata)[1]))[2])
}
#
######classification error########
#
class.error <- function(truth,pred){
Bsum00 <- sum((pred==0) & (truth==0)) 
Bsum01 <- sum((pred==1) & (truth==0)) 
Bsum10 <- sum((pred==0) & (truth==1)) 
Bsum11 <- sum((pred==1) & (truth==1)) 
Btesterr <- 100*(Bsum01+Bsum10)/length(truth)
return(Btesterr)
}

######needed for classification with logistic regression####
expit<-function(a){return(exp(a)/(1+exp(a)))}
###############################
prettytable<-function(visited,models,vars=varints,howmany=5){
	howmany=min(howmany,length(visited))
	topmod<-vector("list",length=howmany)
	for(i in 1:howmany){
	topmod[[i]]<-vars[as.logical(models[names(sort(visited))[i],])]
	}
	return(topmod)
}
#
###############End Run Once####################
#
B <- 100
null.err<-main.err<-sat.err<-aic.err<-bic.err<-aic.unres.err<-bic.unres.err<-tree.err<-bag.err<-rf.err<-rep(0,B)
aicmods<-bicmods<-uaicmods<-ubicmods<-rep(0,B)
for(i in 1:B){
#
######break up data 70% train 30% test########
 train<-sample(1:931,652)
 head.train<-head2[train,]
 head.test<-head2[-train,]
#####null model##########
nullmod<-glm(outcome~1, family = binomial, data=head.train)
nulp<-predict(nullmod,head.test[,-1])
nulp<-expit(nulp)
nulregp<-as.numeric(nulp>.5)
null.err[i]=class.error(head.test[,1],nulregp)
#######main only############
mainmod<-glm(outcome~., family = binomial, data=head.train)
mp<-predict(mainmod,head.test[,-1])
mp<-expit(mp)
mregp<-as.numeric(mp>.5)
main.err[i]=class.error(head.test[,1],mregp)
#####saturated#######


##calculate aic for each hierarchical model####
get.my.aic<-function(c){return(get.aic(c,varints,head.train,bicflag=0))}

aic.vals<-apply(hier.models,1,get.my.aic)

##which is the best?###
abest.model<-varints[as.logical(hier.models[which.min(aic.vals),])]
amy.formula<-as.formula(paste(c("outcome ~1", abest.model), collapse=" + "))
amy.model<-glm(amy.formula, family = binomial, data=head.train)
aicmods[i]=which.min(aic.vals)
#####test error for best model#########
alrp1<-predict(amy.model,head.test[,-1])
alrp<-expit(alrp1)
alogregp<-as.numeric(alrp>.5)
aic.err[i]=class.error(head.test[,1],alogregp)

#######Unrestricted Models############
aic.ur<-apply(possible.model,1,get.my.aic)
##which is the best?###
aubest.model<-varints[as.logical(possible.model[which.min(aic.ur),])]
aumy.formula<-as.formula(paste(c("outcome ~1", aubest.model), collapse=" + "))
aumy.model<-glm(aumy.formula, family = binomial, data=head.train)
uaicmods[i]=which.min(aic.ur)
#####test error for best model#########
aulrp1<-predict(aumy.model,head.test[,-1])
aulrp<-expit(aulrp1)
aulogregp<-as.numeric(aulrp>.5)
aic.unres.err[i]=class.error(head.test[,1],aulogregp)


##calculate aic for each hierarchical model####
get.my.bic<-function(c){return(get.aic(c,varints,head.train,bicflag=1))}

bic.vals<-apply(hier.models,1,get.my.bic)

##which is the best?###
best.model<-varints[as.logical(hier.models[which.min(bic.vals),])]
my.formula<-as.formula(paste(c("outcome ~1", best.model), collapse=" + "))
my.model<-glm(my.formula, family = binomial, data=head.train)

#####test error for best model#########
bicp<-predict(my.model,head.test[,-1])
bicp<-expit(bicp)
bicregp<-as.numeric(bicp>.5)
bic.err[i]=class.error(head.test[,1],bicregp)
bicmods[i]=which.min(bic.vals)


#######Unrestricted Models############
bic.ur<-apply(possible.model,1,get.my.bic)
##which is the best?###
ubest.model<-varints[as.logical(possible.model[which.min(bic.ur),])]
umy.formula<-as.formula(paste(c("outcome ~1", ubest.model), collapse=" + "))
umy.model<-glm(umy.formula, family = binomial, data=head.train)
ubicmods[i]=which.min(bic.ur)
#####test error for best model#########
ubicp<-predict(umy.model,head.test[,-1])
ubicp<-expit(ubicp)
ubicregp<-as.numeric(ubicp>.5)
bic.unres.err[i]=class.error(head.test[,1],ubicregp)

#####regression tree########
treefit <- rpart(outcome~haem+pup+coma+agec,data=head.train,method="class",control=list(minsplit=5,minbucket=3,cp=.001))  
prunedtreefit <- prune(treefit,cp=treefit$cptable[which.min(treefit$cptable[,"xerror"]),"CP"])
treep <- predict(prunedtreefit,head.test[,-1],type="class")
tree.err[i]=class.error(head.test[,1],treep)
#
######### Random Forests##########
#
RFmod <- randomForest(as.factor(outcome)~haem+pup+coma+agec,data=head.train,importance=TRUE, 
        type="classification")
RFp <- predict(RFmod,head.test[,-1],type="class")
rf.err[i] = class.error(head.test[,1],RFp)
#
######Bagging#########
#
bagmod <- bagging(outcome~.,data=head.train,method="class",coob=TRUE)
bagp <- predict(bagmod,head.test[,-1],type="class")
bag.err[i] = class.error(head.test[,1],bagp)
if (floor(i/5)==ceiling(i/5)) cat(i," iterations completed\n")
}
#
#
######average error all in one place: results are slightly different because seeds are different
#
Error <- rbind(null.err,main.err, aic.err,bic.err,aic.unres.err,bic.unres.err,tree.err,
         bag.err,rf.err)
apply(Error,1,mean)
sqrt(apply(Error,1,var))
#
aic.picks<-table(aicmods)
bic.picks<-table(bicmods)
uaic.picks<-table(uaicmods)
ubic.picks<-table(ubicmods)
####more like a list than a table########
prettytable(aic.picks,hier.models)
prettytable(bic.picks,hier.models)
prettytable(uaic.picks,possible.model)
prettytable(ubic.picks,possible.model)
#
# A couple of figures based on the last split: these will differ from the book versions
# because of different splits
#
# Fig 12.20
#
pdf("RFhead1.pdf")
varImpPlot(RFmod,main="",scale=F)
dev.off()
#
# Fig 12.21
#
pdf("RFhead2.pdf")
plot(RFmod$err[,1],typ="l",xlab="Trees",ylab="oob error")
dev.off()