This page contains R code and output from the examples in the paper. In all cases, the data are random samples from the freely-available NHANES dataset, and the variables used are;
To run the code, the following packages must first be installed;
install.packages(c("beeswarm","vioplot","lattice"))
install.packages(pkgs="snippets",repos="http://www.rforge.net/")
Following this, cut-and-paste the code for each figure into an R session.
nhanessmall <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhanessmall.csv", na=".")
with(nhanessmall, stripchart(BPXSAR, vertical=FALSE, pch=1, xlab="Systolic BP (mmHg)"))
library("beeswarm")
with(nhanessmall, beeswarm(BPXSAR, breaks=NA, vertical=FALSE, pch=1, method="center", xlab="Systolic BP (mmHg)"))
nhanesmedium <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhanesmedium.csv", na=".")
with(nhanesmedium, stripchart(BPXSAR, vertical=FALSE, pch=1, xlab="Systolic BP (mmHg)"))#, cex=1))
library("beeswarm")
with(nhanesmedium, beeswarm(BPXSAR, breaks=NA, vertical=FALSE, pch=1, method="center", xlab="Systolic BP (mmHg)"))
with(nhanesmedium, beeswarm(BPXSAR, breaks=seq(min(BPXSAR), max(BPXSAR), 1), vertical=FALSE, pch=1, method="center", xlab="Systolic BP (mmHg)"))
# adding superimposed points and legend:
points(x=mean(nhanesmedium$BPXSAR), y=0.6, pch=16, col="blue")
lines(x=confint(lm(BPXSAR~1, data=nhanesmedium)), y=c(0.6,0.6), pch=16, col="blue")
legend("topright", col="blue",pch=16, lty=1, "Sample mean & 95% conf interval", bty="n")
nhaneslarge <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv", na=".")
library("vioplot")
vioplot(nhaneslarge$BPXSAR, h=3, names="", horizontal=TRUE, col="grey90", drawRect=FALSE)
mtext(side=1, line=3, "Systolic BP (mmHg)")
points(x=mean(nhaneslarge$BPXSAR), y=1, pch=16, col="blue")
points(x=median(nhaneslarge$BPXSAR), y=1, pch=18, col="red")
legend("topright", pch=c(16,18), col=c("blue","red"), c("Sample mean","Sample median"), bty="n")
with(nhaneslarge, hist(BPXSAR, n=30, col="grey", main="", xlab="Systolic BP (mmHg)"))
nhaneslarge <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv", na=".")
barplot(as.matrix(table(nhaneslarge$BPXSAR>140)/1000), beside=FALSE, horiz=TRUE, xlim=c(0,1),
xlab="Proportion with Systolic BP <= 140 mmHg\n")
dotchart( 1-mean( nhaneslarge$BPXSAR>140), xlim=c(0.03,0.97), lcolor=NA,
xlab="Proportion with Systolic BP <= 140 mmHg,\nwith corresponding 95% conf interval", pch=1)
lines(x = 1-binom.test( sum(nhaneslarge$BPXSAR>140), 1000)$conf.int, y=c(1,1))
par(mar=c(4,2,4,0.3)+0.3, xpd=TRUE)
barplot(as.matrix(table(nhaneslarge$race_ethc)/1000), beside=FALSE, horiz=TRUE, xlim=c(0,1),
xlab="Proportion\n")
legend("top", inset=c(0, -0.5), fill=gray.colors(4), horiz=TRUE, bty="n", levels(nhaneslarge$race_ethc))
par(mar=c(5,2,0,0.3)+0.3)
dotchart( as.matrix(rev(table(nhaneslarge$race_ethc)))/1000,
xlim=c(0.03,0.97), lcolor=NA, gcolor="white", xlab="Proportion,\nwith corresponding 95% conf interval")
for(i in 1:4){
lines(x = binom.test( sum(as.numeric(nhaneslarge$race_ethc)==i), 1000)$conf.int,
y=5-c(i,i))
}
nhaneslarge <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv", na=".")
par(mar=c(0,0,0,0))
pie(table(nhaneslarge$BPXSAR>140), labels=c("SBP<=140 mmHg ", "SBP>140 mmHg"), col=gray.colors(2))
pie(table(nhaneslarge$race_ethc), col=gray.colors(4))
nhanessmall <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhanessmall.csv", na=".")
nhanesmedium <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhanesmedium.csv", na=".")
nhaneslarge <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv", na=".")
#empty plot to set up axes, labels etc:
boxplot(DR1TFOLA ~gender, ylim=c(0,2200), type="n", border=0,
data=nhanessmall,xlab="",ylab=quote("Folate intake"~(mu*g/day)) )
#code to add points:
library("beeswarm")
beeswarm(DR1TFOLA ~gender,
data=nhanessmall,method="center", breaks=NA, add=TRUE)
boxplot(DR1TFOLA ~gender, ylim=c(0,2200), type="n", border=0,
data=nhanesmedium,xlab="",ylab=quote("Folate intake"~(mu*g/day)) )
beeswarm(DR1TFOLA ~gender, data=nhanesmedium,method="center", add=TRUE)
library("sm")
library("vioplot")
vioplot( subset(nhaneslarge, gender=="Male")$DR1TFOLA,
subset(nhaneslarge, gender=="Female")$DR1TFOLA,
h=20, ylim=c(0,2200), names=c("Male","Female"), col="grey90", drawRect=FALSE)
mtext(side=2, line=3, cex=0.75, quote("Folate intake"~(mu*g/day)))
# add lines for the RDA values, and a legend
segments(.8,400,1.2,400,lwd=2,lty=2,col="grey30")
segments(1.8,400,2.2,400,lwd=2,lty=2,col="grey30")
segments(1.8,600,2.2,600,lwd=2,lty=1,col="grey30")
legend("topright",legend=c("RDA: pregnant women", "RDA: other adults"),lty=1:2,lwd=2,bty="n",col="grey30")
nhanesmedium <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhanesmedium.csv", na=".")
#empty plot to set up axes - same as the larger dataset
plot(BPXSAR~RIDAGEYR, data=nhaneslarge, xlab="Age (years)",ylab="Systolic BP (mmHg)",type="n")
points(BPXSAR~RIDAGEYR,data=nhanesmedium)
# fit the linear model, add the line and 95% CI
lmmodel <- lm(BPXSAR~RIDAGEYR, data=nhanesmedium)
mfit <- predict(lmmodel,se=TRUE) # assumes homoskedasticity
mii <- order(nhanesmedium$RIDAGEYR)
polygon(nhanesmedium$RIDAGEYR[c(mii,rev(mii))],
mfit$fit[c(mii,rev(mii))]+mfit$se.fit[c(mii,rev(mii))]*rep(c(-1.96,1.96),each=200),
col="#00000050",border=NA)
lines(nhanesmedium$RIDAGEYR[mii],mfit$fit[mii],type="l")
nhaneslarge <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv", na=".")
# plotting the larger dataset;
plot(BPXSAR~RIDAGEYR,data=nhaneslarge,xlab="Age (years)",ylab="Systolic BP (mmHg)",col="grey")
# spline fit plus prediction interval
library("splines")
splinemodel <- lm(BPXSAR~ns(RIDAGEYR,3),data=nhaneslarge,)
fit <- predict(splinemodel,se=TRUE)
ii <- order(nhaneslarge$RIDAGEYR)
polygon(nhaneslarge$RIDAGEYR[c(ii,rev(ii))],
fit$fit[c(ii,rev(ii))]+fit$se.fit[c(ii,rev(ii))]*rep(c(-1.96,1.96), each=1000),
col="#00000050",border=NA)
lines(nhaneslarge$RIDAGEYR[ii],fit$fit[ii],type="l")
nhaneslarge <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv", na=".")
par(las=3, xpd=TRUE, mar=c(8,4,0.5,4)+0.3)
with(nhaneslarge,
plot(y=cut(BPXSAR,c(0,110,140,250)), x=race_ethc, ylab="Category of Systolic BP (mmHg)", xlab=""))
mtext(side=4, "Proportion", line=2.5)
# tabulate the proportions
tabcounts <- with(nhaneslarge, table( race_ethc, cutbp=cut(BPXSAR,c(0,110,140,250)) ))
tabprop <- apply(tabcounts, 1, function(x){x/sum(x)})
tabd <- data.frame(prop = as.vector(tabprop),
race_ethc=rep(attr(tabcounts, "dimnames")[[1]], each=3),
cut_sbp=rep(attr(tabcounts, "dimnames")[[2]], 4))
par(las=3, xpd=TRUE, mar=c(8,4,3.5,4)+0.3)
# plot the proportions, label axes, and add legend
with(tabd,
plot(prop ~ rep(1:4, each=3), col=gray.colors(3)[as.numeric(cut_sbp)], pch=19, xlab="", ylab="Proportion",
axes=F, ylim=c(0,1))
)
axis(side=2)
axis(side=1, at=1:4, labels=attr(tabcounts, "dimnames")[[1]], las=2)
box()
par(xpd=TRUE)
legend("top", inset=c(0, -0.30), col=grey.colors(3), pch=19, bty="n",
paste(attr(tabcounts, "dimnames")[[2]],"mmHg"))
nhaneslarge <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv", na=".")
library("snippets")
tabbp <- with(nhaneslarge,
table(cut(BPXDI1,c(0,60,70,80,120)), cut(BPXDI2,c(0,60,70,80,120)))
)
par(mar=c(6,7,0,0)+0.1)
fd.matrix(as.matrix(tabbp), las=2)
abline(0,1,lty=3) # 45-degree line
mtext(side=1, line=5, "Diastolic BP range (mmHg)\n1st measurement")
mtext(side=2, line=5, "Diastolic BP range (mmHg)\n2nd measurement")
par(las=1, mar=c(4,7,0,0)+0.3)
nhanesmedium <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhanesmedium.csv", na=".")
expit <- function(x){exp(x)/(1+exp(x))}
library("beeswarm")
# plot the data
beeswarm(RIDAGEYR~I(BPXDAR>70),data=nhanesmedium, method="center", breaks=NA, horizontal=TRUE, xlab="Age (years)",
ylab="", labels=c("<= 70mmHg","> 70mmHg"))
# fit the logistic model, and add fitted values with 95% CI
glmmodel <- glm(BPXDAR>70~RIDAGEYR,data=nhanesmedium, family=binomial)
mfit <- predict(glmmodel,se=TRUE)
mii <- order(nhanesmedium$RIDAGEYR)
polygon(x=nhanesmedium$RIDAGEYR[c(mii,rev(mii))],
y=1+expit(mfit$fit[c(mii,rev(mii))]+mfit$se.fit[c(mii,rev(mii))]*rep(c(-1.96,1.96),each=200)),
col="#00000050",border=NA)
lines(nhanesmedium$RIDAGEYR[mii],1+expit(mfit$fit[mii]),type="l")
mtext(side=2, line=6, "Diastolic BP (mmHg)", las=3)
#plot the data
beeswarm(RIDAGEYR~I(BPXDAR>70),data=nhanesmedium, method="center", breaks=NA, horizontal=TRUE, xlab="Age (years)",
ylab="", labels=c("<= 70mmHg","> 70mmHg"))
# fit the spline-based model, adding fitted values with 95% CI
splinemodel <- glm(BPXDAR>70~ns(RIDAGEYR,3),data=nhanesmedium, family=binomial)
mfit <- predict(splinemodel,se=TRUE)
mii <- order(nhanesmedium$RIDAGEYR)
polygon(x=nhanesmedium$RIDAGEYR[c(mii,rev(mii))],
y=1+expit(mfit$fit[c(mii,rev(mii))]+mfit$se.fit[c(mii,rev(mii))]*rep(c(-1.96,1.96),each=200)),
col="#00000050",border=NA)
lines(nhanesmedium$RIDAGEYR[mii],1+expit(mfit$fit[mii]),type="l")
mtext(side=2, line=6, "Diastolic BP (mmHg)", las=3)
nhanesmedium <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhanesmedium.csv", na=".")
par(las=1, mar=c(4,6,1,0.5)) # horizontal axis labels, margin sizes
expit <- function(x){exp(x)/(1+exp(x))}
mycuts <- c(60,70,80)
cut_bp <- with(nhanesmedium, cut(BPXDAR, c(0,mycuts,150)))
plot(I(BPXDAR>70)~RIDAGEYR,data=nhanesmedium,xlab="Age (years)", ylab="Proportion,\nbased on logistic regression", type="n")
# add lines for multiple fitted models
for(i in 1:4){
thiscat <- as.numeric(cut_bp)==i
glmmodel <- glm(thiscat~RIDAGEYR,data=nhanesmedium, family=binomial)
mfit <- predict(glmmodel, se=TRUE, newdata=nhanesmedium)
mii <- order(nhanesmedium$RIDAGEYR)
lines(x=(nhanesmedium$RIDAGEYR)[mii],y=expit((mfit$fit)[mii]),type="l", lty=i)
}
# legend with right-justified labels
temp <- legend("top", lty=1:4, legend=rep(" ",4), bty="n",
text.width=strwidth(paste(levels(cut_bp)," mmHg", sep="")[4]),
xjust=1, yjust=1)
text(temp$rect$left + temp$rect$w, temp$text$y,
paste(levels(cut_bp)," mmHg", sep=""), pos = 2)
plot(I(BPXDAR>70)~RIDAGEYR,data=nhanesmedium,xlab="Age (years)", ylab="Proportion,\nbased on logistic regression", type="n")
# add lines for multiple fitted models;
for(i in 1:3){
glmmodel <- glm(BPXDAR>mycuts[i]~RIDAGEYR, data=nhanesmedium, family=binomial)
mfit <- predict(glmmodel,se=TRUE)
mii <- order(nhanesmedium$RIDAGEYR)
lines(x=nhanesmedium$RIDAGEYR[mii],y=expit(mfit$fit[mii]),type="l", lty=i)
}
legend("topleft", lty=1:3, legend=paste(">", mycuts, " mmHg", sep=""), bty="n")
library("lattice", warn.conflicts=FALSE)
nhaneslarge <- read.csv("http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv", na=".")
xyplot(BPXSAR~BMXBMI|cut(RIDAGEYR,c(0,30,55,100), labels=c("<= 30 years","31-55 years", ">55 years")),groups=gender,
col.symbol="grey60",
data=nhaneslarge,xlab=quote("Body Mass Index"~(kg/m^2)),ylab="Systolic BP (mmHg)",
panel=panel.superpose,
panel.groups=function(x,y,group.number,...){
panel.xyplot(x,y,...)
panel.loess(x,y,col.line="black",lty=group.number,lwd=2, alpha=1)
},
layout=c(3,1),
par.settings = list(
superpose.line = list( col="black", lty=1:2, lwd=2 )
),
auto.key=list(space="top", columns=2,
title="Sex", cex.title=1,
lines=TRUE, points=FALSE)
)