This page contains Stata code and output, equivalent to the examples in the paper. In all cases, the data are random samples from the freely-available NHANES dataset, and the variables used are;
To run the code, the following packages must first be installed;
ssc install vioplot
ssc install stripplot
ssc install spineplot
Following this, cut-and-paste the code for each figure into an R session.
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhanessmall.csv, clear
label variable bpxsar "Systolic Blood Pressure (mmHg)"
stripplot bpxsar
stripplot bpxsar, stack center
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhanesmedium.csv, clear
label variable bpxsar "Systolic Blood Pressure (mmHg)"
stripplot bpxsar
stripplot bpxsar, stack center
stripplot bpxsar, width(1) stack center
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
label variable bpxsar "Systolic Blood Pressure (mmHg)"
vioplot bpxsar, horizontal
histogram bpxsar
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
generate normotens = bpxsar <= 140
label define normotensl 1 "<= 140 mmHg" 0 "> 140 mmHg"
label values normotens normotensl
generate sample = 1
graph hbar (percent) sample, over(normotens) asyvars stack
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
generate normotens = bpxsar <= 140
collapse (mean) est=normotens (sebinomial) stderr=normotens
generate loCI = est - 1.96*stderr
generate hiCI = est + 1.96*stderr
generate x = 1
label variable x "Estimate"
label variable loCI "95% Conf Int"
label variable hiCI "95% Conf Int"
twoway (scatter x est) (rspike loCI hiCI x, horizontal), ///
xscale( r(0 1) ) xtick( 0(0.2)1 ) xlabel( 0(0.2)1) yscale(off)
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
generate sample = 1
graph hbar (percent) sample, over(race_ethc) asyvars stack
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
generate x=1
collapse (count) x, by(race_ethc)
generate est = x/1000
generate stderr = sqrt(est*(1-est)/1000)
generate loCI = est - 1.96*stderr
generate hiCI = est + 1.96*stderr
generate study = _n
label variable study "Estimate"
label variable loCI "95% Conf Int"
label variable hiCI "95% Conf Int"
twoway (scatter study est) (rspike loCI hiCI study, horizontal), ///
xscale( r(0 1) ) xtick( 0(0.2)1 ) xlabel( 0(0.2)1) ///
ylabel( 1 "Black non-Hispanic" 2 "Hispanic" 3 "Other" 4 "White non-Hispanic", angle(0)) ///
ytitle("")
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
generate normotens = bpxsar <= 140
label define normotensl 1 "<= 140 mmHg" 0 "> 140 mmHg"
label values normotens normotensl
generate x = 1
graph pie x, over(normotens)
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
generate x = 1
graph pie x, over(race_ethc)
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhanessmall.csv, clear
label variable dr1tfola "Folate intake (ug/day)"
label define sexl 1 "Male" 2 "Female"
label values riagendr sexl
stripplot dr1tfola, vertical over(riagendr)
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhanesmedium.csv, clear
label variable dr1tfola "Folate intake (ug/day)"
label define sexl 1 "Male" 2 "Female"
label values riagendr sexl
stripplot dr1tfola, width(20) vertical stack center over(riagendr)
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
label variable dr1tfola "Folate intake (ug/day)"
label define sexl 1 "Male" 2 "Female"
label values riagendr sexl
vioplot dr1tfola, vertical over(riagendr) ytitle("Folate intake (ug/day)")
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhanesmedium.csv, clear
label variable bpxsar "Systolic Blood Pressure (mmHg)"
label variable ridageyr "Age (years)"
twoway (scatter bpxsar ridageyr, msymbol(Oh)) (lfitci bpxsar ridageyr), legend(off) ytitle("Systolic Blood Pressure (mmHg)")
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
label variable bpxsar "Systolic Blood Pressure (mmHg)"
label variable ridageyr "Age (years)"
mkspline spage = ridageyr, cubic
quietly regress bpxsar spage*
predict fit, xb
predict stderr, stdp
generate lb = fit - 1.96*stderr
generate ub = fit + 1.96*stderr
sort ridageyr
twoway scatter bpxsar ridageyr || rarea lb ub ridageyr || line fit ridageyr, legend(off) ytitle("Systolic Blood Pressure (mmHg)")
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
egen sbpcut = cut(bpxsar), at(0 110 140 250) label
encode race_ethc, generate(race_ethc2)
label variable sbpcut "Category of Sys Blood Pressure"
label variable race_ethc2 "Race/Ethnicity"
spineplot sbpcut race_ethc2, xlabel(, angle(v) axis(2))
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
generate x=1
egen sbpcut = cut(bpxsar), at(0 110 140 250) label
collapse (count) x, by(race_ethc sbpcut)
egen prop = pc(x), by(race_ethc)
replace prop = prop/100
encode race_ethc, generate(race_ethc2)
label variable prop "Proportion"
label variable race_ethc2 "Race/Ethnicity"
stripplot prop, over(race_ethc2) separate(sbpcut) vertical xlabel(, angle(v)) legend( title("Category of Sys Blood Pressure"))
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
egen dbpcut1 = cut(bpxdi1), at(0 60 70 80 120) label
egen dbpcut2 = cut(bpxdi2), at(0 60 70 80 120) label
generate x=1
collapse (count) x, by(dbpcut1 dbpcut2)
twoway (scatter dbpcut1 dbpcut2 [weight=x], msize(1.5) msymbol(S)) ///
(function y=x, range(-1 4) ), ///
aspectratio(1) ///
xlabel(0 "0-" 1 "60-" 2 "70-" 3 "80-") ///
ylabel(0 "0-" 1 "60-" 2 "70-" 3 "80-") ///
xscale( range(-1 4)) ///
yscale( range(-1 4)) ///
xtitle("Category of Dias BP, Measure #2") ///
ytitle("Category of Dias BP, Measure #1") ///
legend(off)
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhanesmedium.csv, clear
generate htn = bpxdar > 70
label variable ridageyr "Age (years)"
label variable htn "Diastolic BP (mmHg)/Proportion"
label define htnl 1 "> 70 mmHg" 0 "<= 70 mmHg"
label values htn htnl
logistic htn ridageyr
predict fitted, xb
predict fitted_se, stdp
generate est = exp(fitted)/(1+exp(fitted))
generate hici = exp(fitted+fitted_se*1.96)/(1+exp(fitted+fitted_se*1.96))
generate loci = exp(fitted-fitted_se*1.96)/(1+exp(fitted-fitted_se*1.96))
sort ridageyr
stripplot ridageyr, stack center over(htn)
graph addplot rarea hici loci ridageyr
graph addplot line est ridageyr, yscale( range(0 1)) ylabel(0 "<= 70mmHg 0" 0.5 1 ">70mmHg 1")
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhanesmedium.csv, clear
generate htn = bpxdar > 70
label variable ridageyr "Age (years)"
label variable htn "Dichotomized Diastolic BP (mmHg), Proportion"
label define htnl 1 "> 70 mmHg" 0 "<= 70 mmHg"
label values htn htnl
mkspline spage = ridageyr, cubic
quietly logistic htn spage*
predict fitted, xb
predict fitted_se, stdp
generate est = exp(fitted)/(1+exp(fitted))
generate hici = exp(fitted+fitted_se*1.96)/(1+exp(fitted+fitted_se*1.96))
generate loci = exp(fitted-fitted_se*1.96)/(1+exp(fitted-fitted_se*1.96))
sort ridageyr
stripplot ridageyr, stack center over(htn)
graph addplot rarea hici loci ridageyr
graph addplot line est ridageyr, yscale( range(0 1)) ylabel(0 "<= 70mmHg 0" 0.5 1 ">70mmHg 1")
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
egen dbpcut = cut(bpxdar), at(0 60 70 80 120) label
forvalues i = 0(1)3{
generate level`i' = dbpcut==`i'
quietly logit level`i' ridageyr
predict fitxb`i', xb
generate fitp`i' = exp(fitxb`i')/(1+exp(fitxb`i'))
local temp : label (dbpcut) `i'
label variable fitp`i' "`temp'"
}
sort ridageyr
label variable ridageyr "Age (years)"
line fitp* ridageyr, ytitle("Proportion, based on log'c regression")
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
egen dbpcut = cut(bpxdar), at(0 60 70 80 120) label
forvalues i = 0(1)2{
generate level`i' = dbpcut>`i'
quietly logit level`i' ridageyr
predict fitxb`i', xb
generate fitp`i' = exp(fitxb`i')/(1+exp(fitxb`i'))
}
label variable ridageyr "Age (years)"
label variable fitp0 "> 60 mmHg"
label variable fitp1 "> 70 mmHg"
label variable fitp2 "> 80 mmHg"
sort ridageyr
line fitp* ridageyr, ytitle("Proportion, based on log'c regression")
insheet using http://faculty.washington.edu/kenrice/heartgraphs/nhaneslarge.csv, clear
egen agecut = cut(ridageyr), at(0 30 55 100) label
label variable bmxbmi "BMI (kg/m^2)"
twoway (scatter bpxsar bmxbmi, msymbol(o) mcolor(gs14) subtitle("Age Category:", prefix)) ///
(lowess bpxsar bmxbmi if riagendr==1, lwidth(thick)) ///
(lowess bpxsar bmxbmi if riagendr==2, lwidth(thick) lpattern(dash)) ///
, by(agecut, cols(3) compact note("")) ///
ytitle("Systolic Blood Pressure (mmHg)") legend(order(2 "Male" 3 "Female"))