# # R Commands used with NWTS balanced sample to generate results for IEA course notes # # Set working directory (will depend on local configuration) # setwd("P:\\LaTeX\\talks\\2006-09\\IEA08") # # Load previously installed packages # library(survival) library(survey) library(Epi) # # Load NWTS data and documentation # load(system.file("doc","nwts.rda",package="survey")) # # See codesheet.txt on course website for data documentation # nwts # print out grouped data for all 4088 subjects (complete data) nwtsb # print out grouped data for 1142 in balanced sample nwts.ex<-read.table("nwts.ex.txt",header=T) # read expanded complete dataset from course website nwtsb.ex<-read.table("nwtsb.ex.txt",header=T) # read expanded balanced sample data from website # NB: nwtsb.ex contains records for all 4088 subjects sampled at Phase I # but has known values of histol only for those sampled at Phase II # # Add sampling weights and ID variable to balanced dataset # nwtsb.ex<-within(nwtsb.ex,wt<-1) nwtsb.ex<-within(nwtsb.ex,{ wt[instit==1&cc==0]<-3262/316 id<-1:length(cc) in.ccs<-!is.na(histol)} ) # # Verify the weights stat.table(list(cc,instit),mean(wt),data=nwtsb.ex) # # Declare stage and histol as factors # nwts.ex<-within(nwts.ex,{ stage<-factor(stage,labels=c("I","II","III","IV")) histol<-factor(histol,labels=c("FH","UH"))}) nwtsb.ex<-within(nwtsb.ex,{ stage<-factor(stage,labels=c("I","II","III","IV")) histol<-factor(histol,labels=c("FH","UH"))}) # xtabs(~cc+histol,data=nwts.ex) # 2x2 table of complete data effx(cc,exposure=histol,type="binary",data=nwts.ex) # odds ratio effx(cc,exposure=histol,strata=stage,type="binary",data=nwts.ex) # odds ratio effx(cc,exposure=histol,type="binary",weights=wt,data=nwtsb.ex) # odds ratio effx(cc,exposure=histol,strata=factor(stage),type="binary",weights=wt,data=nwtsb.ex) # odds ratio # # Analyses of complete data (log odds ratio, log risk ratio) # summary(glm(cc~histol,family=binomial,data=nwts.ex)) summary(glm(cc~histol,family=binomial(link="log"),data=nwts.ex)) # # Treat Phase II data as simple random (Bernoulli) sample with known sampling weights # dsingle<-svydesign(id=~id,weights=~wt,data=subset(nwtsb.ex,!is.na(histol))) summary(dsingle) # # Two phase sampling design (finite population stratified sample at Phase II) # dtwophs<-twophase(id=list(~id,~id),subset=~in.ccs, strata=list(NULL,~interaction(instit,cc)),data=nwtsb.ex) summary(dtwophs) # # Log odds ratio and log risk ratio with conservative (Bernoulli) standard errors # summary(svyglm(cc~histol,family=binomial,design=dsingle)) summary(svyglm(cc~histol,family=binomial(link="log"),design=dsingle)) # # Log odds ratio and log risk ratio with standard two-phase standard errors # summary(svyglm(cc~histol,family=binomial,design=dtwophs)) summary(svyglm(cc~histol,family=binomial(link="log"),design=dtwophs)) # # Fit full logistic model (stage/histol) to complete data and each sampling design # main<-glm(cc~stage/histol,family=binomial,data=nwts.ex) summary(main) round(ci.lin(main,Exp=T)[,5:7],4) single<-svyglm(cc~stage/histol,family=binomial,design=dsingle) summary(single) round(ci.lin(single,Exp=T)[,5:7],4) twophs<-svyglm(cc~stage/histol,family=binomial,design=dtwophs) summary(twophs) round(ci.lin(twophs,Exp=T)[,5:7],4) # # Adjust weights by post-stratification on stage (in addition to instit and cc) # dpostst<-twophase(id=list(~id,~id),subset=~in.ccs, strata=list(NULL,~interaction(stage,instit,cc)),data=nwtsb.ex) summary(dpostst) postst<-svyglm(cc~stage/histol,family=binomial,design=dpostst) summary(postst) round(ci.lin(postst,Exp=T)[,5:7],4) # # Obtain same results by calibration of two-phase design to stage distribution # within each combination of instit and cc # dcalibr<-calibrate(dtwophs,formula=~interaction(stage,instit,cc)) summary(dcalibr) calibr<-svyglm(cc~stage/histol,family=binomial,design=dcalibr) summary(calibr) round(ci.lin(calibr,Exp=T)[,5:7],4) # # Obtain same results again by estimation of weights using the same # 2x2x4 = 16 strata formed by interaction of instit x cc x stage # destima<-estWeights(nwtsb.ex,formula=~interaction(stage,instit,cc),subset=~in.ccs) summary(destima) estima<-svyglm(cc~stage/histol,family=binomial,design=destima) summary(estima) round(ci.lin(estima,Exp=T)[,5:7],4) # # Test for significance of interaction coefficients # main.add<-glm(cc~stage+histol,family=binomial,data=nwts.ex) anova(main.add,main,test="Chisq") main.interact<-glm(cc~stage+histol+stage:histol,family=binomial,data=nwts.ex) regTermTest(main.interact,~stage:histol) interact<-svyglm(cc~stage+histol+stage:histol,family=binomial,design=dpostst) regTermTest(interact,~stage:histol) # # Compare Phase I and Phase II SEs # var<-vcov(postst) var1<-attr(var,"phases")$phase1 var2<-attr(var,"phases")$phase2 round(cbind(sqrt(diag(var1)),sqrt(diag(var2))),4) # # Check that Phase II SE is zero when fit model with stage alone # stage.alone<-svyglm(cc~stage,family=binomial,design=dpostst) var<-vcov(stage.alone) var1<-attr(var,"phases")$phase1 var2<-attr(var,"phases")$phase2 round(cbind(sqrt(diag(var1)),sqrt(diag(var2))),4) # # This doesn't work with non-calibrated design # stage.alone<-svyglm(cc~stage,family=binomial,design=dtwophs) var<-vcov(stage.alone) var1<-attr(var,"phases")$phase1 var2<-attr(var,"phases")$phase2 round(cbind(sqrt(diag(var1)),sqrt(diag(var2))),4)