# Session 6 # reading in data from the web (or could give a file location on disk) lawschool <- read.table("http://faculty.washington.edu/kenrice/rintro/lawschool.txt", header=TRUE) # q1 plot(LSAT~GPA, data=lawschool) # q2 orig.cor <- with(lawschool, cor(LSAT, GPA)) orig.cor # q3 (first with a for() loop) set.seed(1650) # Battle of Dunbar # do everything for one resample; n <- dim(lawschool)[1] # i.e. 15 myrows <- sample(1:n, replace=TRUE) # not a shuffle lawschool.boot <- lawschool[myrows,] lawschool.boot with(lawschool.boot, cor(LSAT, GPA)) many.cors <- rep(NA, 50) for(i in 1:50){ myrows <- sample(1:n, replace=TRUE) # not a shuffle lawschool.boot <- lawschool[myrows,] many.cors[i] <- with(lawschool.boot, cor(LSAT, GPA)) } hist(many.cors) summary(many.cors) # now with a repeat() loop set.seed(1650) # Battle of Dunbar many.cors <- rep(NA, 50) count <- 1 repeat({ myrows <- sample(1:n, replace=TRUE) lawschool.boot <- lawschool[myrows,] many.cors[count] <- with(lawschool.boot, cor(LSAT, GPA)) count <- count + 1 if( count == 50 ) break }) hist(many.cors) summary(many.cors) # !beware - uses the value of count at the start of the repeat() iteration # now with a while() loop set.seed(1650) # Battle of Dunbar many.cors <- rep(NA, 50) count <- 1 while(count<=50)({ myrows <- sample(1:n, replace=TRUE) lawschool.boot <- lawschool[myrows,] many.cors[count] <- with(lawschool.boot, cor(LSAT, GPA)) count <- count + 1 }) hist(many.cors) summary(many.cors) # q4 - using a for() loop # wrap all the q3 code inside a loop of numbers of samples - 50/200/800/3200; # (and add some prettified output) par(mfrow=c(2,2)) set.seed(1651) # Battle of Worcester for(bignumber in c(50, 200, 800, 3200)){ many.cors <- rep(NA, bignumber) for(i in 1:bignumber){ myrows <- sample(1:n, replace=TRUE) # not a shuffle lawschool.boot <- lawschool[myrows,] many.cors[i] <- with(lawschool.boot, cor(LSAT, GPA)) } hist(many.cors, xlim=c(0,1), main="") print(summary(many.cors)) # and a fancy title that includes numeric summaries title( paste("Mean =",round(mean(many.cors),2),"SD =",round(sd(many.cors),2) )) if(bignumber==3200){ # pretty output in concatanated form this time cat("\nFinal estimate:\n") # \n is carriage return cat(paste( round(orig.cor,2), "(", round(quantile(many.cors, 0.025),2), ",", round(quantile(many.cors, 0.975),2), ")\n")) } } # bootstrapping a function of the original estimate is straightforward # - in this case, atanh() of the correlation - Fisher's Z transform # (doing this using other statistical approaches is lots of work) length(many.cors) quantile(many.cors, c(0.025, 0.975) ) abline(v= quantile(many.cors, c(0.025, 0.975) ), col="thistle", lwd=2) par(mfrow=c(1,1)) hist(atanh(many.cors)) abline(v= quantile(atanh(many.cors), c(0.025, 0.975) ), col="thistle", lwd=2) summary( atanh(many.cors) )