R : Copyright 2004, The R Foundation for Statistical Computing Version 1.9.1 (2004-06-21), ISBN 3-900051-00-3 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for a HTML browser interface to help. Type 'q()' to quit R. > # > # exer5-Q2.q > # > # -------------------------------------------------- > # > # PURPOSE: Generate correlated data and use WLS > # > # DATE: 08 Feb 2007 > # > # AUTHOR: P. Heagerty > # > # -------------------------------------------------- > # > m <- 200 > beta <- c( 100.0 , -5.0 ) > sigma <- 1.00 > D <- matrix( c( 100, 0, 0, 2 ), 2, 2 ) > print(D) [,1] [,2] [1,] 100 0 [2,] 0 2 > # > X <- cbind( 1, c(1:10) ) > Sigma <- sigma^2 * diag(rep(1,10)) + X%*%D%*%t(X) > Sigma <- (Sigma + t(Sigma))/2 > # > ols.sigma2 <- mean( diag( Sigma ) ) > # > mu <- as.vector( X%*%beta ) > # > Cc <- chol( Sigma ) > max( abs( t(Cc)%*%Cc - Sigma ) ) [1] 5.684342e-14 > max( abs( Cc%*%t(Cc) - Sigma ) ) [1] 1098.408 > # > ##### > ##### generate data directly as MVN > ##### > # > y <- NULL > x <- NULL > id <- NULL > for( i in 1:200 ){ + yi <- mu + as.vector( t(Cc)%*%matrix(rnorm(10),ncol=1) ) + y <- c( y, yi ) + x <- c( x, c(1:10) ) + id <- c( id, rep(i,10) ) + } > # > ##### > ##### generate data and store RanInt RanSlope > ##### > # > y <- NULL > x <- NULL > id <- NULL > RanInt <- rep( NA, 200 ) > RanSlope <- rep( NA, 200 ) > for( i in 1:200 ){ + b0 <- sqrt(100)*rnorm(1) + b1 <- sqrt(2)*rnorm(1) + yi <- mu + b0 + b1*c(1:10) + rnorm(10) + y <- c( y, yi ) + x <- c( x, c(1:10) ) + id <- c( id, rep(i,10) ) + RanInt[i] <- b0 + RanSlope[i] <- b1 + } > # > ##### > ##### WLS function for balanced data > ##### assumes Sigma2 is true sigma > ##### > # > wls.balanced <- function( id, y, X, Sigma, Sigma2 ){ + nsubjects <- length(table(id)) + W <- solve(Sigma) + XtWX <- matrix( 0, ncol(X), ncol(X) ) + B <- matrix( 0, ncol(X), ncol(X) ) + XtWY <- matrix( 0, ncol(X), 1 ) + uid <- unique(id) + for( i in 1:nsubjects ){ + keep <- id==uid[i] + yi <- matrix( y[ keep ], ncol=1 ) + XtWX <- XtWX + t(X)%*%W%*%X + XtWY <- XtWY + t(X)%*%W%*%yi + B <- B + t(X)%*% W %*% Sigma2 %*% t(W) %*% X + } + beta <- as.vector( solve( XtWX )%*%XtWY ) + list(beta=beta, A=XtWX, B=B) + } > # > Sigma0 <- diag( rep( ols.sigma2 ,10 ) ) > Sigma1 <- D[1,1] + diag( rep( sigma , 10 ) ) > Sigma2 <- Sigma > # > cat("Complete Data: WLS Estimators\n") Complete Data: WLS Estimators > round(wls.balanced( id, y, X, Sigma0, Sigma2 )$beta,2) [1] 99.37 -5.01 > round(wls.balanced( id, y, X, Sigma1, Sigma2 )$beta,2) [1] 99.37 -5.01 > round(wls.balanced( id, y, X, Sigma2, Sigma2 )$beta,2) [1] 99.37 -5.01 > # > ##### > ##### WLS for unbalanced using a missing indicator > ##### > # > wls.unbalanced <- function( id, y, X, Sigma,Sigma2, missing ){ + nsubjects <- length(table(id[!missing])) + XtWX <- matrix( 0, ncol(X), ncol(X) ) + XtWY <- matrix( 0, ncol(X), 1 ) + B <- matrix( 0, ncol(X), ncol(X) ) + uid <- unique(id[!missing]) + for( i in 1:nsubjects ){ + keep <- id==uid[i] + mi <- as.logical( missing[keep] ) + yi <- matrix( y[ keep ], ncol=1 ) + yi <- matrix( yi[ !mi ], ncol=1 ) + Xi <- matrix( X[ !mi, ], ncol=2 ) + Wi <- solve( Sigma[ !mi, !mi ] ) + Sig2i <- Sigma2[!mi, !mi] + XtWX <- XtWX + t(Xi)%*%Wi%*%Xi + XtWY <- XtWY + t(Xi)%*%Wi%*%yi + B <- B + t(Xi)%*% Wi %*% Sig2i %*% t(Wi) %*% Xi + } + beta <- as.vector( solve( XtWX )%*%XtWY ) + list(beta=beta, A=XtWX, B=B) + } > > # > ##### > ##### MCAR data (part a) > ##### > # > MCAR <- rep(0,2000) > MCAR[sample(2000,.2*2000,replace=F)] <- 1 > mcar.0 <- (wls.unbalanced( id, y, X, Sigma0, Sigma2, MCAR)) > mcar.1 <- (wls.unbalanced( id, y, X, Sigma1, Sigma2, MCAR)) > mcar.2 <- (wls.unbalanced( id, y, X, Sigma2, Sigma2, MCAR)) > > cat("Incomplete Data (MCAR): WLS Estimators\n") Incomplete Data (MCAR): WLS Estimators > round(mcar.0$beta,2) [1] 99.12 -4.98 > round(mcar.1$beta,2) [1] 99.53 -5.04 > round(mcar.2$beta,2) [1] 99.40 -5.02 > > # > ##### > ##### MCAR asymptotic relative efficiencies (part b) > ##### > # > ivar <- solve(mcar.0$A) %*% (mcar.0$B) %*% solve(mcar.0$A) > wvar <- solve(mcar.1$A) %*% (mcar.1$B) %*% solve(mcar.1$A) > ### check that these two the same: > solve(mcar.2$A) [,1] [,2] [1,] 0.5033019210 -0.0004742324 [2,] -0.0004742324 0.0100854708 > solve(mcar.2$A) %*% (mcar.2$B) %*% solve(mcar.2$A) [,1] [,2] [1,] 0.5033019210 -0.0004742324 [2,] -0.0004742324 0.0100854708 > > ivar [,1] [,2] [1,] 0.5785461 -0.01228970 [2,] -0.0122897 0.01258213 > wvar [,1] [,2] [1,] 0.524829249 -0.003797547 [2,] -0.003797547 0.010658756 > cat("Incomplete Data (MCAR): WLS Estimator Efficiency\n") Incomplete Data (MCAR): WLS Estimator Efficiency > solve(mcar.2$A)[1,1]/ivar[1,1] [1] 0.8699426 > solve(mcar.2$A)[2,2]/ivar[2,2] [1] 0.8015711 > solve(mcar.2$A)[1,1]/wvar[1,1] [1] 0.9589822 > solve(mcar.2$A)[2,2]/wvar[2,2] [1] 0.9462147 > > # > ##### > ##### MAR data > ##### > # > cut.point <- 65.0 > nsubjects <- length(table(id)) > uid <- unique(id) > missing <- NULL > for( i in 1:nsubjects ){ + yi <- y[ id==uid[i] ] + if( min(yi) < cut.point ){ + if( max(yi) < cut.point ){ + index <- c( 0, rep( 1, 9 ) ) + }else{ + index <- min( c(1:10)[ yi > ##### number of cases eliminated due to MAR > sum(missing) [1] 543 > > # > cat("Incomplete Data (MAR): WLS Estimators\n") Incomplete Data (MAR): WLS Estimators > round(wls.unbalanced( id, y, X, Sigma0, Sigma2 , missing )$beta,2) [1] 94.04 -2.89 > round(wls.unbalanced( id, y, X, Sigma1, Sigma2 , missing )$beta,2) [1] 97.48 -4.31 > round(wls.unbalanced( id, y, X, Sigma2, Sigma2 , missing )$beta,2) [1] 99.37 -5.01 > # > ##### > ##### informative missingness!!! > ##### > # > generate.crm <- function( eta ){ + mmm <- exp( eta )/( 1 + exp( eta ) ) + qqq <- 1-mmm + sss <- exp( cumsum( log(qqq) ) ) + fff <- c( 1 - sss, 1 ) + uuu <- runif(1) + ddd <- c(1:length(fff))[ ( uuu < fff ) & ( uuu >= c(0,fff[-length(fff)]) ) ] + ddd + } > # > ddd <- rep( NA, 200 ) > for( j in 1:200 ){ + ddd[j] <- generate.crm( -2.5 - (c(1:9)/2.5)*RanSlope[j] - 0.1*RanInt[j] ) + } > # > dvec <- rep( ddd, rep(10,200) ) > missing <- rep( 0, length(y) ) > missing[ rep( c(1:10), 200 ) > dvec ] <- 1 > # > ##### number of cases eliminated due to ID > # > sum(missing) [1] 743 > # > cat("Incomplete Data (NI): WLS Estimators\n") Incomplete Data (NI): WLS Estimators > round(wls.unbalanced( id, y, X, Sigma0, Sigma2 , missing )$beta,2) [1] 97.79 -3.50 > round(wls.unbalanced( id, y, X, Sigma1, Sigma2 , missing )$beta,2) [1] 97.78 -4.10 > round(wls.unbalanced( id, y, X, Sigma2, Sigma2 , missing )$beta,2) [1] 99.16 -4.82 > # > ##### > ##### show this scenario > ##### > # > postscript( file="exer5-Q2-plot1.ps", horiz=F ) > par( mfrow=c(2,1) ) > plot( jitter(x), y, pch=".", ylim=range(y) ) > title("Complete Data") > plot( jitter(x[!missing]), y[!missing], pch=".", ylim=range(y) ) > title("Observed Data") > graphics.off() > # > postscript( file="exer5-Q2-plot2.ps", horiz=F ) > par( mfrow=c(2,1) ) > plot( RanInt, ddd, pch=1 ) > title("Dropout vs RanInt") > plot( RanSlope, ddd, pch=1 ) > title("Dropout vs RanSlope") > graphics.off() > # > # end-of-file... > > > >