R : Copyright 2004, The R Foundation for Statistical Computing
Version 1.9.1  (2004-06-21), ISBN 3-900051-00-3

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for a HTML browser interface to help.
Type 'q()' to quit R.

> #
> # exer5-Q2.q
> #
> # --------------------------------------------------
> #
> # PURPOSE:  Generate correlated data and use WLS
> #
> # DATE:   08 Feb 2007
> #
> # AUTHOR:  P. Heagerty
> #
> # --------------------------------------------------
> #
> m <- 200
> beta <- c( 100.0 , -5.0 )
> sigma <- 1.00
> D <- matrix( c( 100, 0, 0, 2 ), 2, 2 )
> print(D)
     [,1] [,2]
[1,]  100    0
[2,]    0    2
> #
> X <- cbind( 1, c(1:10) )
> Sigma <- sigma^2 * diag(rep(1,10)) + X%*%D%*%t(X)
> Sigma <- (Sigma + t(Sigma))/2
> #
> ols.sigma2 <- mean( diag( Sigma ) )
> #
> mu <- as.vector( X%*%beta )
> #
> Cc <- chol( Sigma )
> max( abs( t(Cc)%*%Cc - Sigma ) )
[1] 5.684342e-14
> max( abs( Cc%*%t(Cc) - Sigma ) )
[1] 1098.408
> #
> #####
> ##### generate data directly as MVN
> #####
> #
> y <- NULL
> x <- NULL
> id <- NULL
> for( i in 1:200 ){
+   yi <- mu + as.vector( t(Cc)%*%matrix(rnorm(10),ncol=1) )
+   y <- c( y, yi )
+   x <- c( x, c(1:10) )
+   id <- c( id, rep(i,10) )
+ }
> #
> #####
> ##### generate data and store RanInt RanSlope
> #####
> #
> y <- NULL
> x <- NULL
> id <- NULL
> RanInt <- rep( NA, 200 )
> RanSlope <- rep( NA, 200 )
> for( i in 1:200 ){
+   b0 <- sqrt(100)*rnorm(1)
+   b1 <- sqrt(2)*rnorm(1)  
+   yi <- mu + b0 + b1*c(1:10) + rnorm(10)
+   y <- c( y, yi )
+   x <- c( x, c(1:10) )
+   id <- c( id, rep(i,10) )
+   RanInt[i] <- b0
+   RanSlope[i] <- b1
+ }
> #
> #####
> ##### WLS function for balanced data
> ##### assumes Sigma2 is true sigma
> #####
> #
> wls.balanced <- function( id, y, X, Sigma, Sigma2 ){
+   nsubjects <- length(table(id))
+   W <- solve(Sigma)
+   XtWX <- matrix( 0, ncol(X), ncol(X) )
+   B <- matrix( 0, ncol(X), ncol(X) )
+   XtWY <- matrix( 0, ncol(X), 1 )
+   uid <- unique(id)
+   for( i in 1:nsubjects ){
+ 	keep <- id==uid[i] 
+   	yi <- matrix( y[ keep ], ncol=1 )
+ 	XtWX <- XtWX + t(X)%*%W%*%X
+ 	XtWY <- XtWY + t(X)%*%W%*%yi
+         B <- B + t(X)%*% W %*% Sigma2 %*% t(W) %*% X
+   }
+   beta <- as.vector( solve( XtWX )%*%XtWY )
+ list(beta=beta, A=XtWX, B=B)
+ }
> #
> Sigma0 <- diag( rep( ols.sigma2 ,10 ) )
> Sigma1 <- D[1,1] + diag( rep( sigma , 10 ) )
> Sigma2 <- Sigma
> #
> cat("Complete Data:  WLS Estimators\n")
Complete Data:  WLS Estimators
> round(wls.balanced( id, y, X, Sigma0, Sigma2  )$beta,2)
[1] 99.37 -5.01
> round(wls.balanced( id, y, X, Sigma1, Sigma2  )$beta,2)
[1] 99.37 -5.01
> round(wls.balanced( id, y, X, Sigma2, Sigma2  )$beta,2)
[1] 99.37 -5.01
> #
> #####
> ##### WLS for unbalanced using a missing indicator
> #####
> #
> wls.unbalanced <- function( id, y, X, Sigma,Sigma2, missing ){
+   nsubjects <- length(table(id[!missing]))
+   XtWX <- matrix( 0, ncol(X), ncol(X) )
+   XtWY <- matrix( 0, ncol(X), 1 )
+   B <- matrix( 0, ncol(X), ncol(X) )
+   uid <- unique(id[!missing])
+   for( i in 1:nsubjects ){
+ 	keep <- id==uid[i] 
+         mi <- as.logical( missing[keep] )
+   	yi <- matrix( y[ keep ], ncol=1 )
+ 	yi <- matrix( yi[ !mi ], ncol=1 )
+ 	Xi <- matrix( X[ !mi, ], ncol=2 )
+ 	Wi <- solve( Sigma[ !mi, !mi ] )
+         Sig2i <- Sigma2[!mi, !mi]
+ 	XtWX <- XtWX + t(Xi)%*%Wi%*%Xi
+ 	XtWY <- XtWY + t(Xi)%*%Wi%*%yi
+         B <- B + t(Xi)%*% Wi %*% Sig2i %*% t(Wi) %*% Xi
+   }
+   beta <- as.vector( solve( XtWX )%*%XtWY )
+  list(beta=beta, A=XtWX, B=B)
+ }
> 
> #
> #####
> ##### MCAR data (part a)
> #####
> #
> MCAR <- rep(0,2000)
> MCAR[sample(2000,.2*2000,replace=F)] <- 1
> mcar.0 <- (wls.unbalanced( id, y, X, Sigma0, Sigma2, MCAR))     
> mcar.1 <- (wls.unbalanced( id, y, X, Sigma1, Sigma2, MCAR))  
> mcar.2 <- (wls.unbalanced( id, y, X, Sigma2, Sigma2, MCAR))
> 
> cat("Incomplete Data (MCAR):  WLS Estimators\n")
Incomplete Data (MCAR):  WLS Estimators
> round(mcar.0$beta,2)
[1] 99.12 -4.98
> round(mcar.1$beta,2)        
[1] 99.53 -5.04
> round(mcar.2$beta,2)
[1] 99.40 -5.02
> 
> #
> #####
> ##### MCAR asymptotic relative efficiencies (part b)
> #####
> #
> ivar <- solve(mcar.0$A) %*% (mcar.0$B) %*%  solve(mcar.0$A)       
> wvar <- solve(mcar.1$A) %*% (mcar.1$B) %*%  solve(mcar.1$A)
> ### check that these two the same:
>  solve(mcar.2$A)
              [,1]          [,2]
[1,]  0.5033019210 -0.0004742324
[2,] -0.0004742324  0.0100854708
>  solve(mcar.2$A) %*% (mcar.2$B) %*%  solve(mcar.2$A) 
              [,1]          [,2]
[1,]  0.5033019210 -0.0004742324
[2,] -0.0004742324  0.0100854708
> 
> ivar
           [,1]        [,2]
[1,]  0.5785461 -0.01228970
[2,] -0.0122897  0.01258213
> wvar
             [,1]         [,2]
[1,]  0.524829249 -0.003797547
[2,] -0.003797547  0.010658756
> cat("Incomplete Data (MCAR):  WLS Estimator Efficiency\n")
Incomplete Data (MCAR):  WLS Estimator Efficiency
> solve(mcar.2$A)[1,1]/ivar[1,1]
[1] 0.8699426
> solve(mcar.2$A)[2,2]/ivar[2,2]
[1] 0.8015711
> solve(mcar.2$A)[1,1]/wvar[1,1]   
[1] 0.9589822
> solve(mcar.2$A)[2,2]/wvar[2,2]        
[1] 0.9462147
>         
> #
> #####
> ##### MAR data 
> #####
> #
> cut.point <- 65.0
> nsubjects <- length(table(id))
> uid <- unique(id)
> missing <- NULL
> for( i in 1:nsubjects ){
+   yi <- y[ id==uid[i] ]
+   if( min(yi) < cut.point ){
+ 	if( max(yi) < cut.point ){
+  	 	index <- c( 0, rep( 1, 9 ) )
+ 	}else{
+ 		index <- min( c(1:10)[ yi<cut.point ] )
+ 		mi <- rep( 0, 10 )
+ 	        if( index<10 ){ mi <- c( rep( 0, index), rep(1,10-index) ) }
+ 	}
+   }else{
+ 	mi <- rep( 0, 10 )
+  }
+ missing <- c( missing, mi )
+ }
> 
> ##### number of cases eliminated due to MAR
> sum(missing)
[1] 543
> 
> #
> cat("Incomplete Data (MAR):  WLS Estimators\n")
Incomplete Data (MAR):  WLS Estimators
> round(wls.unbalanced( id, y, X, Sigma0, Sigma2 , missing )$beta,2)
[1] 94.04 -2.89
> round(wls.unbalanced( id, y, X, Sigma1, Sigma2 , missing )$beta,2)
[1] 97.48 -4.31
> round(wls.unbalanced( id, y, X, Sigma2, Sigma2 , missing )$beta,2)
[1] 99.37 -5.01
> #
> #####
> ##### informative missingness!!!
> #####
> #
> generate.crm <- function( eta ){
+   mmm <- exp( eta )/( 1 + exp( eta ) )
+   qqq <- 1-mmm
+   sss <- exp( cumsum( log(qqq) ) )
+   fff <- c( 1 - sss, 1 )
+   uuu <- runif(1)
+   ddd <- c(1:length(fff))[ ( uuu < fff ) & ( uuu >= c(0,fff[-length(fff)]) ) ]
+   ddd
+ }
> #
> ddd <- rep( NA, 200 )
> for( j in 1:200 ){
+   ddd[j] <- generate.crm( -2.5 - (c(1:9)/2.5)*RanSlope[j] - 0.1*RanInt[j] )
+ }
> #
> dvec <- rep( ddd, rep(10,200) )
> missing <- rep( 0, length(y) )
> missing[ rep( c(1:10), 200 ) > dvec ] <- 1
> #
> ##### number of cases eliminated due to ID
> #
> sum(missing)
[1] 743
> #
> cat("Incomplete Data (NI):  WLS Estimators\n")
Incomplete Data (NI):  WLS Estimators
> round(wls.unbalanced( id, y, X, Sigma0, Sigma2 , missing )$beta,2)
[1] 97.79 -3.50
> round(wls.unbalanced( id, y, X, Sigma1, Sigma2 , missing )$beta,2)
[1] 97.78 -4.10
> round(wls.unbalanced( id, y, X, Sigma2, Sigma2 , missing )$beta,2)
[1] 99.16 -4.82
> #
> #####
> ##### show this scenario
> #####
> #
> postscript( file="exer5-Q2-plot1.ps", horiz=F )
> par( mfrow=c(2,1) )
> plot( jitter(x), y, pch=".", ylim=range(y) )
> title("Complete Data")
> plot( jitter(x[!missing]), y[!missing], pch=".", ylim=range(y) )
> title("Observed Data")
> graphics.off()
> #
> postscript( file="exer5-Q2-plot2.ps", horiz=F )
> par( mfrow=c(2,1) )
> plot( RanInt, ddd, pch=1 )
> title("Dropout vs RanInt")
> plot( RanSlope, ddd, pch=1 )
> title("Dropout vs RanSlope")
> graphics.off()
> #
> # end-of-file...
> 
> 
> 
>