# Non-stationary ARIMA & ECM estimation, fiting, and interpretation # Chris Adolph # # Revised 29 July 2014 rm(list=ls()) # Load libraries library(Zelig) # For Zelig simulation procedures & approval data library(tseries) # For unit root tests library(lmtest) # For Breusch-Godfrey LM test of serial correlation library(urca) # For estimating cointegration models library(simcf) # For counterfactual simulation via ldvsimev() library(MASS) # For mvrnorm() source("TSplotHelper.R") # Helper function for counterfactual time series plots # Load data # US Presidential approval data (Bush, Monthly, 2/2001--6/2006) # Includes average oil price data ($/barrel?) # # Variable names: month year approve disapprove unsure # sept.oct.2001 iraq.war avg.price data(approval) attach(approval) # Create a random walk set.seed(1) phony <- rnorm(length(approve)) for (i in 2:length(phony)) phony[i] <- phony[i-1] + rnorm(1) # Look at the time series pdf("tsapproval.pdf",width=6,height=3.25) plot(approve,type="l",ylab="Percent Approving",xlab="Time", main = "US Presidential Approval") lines(x=c(8,8),y=c(-1000,1000),col="red") lines(x=c(26,26),y=c(-1000,1000),col="blue") text("9/11",x = 10, y = 40, col="red",cex=0.7) text("Iraq \n War",x = 28, y = 40, col="blue",cex=0.7) dev.off() pdf("tsprice.pdf",width=6,height=3.25) plot(avg.price,type="l",ylab="$ per Barrel",xlab="Time", main = "Average Price of Oil") lines(x=c(8,8),y=c(-1000,1000),col="red") lines(x=c(26,26),y=c(-1000,1000),col="blue") text("9/11",x = 10, y = 175, col="red",cex=0.7) text("Iraq \n War",x = 28, y = 175, col="blue",cex=0.7) dev.off() # Look at the ACF pdf("acfapproval.pdf",width=6,height=3.25) acf(approve) dev.off() pdf("acfprice.pdf",width=6,height=3.25) acf(avg.price) dev.off() # Look at the PACF pdf("pacfapproval.pdf",width=6,height=3.25) pacf(approve) dev.off() pdf("pacfprice.pdf",width=6,height=3.25) pacf(avg.price) dev.off() # Check for a unit root PP.test(approve) adf.test(approve) PP.test(avg.price) adf.test(avg.price) # Consider the first difference of each variable approveLag <- c(NA, approve[1:(length(approve)-1)]) approveDiff <- approve - approveLag avg.priceLag <- c(NA, avg.price[1:(length(avg.price)-1)]) avg.priceDiff <- avg.price - avg.priceLag # Look at the DIFFERENCED time series pdf("tsapprovalDiff.pdf",width=6,height=3.25) plot(approveDiff,type="l",ylab="Change in Percent Approving",xlab="Time", main = "US Presidential Approval") lines(x=c(8,8),y=c(-1000,1000),col="red") lines(x=c(26,26),y=c(-1000,1000),col="blue") text("9/11",x = 10, y = 15, col="red",cex=0.7) text("Iraq \n War",x = 28, y = 15, col="blue",cex=0.7) dev.off() pdf("tspriceDiff.pdf",width=6,height=3.25) plot(avg.priceDiff,type="l",ylab="Change in $ per Barrel",xlab="Time", main = "Average Price of Oil") lines(x=c(8,8),y=c(-1000,1000),col="red") lines(x=c(26,26),y=c(-1000,1000),col="blue") text("9/11",x = 10, y = -30, col="red",cex=0.7) text("Iraq \n War",x = 28, y = -30, col="blue",cex=0.7) dev.off() # Look at the new ACF pdf("acfapprovalDiff.pdf",width=6,height=3.25) acf(approveDiff, na.action=na.pass) dev.off() pdf("acfpriceDiff.pdf",width=6,height=3.25) acf(avg.priceDiff, na.action=na.pass) dev.off() # Look at the new PACF pdf("pacfapprovalDiff.pdf",width=6,height=3.25) pacf(approveDiff, na.action=na.pass) dev.off() pdf("pacfpriceDiff.pdf",width=6,height=3.25) pacf(avg.priceDiff, na.action=na.pass) dev.off() # Check for a unit root in differenced time series PP.test(as.vector(na.omit(approveDiff))) adf.test(na.omit(approveDiff)) PP.test(as.vector(na.omit(avg.priceDiff))) adf.test(na.omit(avg.priceDiff)) ################################################################# ## Model 1a: ARIMA(0,1,0) model of approve ## ## Estimate an ARIMA(0,1,0) using arima xcovariates <- cbind(sept.oct.2001, iraq.war, avg.price) arima.res1a <- arima(approve, order = c(0,1,0), xreg = xcovariates, include.mean = TRUE ) print(arima.res1a) # Extract estimation results from arima.res1a pe.1a <- arima.res1a$coef # parameter estimates (betas) se.1a <- sqrt(diag(arima.res1a$var.coef)) # standard errors ll.1a <- arima.res1a$loglik # log likelihood at its maximum sigma2hat.1a <- arima.res1a$sigma2 # standard error of the regression aic.1a <- arima.res1a$aic # Akaike Information Criterion resid.1a <- arima.res1a$resid # residuals ################################################################# ## Model 1c: ARIMA(1,1,0) model of approve ## ## Estimate an ARIMA(1,1,0) using arima xcovariates <- cbind(sept.oct.2001, iraq.war, avg.price) arima.res1b <- arima(approve, order = c(1,1,0), xreg = xcovariates, include.mean = TRUE ) print(arima.res1b) # Extract estimation results from arima.res1a pe.1b <- arima.res1b$coef # parameter estimates (betas) se.1b <- sqrt(diag(arima.res1b$var.coef)) # standard errors ll.1b <- arima.res1b$loglik # log likelihood at its maximum sigma2hat.1b <- arima.res1b$sigma2 # standard error of the regression aic.1b <- arima.res1b$aic # Akaike Information Criterion resid.1b <- arima.res1b$resid # residuals ################################################################# ## Model 1b: ARIMA(2,1,2) model of approve ## ## Estimate an ARIMA(2,1,2) using arima xcovariates <- cbind(sept.oct.2001, iraq.war, avg.price) arima.res1c <- arima(approve, order = c(2,1,2), xreg = xcovariates, include.mean = TRUE ) print(arima.res1c) # Extract estimation results from arima.res1a pe.1c <- arima.res1c$coef # parameter estimates (betas) se.1c <- sqrt(diag(arima.res1c$var.coef)) # standard errors ll.1c <- arima.res1c$loglik # log likelihood at its maximum sigma2hat.1c <- arima.res1c$sigma2 # standard error of the regression aic.1c <- arima.res1c$aic # Akaike Information Criterion resid.1c <- arima.res1c$resid # residuals ################################################################# ## Model 1d: ARIMA(0,1,0) model of approve including a spurrious regressor ## ## Estimate an ARIMA(0,1,0) using arima xcovariates <- cbind(sept.oct.2001, iraq.war, avg.price, phony) arima.res1d <- arima(approve, order = c(0,1,0), xreg = xcovariates, include.mean = TRUE ) print(arima.res1d) # Extract estimation results from arima.res1a pe.1d <- arima.res1d$coef # parameter estimates (betas) se.1d <- sqrt(diag(arima.res1d$var.coef)) # standard errors ll.1d <- arima.res1d$loglik # log likelihood at its maximum sigma2hat.1d <- arima.res1d$sigma2 # standard error of the regression aic.1d <- arima.res1d$aic # Akaike Information Criterion resid.1d <- arima.res1d$resid # residuals # Based on ACF, PACF, and AIC, let's select Model 1a to be Model 1 arima.res1 <- arima.res1a ## What would happen if we used linear regression on a single lag of approval? lm.res1e <- lm(approve ~ approveLag + sept.oct.2001 + iraq.war + avg.price) print(summary(lm.res1e)) # linear regression with a spurious regressor? lm.res1f <- lm(approve ~ approveLag + sept.oct.2001 + iraq.war + avg.price + phony) print(summary(lm.res1f)) # Check LS result for serial correlation in the first or second order bgtest(lm.res1e,1) bgtest(lm.res1f,2) #### Because Zelig's arima simulator isn't currently available, #### Let's use ldvsimev(). # First we need the simulated parameters sims <- 10000 pe <- arima.res1$coef vc <- arima.res1$var.coef simparams <- mvrnorm(sims, pe, vc) simbetas <- simparams[, (1+sum(arima.res1$arma[1:2])):ncol(simparams)] #simphi <- simparams[, 1:arima.res1$arma[1]] # if AR(1) or greater # Choose counterfactual periods periodsToSim <- seq(from=26, to=65, by=1) nPeriodsToSim <- length(periodsToSim) ## Create hypothetical covariates over these periods # Start with factual values # For ARIMA, need to enter these covariates in same order as model model <- approve ~ sept.oct.2001 + iraq.war + avg.price selectdata <- extractdata(model, approval, na.rm = TRUE) perioddata <- selectdata[periodsToSim,] # Treat factual data as the baseline counterfactual xhyp0 <- subset(perioddata, select = -approve ) # Suppose no Iraq War occurred xhyp <- subset(perioddata, select = -approve ) xhyp$iraq.war <- rep(0,nPeriodsToSim) # Leave out phi because this specification has no AR component # phi <- simphi # Construction of prior lags depends on order of ARIMA # Only need initialY for I(1) # Only need lagY for AR(1) or higher initialY <- selectdata$approve[periodsToSim[1] - 1] # if ARIMA(1,1,0), instead: selectdata$approve[periodsToSim[1] - 2] lagY <- selectdata$approve[periodsToSim[1] - 1] # if ARIMA(1,1,0), append: - selectdata$approve[periodsToSim[1] - 2] # Simulate expected values of Y out to periods # given hypothetical values of X, and an initial level of Y # No Iraq scenario noIraq.ev1 <- ldvsimev(xhyp, # The matrix of hypothetical x's simbetas, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant; # no constant because model is differenced #phi=phi, # estimated AR parameters; length must match lagY #lagY=lagY, # lags of y, most recent last transform="diff", # "log" to undo log transformation, # "diff" to under first differencing # "difflog" to do both initialY=initialY # for differenced models, lag of level of y ) # Iraq scenario (factual) Iraq.ev1 <- ldvsimev(xhyp0, # The matrix of hypothetical x's simbetas, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant; # no constant because model is differenced #phi=phi, # estimated AR parameters; length must match lagY #lagY=lagY, # lags of y, most recent last transform="diff", # "log" to undo log transformation, # "diff" to under first differencing # "difflog" to do both initialY=initialY # for differenced models, lag of level of y ) at.xaxis <- seq(from = -1, to = length(approve[1:25]) + nPeriodsToSim, by = 12) lab.xaxis <- seq(from = 2001, by = 1, length.out = length(at.xaxis)) pdf("noIraqARIMA.pdf",width=6,height=3.25) ctrfactTS(observed = approve[1:25], predicted = noIraq.ev1$pe, lower = noIraq.ev1$lower, upper = noIraq.ev1$upper, #se = NULL, predicted0 = Iraq.ev1$pe, lower0 = Iraq.ev1$lower, upper0 = Iraq.ev1$upper, #se0 = NULL, factual = approve[26:65], at.xaxis = at.xaxis, lab.xaxis = lab.xaxis, #ylim = c(0, 100), main = "Ctrfactual Effect of No Iraq War from ARIMA(0,1,0)", xlab = "Year", ylab = "Presidential Approval (%)", col = "red", col0 = "blue") dev.off() # What if we used an ARMA model in this case (ignored possible # unit root)? Would we get better long run estimates? ################################################################# ## Model 2a: ARIMA(1,0,0) model of approve ## ## Estimate an ARIMA(1,0,0) using arima xcovariates <- cbind(sept.oct.2001, iraq.war, avg.price) arima.res2a <- arima(approve, order = c(1,0,0), xreg = xcovariates, include.mean = TRUE ) print(arima.res2a) # Extract estimation results from arima.res1a pe.2a <- arima.res2a$coef # parameter estimates (betas) se.2a <- sqrt(diag(arima.res2a$var.coef)) # standard errors ll.2a <- arima.res2a$loglik # log likelihood at its maximum sigma2hat.2a <- arima.res2a$sigma2 # standard error of the regression aic.2a <- arima.res2a$aic # Akaike Information Criterion resid.2a <- arima.res2a$resid # residuals # First we need the simulated parameters sims <- 10000 pe <- arima.res2a$coef vc <- arima.res2a$var.coef simparams <- mvrnorm(sims, pe, vc) simbetas <- simparams[, (1+sum(arima.res2a$arma[1:2])):ncol(simparams)] simphi <- simparams[, 1:arima.res2a$arma[1]] # if AR(1) or greater # Choose counterfactual periods periodsToSim <- seq(from=26, to=65, by=1) nPeriodsToSim <- length(periodsToSim) ## Create hypothetical covariates over these periods # Start with factual values # For ARIMA, need to enter these covariates in same order as model model <- approve ~ sept.oct.2001 + iraq.war + avg.price selectdata <- extractdata(model, approval, na.rm = TRUE) perioddata <- selectdata[periodsToSim,] # Treat factual data as the baseline counterfactual xhyp0 <- subset(perioddata, select = -approve ) # Suppose no Iraq War occurred xhyp <- subset(perioddata, select = -approve ) xhyp$iraq.war <- rep(0,nPeriodsToSim) # Construction of prior lags depends on order of ARIMA # Only need initialY for I(1) # Only need lagY for AR(1) or higher lagY <- selectdata$approve[periodsToSim[1] - 1] # Simulate expected values of Y out to periods # given hypothetical values of X, and an initial level of Y # No Iraq scenario noIraq.ev1 <- ldvsimev(xhyp, # The matrix of hypothetical x's simbetas, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # Column containing the constant; phi=mean(simphi), # estimated AR parameters; length must match lagY lagY=lagY # lags of y, most recent last ) # Iraq scenario (factual) Iraq.ev1 <- ldvsimev(xhyp0, # The matrix of hypothetical x's simbetas, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # Column containing the constant; phi=mean(simphi), # estimated AR parameters; length must match lagY lagY=lagY # lags of y, most recent last ) at.xaxis <- seq(from = -1, to = length(approve[1:25]) + nPeriodsToSim, by = 12) lab.xaxis <- seq(from = 2001, by = 1, length.out = length(at.xaxis)) pdf("noIraqARMA.pdf",width=6,height=3.25) ctrfactTS(observed = approve[1:25], predicted = noIraq.ev1$pe, lower = noIraq.ev1$lower, upper = noIraq.ev1$upper, #se = NULL, predicted0 = Iraq.ev1$pe, lower0 = Iraq.ev1$lower, upper0 = Iraq.ev1$upper, #se0 = NULL, factual = approve[26:65], at.xaxis = at.xaxis, lab.xaxis = lab.xaxis, #ylim = c(0, 100), main = "Ctrfactual Effect of No Iraq War (Red) from AR(1)", xlab = "Year", ylab = "Presidential Approval (%)", col = "red", col0 = "blue") dev.off() ############# # What if we used the linear regression model with a lagged DV? lm.res3 <- lm(approve ~ approveLag + sept.oct.2001 + iraq.war + avg.price) print(summary(lm.res3)) # Extract estimation results from arima.res1a pe.3 <- coef(lm.res3) # parameter estimates vc.3 <- vcov(lm.res3) # variance-covariance (of params) se.3 <- sqrt(diag(vc.3)) # standard errors # First we need the simulated parameters sims <- 10000 simparams <- mvrnorm(sims, pe.3, vc.3) # Pluck out the lag parameter simphi <- simparams[,2] # Get the rest of the betas simbetas <- simparams[,c(1,3:ncol(simparams))] # Choose counterfactual periods periodsToSim <- seq(from=26, to=65, by=1) nPeriodsToSim <- length(periodsToSim) ## Create hypothetical covariates over these periods # Start with factual values (note I leave out the lag; # ldvsimev() will construct it) model <- approve ~ sept.oct.2001 + iraq.war + avg.price selectdata <- extractdata(model, approval, na.rm = TRUE) perioddata <- selectdata[periodsToSim,] # Treat factual data as the baseline counterfactual xhyp0 <- subset(perioddata, select = -approve ) # Suppose no Iraq War occurred xhyp <- subset(perioddata, select = -approve ) xhyp$iraq.war <- rep(0,nPeriodsToSim) # Construction of prior lags depends on order of ARIMA # Only need initialY for I(1) # Only need lagY for AR(1) or higher lagY <- selectdata$approve[periodsToSim[1] - 1] # Simulate expected values of Y out to periods # given hypothetical values of X, and an initial level of Y # No Iraq scenario noIraq.ev1 <- ldvsimev(xhyp, # The matrix of hypothetical x's simbetas, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # Column containing the constant; phi=mean(simphi), # estimated AR parameters; length must match lagY lagY=lagY # lags of y, most recent last ) # Iraq scenario (factual) Iraq.ev1 <- ldvsimev(xhyp0, # The matrix of hypothetical x's simbetas, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # Column containing the constant; phi=mean(simphi), # estimated AR parameters; length must match lagY lagY=lagY # lags of y, most recent last ) at.xaxis <- seq(from = -1, to = length(approve[1:25]) + nPeriodsToSim, by = 12) lab.xaxis <- seq(from = 2001, by = 1, length.out = length(at.xaxis)) pdf("noIraqLS.pdf",width=6,height=3.25) ctrfactTS(observed = approve[1:25], predicted = noIraq.ev1$pe, lower = noIraq.ev1$lower, upper = noIraq.ev1$upper, #se = NULL, predicted0 = Iraq.ev1$pe, lower0 = Iraq.ev1$lower, upper0 = Iraq.ev1$upper, #se0 = NULL, factual = approve[26:65], at.xaxis = at.xaxis, lab.xaxis = lab.xaxis, #ylim = c(0, 100), main = "Ctrfactual Effect of No Iraq War (Red) from LS with Lagged DV", xlab = "Year", ylab = "Presidential Approval (%)", col = "red", col0 = "blue") dev.off() ################################################ # Cointegration analysis cointvars <- cbind(approve,avg.price) ecm.test1 <- ca.jo(cointvars, ecdet = "const", type="eigen", K=2, spec="longrun", dumvar=cbind(sept.oct.2001,iraq.war) ) ecm.res1 <- cajorls(ecm.test1, r = 1, reg.number = 1) summary(ecm.res1$rlm) ######################################################## # Cointegration analysis with a spurious regressor cointvars <- cbind(approve,avg.price,phony) ecm.test1 <- ca.jo(cointvars, ecdet = "const", type="eigen", K=2, spec="longrun", dumvar=cbind(sept.oct.2001,iraq.war) ) ecm.res1 <- cajorls(ecm.test1, r = 1, reg.number = 1) summary(ecm.res1$rlm)