# Dynamic Panel Data Models (Arellano-Bond, etc.) # for fixed effects in short-T panels # # Model Fitting and Interpretation via Simulation # # Christopher Adolph faculty.washington.edu/cadolph # 8 June 2015 # Clear memory rm(list=ls()) # Load libraries library(plm) # Econometrics package for linear panel models library(tseries) # For ADF unit root test library(simcf) # For panel functions and simulators library(tile) # For visualization of model inference library(RColorBrewer) # For nice colors library(MASS) # For mvrnorm() source("helperCigs.R") # For graphics functions # Load cigarette consumption data (Jonathan Gruber, MIT) # Variables (see codebook): # state year cpi pop packpc income tax avgprs taxs data <- read.csv("cigarette.csv") # Quick inflation adjustment to 1995 dollars inflAdjust <- function(x,cpi,year,target) { unique(cpi[year==target])*x/cpi } data$income95 <- with(data, inflAdjust(income, cpi, year, 1995)) data$tax95 <- with(data, inflAdjust(tax, cpi, year, 1995)) data$avgprs95 <- with(data, inflAdjust(avgprs, cpi, year, 1995)) data$taxs95 <- with(data, inflAdjust(taxs, cpi, year, 1995)) # Create per capita income (in k) data$income95pc <- data$income95/data$pop # Create pretax price, 1995 dollars data$pretax95 <- data$avgprs95 - data$taxs95 # Look at some time series stateList <- unique(as.character(data$state)) nstate <- length(stateList) yearList <- unique(data$year) nyear <- length(yearList) packpcMat <- taxs95Mat <- popMat <- NULL for (istate in 1:nstate) { curstate <- stateList[istate] packpcMat <- cbind(packpcMat, data$packpc[as.character(data$state)==curstate]) taxs95Mat <- cbind(taxs95Mat, data$taxs95[as.character(data$state)==curstate]) popMat <- cbind(popMat, data$pop[as.character(data$state)==curstate]) } packpcWAvg <- apply(packpcMat*popMat, 1, sum)/apply(popMat, 1, sum) taxs95WAvg <- apply(taxs95Mat*popMat, 1, sum)/apply(popMat, 1, sum) packpcTrace <- polylinesTile(y=packpcMat, x=matrix(yearList, nrow=nyear, ncol=nstate), id=matrix(1:nstate, nrow=nyear, ncol=nstate, byrow=TRUE), col="gray75", plot=1) taxs95Trace <- polylinesTile(y=taxs95Mat/100, x=matrix(yearList, nrow=nyear, ncol=nstate), id=matrix(1:nstate, nrow=nyear, ncol=nstate, byrow=TRUE), col="gray75", plot=2) packpcLabels <- textTile(x=rep(1995.75, nstate), y=packpcMat[11,], labels=labelTopBottomK(stateList, packpcMat[11,], 3), cex=0.8, col="gray75", plot=1) taxs95Labels <- textTile(x=rep(1995.75, nstate), y=taxs95Mat[11,]/100, labels=labelTopBottomK(stateList, taxs95Mat[11,], 3), cex=0.8, col="gray75", plot=2) packpcWAvgTrace <- linesTile(y=packpcWAvg, x=yearList, col="red", plot=1) taxs95WAvgTrace <- linesTile(y=taxs95WAvg/100, x=yearList, col="red", plot=2) packpcWAvgLabel <- textTile(y=packpcWAvg[11], x=1995.75, labels="US", cex=0.8, col="red", plot=1) taxs95WAvgLabel <- textTile(y=taxs95WAvg[11]/100, x=1995.75, labels="US", cex=0.8, col="red", plot=2) tile(packpcTrace, taxs95Trace, packpcLabels, taxs95Labels, packpcWAvgTrace, taxs95WAvgTrace, packpcWAvgLabel, taxs95WAvgLabel, limits=rbind(c(1984.5, 1996.5, 45, 210), c(1984.5, 1996.5, 0.25, 1.20)), xaxis=list(at=c(1986, 1988, 1990, 1992, 1994)), yaxis=list(log=TRUE, major=FALSE), plottitle=list(labels1="Packs consumed per capita", labels2="Avg taxes per pack, $1995", y=1), height=list(plottile=3), output=list(file="cigByState", width=8) ) # Are there trends or lags? *to do # Look at some unit root tests *to do # Alternative model specifications model1 <- packpc ~ income95pc + avgprs95 model2 <- packpc ~ income95pc + pretax95 + taxs95 model3 <- log(packpc) ~ log(income95pc) + log(avgprs95) # Simple linear models lm.res1 <- lm(model1, data) lm.res2 <- lm(model2, data) lm.res3 <- lm(model3, data) # Do plm model *to do # Do RE model *to do # Panel based diagnostics available in the plm library # (This package recently expanded to contain many many panel data tests # for serial correlation, fixed effects, and unit roots) # First, create a plm data frame (special data frame that "knows" the # unit variable and time variable pdata <- pdata.frame(data, index=c("state", "year")) # Do an panel unit root test on the undifferenced cigarette data; # there are many options; see ?purtest # Note: for some reason this isn't working ##purtest(packpc~1, data=pdata, test="ips") ##### # Estimate Arellano-Bond GMM for fixed effects with lagged DV # # pgmm needs formulas in a specific format: # 1. in the first part of the RHS, include lags of DV and covariates, as shown # 2. in the second part, include the panel data instruments (99 here means use # up to the 99th lag of the difference as an instrument) # 3. in an optional (not shown) third part of the RHS, include any other instruments # # note that pgmm formulas construct lag() properly for panel data, # though lag() usually doesn't pgmmformula.1a <- packpc ~ lag(packpc, 1) + income95pc + avgprs95 | lag(packpc, 2:99) # We'll run GMM with only unit fixed effects, # but we could include period fixed effects as well by setting effect to "two-way" # (often a good practice in short T panels) pgmm.res1a <- pgmm(pgmmformula.1a, data = pdata, effect = "individual", # should consider two-way for small T transformation = "d") # should do ld if T is small, but only "d" works for T=3 summary(pgmm.res1a) # use summary2() from the helper file if T<5 to avoid a bug in summary.pgmm() # Good Sargan test, Good AR(2) test # Let's consider alternative sets of instruments; concern: distant lags are weak instruments pgmmformula.1b <- packpc ~ lag(packpc, 1) + income95pc + avgprs95 | lag(packpc, 2:5) pgmm.res1b <- pgmm(pgmmformula.1b, data = pdata, effect = "individual", transformation = "d") summary(pgmm.res1b) # Poor Sargan test, Good AR(2) test # Keeping just the most recent two instruments makes no substantive difference pgmmformula.1c <- packpc ~ lag(packpc, 1) + income95pc + avgprs95 | lag(packpc, 2:3) pgmm.res1c <- pgmm(pgmmformula.1c, data = pdata, effect = "individual", transformation = "d") summary(pgmm.res1c) # Poor Sargan test, Good AR(2) test # Slight difference with one instrument, but not substatively noteworthy? pgmmformula.1d <- packpc ~ lag(packpc, 1) + income95pc + avgprs95 | lag(packpc, 2) pgmm.res1d <- pgmm(pgmmformula.1d, data = pdata, effect = "individual", transformation = "d") summary(pgmm.res1d) # Poor Sargan test, Good AR(2) test # Try system GMM with all lags pgmm.res1e <- pgmm(pgmmformula.1a, data = pdata, effect = "individual", transformation = "ld") summary(pgmm.res1e) # Good Sargan test, Good AR(2) test # Try system GMM with only recent lag pgmm.res1f <- pgmm(pgmmformula.1d, data = pdata, effect = "individual", transformation = "ld") summary(pgmm.res1f) # Poor Sargan test, Good AR(2) test # Try difference GMM with two way effects pgmm.res1g <- pgmm(pgmmformula.1a, data = pdata, effect = "twoways", transformation = "d") summary(pgmm.res1g) # Good Sargan test, Good AR(2) test, Wald supports 2-way # Try system GMM with two way effects pgmm.res1h <- pgmm(pgmmformula.1a, data = pdata, effect = "twoways", transformation = "ld") summary(pgmm.res1h) # Good Sargan test, Good AR(2) test, Wald supports 2-way # Now consider last two models with alternative specifications pgmmformula.2a <- packpc ~ lag(packpc, 1) + income95pc + pretax95 + taxs95 | lag(packpc, 2:99) pgmmformula.3a <- log(packpc) ~ lag(log(packpc), 1) + log(income95pc) + log(avgprs95) | lag(log(packpc), 2:99) pgmmformula.4a <- log(packpc) ~ lag(log(packpc), 1) + log(income95pc) + log(pretax95) + log(taxs95) | lag(log(packpc), 2:99) # Model 2: Unique tax effects # Try difference GMM with two way effects pgmm.res2g <- pgmm(pgmmformula.2a, data = pdata, effect = "twoways", transformation = "d") summary(pgmm.res2g) # Good Sargan test, Good AR(2) test, Wald supports 2-way # Try system GMM with two way effects pgmm.res2h <- pgmm(pgmmformula.2a, data = pdata, effect = "twoways", transformation = "ld") summary(pgmm.res2h) # Good Sargan test, Good AR(2) test, Wald supports 2-way # Model 3: Elasticity specification # Try difference GMM with only unit fixed effects pgmm.res3a <- pgmm(pgmmformula.3a, data = pdata, effect = "individual", transformation = "d") summary(pgmm.res3a) # Good Sargan test, Good AR(2) test # Try system GMM with all lags pgmm.res3e <- pgmm(pgmmformula.3a, data = pdata, effect = "individual", transformation = "ld") summary(pgmm.res3e) # Good Sargan test, Good AR(2) test # Try difference GMM with two way effects pgmm.res3g <- pgmm(pgmmformula.3a, data = pdata, effect = "twoways", transformation = "d") summary(pgmm.res3g) # Good Sargan test, Good AR(2) test, Wald supports 2-way # Try system GMM with two way effects pgmm.res3h <- pgmm(pgmmformula.3a, data = pdata, effect = "twoways", transformation = "ld") summary(pgmm.res3h) # Good Sargan test, Good AR(2) test, Wald supports 2-way # Model 4: Elasticity specification, components of price # Try difference GMM with two way effects pgmm.res4g <- pgmm(pgmmformula.4a, data = pdata, effect = "twoways", transformation = "d") summary(pgmm.res4g) # Good Sargan test, Good AR(2) test, Wald supports 2-way # Try system GMM with two way effects pgmm.res4h <- pgmm(pgmmformula.4a, data = pdata, effect = "twoways", transformation = "ld" ) summary(pgmm.res4h) # Good Sargan test, Good AR(2) test, Wald supports 2-way ####### # Simulate conditional forecasts from 8 different models # 1a, 1e, 1g, 1h, 3a, 3e, 3g, 3h # Forecast for 3 years from 1996 to 1998 periods.out <- 3 sims <- 1000 # How big a change in price to simulate? # How about "double" the average tax in the most recent year? summary(pdata$taxs95[pdata$year==1995]) # The average (and median) tax is about 60 cents/pack sd(pdata$taxs95[pdata$year==1995]) # A 60 cent increase would also be about 3 sd's, # and raise the tax to a bit more than the max observed # Other possibilities: # (2) A 10 cent increase # (3) Raise every sate to the max observed for any state in 1995 (112.60 cents) # Construct the year dummies yearfe <- makeFEdummies(pdata$year) yearfe <- yearfe[,3:ncol(yearfe)] # Why drop first 2 col's? yearlist <- unique(pdata$year) yearlist <- yearlist[3:length(yearlist)] colnames(yearfe) <- paste0("y",yearlist) # Construct formulas -- without year dummies (1a) formula.1a <- packpc ~ income95pc + avgprs95 -1 # Construct formulas -- without year dummies but with intercept (1e) formula.1e <- packpc ~ income95pc + avgprs95 # Construct formulas -- with year dummies (1g) formula <- "packpc ~ income95pc + avgprs95 -1" datayearfe <- cbind(pdata,yearfe) yearfenames <- NULL for (i in 1:ncol(yearfe)) { formula <- paste0(formula,"+ y",yearlist[i]," ") yearfenames <- c(yearfenames,paste0("y",yearlist[i])) } names(datayearfe) <- c(names(data),yearfenames) formula.1g <- as.formula(formula) # Construct formulas -- with year dummies and intercept (1h) formula <- "packpc ~ income95pc + avgprs95" datayearfe <- cbind(pdata,yearfe) yearfenames <- NULL for (i in 1:ncol(yearfe)) { formula <- paste0(formula,"+ y",yearlist[i]," ") yearfenames <- c(yearfenames,paste0("y",yearlist[i])) } names(datayearfe) <- c(names(data),yearfenames) formula.1h <- as.formula(formula) # Population in 1995 in average state avgpop1995 <- mean(pdata$pop[pdata$year==1995]) # Forecast: Model 1a, +60 # Simulate parameters simparam.1a <- mvrnorm(sims, coefficients(pgmm.res1a), vcovHC(pgmm.res1a)) simphis.1a <- simparam.1a[,1] simbetas.1a <- simparam.1a[,2:ncol(simparam.1a)] # Make matrix of hypothetical x's: # Assume an average state raised taxes 60 cents starting 1996 # # Make matrix of hypothetical x's: covariates xhyp.1a <- cfMake(formula.1a,datayearfe, periods.out) # pgmm uses covariates in differenced form # so we want most of them to be 0 (no change) # exceptions: # (1) changes in covariates of interest # (2) differenced time dummies require special care xhyp.1a$x <- xhyp.1a$xpre <- 0*xhyp.1a$x xhyp.1a <- cfChange(xhyp.1a, "avgprs95", x=60, scen=1) # We can "ignore" the state fixed effects for now and add them later # because model is total linear # Create baseline scenario xbase.1a <- xhyp.1a xbase.1a$x <- xbase.1a$xpre # We need a lag of the price per pack lagY.1a <- NULL # Hypothetical previous change in Y for simulation for (i in 1:length(pgmm.res1a$model)) lagY.1a <- c(lagY.1a, as.data.frame(pgmm.res1a$model[[i]])["1995",]$packpc) lagY.1a <- mean(lagY.1a, na.rm=TRUE) # Hypothetical initial level of Y for simulation initialY <- mean(pdata$packpc[pdata$year==1995], na.rm=TRUE) # Simulate expected values of Y (on original level scale) # out to periods.out given hypothetical future values of X, # initial lags of the change in Y, and an initial level of Y sim.ev1a <- ldvsimev(xhyp.1a, # The matrix of hypothetical x's simbetas.1a, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.1a, # estimated AR parameters; length must match lagY lagY=lagY.1a, # lags of y, most recent last transform="diff", # "log" to undo log transformation, # "diff" to under first differencing # "difflog" to do both initialY=initialY # for differenced models, the lag of the level of y ) # Simulate expected values of Y given no change in covariates sim.base1a <- ldvsimev(xbase.1a, # The matrix of hypothetical x's simbetas.1a, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.1a, # estimated AR parameters; length must match lagY lagY=lagY.1a, # lags of y, most recent last transform="diff", # "log" to undo log transformation, # "diff" to under first differencing # "difflog" to do both initialY=initialY # for differenced models, the lag of the level of y ) # Simulate first differences in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.fd1a <- ldvsimfd(xhyp.1a, # The matrix of hypothetical x's simbetas.1a, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.1a, # estimated AR parameters; length must match lagY lagY=lagY.1a, # lags of y, most recent last transform="diff", # Model is differenced initialY=initialY # Redundant in this case (fd of linear differenced Y) ) # Simulate relative risks in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.rr1a <- ldvsimrr(xhyp.1a, # The matrix of hypothetical x's simbetas.1a, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.1a, # estimated AR parameters; length must match lagY lagY=lagY.1a, # lags of y, most recent last transform="diff", # Model is differenced initialY=initialY # Required for differenced Y in ldvsimrr ) # Forecast: Model 1e, +60 # Simulate parameters simparam.1e <- mvrnorm(sims, coefficients(pgmm.res1e), vcovHC(pgmm.res1e)) simphis.1e <- simparam.1e[,1] simbetas.1e <- simparam.1e[,2:ncol(simparam.1e)] # System GMM does NOT difference the covariates # Make matrix of hypothetical x's: # Assume an average state raised taxes 60 cents starting 1996 # # Make matrix of hypothetical x's: covariates xhyp.1e <- cfMake(formula.1a, datayearfe, periods.out) # system pgmm uses covariates in *level* form # -> back to our usual use of simcf; note apply to all 3 periods! xhyp.1e <- cfChange(xhyp.1e, "avgprs95", x=60 + mean(pdata$avgprs95), scen=1:3) # State fixed effects are not removed from the covariates, # but from the instruments (so we can ignore them here) # Create baseline scenario xbase.1e <- xhyp.1e xbase.1e$x <- xbase.1e$xpre # We need a lag of the price per pack, now in levels # But the code above to extract it from the pgmm object won't work! lagY.1e <- mean(pdata$packpc[pdata$year==1995], na.rm=TRUE) # Hypothetical initial level of Y for simulation initialY <- mean(pdata$packpc[pdata$year==1995], na.rm=TRUE) # Simulate expected values of Y (on original level scale) # out to periods.out given hypothetical future values of X, # initial lags of the change in Y, and an initial level of Y sim.ev1e <- ldvsimev(xhyp.1e, # The matrix of hypothetical x's simbetas.1e, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.1e, # estimated AR parameters; length must match lagY lagY=lagY.1e, # lags of y, most recent last transform="none", # NOTE: System GMM is not differenced! initialY=initialY ) # Simulate expected values of Y given no change in covariates sim.base1e <- ldvsimev(xbase.1e, # The matrix of hypothetical x's simbetas.1e, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.1e, # estimated AR parameters; length must match lagY lagY=lagY.1e, # lags of y, most recent last transform="none" # NOTE: System GMM is not differenced! ) # Simulate first differences in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.fd1e <- ldvsimfd(xhyp.1e, # The matrix of hypothetical x's simbetas.1e, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.1e, # estimated AR parameters; length must match lagY lagY=lagY.1e, # lags of y, most recent last transform="none" # NOTE: System GMM is not differenced! ) # Simulate relative risks in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.rr1e <- ldvsimrr(xhyp.1e, # The matrix of hypothetical x's simbetas.1e, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.1e, # estimated AR parameters; length must match lagY lagY=lagY.1e, # lags of y, most recent last transform="none" # NOTE: System GMM is not differenced! ) # Forecast: Model 1g, +60 # Simulate parameters simparam.1g <- mvrnorm(sims, coefficients(pgmm.res1g), vcovHC(pgmm.res1g)) simphis.1g <- simparam.1g[,1] simbetas.1g <- simparam.1g[,2:ncol(simparam.1g)] # Make matrix of hypothetical x's: # Assume an average state raised taxes 60 cents starting 1996 # # Issues -- we need to somehow include the state and year FEs: # Let's set the state to be an "average" state in 1995, # and year to be like the last year (1995) # Make matrix of hypothetical x's: covariates xhyp.1g <- cfMake(formula.1g, datayearfe, periods.out) # pgmm uses covariates in differenced form # so we want most of them to be 0 (no change) # exceptions: # (1) changes in covariates of interest # (2) differenced time dummies require special care xhyp.1g$x <- xhyp.1g$xpre <- 0*xhyp.1g$x xhyp.1g <- cfChange(xhyp.1g, "avgprs95", x=60, scen=1) # The lines below would "repeat" the 1995 year effect in each # subsequent period -- but this is an aberrant year effect #xhyp.1g <- cfChange(xhyp.1g, "y1994", x=-1, xpre=-1, scen=1:3) #xhyp.1g <- cfChange(xhyp.1g, "y1995", x=1, xpre=1, scen=1:3) # The current trend seems to start in 1993; we will average over the # the last three years of year effects: # = 1/3*( (tau93 - tau92) - (tau94 - tau93) - (tau95 - tau94) ) # = 1/3*tau95 - 1/3*tau92 xhyp.1g <- cfChange(xhyp.1g, "y1992", x=-1/3, xpre=-1/3, scen=1:3) xhyp.1g <- cfChange(xhyp.1g, "y1995", x=1/3, xpre=1/3, scen=1:3) # We can "ignore" the state fixed effects for now and add them later # because model is total linear # Create baseline scenario xbase.1g <- xhyp.1g xbase.1g$x <- xbase.1g$xpre # We need a lag of the price per pack lagY.1g <- NULL # Hypothetical previous change in Y for simulation for (i in 1:length(pgmm.res1g$model)) lagY.1g <- c(lagY.1g, as.data.frame(pgmm.res1g$model[[i]])["1995",]$packpc) lagY.1g <- mean(lagY.1g, na.rm=TRUE) # Hypothetical initial level of Y for simulation initialY <- mean(pdata$packpc[pdata$year==1995], na.rm=TRUE) # Simulate expected values of Y (on original level scale) # out to periods.out given hypothetical future values of X, # initial lags of the change in Y, and an initial level of Y sim.ev1g <- ldvsimev(xhyp.1g, # The matrix of hypothetical x's simbetas.1g, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.1g, # estimated AR parameters; length must match lagY lagY=lagY.1g, # lags of y, most recent last transform="diff", # "log" to undo log transformation, # "diff" to under first differencing # "difflog" to do both initialY=initialY # for differenced models, the lag of the level of y ) # Simulate expected values of Y given no change in covariates sim.base1g <- ldvsimev(xbase.1g, # The matrix of hypothetical x's simbetas.1g, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.1g, # estimated AR parameters; length must match lagY lagY=lagY.1g, # lags of y, most recent last transform="diff", # "log" to undo log transformation, # "diff" to under first differencing # "difflog" to do both initialY=initialY # for differenced models, the lag of the level of y ) # Simulate first differences in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.fd1g <- ldvsimfd(xhyp.1g, # The matrix of hypothetical x's simbetas.1g, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.1g, # estimated AR parameters; length must match lagY lagY=lagY.1g, # lags of y, most recent last transform="diff", # Model is differenced initialY=initialY # Redundant in this case (fd of linear differenced Y) ) # Simulate relative risks in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.rr1g <- ldvsimrr(xhyp.1g, # The matrix of hypothetical x's simbetas.1g, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.1g, # estimated AR parameters; length must match lagY lagY=lagY.1g, # lags of y, most recent last transform="diff", # Model is differenced initialY=initialY # Required for differenced Y in ldvsimrr ) # Forecast: Model 1h, +60 # Simulate parameters simparam.1h <- mvrnorm(sims, coefficients(pgmm.res1h), vcovHC(pgmm.res1h)) simphis.1h <- simparam.1h[,1] simbetas.1h <- simparam.1h[,2:ncol(simparam.1h)] # System GMM does NOT difference the covariates # -> with 2-way effects, the model has a constant, # which pgmm() puts in an odd place # Move the constant to the front of the matrix! simbetas.1h <- cbind(simbetas.1h[,3], simbetas.1h[,-3]) # Make matrix of hypothetical x's: # Assume an average state raised taxes 60 cents starting 1996 # # Issues -- we need to somehow include the state and year FEs: # Let's set the state to be an "average" state in 1995, # and year to be like the last year (1995) # Make matrix of hypothetical x's: covariates xhyp.1h <- cfMake(formula.1h, datayearfe, periods.out) # system pgmm uses covariates in *level* form # -> back to our usual use of simcf; note apply to all 3 periods! xhyp.1h <- cfChange(xhyp.1h, "avgprs95", x=60 + mean(pdata$avgprs95), scen=1:3) # The current trend seems to start in 1993; we will average over the # the last three years of year effects: xhyp.1h <- cfChange(xhyp.1h, "y1987", x=0, xpre=0, scen=1:3) xhyp.1h <- cfChange(xhyp.1h, "y1988", x=0, xpre=0, scen=1:3) xhyp.1h <- cfChange(xhyp.1h, "y1989", x=0, xpre=0, scen=1:3) xhyp.1h <- cfChange(xhyp.1h, "y1990", x=0, xpre=0, scen=1:3) xhyp.1h <- cfChange(xhyp.1h, "y1991", x=0, xpre=0, scen=1:3) xhyp.1h <- cfChange(xhyp.1h, "y1992", x=0, xpre=0, scen=1:3) xhyp.1h <- cfChange(xhyp.1h, "y1993", x=1/3, xpre=1/3, scen=1:3) xhyp.1h <- cfChange(xhyp.1h, "y1994", x=1/3, xpre=1/3, scen=1:3) xhyp.1h <- cfChange(xhyp.1h, "y1995", x=1/3, xpre=1/3, scen=1:3) # State fixed effects are not removed from the covariates, # but from the instruments (so we can ignore them here) # Create baseline scenario xbase.1h <- xhyp.1h xbase.1h$x <- xbase.1h$xpre # We need a lag of the price per pack, now in levels # But the code above to extract it from the pgmm object won't work! lagY.1h <- mean(pdata$packpc[pdata$year==1995], na.rm=TRUE) # Hypothetical initial level of Y for simulation initialY <- mean(pdata$packpc[pdata$year==1995], na.rm=TRUE) # Simulate expected values of Y (on original level scale) # out to periods.out given hypothetical future values of X, # initial lags of the change in Y, and an initial level of Y sim.ev1h <- ldvsimev(xhyp.1h, # The matrix of hypothetical x's simbetas.1h, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # NOTE: System GMM has a constant! # You will need to note the column of the constant in simbetas phi=simphis.1h, # estimated AR parameters; length must match lagY lagY=lagY.1h, # lags of y, most recent last transform="none" # NOTE: System GMM is not differenced! ) # Simulate expected values of Y given no change in covariates sim.base1h <- ldvsimev(xbase.1h, # The matrix of hypothetical x's simbetas.1h, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # NOTE: System GMM has a constant! # You will need to note the column of the constant in simbetas phi=simphis.1h, # estimated AR parameters; length must match lagY lagY=lagY.1h, # lags of y, most recent last transform="none" # NOTE: System GMM is not differenced! ) # Simulate first differences in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.fd1h <- ldvsimfd(xhyp.1h, # The matrix of hypothetical x's simbetas.1h, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # Column containing the constant # set to NA for no constant phi=simphis.1h, # estimated AR parameters; length must match lagY lagY=lagY.1h, # lags of y, most recent last transform="none" # NOTE: System GMM is not differenced! ) # Simulate relative risks in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.rr1h <- ldvsimrr(xhyp.1h, # The matrix of hypothetical x's simbetas.1h, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # Column containing the constant # set to NA for no constant phi=simphis.1h, # estimated AR parameters; length must match lagY lagY=lagY.1h, # lags of y, most recent last transform="none" # NOTE: System GMM is not differenced! ) ####### # Try log specification # Forecast: Model 3a, +60 # Simulate parameters simparam.3a <- mvrnorm(sims, coefficients(pgmm.res3a), vcovHC(pgmm.res3a)) simphis.3a <- simparam.3a[,1] simbetas.3a <- simparam.3a[,2:ncol(simparam.3a)] # Make matrix of hypothetical x's: # Assume an average state raised taxes 60 cents starting 1996 # # Make matrix of hypothetical x's: covariates xhyp.3a <- cfMake(formula.1a, datayearfe, periods.out) # pgmm uses covariates in differenced form # so we want most of them to be 0 (no change) # exceptions: # (1) changes in covariates of interest # (2) differenced time dummies require special care xhyp.3a$x <- xhyp.3a$xpre <- 0*xhyp.3a$x # Need log version of differenced key covariate (doubling tax in avg state) meanPrice95 <- mean(pdata$avgprs95[pdata$year==1995], na.rm=TRUE) meanTaxs95 <- mean(pdata$taxs95[pdata$year==1995], na.rm=TRUE) xhyp.3a <- cfChange(xhyp.3a, "avgprs95", x=log(meanPrice95+meanTaxs95) - log(meanPrice95), scen=1) # We can "ignore" the state fixed effects for now and add them later # because model is total linear # Create baseline scenario xbase.3a <- xhyp.3a xbase.3a$x <- xbase.3a$xpre # We need a lag of the price per pack lagY.3a <- NULL # Hypothetical previous change in Y for simulation for (i in 1:length(pgmm.res3a$model)) lagY.3a <- c(lagY.3a, as.data.frame(pgmm.res3a$model[[i]])["1995",1]) lagY.3a <- mean(lagY.3a, na.rm=TRUE) # Hypothetical initial level of Y for simulation initialY <- mean(pdata$packpc[pdata$year==1995], na.rm=TRUE) # Simulate expected values of Y (on original level scale) # out to periods.out given hypothetical future values of X, # initial lags of the change in Y, and an initial level of Y sim.ev3a <- ldvsimev(xhyp.3a, # The matrix of hypothetical x's simbetas.3a, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.3a, # estimated AR parameters; length must match lagY lagY=lagY.3a, # lags of y, most recent last transform="difflog", # "log" to undo log transformation, # "diff" to under first differencing # "difflog" to do both initialY=initialY # for differenced models, the lag of the level of y ) # Simulate expected values of Y given no change in covariates sim.base3a <- ldvsimev(xbase.3a, # The matrix of hypothetical x's simbetas.3a, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.3a, # estimated AR parameters; length must match lagY lagY=lagY.3a, # lags of y, most recent last transform="difflog", # "log" to undo log transformation, # "diff" to under first differencing # "difflog" to do both initialY=initialY # for differenced models, the lag of the level of y ) # Simulate first differences in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.fd3a <- ldvsimfd(xhyp.3a, # The matrix of hypothetical x's simbetas.3a, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.3a, # estimated AR parameters; length must match lagY lagY=lagY.3a, # lags of y, most recent last transform="difflog",# Model is differenced initialY=initialY # Required in this case (fd of differenced log Y) ) # Simulate relative risks in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.rr3a <- ldvsimrr(xhyp.3a, # The matrix of hypothetical x's simbetas.3a, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.3a, # estimated AR parameters; length must match lagY lagY=lagY.3a, # lags of y, most recent last transform="difflog",# Model is differenced initialY=initialY # Required for differenced Y in ldvsimrr ) # Forecast: Model 3e, +60 # Because system GMM is in levels, it is convenient to # handle logging through the formula combined with simcf formula.3e <- log(packpc) ~ log(income95pc) + log(avgprs95) -1 # Simulate parameters simparam.3e <- mvrnorm(sims, coefficients(pgmm.res3e), vcovHC(pgmm.res3e)) simphis.3e <- simparam.3e[,1] simbetas.3e <- simparam.3e[,2:ncol(simparam.3e)] # System GMM does NOT difference the covariates # Make matrix of hypothetical x's: # Assume an average state raised taxes 60 cents starting 1996 # # Make matrix of hypothetical x's: covariates xhyp.3e <- cfMake(formula.3e, datayearfe, periods.out) # system pgmm uses covariates in *level* form # -> back to our usual use of simcf; note apply to all 3 periods! xhyp.3e <- cfChange(xhyp.3e, "avgprs95", x=60 + mean(pdata$avgprs95), scen=1:3) # State fixed effects are not removed from the covariates, # but from the instruments (so we can ignore them here) # Create baseline scenario xbase.3e <- xhyp.3e xbase.3e$x <- xbase.3e$xpre # We need a lag of the price per pack, now in logged levels # But the code above to extract it from the pgmm object won't work! # Getting this right is crucial lagY.3e <- log(mean(pdata$packpc[pdata$year==1995], na.rm=TRUE)) # Hypothetical initial level of Y for simulation # Still in linear levels initialY <- mean(pdata$packpc[pdata$year==1995], na.rm=TRUE) # Simulate expected values of Y (on original level scale) # out to periods.out given hypothetical future values of X, # initial lags of the change in Y, and an initial level of Y sim.ev3e <- ldvsimev(xhyp.3e, # The matrix of hypothetical x's simbetas.3e, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.3e, # estimated AR parameters; length must match lagY lagY=lagY.3e, # lags of y, most recent last transform="log" # NOTE: System GMM is not differenced! ) # Simulate expected values of Y given no change in covariates sim.base3e <- ldvsimev(xbase.3e, # The matrix of hypothetical x's simbetas.3e, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.3e, # estimated AR parameters; length must match lagY lagY=lagY.3e, # lags of y, most recent last transform="log", # NOTE: System GMM is not differenced! ) # Simulate first differences in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.fd3e <- ldvsimfd(xhyp.3e, # The matrix of hypothetical x's simbetas.3e, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.3e, # estimated AR parameters; length must match lagY lagY=lagY.3e, # lags of y, most recent last transform="log" # NOTE: System GMM is not differenced! ) # Simulate relative risks in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.rr3e <- ldvsimrr(xhyp.3e, # The matrix of hypothetical x's simbetas.3e, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.3e, # estimated AR parameters; length must match lagY lagY=lagY.3e, # lags of y, most recent last transform="log" # NOTE: System GMM is not differenced! ) # Forecast: Model 3g, +60 simparam.3g <- mvrnorm(sims, coefficients(pgmm.res3g), vcovHC(pgmm.res3g)) simphis.3g <- simparam.3g[,1] simbetas.3g <- simparam.3g[,2:ncol(simparam.3g)] # Make matrix of hypothetical x's: # Assume an average state raised taxes 60 cents starting 1996 # # Issues -- we need to somehow include the state and year FEs: # Let's set the state to be an "average" state in 1995, # and year to be like the last year (1995) # Make matrix of hypothetical x's: covariates # Still use the 1g formula (no logs) -- we will handle logging manually # to get the differences of logs right xhyp.3g <- cfMake(formula.1g, datayearfe, periods.out) # pgmm uses covariates in differenced form # so we want most of them to be 0 (no change) # exceptions: # (1) changes in covariates of interest # (2) differenced time dummies need special care xhyp.3g$x <- xhyp.3g$xpre <- 0*xhyp.3g$x # Need log version of differenced key covariate (doubling tax in avg state) meanPrice95 <- mean(pdata$avgprs95[pdata$year==1995], na.rm=TRUE) meanTaxs95 <- mean(pdata$taxs95[pdata$year==1995], na.rm=TRUE) xhyp.3g <- cfChange(xhyp.3g, "avgprs95", x=log(meanPrice95+meanTaxs95) - log(meanPrice95), scen=1) # The lines below would "repeat" the 1995 year effect in each # subsequent period -- but this is an aberrant year effect #xhyp.1g <- cfChange(xhyp.1g, "y1994", x=-1, xpre=-1, scen=1:3) #xhyp.1g <- cfChange(xhyp.1g, "y1995", x=1, xpre=1, scen=1:3) # The current trend seems to start in 1993; we will average over the # the last three years of year effects: # = 1/3*( (tau93 - tau92) - (tau94 - tau93) - (tau95 - tau94) ) # = 1/3*tau95 - 1/3*tau92 xhyp.3g <- cfChange(xhyp.3g, "y1992", x=-1/3, xpre=-1/3, scen=1:3) xhyp.3g <- cfChange(xhyp.3g, "y1995", x=1/3, xpre=1/3, scen=1:3) # We can "ignore" the state fixed effects for now and add them later # because model is total linear # Create baseline scenario xbase.3g <- xhyp.3g xbase.3g$x <- xbase.3g$xpre # We need a lag of the price per pack lagY.3g <- NULL # Hypothetical previous change in Y for simulation for (i in 1:length(pgmm.res3g$model)) lagY.3g <- c(lagY.3g, as.data.frame(pgmm.res3g$model[[i]])["1995",1]) lagY.3g <- mean(lagY.3g, na.rm=TRUE) initialY <- mean(pdata$packpc[pdata$year==1995], na.rm=TRUE) # Hypothetical initial level of Y for simulation # Simulate expected values of Y (on original level scale) # out to periods.out given hypothetical future values of X, # initial lags of the change in Y, and an initial level of Y sim.ev3g <- ldvsimev(xhyp.3g, # The matrix of hypothetical x's simbetas.3g, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.3g, # estimated AR parameters; length must match lagY lagY=lagY.3g, # lags of y, most recent last transform="difflog", # "log" to undo log transformation, # "diff" to under first differencing # "difflog" to do both initialY=initialY # for differenced models, the lag of the level of y ) # Simulate expected values of Y given no change in covariates sim.base3g <- ldvsimev(xbase.3g, # The matrix of hypothetical x's simbetas.3g, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.3g, # estimated AR parameters; length must match lagY lagY=lagY.3g, # lags of y, most recent last transform="difflog", # "log" to undo log transformation, # "diff" to under first differencing # "difflog" to do both initialY=initialY # for differenced models, the lag of the level of y ) # Simulate first differences in y (as cumulated changes) # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.fd3g <- ldvsimfd(xhyp.3g, # The matrix of hypothetical x's simbetas.3g, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.3g, # estimated AR parameters; length must match lagY lagY=lagY.3g, # lags of y, most recent last transform="difflog",# Model is differenced logs initialY=initialY # Required in this case (fd of differenced log Y) ) # Simulate relative risks in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.rr3g <- ldvsimrr(xhyp.3g, # The matrix of hypothetical x's simbetas.3g, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # Column containing the constant # set to NA for no constant phi=simphis.3g, # estimated AR parameters; length must match lagY lagY=lagY.3g, # lags of y, most recent last transform="difflog",# Model is differenced logs initialY=initialY # Required for differenced Y in ldvsimrr ) # Forecast: Model 3h, +60 # Because system GMM is in levels, it is convenient to # handle logging through the formula combined with simcf formula <- "log(packpc) ~ log(income95pc) + log(avgprs95)" datayearfe <- cbind(pdata,yearfe) yearfenames <- NULL for (i in 1:ncol(yearfe)) { formula <- paste0(formula,"+ y",yearlist[i]," ") yearfenames <- c(yearfenames,paste0("y",yearlist[i])) } names(datayearfe) <- c(names(data),yearfenames) formula.3h <- as.formula(formula) # Simulate parameters simparam.3h <- mvrnorm(sims, coefficients(pgmm.res3h), vcovHC(pgmm.res3h)) simphis.3h <- simparam.3h[,1] simbetas.3h <- simparam.3h[,2:ncol(simparam.3h)] # System GMM does NOT difference the covariates # -> the model has a constant, which pgmm() puts in an odd place # Move the constant to the front of the matrix! simbetas.3h <- cbind(simbetas.3h[,3], simbetas.3h[,-3]) # Make matrix of hypothetical x's: # Assume an average state raised taxes 60 cents starting 1996 # # Issues -- we need to somehow include the state and year FEs: # Let's set the state to be an "average" state in 1995, # and year to be like the last year (1995) # Make matrix of hypothetical x's: covariates xhyp.3h <- cfMake(formula.3h, datayearfe, periods.out) # system pgmm uses covariates in *level* form # -> back to our usual use of simcf; let simcf handle logging here. # The current trend seems to start in 1993; we will average over the # the last three years of year effects: xhyp.3h <- cfChange(xhyp.3h, "avgprs95", x=60 + mean(pdata$avgprs95), scen=1:3) xhyp.3h <- cfChange(xhyp.3h, "y1987", x=0, xpre=0, scen=1:3) xhyp.3h <- cfChange(xhyp.3h, "y1988", x=0, xpre=0, scen=1:3) xhyp.3h <- cfChange(xhyp.3h, "y1989", x=0, xpre=0, scen=1:3) xhyp.3h <- cfChange(xhyp.3h, "y1990", x=0, xpre=0, scen=1:3) xhyp.3h <- cfChange(xhyp.3h, "y1991", x=0, xpre=0, scen=1:3) xhyp.3h <- cfChange(xhyp.3h, "y1992", x=0, xpre=0, scen=1:3) xhyp.3h <- cfChange(xhyp.3h, "y1993", x=1/3, xpre=1/3, scen=1:3) xhyp.3h <- cfChange(xhyp.3h, "y1994", x=1/3, xpre=1/3, scen=1:3) xhyp.3h <- cfChange(xhyp.3h, "y1995", x=1/3, xpre=1/3, scen=1:3) # State fixed effects are not removed from the covariates, # but from the instruments (so we can ignore them here) # Create baseline scenario xbase.3h <- xhyp.3h xbase.3h$x <- xbase.3h$xpre # We need a lag of the price per pack, now in logged levels # But the code above to extract it from the pgmm object won't work! # Getting this right is crucial lagY.3h <- log(mean(pdata$packpc[pdata$year==1995], na.rm=TRUE)) # Hypothetical initial level of Y for simulation # Still in linear levels initialY <- mean(pdata$packpc[pdata$year==1995], na.rm=TRUE) # Simulate expected values of Y (on original level scale) # out to periods.out given hypothetical future values of X, # initial lags of the change in Y, and an initial level of Y sim.ev3h <- ldvsimev(xhyp.3h, # The matrix of hypothetical x's simbetas.3h, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # NOTE: System GMM with two-way effects has a constant! # You will need to note the column of the constant in simbetas phi=simphis.3h, # estimated AR parameters; length must match lagY lagY=lagY.3h, # lags of y, most recent last transform="log" # NOTE: System GMM is not differenced! ) # Simulate expected values of Y given no change in covariates sim.base3h <- ldvsimev(xbase.3h, # The matrix of hypothetical x's simbetas.3h, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # NOTE: System GMM with two-way effects has a constant! # You will need to note the column of the constant in simbetas phi=simphis.3h, # estimated AR parameters; length must match lagY lagY=lagY.3h, # lags of y, most recent last transform="log" # NOTE: System GMM is not differenced! ) # Simulate first differences in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.fd3h <- ldvsimfd(xhyp.3h, # The matrix of hypothetical x's simbetas.3h, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # Column containing the constant # set to NA for no constant phi=simphis.3h, # estimated AR parameters; length must match lagY lagY=lagY.3h, # lags of y, most recent last transform="log" # NOTE: System GMM is not differenced! ) # Simulate relative risks in y # out to periods.out given hypothetical future values of x, xpre, # and initial lags of the change in y sim.rr3h <- ldvsimrr(xhyp.3h, # The matrix of hypothetical x's simbetas.3h, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=1, # Column containing the constant # set to NA for no constant phi=simphis.3h, # estimated AR parameters; length must match lagY lagY=lagY.3h, # lags of y, most recent last transform="log" # NOTE: System GMM is not differenced! ) # Make plots of expected values, first differences, and percent changes # using custom tile code in helperCigs.R # Hypothetical initial level of Y for simulation initialY <- mean(pdata$packpc[pdata$year==1995], na.rm=TRUE) # axis limits limitsEV <- c(50, 105) limitsFD <- c(-40, 5) limitsRR <- c(-40, 5) # Model 1a Lineplot: Packs cigLineplots(sim.ev1a, sim.base1a, sim.fd1a, sim.rr1a, limitsEV, limitsFD, limitsRR, initialY, "Cigarette Taxes & Consumption: 1a. Linear Difference GMM, Individual Effects", "m1aEVFDRR" ) # Model 1e Lineplot: Packs cigLineplots(sim.ev1e, sim.base1e, sim.fd1e, sim.rr1e, limitsEV, limitsFD, limitsRR, initialY, "Cigarette Taxes & Consumption: 1e. Linear System GMM, Individual Effects", "m1eEVFDRR" ) # Model 1g Lineplot: Packs cigLineplots(sim.ev1g, sim.base1g, sim.fd1g, sim.rr1g, limitsEV, limitsFD, limitsRR, initialY, "Cigarette Taxes & Consumption: 1g. Linear Difference GMM, Two-Way Effects", "m1gEVFDRR" ) # Model 1h Lineplot: Packs cigLineplots(sim.ev1h, sim.base1h, sim.fd1h, sim.rr1h, limitsEV, limitsFD, limitsRR, initialY, "Cigarette Taxes & Consumption: 1h. Linear System GMM, Two-Way Effects", "m1hEVFDRR" ) # Model 3a Lineplot: Packs cigLineplots(sim.ev3a, sim.base3a, sim.fd3a, sim.rr3a, limitsEV, limitsFD, limitsRR, initialY, "Cigarette Taxes & Consumption: 3a. Log-Log Difference GMM, Individual Effects", "m3aEVFDRR" ) # Model 3e Lineplot: Packs cigLineplots(sim.ev3e, sim.base3e, sim.fd3e, sim.rr3e, limitsEV, limitsFD, limitsRR, initialY, "Cigarette Taxes & Consumption: 3e. Log-Log System GMM, Individual Effects", "m3eEVFDRR" ) # Model 3g Lineplot: Packs cigLineplots(sim.ev3g, sim.base3g, sim.fd3g, sim.rr3g, limitsEV, limitsFD, limitsRR, initialY, "Cigarette Taxes & Consumption: 3g. Log-Log Difference GMM, Two-Way Effects", "m3gEVFDRR" ) # Model 3h Lineplot: Packs cigLineplots(sim.ev3h, sim.base3h, sim.fd3h, sim.rr3h, limitsEV, limitsFD, limitsRR, initialY, "Cigarette Taxes & Consumption: 3h. Log-Log System GMM, Two-Way Effects", "m3hEVFDRR" ) ##### # Ropeladder to show model robustness (with CIs) : FD k <- 1 traceDiffGMM1yearFD <- ropeladder(x=c(sim.fd1a$pe[k], sim.fd1g$pe[k], sim.fd3a$pe[k], sim.fd3g$pe[k]), lower=c(sim.fd1a$lower[k], sim.fd1g$lower[k], sim.fd3a$lower[k], sim.fd3g$lower[k]), upper=c(sim.fd1a$upper[k], sim.fd1g$upper[k], sim.fd3a$upper[k], sim.fd3g$upper[k]), labels=c("Linear / no trend", "Linear / year effects", "Log-Log / no trend", "Log-Log / year effects "), sublabels="diff GMM", entryheight=0.25, subentryheight=0.8, sublabelsX=.14, plot=1 ) traceSysGMM1yearFD <- ropeladder(x=c(sim.fd1e$pe[k], sim.fd1h$pe[k], sim.fd3e$pe[k], sim.fd3h$pe[k]), lower=c(sim.fd1e$lower[k], sim.fd1h$lower[k], sim.fd3e$lower[k], sim.fd3h$lower[k]), upper=c(sim.fd1e$upper[k], sim.fd1h$upper[k], sim.fd3e$upper[k], sim.fd3h$upper[k]), sublabels="sys GMM", sublabelsX=.14, plot=1 ) k <- 3 traceDiffGMM3yearFD <- ropeladder(x=c(sim.fd1a$pe[k], sim.fd1g$pe[k], sim.fd3a$pe[k], sim.fd3g$pe[k]), lower=c(sim.fd1a$lower[k], sim.fd1g$lower[k], sim.fd3a$lower[k], sim.fd3g$lower[k]), upper=c(sim.fd1a$upper[k], sim.fd1g$upper[k], sim.fd3a$upper[k], sim.fd3g$upper[k]), sublabelsX=-45, plot=2 ) traceSysGMM3yearFD <- ropeladder(x=c(sim.fd1e$pe[k], sim.fd1h$pe[k], sim.fd3e$pe[k], sim.fd3h$pe[k]), lower=c(sim.fd1e$lower[k], sim.fd1h$lower[k], sim.fd3e$lower[k], sim.fd3h$lower[k]), upper=c(sim.fd1e$upper[k], sim.fd1h$upper[k], sim.fd3e$upper[k], sim.fd3h$upper[k]), sublabelsX=-45, plot=2 ) vertmark <- linesTile(x=c(0,0), y=c(0,1), plot=1:2) tile(traceDiffGMM1yearFD, traceSysGMM1yearFD, traceDiffGMM3yearFD, traceSysGMM3yearFD, vertmark, limits=rbind(c(-50,10), c(-50,10)), xaxis=list(at1=seq(-30,0,10), at2=seq(-40,0,10)), topaxis=list(at1=seq(-30,0,10), at2=seq(-40,0,10), add=TRUE), topaxistitle=list(labels1="Change in Packs pc, +1 years", labels2="Change in Packs pc, +3 years"), gridlines=list(type="xt"), output=list(file="FDrobustnessCigsHC", width=8.5) ) # Make a ropeladder to show model robustness (with CIs) : RR # helper function to convert risk ratios to percent change rr2pc <- function(x) 100*(x-1) k <- 1 traceDiffGMM1yearRR <- ropeladder(x=rr2pc(c(sim.rr1a$pe[k], sim.rr1g$pe[k], sim.rr3a$pe[k], sim.rr3g$pe[k])), lower=rr2pc(c(sim.rr1a$lower[k], sim.rr1g$lower[k], sim.rr3a$lower[k], sim.rr3g$lower[k])), upper=rr2pc(c(sim.rr1a$upper[k], sim.rr1g$upper[k], sim.rr3a$upper[k], sim.rr3g$upper[k])), labels=c("Linear / no trend", "Linear / year effects", "Log-Log / no trend", "Log-Log / year effects "), sublabels="diff GMM", entryheight=0.25, subentryheight=0.8, sublabelsX=.14, plot=1 ) traceSysGMM1yearRR <- ropeladder(x=rr2pc(c(sim.rr1e$pe[k], sim.rr1h$pe[k], sim.rr3e$pe[k], sim.rr3h$pe[k])), lower=rr2pc(c(sim.rr1e$lower[k], sim.rr1h$lower[k], sim.rr3e$lower[k], sim.rr3h$lower[k])), upper=rr2pc(c(sim.rr1e$upper[k], sim.rr1h$upper[k], sim.rr3e$upper[k], sim.rr3h$upper[k])), sublabels="sys GMM", sublabelsX=.14, plot=1 ) k <- 3 traceDiffGMM3yearRR <- ropeladder(x=rr2pc(c(sim.rr1a$pe[k], sim.rr1g$pe[k], sim.rr3a$pe[k], sim.rr3g$pe[k])), lower=rr2pc(c(sim.rr1a$lower[k], sim.rr1g$lower[k], sim.rr3a$lower[k], sim.rr3g$lower[k])), upper=rr2pc(c(sim.rr1a$upper[k], sim.rr1g$upper[k], sim.rr3a$upper[k], sim.rr3g$upper[k])), sublabelsX=-45, plot=2 ) traceSysGMM3yearRR <- ropeladder(x=rr2pc(c(sim.rr1e$pe[k], sim.rr1h$pe[k], sim.rr3e$pe[k], sim.rr3h$pe[k])), lower=rr2pc(c(sim.rr1e$lower[k], sim.rr1h$lower[k], sim.rr3e$lower[k], sim.rr3h$lower[k])), upper=rr2pc(c(sim.rr1e$upper[k], sim.rr1h$upper[k], sim.rr3e$upper[k], sim.rr3h$upper[k])), sublabelsX=-45, plot=2 ) vertmark <- linesTile(x=c(0,0), y=c(0,1), plot=1:2) tile(traceDiffGMM1yearRR, traceSysGMM1yearRR, traceDiffGMM3yearRR, traceSysGMM3yearRR, vertmark, limits=rbind(c(-50,10), c(-50,10)), xaxis=list(at1=seq(-30,0,10), at2=seq(-40,0,10)), topaxis=list(at1=seq(-30,0,10), at2=seq(-40,0,10), add=TRUE), topaxistitle=list(labels1="% Change in Packs pc, +1 years", labels2="% Change in Packs pc, +3 years"), gridlines=list(type="xt"), output=list(file="RRrobustnessCigsHC", width=8.5) ) ##### Supplement ##### Simulate year effects, all else equal, model 3g allPeriods <- 1987:1995 # All else equal Forecast: Model 3g, all periods # Make matrix of hypothetical x's: covariates # Still use the 1g formula (no logs) -- we will handle logging manually # to get the differences of logs right xhyp0.3g <- cfMake(formula.1g, datayearfe, length(allPeriods)) # Set everything to zero for a "static" model with no changes in covariates xhyp0.3g$x <- xhyp0.3g.xpre <- 0*xhyp0.3g$x # Extract the actual averages across states from the analyzed data; xhypMat <- Reduce("+", pgmm.res3g$model)/length(pgmm.res3g$model) # Pull the year effects from the above and insert as counterfactuals; # check the indexes manually! xhyp0.3g$x[,4:ncol(xhyp0.3g$x)] <- xhypMat[,5:ncol(xhypMat)] xhyp0.3g$xpre[,4:ncol(xhyp0.3g$x)] <- xhypMat[,5:ncol(xhypMat)] # We need a lag of the price per pack lagY0.3g <- NULL # Hypothetical previous change in Y for simulation for (i in 1:length(pgmm.res3g$model)) lagY0.3g <- c(lagY0.3g, as.data.frame(pgmm.res3g$model[[i]])["1987",1]) lagY0.3g <- mean(lagY0.3g, na.rm=TRUE) initialY0 <- mean(pdata$packpc[pdata$year==1986], na.rm=TRUE) # Hypothetical initial level of Y for simulation # Simulate expected values of Y (on original level scale) # out to periods.out given hypothetical future values of X, # initial lags of the change in Y, and an initial level of Y sim.ev3g <- ldvsimev(xhyp0.3g, # The matrix of hypothetical x's simbetas.3g, # The matrix of simulated betas ci=0.95, # Desired confidence interval constant=NA, # NA indicates no constant! phi=simphis.3g, # estimated AR parameters; length must match lagY lagY=lagY0.3g, # lags of y, most recent last transform="difflog", # "log" to undo log transformation, # "diff" to under first differencing # "difflog" to do both initialY=initialY0 # for differenced models, the lag of the level of y ) # Plot results using tile colFits <- brewer.pal(3,"Set1")[2:3] yrsTrace <- scatter(x=1987:1995, y=sim.ev3g$pe, ylower=sim.ev3g$lower, yupper=sim.ev3g$upper, pch=1, size=.8, plot=1 ) yrsTraceWLS <- scatter(x=1987:1995, y=sim.ev3g$pe, ylower=sim.ev3g$lower, yupper=sim.ev3g$upper, fit=list(method="wls", weights=1/sim.ev3g$se^2, col=colFits[1]), pch=1, size=.8, plot=1 ) yrsTraceLOESS <- scatter(x=1987:1995, y=sim.ev3g$pe, ylower=sim.ev3g$lower, yupper=sim.ev3g$upper, fit=list(method="loess", weights=1/sim.ev3g$se^2, col=colFits[2]), pch=1, size=.8, plot=1 ) yrsLabel <- textTile(labels="EV and 95% CI", x=1988.75, y=105, plot=1) wlsLabel <- textTile(labels="WLS trend and 95% CI", x=1988.75, y=100, col=colFits[1], plot=1) loessLabel <- textTile(labels="Loess trend and 95% CI", x=1988.75, y=100, col=colFits[2], plot=1) tile(yrsTrace, yrsLabel, output = list(file="yearEffectsModel3g", width=5.5), limits = c(1986.5,1995.5,95,140), yaxis=list(major=FALSE), xaxistitle = list(labels="Year"), yaxistitle = list(labels="Trend due to year effects only", x=0), height=list(plot="golden"), width=list(null=5, # widen plot area for visibility yaxis.labelspace=0) ) tile(yrsTraceWLS, yrsLabel, wlsLabel, output = list(file="yearEffectsModel3gWLS", width=5.5), limits = c(1986.5,1995.5,95,140), yaxis=list(major=FALSE), xaxistitle = list(labels="Year"), yaxistitle = list(labels="Trend due to year effects only", x=0), height=list(plot="golden"), width=list(null=5, # widen plot area for visibility yaxis.labelspace=0) ) tile(yrsTraceLOESS, yrsLabel, loessLabel, output = list(file="yearEffectsModel3gLOESS", width=5.5), limits = c(1986.5,1995.5,95,140), yaxis=list(major=FALSE), xaxistitle = list(labels="Year"), yaxistitle = list(labels="Trend due to year effects only", x=0), height=list(plot="golden"), width=list(null=5, # widen plot area for visibility yaxis.labelspace=0) )