# tsconcepts.ssc script file for examples used in Time Series Concepts chapter # updated for FM 2.0 # # created: November 14, 2001 # revised: July 7, 2007 # added example using new predict method for arima objects # #################################################################### # # Example: simulate gaussian white noise # set.seed(101) y = rnorm(100,sd=1) # # estimate sample moments # y.bar = mean(y) g.hat = acf(y,lag.max=10,type="covariance",plot=F) r.hat = acf(y,lag.max=10,type="correlation",plot=F) # # plot data and SACF # par(mfrow=c(1,2)) tsplot(y,ylab="y") acf.plot(r.hat) par(mfrow=c(1,1)) # # test for normality using normalTest # qqnorm(y) qqline(y) normalTest(y,method="sw") normalTest(y,method="jb") # # Example: sample moments of daily cc returns on Microsoft stock # r.msft = getReturns(DowJones30[,"MSFT"],type="continuous") r.msft@title = "Daily returns on Microsoft" sample.2000 = (positions(r.msft) > timeDate("12/31/1999") & positions(r.msft) < timeDate("1/1/2001")) par(mfrow=c(2,2)) plot(r.msft[sample.2000],ylab="r.msft") r.acf = acf(r.msft[sample.2000]) hist(seriesData(r.msft)) qqnorm(seriesData(r.msft)) par(mfrow=c(1,1)) histPlot(r.msft,strip.text="MSFT monthly return") qqPlot(r.msft,strip.text="MSFT monthly return") # test for white noise using autocorTest args(autocorTest.default) autocorTest(r.msft,lag.n=10,method="lb") # # Example: simulate stationary AR(1) with mu=1, phi=0.75 and sigma=1 # Note: set a seed to create reproducable results # set.seed(101) e = rnorm(100,sd=1) e.start = rnorm(25,sd=1) y.ar1 = 1 + arima.sim(model=list(ar=0.75),n=100, innov=e,start.innov=e.start) mean(y.ar1) var(y.ar1) # # compute true ACF # gamma.j = rep(0.75,10)^seq(10) # # plot data # par(mfrow=c(2,2)) tsplot(y.ar1,main="Simulated AR(1)") abline(h=1) tsplot(gamma.j,type="h",main="ACF and IRF for AR(1)", ylab="Autocorrelation",xlab="lag") tmp = acf(y.ar1,lag.max=10) par(mfrow=c(1,1)) # compute half life log(0.5)/log(0.75) # # Example: ACF for interest rate spreads (from term structure and exchange rates) # uscn.id = 100*(lexrates.dat[,"USCNF"]-lexrates.dat[,"USCNS"]) colIds(uscn.id) = "USCNID" uscn.id@title = "US/CA 30 day interest rate differential" par(mfrow=c(2,1)) plot(uscn.id,reference.grid=F) abline(h=0) tmp = acf(uscn.id) par(mfrow=c(1,1)) lag.plot(uscn.id,lags=8,layout=c(2,4)) # # example of AR process # colIds(varex.ts) smpl = (positions(varex.ts) > timeDate("12/31/1960")) irate.real = varex.ts[smpl,"RF.REAL"] par(mfrow=c(2,2)) acf.plot(acf(irate.real,plot=F)) plot(irate.real,main="Monthly Real Interest Rate") tmp = acf(irate.real,type="partial") par(mfrow=c(1,1)) # detrended US industrial production smpl = (positions(varex.ts) > timeDate("1/1/1960")) ip = as.matrix(log(seriesData(IP.dat[smpl,]))) trend = 1:nrow(ip) ipg = residuals(lm(ip~trend)) acf(ipg,type="partial") # estimate AR model using splus function ar args(ar) irate.real = irate.real - mean(irate.real) ar.fit = ar(irate.real,order.max=6) names(ar.fit) ar.fit$order ar.fit$ar sum(ar.fit$ar) abs(polyroot(c(1,-ar.fit$ar))) # # simulate signal + noise model # set.seed(112) eps = rnorm(100,sd=1) eta = rnorm(100,sd=0.5) z = cumsum(eta) y = z + eps dy = diff(y) par(mfrow=c(2,2)) tsplot(y,main="Signal plus noise",ylab="y") tsplot(dy,main="1st difference",ylab="dy") tmp = acf(dy) tmp = acf(dy,type="partial") par(mfrow=c(1,1)) q.val = 0.5^2 rho1 = -1/(q.val + 2) q.val rho1 # # equally weighted portfolio of Dow Jones 30 assets # ret.DJ30 = getReturns(DowJones30,type="discrete") ret.mat = as.matrix(seriesData(ret.DJ30)) wts = rep(1/30,30) ret.port = ret.mat%*%wts # # create overlapping annual returns for msft # sp500.mret = getReturns(singleIndex.dat[,"SP500"], type="continuous") sp500.mret@title = "Monthly returns on S&P 500 Index" sp500.aret = aggregateSeries(sp500.mret,moving=12,FUN=sum) sp500.aret@title = "Monthly Annual returns on S&P 500 Index" par(mfrow=c(2,2)) plot(sp500.mret) plot(sp500.aret) acf.plot(acf(sp500.aret,plot=F)) acf.plot(acf(sp500.aret,type="partial",plot=F)) par(mfrow=c(1,1)) # # estimate ARMA(1,1) model for uscn.id # uscn.id.dm = uscn.id - mean(uscn.id) arma11.mod = list(ar=0.75,ma=0) arma11.fit = arima.mle(uscn.id.dm,model=arma11.mod) class(arma11.fit) names(arma11.fit) arma11.fit std.errs = sqrt(diag(arma11.fit$var.coef)) names(std.errs) = colIds(arma11.fit$var.coef) std.errs # EZ: 07/05/2007 # note: in splus 8, there is a summary method for objects of class # "arima" which shows standard errors and t-stats. There is also # a residuals() function summary(arma11.fit) plot(residuals(arma11.fit), reference.grid=F, main="Residuals from ARMA(1,1) fit") abline(h=0) seriesPlot(residuals(arma11.fit), one.plot=T, ref.line=T) # # estimate ARMA(1,1) with a constant # arma11.fit2 = arima.mle(uscn.id,model=arma11.mod,xreg=1) arma11.fit2 std.errs = sqrt(diag(arma11.fit$var.coef)) names(std.errs) = colIds(arma11.fit$var.coef) std.errs plot(arma11.fit) graphsheet() # produce h-step ahead forecasts end(uscn.id.dm) fcst.dates = timeSeq("7/1/1996","6/1/1997", by="months",format="%b %Y") uscn.id.dm.fcst = arima.forecast(uscn.id.dm,n=12, model=arma11.fit$model,future.positions=fcst.dates) names(uscn.id.dm.fcst) uscn.id.dm.fcst[[1]] # EZ: 7/5/2007. Problem in arima.forecast when timeSeries is # extended to accomodate forecast observations. Need to coerce # NA data to class matrix in order for function to work n <- length(fcst.dates) x.future <- timeSeries(data = as.matrix(rep(NA, n)), positions = fcst.dates) x.fore <- concat(uscn.id.dm, x.future) # EZ: 7/5/2007 # In FM 3.0, there is a predict() method for objects of class # "arima" which produces a FM object of class "forecast" args(predict.arima) uscn.id.dm.fcst = predict(arma11.fit, n.predict=12) class(uscn.id.dm.fcst) uscn.id.dm.fcst summary(uscn.id.dm.fcst) plot(uscn.id.dm.fcst, xold=uscn.id.dm, n.old=6) smpl = positions(uscn.id.dm) >= timeDate("6/1/1995") plot(uscn.id.dm[smpl,],uscn.id.dm.fcst$mean, uscn.id.dm.fcst$mean+2*uscn.id.dm.fcst$std.err, uscn.id.dm.fcst$mean-2*uscn.id.dm.fcst$std.err, plot.args=list(lty=c(1,2,3,3))) # # estimate AR(2) using OLS # ar2.fit = OLS(USCNID~ar(2),data=uscn.id) ar2.fit abs(polyroot(c(1,-ar2.fit$coef[2:3]))) # simulate ARCH(1) process rt = simulate.garch(model=list(a.value=0.1, arch=0.8), n=250, rseed=196) class(rt) names(rt) par(mfrow=c(2,1)) tsplot(rt$et,main="Simulated returns") tsplot(rt$sigma.t,main="Simulated volatility") par(mfrow=c(1,1)) # # estimating the long-run variance of a time series # y(t) = 1 + 0.75y(t-1) + e(t) lrv.true = 1/((1-0.75)^2) set.seed(101) e = rnorm(100,sd=1) y.ar1 = 1 + arima.sim(model=list(ar=0.75),innov=e) # autoregressive estimate ar1.fit = OLS(y.ar1~ar(1)) rho.hat = coef(ar1.fit)[2] sig2.hat = sum(residuals(ar1.fit)^2)/ar1.fit$df.resid lrv.ar1 = sig2.hat/(1-rho.hat)^2 as.numeric(lrv.ar1) se.ybar.ar1 = sqrt(lrv.ar1/100) # Newey-West estimate args(asymp.var) 4*(100/100)^(2/9) lrv.nw = asymp.var(y.ar1,bandwidth=4) lrv.nw sqrt(lrv.nw/100) # # compute long-run variance for overlapping returns # # simulate random walk set.seed(201) e = rnorm(100) z.rw = cumsum(e) # # Example: Variance ratios # args(varRatioTest) # note: x is log prices # Variance ratios for Dow Jones Industrial average from 1960 - 1990 # note: if hetero=F then first variance ratio is significant VR.djia = varRatioTest(djia[timeEvent("1/1/1960","1/1/1990"),"close"], n.periods=60,unbiased=T,hetero=T) class(VR.djia) names(VR.djia) VR.djia plot(VR.djia) # Variance ratios for individual Dow Jones stocks from 1991 - 2001 VR.DJ30 = varRatioTest(DowJones30,n.periods=5,unbiased=T,hetero=T) plot(VR.DJ30) # # Example: simulate trend stationary AR(1) with mu=1,delta=0.25, phi=0.75 and sigma=1 # set.seed(101) y.tsar1 = 1 + 0.25*seq(100) + arima.sim(model=list(ar=0.75),n=100) tsplot(y.tsar1,ylab="y") abline(a=1,b=0.25) set.seed(101) u.ar1 = arima.sim(model=list(ar=0.75),n=100) y1 = cumsum(u.ar1) y1.d = 1 + 0.25*seq(100)+ y1 y2 = rep(0,100) for (i in 3:100) { y2[i] = 2*y2[i-1] - y2[i-2] + u.ar1[i] } par(mfrow=c(2,2)) tsplot(u.ar1,main="I(0) innovations") tsplot(y1,main="I(1) process") tsplot(y1.d,main="I(1) process with drift") abline(a=1,b=0.25) tsplot(y2,main="I(2) process") par(mfrow=c(1,1)) # # plot financial data # uscn.spot = lexrates.dat[,"USCNS"] sp500 = singleIndex.dat[,"SP500"] par(mfrow=c(2,2)) plot.timeSeries(uscn.spot,main="Log US/CA spot exchange rate",reference.grid=F) plot.timeSeries(log(sp500),main="Log S&P 500 index",reference.grid=F) plot.timeSeries(rf.30day,main="Nominal 30 day T-bill rate",reference.grid=F) plot.timeSeries(log(CPI.dat),main="Log of US CPI",reference.grid=F) par(mfrow=c(1,1)) # # long memory example # # simulate fractional white noise process set.seed(394) y.fwn = simulate.FARIMA(list(d=0.3), 500) par(mfrow=c(2,1)) tsplot(y.fwn) tmp = acf(y.fwn,lag.max=50) par(mfrow=c(1,1)) msft.aret = abs(getReturns(DowJones30[,"MSFT"])) uscn.id = 100*(lexrates.dat[,"USCNF"]- lexrates.dat[,"USCNS"]) colIds(uscn.id) = "USCNID" par(mfrow=c(2,1)) tmp = acf(msft.aret,lag.max=100) tmp = acf(uscn.id,lag.max=50) par(mfrow=c(1,1)) ######################################################################################### # multivariate time series ######################################################################################### # # construct time series of first four asset returns in DowJones30.dat # Y = getReturns(DowJones30[,1:4],type="continuous") colIds(Y) # compute means and variances and correlations colMeans(seriesData(Y)) # colMeans doesn't have method for timeSeries var(Y) # var does have a method for timeSeries! cor(Y) colVars(seriesData(Y)) colStdevs(seriesData(Y)) # # compute autocovariance and autocorrelation matrices # Ghat = acf(Y[,1:2],lag.max=5,type="covariance",plot=F) Rhat = acf(Y[,1:2],lag.max=5,plot=F) Rhat acf.plot(Rhat) Ghat$acf[1,,] Rhat$acf[1,,] Ghat$acf[2,,] Rhat$acf[2,,] par(mfrow=c(1,1)) # # simulate VAR(1) model: note can't use filter() for efficiency # nobs = 250 e.var = rmvnorm(nobs,sd=c(1,1),rho=0.5) e.var = t(e.var) pi1 = matrix(c(0.7,0.2,0.2,0.7),2,2,byrow=T) # VAR(1) coef matrix mu.vec = c(1,2) # mean vector c.vec = as.vector((diag(2)-pi1)%*%mu.vec) # intercept in VAR y.var = matrix(0,2,nobs) y.var[,1] = mu.vec # initial value is mean for (i in 2:nobs) { y.var[,i] = c.vec + pi1%*%y.var[,i-1]+e.var[,i] } y.var = t(y.var) dimnames(y.var) = list(NULL,c("y1","y2")) eigen(pi1,only.values=T) colMeans(y.var) tsplot(y.var) # # compute long-run variance # M.T = floor(4*(nrow(Y)/100)^(2/9)) lrv.nw = asymp.var(Y,bandwidth=M.T) lrv.nw sqrt(diag(lrv.nw))