# tsconcepts.ssc			script file for examples used in Time Series Concepts chapter
#							updated for FM 2.0
#
# created: November 14, 2001
# revised: July 7, 2007
#	added example using new predict method for arima objects
#
####################################################################
# 
# Example: simulate gaussian white noise
#

set.seed(101)				
y = rnorm(100,sd=1)

#
# estimate sample moments
#

y.bar = mean(y)
g.hat = acf(y,lag.max=10,type="covariance",plot=F)
r.hat = acf(y,lag.max=10,type="correlation",plot=F)

#
# plot data and SACF
#

par(mfrow=c(1,2))
tsplot(y,ylab="y")
acf.plot(r.hat)
par(mfrow=c(1,1))

#
# test for normality using normalTest
#

qqnorm(y)
qqline(y)

normalTest(y,method="sw")
normalTest(y,method="jb")

#
# Example: sample moments of daily cc returns on Microsoft stock
#

r.msft = getReturns(DowJones30[,"MSFT"],type="continuous")
r.msft@title = "Daily returns on Microsoft"
sample.2000 = (positions(r.msft) > timeDate("12/31/1999")
& positions(r.msft) < timeDate("1/1/2001"))
par(mfrow=c(2,2))
plot(r.msft[sample.2000],ylab="r.msft")
r.acf = acf(r.msft[sample.2000])
hist(seriesData(r.msft))			
qqnorm(seriesData(r.msft))
par(mfrow=c(1,1))

histPlot(r.msft,strip.text="MSFT monthly return")
qqPlot(r.msft,strip.text="MSFT monthly return")

# test for white noise using autocorTest
args(autocorTest.default)
autocorTest(r.msft,lag.n=10,method="lb")

#
# Example: simulate stationary AR(1) with mu=1, phi=0.75 and sigma=1
# Note: set a seed to create reproducable results
#

set.seed(101)
e = rnorm(100,sd=1)
e.start = rnorm(25,sd=1)
y.ar1 = 1 + arima.sim(model=list(ar=0.75),n=100,
innov=e,start.innov=e.start)
mean(y.ar1)
var(y.ar1)		

#
# compute true ACF
#

gamma.j = rep(0.75,10)^seq(10)

#
# plot data
#

par(mfrow=c(2,2))
tsplot(y.ar1,main="Simulated AR(1)")
abline(h=1)
tsplot(gamma.j,type="h",main="ACF and IRF for AR(1)",
ylab="Autocorrelation",xlab="lag")
tmp = acf(y.ar1,lag.max=10)
par(mfrow=c(1,1))

# compute half life
log(0.5)/log(0.75)

#
# Example: ACF for interest rate spreads (from term structure and exchange rates)
#

uscn.id = 100*(lexrates.dat[,"USCNF"]-lexrates.dat[,"USCNS"])
colIds(uscn.id) = "USCNID"
uscn.id@title = "US/CA 30 day interest rate differential"
par(mfrow=c(2,1))
plot(uscn.id,reference.grid=F)
abline(h=0)
tmp = acf(uscn.id)
par(mfrow=c(1,1))

lag.plot(uscn.id,lags=8,layout=c(2,4))

# 
# example of AR process
#

colIds(varex.ts)
smpl = (positions(varex.ts) > timeDate("12/31/1960"))
irate.real = varex.ts[smpl,"RF.REAL"]
par(mfrow=c(2,2))
acf.plot(acf(irate.real,plot=F))
plot(irate.real,main="Monthly Real Interest Rate")
tmp = acf(irate.real,type="partial")
par(mfrow=c(1,1))

# detrended US industrial production
smpl = (positions(varex.ts) > timeDate("1/1/1960"))
ip = as.matrix(log(seriesData(IP.dat[smpl,])))
trend = 1:nrow(ip)
ipg = residuals(lm(ip~trend))

acf(ipg,type="partial")

# estimate AR model using splus function ar
args(ar)
irate.real = irate.real - mean(irate.real)
ar.fit = ar(irate.real,order.max=6)
names(ar.fit)
ar.fit$order
ar.fit$ar

sum(ar.fit$ar)
abs(polyroot(c(1,-ar.fit$ar)))

# 
# simulate signal + noise model
#

set.seed(112)
eps = rnorm(100,sd=1)
eta = rnorm(100,sd=0.5)
z = cumsum(eta)
y = z + eps
dy = diff(y)
par(mfrow=c(2,2))
tsplot(y,main="Signal plus noise",ylab="y")
tsplot(dy,main="1st difference",ylab="dy")
tmp = acf(dy)
tmp = acf(dy,type="partial")
par(mfrow=c(1,1))

q.val = 0.5^2
rho1 = -1/(q.val + 2)
q.val
rho1
#
# equally weighted portfolio of Dow Jones 30 assets
#

ret.DJ30 = getReturns(DowJones30,type="discrete")
ret.mat = as.matrix(seriesData(ret.DJ30))
wts = rep(1/30,30)
ret.port = ret.mat%*%wts

#
# create overlapping annual returns for msft
#

sp500.mret = getReturns(singleIndex.dat[,"SP500"],
type="continuous")
sp500.mret@title = "Monthly returns on S&P 500 Index"
sp500.aret = aggregateSeries(sp500.mret,moving=12,FUN=sum)
sp500.aret@title = "Monthly Annual returns on S&P 500 Index"
par(mfrow=c(2,2))
plot(sp500.mret)
plot(sp500.aret)
acf.plot(acf(sp500.aret,plot=F))
acf.plot(acf(sp500.aret,type="partial",plot=F))
par(mfrow=c(1,1))

#
# estimate ARMA(1,1) model for uscn.id
#

uscn.id.dm = uscn.id - mean(uscn.id)
arma11.mod = list(ar=0.75,ma=0)
arma11.fit = arima.mle(uscn.id.dm,model=arma11.mod)
class(arma11.fit)
names(arma11.fit)
arma11.fit
std.errs = sqrt(diag(arma11.fit$var.coef))
names(std.errs) = colIds(arma11.fit$var.coef)
std.errs

# EZ: 07/05/2007
# note: in splus 8, there is a summary method for objects of class
# "arima" which shows standard errors and t-stats. There is also
# a residuals() function
summary(arma11.fit)
plot(residuals(arma11.fit), reference.grid=F, 
     main="Residuals from ARMA(1,1) fit")
abline(h=0)
seriesPlot(residuals(arma11.fit), one.plot=T, ref.line=T)

#
# estimate ARMA(1,1) with a constant
#

arma11.fit2 = arima.mle(uscn.id,model=arma11.mod,xreg=1)
arma11.fit2
std.errs = sqrt(diag(arma11.fit$var.coef))
names(std.errs) = colIds(arma11.fit$var.coef)
std.errs

plot(arma11.fit)
graphsheet()

# produce h-step ahead forecasts
end(uscn.id.dm)
fcst.dates = timeSeq("7/1/1996","6/1/1997",
by="months",format="%b %Y")
uscn.id.dm.fcst = arima.forecast(uscn.id.dm,n=12,
model=arma11.fit$model,future.positions=fcst.dates)
names(uscn.id.dm.fcst)
uscn.id.dm.fcst[[1]]

# EZ: 7/5/2007. Problem in arima.forecast when timeSeries is 
# extended to accomodate forecast observations. Need to coerce
# NA data to class matrix in order for function to work

	n <- length(fcst.dates)
		x.future <- timeSeries(data = as.matrix(rep(NA, n)), 
			positions = fcst.dates)
		x.fore <- concat(uscn.id.dm, x.future)

# EZ: 7/5/2007
# In FM 3.0, there is a predict() method for objects of class
# "arima" which produces a FM object of class "forecast"
args(predict.arima)
uscn.id.dm.fcst = predict(arma11.fit, n.predict=12)
class(uscn.id.dm.fcst)
uscn.id.dm.fcst
summary(uscn.id.dm.fcst)
plot(uscn.id.dm.fcst, xold=uscn.id.dm, n.old=6)



smpl = positions(uscn.id.dm) >= timeDate("6/1/1995")
plot(uscn.id.dm[smpl,],uscn.id.dm.fcst$mean,
uscn.id.dm.fcst$mean+2*uscn.id.dm.fcst$std.err,
uscn.id.dm.fcst$mean-2*uscn.id.dm.fcst$std.err,
plot.args=list(lty=c(1,2,3,3)))

# 
# estimate AR(2) using OLS
#

ar2.fit = OLS(USCNID~ar(2),data=uscn.id)
ar2.fit
abs(polyroot(c(1,-ar2.fit$coef[2:3])))

# simulate ARCH(1) process

rt = simulate.garch(model=list(a.value=0.1, arch=0.8), 
     n=250, rseed=196)
class(rt)
names(rt)
par(mfrow=c(2,1))
tsplot(rt$et,main="Simulated returns")
tsplot(rt$sigma.t,main="Simulated volatility")
par(mfrow=c(1,1))

#
# estimating the long-run variance of a time series
# y(t) = 1 + 0.75y(t-1) + e(t)

lrv.true = 1/((1-0.75)^2)
set.seed(101)
e = rnorm(100,sd=1)
y.ar1 = 1 + arima.sim(model=list(ar=0.75),innov=e)

# autoregressive estimate
ar1.fit = OLS(y.ar1~ar(1))
rho.hat = coef(ar1.fit)[2]
sig2.hat = sum(residuals(ar1.fit)^2)/ar1.fit$df.resid
lrv.ar1 = sig2.hat/(1-rho.hat)^2
as.numeric(lrv.ar1)
se.ybar.ar1 = sqrt(lrv.ar1/100)

# Newey-West estimate
args(asymp.var)
4*(100/100)^(2/9)
lrv.nw = asymp.var(y.ar1,bandwidth=4)
lrv.nw
sqrt(lrv.nw/100)

#
# compute long-run variance for overlapping returns
#

# simulate random walk
set.seed(201)
e = rnorm(100)
z.rw = cumsum(e)

#
# Example: Variance ratios
#
args(varRatioTest)
# note: x is log prices

# Variance ratios for Dow Jones Industrial average from 1960 - 1990
# note: if hetero=F then first variance ratio is significant
VR.djia = varRatioTest(djia[timeEvent("1/1/1960","1/1/1990"),"close"],
n.periods=60,unbiased=T,hetero=T)
class(VR.djia)
names(VR.djia)
VR.djia
plot(VR.djia)

# Variance ratios for individual Dow Jones stocks from 1991 - 2001
VR.DJ30 = varRatioTest(DowJones30,n.periods=5,unbiased=T,hetero=T)
plot(VR.DJ30)



#
# Example: simulate trend stationary AR(1) with mu=1,delta=0.25, phi=0.75 and sigma=1
#

set.seed(101)
y.tsar1 = 1 + 0.25*seq(100) + 
arima.sim(model=list(ar=0.75),n=100)
tsplot(y.tsar1,ylab="y")
abline(a=1,b=0.25)

set.seed(101)
u.ar1 = arima.sim(model=list(ar=0.75),n=100)
y1 = cumsum(u.ar1)
y1.d = 1 + 0.25*seq(100)+ y1
y2 = rep(0,100)
for (i in 3:100) {
	y2[i] = 2*y2[i-1] - y2[i-2] + u.ar1[i]
}
par(mfrow=c(2,2))
tsplot(u.ar1,main="I(0) innovations")
tsplot(y1,main="I(1) process")
tsplot(y1.d,main="I(1) process with drift")
abline(a=1,b=0.25)
tsplot(y2,main="I(2) process")
par(mfrow=c(1,1))

#
# plot financial data
#

uscn.spot = lexrates.dat[,"USCNS"]
sp500 = singleIndex.dat[,"SP500"]
par(mfrow=c(2,2))
plot.timeSeries(uscn.spot,main="Log US/CA spot exchange rate",reference.grid=F)
plot.timeSeries(log(sp500),main="Log S&P 500 index",reference.grid=F)
plot.timeSeries(rf.30day,main="Nominal 30 day T-bill rate",reference.grid=F)
plot.timeSeries(log(CPI.dat),main="Log of US CPI",reference.grid=F)
par(mfrow=c(1,1))

#
# long memory example
#

# simulate fractional white noise process
set.seed(394)
y.fwn = simulate.FARIMA(list(d=0.3), 500)
par(mfrow=c(2,1))
tsplot(y.fwn)
tmp = acf(y.fwn,lag.max=50)
par(mfrow=c(1,1))

msft.aret = abs(getReturns(DowJones30[,"MSFT"]))
uscn.id = 100*(lexrates.dat[,"USCNF"]-
lexrates.dat[,"USCNS"])
colIds(uscn.id) = "USCNID"

par(mfrow=c(2,1))
tmp = acf(msft.aret,lag.max=100)
tmp = acf(uscn.id,lag.max=50)
par(mfrow=c(1,1))

#########################################################################################
# 								multivariate time series
#########################################################################################

# 
# construct time series of first four asset returns in DowJones30.dat
#
Y = getReturns(DowJones30[,1:4],type="continuous")
colIds(Y)

# compute means and variances and correlations
colMeans(seriesData(Y))			# colMeans doesn't have method for timeSeries
var(Y)								# var does have a method for timeSeries!
cor(Y)
colVars(seriesData(Y))
colStdevs(seriesData(Y))

#
# compute autocovariance and autocorrelation matrices
#

Ghat = acf(Y[,1:2],lag.max=5,type="covariance",plot=F)
Rhat = acf(Y[,1:2],lag.max=5,plot=F)
Rhat
acf.plot(Rhat)
Ghat$acf[1,,]
Rhat$acf[1,,]
Ghat$acf[2,,]
Rhat$acf[2,,]
par(mfrow=c(1,1))

# 
# simulate VAR(1) model: note can't use filter() for efficiency
#

nobs = 250
e.var = rmvnorm(nobs,sd=c(1,1),rho=0.5)
e.var = t(e.var)
pi1 = matrix(c(0.7,0.2,0.2,0.7),2,2,byrow=T)		# VAR(1) coef matrix
mu.vec = c(1,2)											# mean vector 
c.vec = as.vector((diag(2)-pi1)%*%mu.vec) 			# intercept in VAR
y.var = matrix(0,2,nobs)
y.var[,1] = mu.vec										# initial value is mean
for (i in 2:nobs) {
	y.var[,i] = c.vec + pi1%*%y.var[,i-1]+e.var[,i]
}
y.var = t(y.var)
dimnames(y.var) = list(NULL,c("y1","y2"))
eigen(pi1,only.values=T)
colMeans(y.var)
tsplot(y.var)

#
# compute long-run variance
#

M.T = floor(4*(nrow(Y)/100)^(2/9))
lrv.nw = asymp.var(Y,bandwidth=M.T)
lrv.nw
sqrt(diag(lrv.nw))