---
title: "Estimation of CER Model"
author: "Eric Zivot"
date: "Thursday, February 17, 2016"
output: slidy_presentation
---


## Set options and load packages

```{r}
options(digits=3, width=70)
# install IntroCompFinR package from R-forge
# use install.packages("IntroCompFinR", repos="http://R-Forge.R-project.org")
library(IntroCompFinR)
library(mvtnorm)
library(PerformanceAnalytics)
library(zoo)
Sys.setenv(TZ="UTC")
```

## Get data from package IntroCompfinR

```{r}
# get data from IntroCompFin package
data(msftDailyPrices, sp500DailyPrices, sbuxDailyPrices)
msftPrices = to.monthly(msftDailyPrices, OHLC=FALSE)
sp500Prices = to.monthly(sp500DailyPrices, OHLC=FALSE)
sbuxPrices = to.monthly(sbuxDailyPrices, OHLC=FALSE)
# set sample to match book chapter
smpl = "1998-01::2012-05"
msftPrices = msftPrices[smpl]
sp500Prices = sp500Prices[smpl]
sbuxPrices = sbuxPrices[smpl]
# calculate returns
msftRetS = na.omit(Return.calculate(msftPrices, method="simple"))
sp500RetS = na.omit(Return.calculate(sp500Prices, method="simple"))
sbuxRetS = na.omit(Return.calculate(sbuxPrices, method="simple"))
msftRetC = log(1 + msftRetS)
sp500RetC = log(1 + sp500RetS)
sbuxRetC = log(1 + sbuxRetS)
# merged data set
cerRetS = merge(msftRetS, sbuxRetS, sp500RetS)
cerRetC = merge(msftRetC, sbuxRetC, sp500RetC)
colnames(cerRetS) = colnames(cerRetC) = c("MSFT", "SBUX", "SP500")
head(cerRetC, n=3)

```

## Plot 3 assets

```{r}
my.panel <- function(...) {
  lines(...)
  abline(h=0)
}
plot.zoo(cerRetC, col="blue", lwd=2, main="", panel=my.panel, type="l")

```

## Plot 3 assets

```{r}
pairs(coredata(cerRetC), col="blue", cex=1.25, pch=16)
```

## CER model parameter estimates = sample descriptive statistics

```{r}
# Estimate mean (mu)
muhat = apply(cerRetC,2,mean)
muhat
# Estimate variance (sigma^2)
sigma2hat = apply(cerRetC,2,var)
sigma2hat
# Estimate volatility (sigma)
sigmahat = apply(cerRetC,2,sd)
sigmahat
```

## Show mean-volatility tradeoff

```{r}
plot(sigmahat, muhat, col="blue", pch=16, cex=2,
     ylim=c(0, 0.015), xlim=c(0, 0.12))
text(sigmahat, muhat, labels=colnames(cerRetC), pos=4)
```

Assets with highest means also have highest volatilities

## Estimate covariance and correlation matrices

```{r}
covmat = var(cerRetC)
covmat
cormat = cor(cerRetC)
cormat

```

## Extract unique values from covariance and correlation matrices

```{r}
covhat = covmat[lower.tri(covmat)]
rhohat = cormat[lower.tri(cormat)]
names(covhat) <- names(rhohat) <- 
c("msft,sbux","msft,sp500","sbux,sp500")
covhat
rhohat
```

## Estimated standard errors for means

```{r}
n.obs = nrow(cerRetC)
seMuhat = sigmahat/sqrt(n.obs)
cbind(muhat, seMuhat)

```

Estimated SE values are large compared to estimates. Do not estimate mean values very well.

## Estimated standard errors for variances and standard deviations

```{r}
seSigma2hat = sigma2hat/sqrt(n.obs/2)
seSigmahat = sigmahat/sqrt(2*n.obs)
cbind(sigma2hat, seSigma2hat, sigmahat, seSigmahat)

```

Estimated SE values for volatility are small compared to estimates. Estimate volatility well.

## Estimated standard errors for correlations

```{r}
seRhohat = (1-rhohat^2)/sqrt(n.obs)
cbind(rhohat, seRhohat)

```

Estimated standard errors for correlations are somewhat small compared to estimates. Notice how standard error is smallest for the largest correlation estimate.

## Approximate 95% confidence intervals for $\mu$

```{r}
lowerMu = muhat - 2*seMuhat
upperMu = muhat + 2*seMuhat
widthMu = upperMu - lowerMu
cbind(lowerMu, upperMu, widthMu)
```

Wide 95% confidence intervals for the mean implies imprecise estimates. Notice that the confidence interval width is smallest for the S&P 500 index (why?)

## Approximate 95% confidence intervals for $\sigma^2$ and $\sigma$

```{r}
# confidence intervals for sigma^2
lowerSigma2 = sigma2hat - 2*seSigma2hat
upperSigma2 = sigma2hat + 2*seSigma2hat
widthSigma2 = upperSigma2 - lowerSigma2
cbind(lowerSigma2, upperSigma2, widthSigma2)
# confidence intervals for sigma
lowerSigma = sigmahat - 2*seSigmahat
upperSigma = sigmahat + 2*seSigmahat
widthSigma = upperSigma - lowerSigma
cbind(lowerSigma, upperSigma, widthSigma)
```

Confidence intervals for sigma are narrow. Estimates are precise.

## Approximate 95% confidence intervals for $\rho$

```{r}
lowerRho = rhohat - 2*seRhohat
upperRho = rhohat + 2*seRhohat
widthRho = upperRho - lowerRho
cbind(lowerRho, upperRho, widthRho)
```

Confidence intervals are not too wide and contain all positive values. Hence, estimates are moderately precise.

## Stylized facts for the estimation of CER model parameters

* The expected return is not estimated very precisely
    + Large standard errors relative to size of mean estimates
* Standard deviations and correlations are estimated more precisely than the expected return

## Monte Carlo Simulation Loop

```{r}
mu = 0.05
sigma = 0.10
n.obs = 100
n.sim = 1000
set.seed(111)
sim.means = rep(0,n.sim)  # initialize vectors
mu.lower = rep(0,n.sim)
mu.upper = rep(0,n.sim)
qt.975 = qt(0.975, n.obs-1)
for (sim in 1:n.sim) {
	sim.ret = rnorm(n.obs,mean=mu,sd=sigma)
	sim.means[sim] = mean(sim.ret)
	se.muhat = sd(sim.ret)/sqrt(n.obs)
	mu.lower[sim] = sim.means[sim]-qt.975*se.muhat
	mu.upper[sim] = sim.means[sim]+qt.975*se.muhat
}
```

## First 10 simulated samples from CER model

```{r, echo=FALSE}
n.sim2 = 10
set.seed(111)
sim.mat = matrix(0, n.obs, n.sim2)
for (sim in 1:n.sim2) {
  sim.mat[,sim] = rnorm(n.obs, mean=mu, sd=sigma)
}
my.panel <- function(...) {
  lines(...)
  abline(h=0)
}
plot(as.zoo(sim.mat), col="blue", lwd=2, main="", panel=my.panel)
```

## Histogram of 1000 Monte Carlo estimates of $\mu$

```{r}
hist(sim.means, col="cornflowerblue", ylim=c(0,40), 
     main="", xlab="muhat", probability=TRUE)
abline(v=mean(sim.means), col="white", lwd=4, lty=2)
# overlay normal curve
x.vals = seq(0.02, 0.08, length=100)
lines(x.vals, dnorm(x.vals, mean=mu, sd=sigma/sqrt(100)), col="orange", lwd=2)
```

Approximate expected value, bias, SE, and 95% CI coverage probability

```{r}
mean(sim.means)       # MC expected value
mean(sim.means) - mu  # MC bias
sd(sim.means)         # MC SE 
sigma/sqrt(n.obs)     # true SE(muhat)
# 95% CI coverage probability
in.interval = mu >= mu.lower & mu <= mu.upper
sum(in.interval)/n.sim
```

## Histograms of 1000 Monte Carlo estimates of $\sigma^2$ and $\sigma$

```{r, echo=FALSE}
mu = 0.05
sigma = 0.10
n.obs = 100
n.sim = 1000
set.seed(111)
sim.means = rep(0,n.sim)  # initialize vectors
sim.sigma2 = rep(0,n.sim)
sigma2.lower = rep(0,n.sim)
sigma2.upper = rep(0,n.sim)
sim.sigma = rep(0,n.sim)
sigma.lower = rep(0,n.sim)
sigma.upper = rep(0,n.sim)
for (sim in 1:n.sim) {
	sim.ret = rnorm(n.obs,mean=mu,sd=sigma)
	sim.means[sim] = mean(sim.ret)
	sim.sigma2[sim] = var(sim.ret)
	sim.sigma[sim] = sqrt(sim.sigma2[sim])
  sigma2.lower[sim] = sim.sigma2[sim] - 2*sim.sigma2[sim]/sqrt(n.obs/2)
	sigma2.upper[sim] = sim.sigma2[sim] + 2*sim.sigma2[sim]/sqrt(n.obs/2)
	sigma.lower[sim] = sim.sigma[sim] - 2*sim.sigma[sim]/sqrt(n.obs*2)
	sigma.upper[sim] = sim.sigma[sim] + 2*sim.sigma[sim]/sqrt(n.obs*2)
}
par(mfrow=c(1,2))
  hist(sim.sigma2, col="cornflowerblue", xlab="sigma2 hat values", 
       main="sigma2 hat", probability=T)
	abline(v=mean(sim.sigma2), col="white", lwd=4, lty=2)
  # overlay normal distribution
  x.vals = seq(0.005, 0.016, length=100)
  lines(x.vals, dnorm(x.vals, mean=sigma^2, sd=sigma^2/sqrt(100/2)), col="orange", lwd=2)
	hist(sim.sigma, col="cornflowerblue", xlab="sigma hat values", 
       main="sigma hat", probability=T)
	abline(v=mean(sim.sigma), col="white", lwd=4, lty=2)
  # overlay normal distribution
  x.vals = seq(0.07, 0.13, length=100)
  lines(x.vals, dnorm(x.vals, mean=sigma, sd=sigma/sqrt(2*100)), col="orange", lwd=2)
par(mfrow=c(1,1))
```

## Approximate expected value, bias, SE, and 95% CI coverage probability

```{r}
# Evaluation of bias
mean(sim.sigma2)           # true sigma^2 = 0.01
mean(sim.sigma2) - sigma^2 # MC bias for sigma^2
mean(sim.sigma)            # true sigma = 0.1
mean(sim.sigma) - sigma    # MC bias for sigma
# Evaluation of SE
sd(sim.sigma2)         # MC SE for sigma^2
sigma^2/sqrt(n.obs/2)  # Asymptotic SE
sd(sim.sigma)          # MC SE for sigma
sigma/sqrt(2*n.obs)    # Asymptotic SE
# Evaluation of 95% Confidence Interval for sigma2
in.interval = sigma^2 >= sigma2.lower & sigma^2 <= sigma2.upper
sum(in.interval)/n.sim
# Evaluation of 95% Confidence Interval for sigma
in.interval = sigma >= sigma.lower & sigma <= sigma.upper
sum(in.interval)/n.sim
```

* Monte Carlo SE values are close to asymptotic SE values. 
* Monte Carlo SE values are more accurate (provided number of simulations is large)
* Monte Carlo 95% confidence interval coverage is close to 95%

## Histogram of 1000 Monte Carlo estimates of $\rho$

```{r, echo=FALSE}
# generate 1000 samples from CER and compute correlations
n.obs = 100
n.sim = 1000
set.seed(111)
sim.corrs = rep(0, n.sim)  # initialize vectors

muvec = c(0.05, 0.03)
sigmavec = c(0.1, 0.05)
names(muvec) = names(sigmavec) = c("Asset.1", "Asset.2")
rho12 = 0.75
Sigma = diag(sigmavec^2)
Sigma[1,2] = Sigma[2,1] = rho12*sigmavec[1]*sigmavec[2]
rho.lower = rep(0,n.sim)  
rho.upper = rep(0,n.sim)
for (sim in 1:n.sim) {
	sim.ret = rmvnorm(n.obs,mean=muvec, sigma=Sigma)
	sim.corrs[sim] = cor(sim.ret)[1,2]
	se.rhohat = (1 - sim.corrs[sim]^2)/sqrt(n.obs)
	rho.lower[sim] = sim.corrs[sim] - 2*se.rhohat
	rho.upper[sim] = sim.corrs[sim] + 2*se.rhohat
}
# histogram of MC estimates of rho
hist(sim.corrs, xlab="rhohat values", col="cornflowerblue", 
     main="rhohat", probability=TRUE)
abline(v=rho12, lwd=4, col="white")
x.vals = seq(0.5, 0.9, length=100)
lines(x.vals, dnorm(x.vals, mean=rho12, sd=(1-rho12^2)/sqrt(n.obs)), 
      col="orange", lwd=2)
```

True value is `r rho12`

## Approximate expected value, bias, SE, and 95% CI coverage probability

```{r}
mean(sim.corrs)          # true value is 0.75
mean(sim.corrs) - rho12  # bias
sd(sim.corrs)            # MC SE
(1-rho12^2)/sqrt(n.obs)  # Asymptotic SE
# Evaluation of 95% Confidence Interval for rho
in.interval = (rho12 >= rho.lower) & (rho12 <= rho.upper)
sum(in.interval)/n.sim
```

## Estimating simple return quantiles from the CER Model

```{r}
n.obs = length(msftRetC)
muhatS = colMeans(cerRetS)
sigmahatS = apply(cerRetS, 2, sd)
qhat.05 = muhatS + sigmahatS*qnorm(0.05)
qhat.01 = muhatS + sigmahatS*qnorm(0.01)
seQhat.05 = (sigmahatS/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.05)^2)
seQhat.01 = (sigmahatS/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.01)^2)
seQhat.001 = (sigmahatS/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.001)^2)
qhat.001 = muhatS + sigmahatS*qnorm(0.001)
cbind(qhat.05, seQhat.05, qhat.01, seQhat.01, qhat.001, seQhat.001)

```

SE values for 1% quantiles and bigger than SE values for 5% quantiles

## 95% Confidence Intervals for simple return quantiles

```{r}
# 5% quantiles
lowerQhat.05 = qhat.05 - 2*seQhat.05
upperQhat.05 = qhat.05 + 2*seQhat.05
widthQhat.05 = upperQhat.05 - lowerQhat.05
cbind(lowerQhat.05, upperQhat.05, widthQhat.05)
# 1% quantiles
lowerQhat.01 = qhat.01 - 2*seQhat.01
upperQhat.01 = qhat.01 + 2*seQhat.01
widthQhat.01 = upperQhat.01 - lowerQhat.01
cbind(lowerQhat.01, upperQhat.01, widthQhat.01)
# 95% CI for .1% quantile
lowerQhat.001 = qhat.001 - 2*seQhat.001
upperQhat.001 = qhat.001 + 2*seQhat.001
widthQhat.001 = upperQhat.001 - lowerQhat.001
cbind(lowerQhat.001, upperQhat.001, widthQhat.001)
```

95% confidence intervals are somewhat large

## Histograms of 1000 Monte Carlo estimates of $\hat{q}_{\alpha}$ and $VaR_{\alpha}$ 

```{r, echo=FALSE}
mu = 0.05
sigma = 0.10
W0 = 100000
n.obs = 100
n.sim = 1000
set.seed(111)
sim.q = matrix(0,n.sim,2)  # initialize vectors
colnames(sim.q) = c("q.05", "q.01")
sim.VaR = matrix(0,n.sim,2)  # initialize vectors
colnames(sim.VaR) = c("VaR.05", "VaR.01")
q.05 = mu + sigma*qnorm(0.05)
q.01 = mu + sigma*qnorm(0.01)
VaR.05 = W0*q.05
VaR.01 = W0*q.01
for (sim in 1:n.sim) {
	sim.ret = rnorm(n.obs,mean=mu,sd=sigma)
	muhat = mean(sim.ret)
	sigmahat = sd(sim.ret)
	sim.q[sim, "q.05"] = muhat + sigmahat*qnorm(0.05)
	sim.q[sim, "q.01"] = muhat + sigmahat*qnorm(0.01)
	sim.VaR[sim, "VaR.05"] = W0*sim.q[sim, "q.05"]
	sim.VaR[sim, "VaR.01"] = W0*sim.q[sim, "q.01"]
}
par(mfrow=c(2,2))
  # q.05
  hist(sim.q[, "q.05"], col="cornflowerblue", xlab="qhat.05 values", 
       main="qhat.05", probability=T)
	abline(v=mean(sim.q[, "q.05"]), col="white", lwd=4, lty=2)
  # overlay normal distribution
  x.vals = seq(-0.16, 0.06, length=100)
  lines(x.vals, dnorm(x.vals, mean=q.05, sd=((sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.05)^2))),
                      col="orange", lwd=2)
  # q.01
	hist(sim.q[, "q.01"], col="cornflowerblue", xlab="qhat.01 values", 
       main="qhat.01", probability=T)
	abline(v=mean(sim.q[, "q.01"]), col="white", lwd=4, lty=2)
  # overlay normal distribution
  x.vals = seq(-0.24, -0.12, length=100)
  lines(x.vals, dnorm(x.vals, mean=q.01, sd=((sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.01)^2))),
        col="orange", lwd=2)
  # VaR.05
  hist(sim.VaR[, "VaR.05"], col="cornflowerblue", xlab="VaRhat.05 values", 
       main="VaRhat.05", probability=T)
	abline(v=mean(sim.VaR[, "VaR.05"]), col="white", lwd=4, lty=2)
  # overlay normal distribution
  x.vals = seq(-16000, 6000, length=100)
  lines(x.vals, dnorm(x.vals, mean=VaR.05, sd=((W0*sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.05)^2))),
                      col="orange", lwd=2)
  # VaR.01
 	hist(sim.VaR[, "VaR.01"], col="cornflowerblue", xlab="VaRhat.01 values", 
       main="VaRhat.01", probability=T)
	abline(v=mean(sim.VaR[, "VaR.01"]), col="white", lwd=4, lty=2)
  # overlay normal distribution
  x.vals = seq(-24000, -12000, length=100)
  lines(x.vals, dnorm(x.vals, mean=VaR.01, sd=((W0*sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.01)^2))),
        col="orange", lwd=2)
par(mfrow=c(1,1))
```

The Monte Carlo estimates of bias are:

```{r}
colMeans(sim.q) - c(q.05, q.01)
colMeans(sim.VaR) - c(VaR.05, VaR.01)
```

A comparison of the Monte Carlo sample standard deviations with the analytic standard errors is:

```{r}
se.qhat.05 = (sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.05)^2)
se.qhat.01 = (sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.01)^2)
se.VaRhat.05 = (W0*sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.05)^2)
se.VaRhat.01 = (W0*sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.01)^2)
apply(sim.q, 2, sd)
c(se.qhat.05, se.qhat.01)
apply(sim.VaR, 2, sd)
c(se.VaRhat.05, se.VaRhat.01)
```