--- title: "Estimation of CER Model" author: "Eric Zivot" date: "Thursday, February 17, 2016" output: slidy_presentation --- ## Set options and load packages ```{r} options(digits=3, width=70) # install IntroCompFinR package from R-forge # use install.packages("IntroCompFinR", repos="http://R-Forge.R-project.org") library(IntroCompFinR) library(mvtnorm) library(PerformanceAnalytics) library(zoo) Sys.setenv(TZ="UTC") ``` ## Get data from package IntroCompfinR ```{r} # get data from IntroCompFin package data(msftDailyPrices, sp500DailyPrices, sbuxDailyPrices) msftPrices = to.monthly(msftDailyPrices, OHLC=FALSE) sp500Prices = to.monthly(sp500DailyPrices, OHLC=FALSE) sbuxPrices = to.monthly(sbuxDailyPrices, OHLC=FALSE) # set sample to match book chapter smpl = "1998-01::2012-05" msftPrices = msftPrices[smpl] sp500Prices = sp500Prices[smpl] sbuxPrices = sbuxPrices[smpl] # calculate returns msftRetS = na.omit(Return.calculate(msftPrices, method="simple")) sp500RetS = na.omit(Return.calculate(sp500Prices, method="simple")) sbuxRetS = na.omit(Return.calculate(sbuxPrices, method="simple")) msftRetC = log(1 + msftRetS) sp500RetC = log(1 + sp500RetS) sbuxRetC = log(1 + sbuxRetS) # merged data set cerRetS = merge(msftRetS, sbuxRetS, sp500RetS) cerRetC = merge(msftRetC, sbuxRetC, sp500RetC) colnames(cerRetS) = colnames(cerRetC) = c("MSFT", "SBUX", "SP500") head(cerRetC, n=3) ``` ## Plot 3 assets ```{r} my.panel <- function(...) { lines(...) abline(h=0) } plot.zoo(cerRetC, col="blue", lwd=2, main="", panel=my.panel, type="l") ``` ## Plot 3 assets ```{r} pairs(coredata(cerRetC), col="blue", cex=1.25, pch=16) ``` ## CER model parameter estimates = sample descriptive statistics ```{r} # Estimate mean (mu) muhat = apply(cerRetC,2,mean) muhat # Estimate variance (sigma^2) sigma2hat = apply(cerRetC,2,var) sigma2hat # Estimate volatility (sigma) sigmahat = apply(cerRetC,2,sd) sigmahat ``` ## Show mean-volatility tradeoff ```{r} plot(sigmahat, muhat, col="blue", pch=16, cex=2, ylim=c(0, 0.015), xlim=c(0, 0.12)) text(sigmahat, muhat, labels=colnames(cerRetC), pos=4) ``` Assets with highest means also have highest volatilities ## Estimate covariance and correlation matrices ```{r} covmat = var(cerRetC) covmat cormat = cor(cerRetC) cormat ``` ## Extract unique values from covariance and correlation matrices ```{r} covhat = covmat[lower.tri(covmat)] rhohat = cormat[lower.tri(cormat)] names(covhat) <- names(rhohat) <- c("msft,sbux","msft,sp500","sbux,sp500") covhat rhohat ``` ## Estimated standard errors for means ```{r} n.obs = nrow(cerRetC) seMuhat = sigmahat/sqrt(n.obs) cbind(muhat, seMuhat) ``` Estimated SE values are large compared to estimates. Do not estimate mean values very well. ## Estimated standard errors for variances and standard deviations ```{r} seSigma2hat = sigma2hat/sqrt(n.obs/2) seSigmahat = sigmahat/sqrt(2*n.obs) cbind(sigma2hat, seSigma2hat, sigmahat, seSigmahat) ``` Estimated SE values for volatility are small compared to estimates. Estimate volatility well. ## Estimated standard errors for correlations ```{r} seRhohat = (1-rhohat^2)/sqrt(n.obs) cbind(rhohat, seRhohat) ``` Estimated standard errors for correlations are somewhat small compared to estimates. Notice how standard error is smallest for the largest correlation estimate. ## Approximate 95% confidence intervals for $\mu$ ```{r} lowerMu = muhat - 2*seMuhat upperMu = muhat + 2*seMuhat widthMu = upperMu - lowerMu cbind(lowerMu, upperMu, widthMu) ``` Wide 95% confidence intervals for the mean implies imprecise estimates. Notice that the confidence interval width is smallest for the S&P 500 index (why?) ## Approximate 95% confidence intervals for $\sigma^2$ and $\sigma$ ```{r} # confidence intervals for sigma^2 lowerSigma2 = sigma2hat - 2*seSigma2hat upperSigma2 = sigma2hat + 2*seSigma2hat widthSigma2 = upperSigma2 - lowerSigma2 cbind(lowerSigma2, upperSigma2, widthSigma2) # confidence intervals for sigma lowerSigma = sigmahat - 2*seSigmahat upperSigma = sigmahat + 2*seSigmahat widthSigma = upperSigma - lowerSigma cbind(lowerSigma, upperSigma, widthSigma) ``` Confidence intervals for sigma are narrow. Estimates are precise. ## Approximate 95% confidence intervals for $\rho$ ```{r} lowerRho = rhohat - 2*seRhohat upperRho = rhohat + 2*seRhohat widthRho = upperRho - lowerRho cbind(lowerRho, upperRho, widthRho) ``` Confidence intervals are not too wide and contain all positive values. Hence, estimates are moderately precise. ## Stylized facts for the estimation of CER model parameters * The expected return is not estimated very precisely + Large standard errors relative to size of mean estimates * Standard deviations and correlations are estimated more precisely than the expected return ## Monte Carlo Simulation Loop ```{r} mu = 0.05 sigma = 0.10 n.obs = 100 n.sim = 1000 set.seed(111) sim.means = rep(0,n.sim) # initialize vectors mu.lower = rep(0,n.sim) mu.upper = rep(0,n.sim) qt.975 = qt(0.975, n.obs-1) for (sim in 1:n.sim) { sim.ret = rnorm(n.obs,mean=mu,sd=sigma) sim.means[sim] = mean(sim.ret) se.muhat = sd(sim.ret)/sqrt(n.obs) mu.lower[sim] = sim.means[sim]-qt.975*se.muhat mu.upper[sim] = sim.means[sim]+qt.975*se.muhat } ``` ## First 10 simulated samples from CER model ```{r, echo=FALSE} n.sim2 = 10 set.seed(111) sim.mat = matrix(0, n.obs, n.sim2) for (sim in 1:n.sim2) { sim.mat[,sim] = rnorm(n.obs, mean=mu, sd=sigma) } my.panel <- function(...) { lines(...) abline(h=0) } plot(as.zoo(sim.mat), col="blue", lwd=2, main="", panel=my.panel) ``` ## Histogram of 1000 Monte Carlo estimates of $\mu$ ```{r} hist(sim.means, col="cornflowerblue", ylim=c(0,40), main="", xlab="muhat", probability=TRUE) abline(v=mean(sim.means), col="white", lwd=4, lty=2) # overlay normal curve x.vals = seq(0.02, 0.08, length=100) lines(x.vals, dnorm(x.vals, mean=mu, sd=sigma/sqrt(100)), col="orange", lwd=2) ``` Approximate expected value, bias, SE, and 95% CI coverage probability ```{r} mean(sim.means) # MC expected value mean(sim.means) - mu # MC bias sd(sim.means) # MC SE sigma/sqrt(n.obs) # true SE(muhat) # 95% CI coverage probability in.interval = mu >= mu.lower & mu <= mu.upper sum(in.interval)/n.sim ``` ## Histograms of 1000 Monte Carlo estimates of $\sigma^2$ and $\sigma$ ```{r, echo=FALSE} mu = 0.05 sigma = 0.10 n.obs = 100 n.sim = 1000 set.seed(111) sim.means = rep(0,n.sim) # initialize vectors sim.sigma2 = rep(0,n.sim) sigma2.lower = rep(0,n.sim) sigma2.upper = rep(0,n.sim) sim.sigma = rep(0,n.sim) sigma.lower = rep(0,n.sim) sigma.upper = rep(0,n.sim) for (sim in 1:n.sim) { sim.ret = rnorm(n.obs,mean=mu,sd=sigma) sim.means[sim] = mean(sim.ret) sim.sigma2[sim] = var(sim.ret) sim.sigma[sim] = sqrt(sim.sigma2[sim]) sigma2.lower[sim] = sim.sigma2[sim] - 2*sim.sigma2[sim]/sqrt(n.obs/2) sigma2.upper[sim] = sim.sigma2[sim] + 2*sim.sigma2[sim]/sqrt(n.obs/2) sigma.lower[sim] = sim.sigma[sim] - 2*sim.sigma[sim]/sqrt(n.obs*2) sigma.upper[sim] = sim.sigma[sim] + 2*sim.sigma[sim]/sqrt(n.obs*2) } par(mfrow=c(1,2)) hist(sim.sigma2, col="cornflowerblue", xlab="sigma2 hat values", main="sigma2 hat", probability=T) abline(v=mean(sim.sigma2), col="white", lwd=4, lty=2) # overlay normal distribution x.vals = seq(0.005, 0.016, length=100) lines(x.vals, dnorm(x.vals, mean=sigma^2, sd=sigma^2/sqrt(100/2)), col="orange", lwd=2) hist(sim.sigma, col="cornflowerblue", xlab="sigma hat values", main="sigma hat", probability=T) abline(v=mean(sim.sigma), col="white", lwd=4, lty=2) # overlay normal distribution x.vals = seq(0.07, 0.13, length=100) lines(x.vals, dnorm(x.vals, mean=sigma, sd=sigma/sqrt(2*100)), col="orange", lwd=2) par(mfrow=c(1,1)) ``` ## Approximate expected value, bias, SE, and 95% CI coverage probability ```{r} # Evaluation of bias mean(sim.sigma2) # true sigma^2 = 0.01 mean(sim.sigma2) - sigma^2 # MC bias for sigma^2 mean(sim.sigma) # true sigma = 0.1 mean(sim.sigma) - sigma # MC bias for sigma # Evaluation of SE sd(sim.sigma2) # MC SE for sigma^2 sigma^2/sqrt(n.obs/2) # Asymptotic SE sd(sim.sigma) # MC SE for sigma sigma/sqrt(2*n.obs) # Asymptotic SE # Evaluation of 95% Confidence Interval for sigma2 in.interval = sigma^2 >= sigma2.lower & sigma^2 <= sigma2.upper sum(in.interval)/n.sim # Evaluation of 95% Confidence Interval for sigma in.interval = sigma >= sigma.lower & sigma <= sigma.upper sum(in.interval)/n.sim ``` * Monte Carlo SE values are close to asymptotic SE values. * Monte Carlo SE values are more accurate (provided number of simulations is large) * Monte Carlo 95% confidence interval coverage is close to 95% ## Histogram of 1000 Monte Carlo estimates of $\rho$ ```{r, echo=FALSE} # generate 1000 samples from CER and compute correlations n.obs = 100 n.sim = 1000 set.seed(111) sim.corrs = rep(0, n.sim) # initialize vectors muvec = c(0.05, 0.03) sigmavec = c(0.1, 0.05) names(muvec) = names(sigmavec) = c("Asset.1", "Asset.2") rho12 = 0.75 Sigma = diag(sigmavec^2) Sigma[1,2] = Sigma[2,1] = rho12*sigmavec[1]*sigmavec[2] rho.lower = rep(0,n.sim) rho.upper = rep(0,n.sim) for (sim in 1:n.sim) { sim.ret = rmvnorm(n.obs,mean=muvec, sigma=Sigma) sim.corrs[sim] = cor(sim.ret)[1,2] se.rhohat = (1 - sim.corrs[sim]^2)/sqrt(n.obs) rho.lower[sim] = sim.corrs[sim] - 2*se.rhohat rho.upper[sim] = sim.corrs[sim] + 2*se.rhohat } # histogram of MC estimates of rho hist(sim.corrs, xlab="rhohat values", col="cornflowerblue", main="rhohat", probability=TRUE) abline(v=rho12, lwd=4, col="white") x.vals = seq(0.5, 0.9, length=100) lines(x.vals, dnorm(x.vals, mean=rho12, sd=(1-rho12^2)/sqrt(n.obs)), col="orange", lwd=2) ``` True value is `r rho12` ## Approximate expected value, bias, SE, and 95% CI coverage probability ```{r} mean(sim.corrs) # true value is 0.75 mean(sim.corrs) - rho12 # bias sd(sim.corrs) # MC SE (1-rho12^2)/sqrt(n.obs) # Asymptotic SE # Evaluation of 95% Confidence Interval for rho in.interval = (rho12 >= rho.lower) & (rho12 <= rho.upper) sum(in.interval)/n.sim ``` ## Estimating simple return quantiles from the CER Model ```{r} n.obs = length(msftRetC) muhatS = colMeans(cerRetS) sigmahatS = apply(cerRetS, 2, sd) qhat.05 = muhatS + sigmahatS*qnorm(0.05) qhat.01 = muhatS + sigmahatS*qnorm(0.01) seQhat.05 = (sigmahatS/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.05)^2) seQhat.01 = (sigmahatS/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.01)^2) seQhat.001 = (sigmahatS/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.001)^2) qhat.001 = muhatS + sigmahatS*qnorm(0.001) cbind(qhat.05, seQhat.05, qhat.01, seQhat.01, qhat.001, seQhat.001) ``` SE values for 1% quantiles and bigger than SE values for 5% quantiles ## 95% Confidence Intervals for simple return quantiles ```{r} # 5% quantiles lowerQhat.05 = qhat.05 - 2*seQhat.05 upperQhat.05 = qhat.05 + 2*seQhat.05 widthQhat.05 = upperQhat.05 - lowerQhat.05 cbind(lowerQhat.05, upperQhat.05, widthQhat.05) # 1% quantiles lowerQhat.01 = qhat.01 - 2*seQhat.01 upperQhat.01 = qhat.01 + 2*seQhat.01 widthQhat.01 = upperQhat.01 - lowerQhat.01 cbind(lowerQhat.01, upperQhat.01, widthQhat.01) # 95% CI for .1% quantile lowerQhat.001 = qhat.001 - 2*seQhat.001 upperQhat.001 = qhat.001 + 2*seQhat.001 widthQhat.001 = upperQhat.001 - lowerQhat.001 cbind(lowerQhat.001, upperQhat.001, widthQhat.001) ``` 95% confidence intervals are somewhat large ## Histograms of 1000 Monte Carlo estimates of $\hat{q}_{\alpha}$ and $VaR_{\alpha}$ ```{r, echo=FALSE} mu = 0.05 sigma = 0.10 W0 = 100000 n.obs = 100 n.sim = 1000 set.seed(111) sim.q = matrix(0,n.sim,2) # initialize vectors colnames(sim.q) = c("q.05", "q.01") sim.VaR = matrix(0,n.sim,2) # initialize vectors colnames(sim.VaR) = c("VaR.05", "VaR.01") q.05 = mu + sigma*qnorm(0.05) q.01 = mu + sigma*qnorm(0.01) VaR.05 = W0*q.05 VaR.01 = W0*q.01 for (sim in 1:n.sim) { sim.ret = rnorm(n.obs,mean=mu,sd=sigma) muhat = mean(sim.ret) sigmahat = sd(sim.ret) sim.q[sim, "q.05"] = muhat + sigmahat*qnorm(0.05) sim.q[sim, "q.01"] = muhat + sigmahat*qnorm(0.01) sim.VaR[sim, "VaR.05"] = W0*sim.q[sim, "q.05"] sim.VaR[sim, "VaR.01"] = W0*sim.q[sim, "q.01"] } par(mfrow=c(2,2)) # q.05 hist(sim.q[, "q.05"], col="cornflowerblue", xlab="qhat.05 values", main="qhat.05", probability=T) abline(v=mean(sim.q[, "q.05"]), col="white", lwd=4, lty=2) # overlay normal distribution x.vals = seq(-0.16, 0.06, length=100) lines(x.vals, dnorm(x.vals, mean=q.05, sd=((sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.05)^2))), col="orange", lwd=2) # q.01 hist(sim.q[, "q.01"], col="cornflowerblue", xlab="qhat.01 values", main="qhat.01", probability=T) abline(v=mean(sim.q[, "q.01"]), col="white", lwd=4, lty=2) # overlay normal distribution x.vals = seq(-0.24, -0.12, length=100) lines(x.vals, dnorm(x.vals, mean=q.01, sd=((sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.01)^2))), col="orange", lwd=2) # VaR.05 hist(sim.VaR[, "VaR.05"], col="cornflowerblue", xlab="VaRhat.05 values", main="VaRhat.05", probability=T) abline(v=mean(sim.VaR[, "VaR.05"]), col="white", lwd=4, lty=2) # overlay normal distribution x.vals = seq(-16000, 6000, length=100) lines(x.vals, dnorm(x.vals, mean=VaR.05, sd=((W0*sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.05)^2))), col="orange", lwd=2) # VaR.01 hist(sim.VaR[, "VaR.01"], col="cornflowerblue", xlab="VaRhat.01 values", main="VaRhat.01", probability=T) abline(v=mean(sim.VaR[, "VaR.01"]), col="white", lwd=4, lty=2) # overlay normal distribution x.vals = seq(-24000, -12000, length=100) lines(x.vals, dnorm(x.vals, mean=VaR.01, sd=((W0*sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.01)^2))), col="orange", lwd=2) par(mfrow=c(1,1)) ``` The Monte Carlo estimates of bias are: ```{r} colMeans(sim.q) - c(q.05, q.01) colMeans(sim.VaR) - c(VaR.05, VaR.01) ``` A comparison of the Monte Carlo sample standard deviations with the analytic standard errors is: ```{r} se.qhat.05 = (sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.05)^2) se.qhat.01 = (sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.01)^2) se.VaRhat.05 = (W0*sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.05)^2) se.VaRhat.01 = (W0*sigma/sqrt(n.obs))*sqrt(1 + 0.5*qnorm(0.01)^2) apply(sim.q, 2, sd) c(se.qhat.05, se.qhat.01) apply(sim.VaR, 2, sd) c(se.VaRhat.05, se.VaRhat.01) ```