# cerModelExamples.r scripts for examples in the CER model chapter # # author: Eric Zivot # created: January 30, 2002 # update History: # July 26, 2011 # updated examples for summer 2011. # # data for examples # cerExample dataframe with month prices on Microsoft, Starbucks and SP500 # from June 1992 through October 2000. Price data taken from Yahoo # # R function used # abline draw line on graph # acf compute sample autocovariances or autocorrelations # args determine agruments of a function # boxplot compute boxplot # cbind combine data objects vertically # class determine class of object # colnames get column names from object # cor compute sample correlation matrix # density compute smoothed histogram # dnorm compute density from normal distribution # end get end date of time series # help invoke help system # hist compute histogram # legend add legend to graph # length compute column length of matrix # library load R package # mean compute sample mean # names show names of object # pairs all pairwise scatterplots # par set graphics parameters # plot generic plot function # pnorm compute normal CDF # points add points to a graph # qqline add line to qq-plot # qqnorm qq-plot against normal distribution # quantile compute empirical quantiles # range compute range = max - min # rmvnorm generate multivariate normal random numbers # rnorm generate normal random numbers # seq generate sequence of numbers # set.seed set random number seed # sort sort data # start get start date of time series # sd compute sample standard deviation # ts.plot time series plot # var compute sample variance or covariance matrix # ? invoke help system # # R Packages Used # zoo # PerformanceAnalytics # mvtnorm # options(digits=4, width=70) # load R packages library("PerformanceAnalytics") library("mvtnorm") #library("zoo") #library("TSA") # # Example data sets # cerExample data.frame contain price data on sbux, msft and sp500 from # June 1992 - October 2000. Source data is in the csv file cerExample.csv # that can be downloaded from # # http://faculty.washington.edu/ezivot/econ424/424notes.htm # # read prices from csv file on class webpage cerExample.df = read.csv(file="http://faculty.washington.edu//ezivot//econ424//cerExample.csv") # create zooreg object - regularly spaced zoo object cerExample.z = zooreg(data=as.matrix(cerExample.df), start=c(1992,6), end=c(2000,10), frequency=12) index(cerExample.z) = as.yearmon(index(cerExample.z)) cerExample.z = as.zoo(cerExample.z) colnames(cerExample.z) start(cerExample.z) end(cerExample.z) # create cc returns from prices returns.z = diff(log(cerExample.z)) colnames(returns.z) start(returns.z) end(returns.z) head(returns.z) # # plot prices and returns for Microsoft # par(mfrow=c(2,1)) plot(cerExample.z[,"msft"], col="blue", lwd=2, ylab="price", main="Monthly Prices on MSFT") plot(log(cerExample.z[,"msft"]),col="blue", lwd=2, ylab="log price") par(mfrow=c(1,1)) plot(returns.z[,"msft"],ylab="cc return", main="Monthly cc returns on Microsoft", col="blue", lwd=2) abline(h=0) # graphically summarize empirical distribution # # put data in matrix and compute mean and standard deviation # returns.mat = coredata(returns.z) mu.msft = mean(returns.mat[,"msft"]) sigma.msft = sd(returns.mat[,"msft"]) mu.msft sigma.msft par(mfrow=c(1,2)) plot(returns.z[,"msft"],ylab="returns", main="", col="blue", lwd=2) abline(h=0) hist(returns.mat[, "msft"], main="", xlab="returns", col="slateblue1") par(mfrow=c(1,1)) par(mfrow=c(2,2)) hist(returns.mat[,"msft"], xlab="return",ylab="frequency", main="Monthly cc returns on MSFT", col="slateblue1") boxplot(returns.mat[,"msft"], col="slateblue1") plot(density(returns.mat[,"msft"]),type="l",xlab="return",ylab="density", lwd=2, col="slateblue1", main="smoothed densiy") qqnorm(returns.mat[,"msft"], col="blue") qqline(returns.mat[,"msft"]) par(mfrow=c(1,1)) # # Illustrate simulation of random numbers using inverse CDF technique # xx = seq(from=-4, to=4, length=250) plot(xx,pnorm(xx), type="l", ylab="u", xlab="x", lwd=3) # # simulate CER model calibrated to monthly returns on MSFT # mu = 0.03 sd.e = 0.10 nobs = 100 set.seed(111) sim.e = rnorm(nobs, mean=0, sd=sd.e) sim.ret = mu + sim.e # plot simulated returns par(mfrow=c(1,2)) ts.plot(sim.ret, main="", xlab="months",ylab="return", lwd=2, col="blue") abline(h=mu) hist(sim.ret, main="", xlab="returns", col="slateblue1") par(mfrow=c(1,1)) # graphically summarize empirical distribution of simulated data par(mfrow=c(2,2)) hist(sim.ret, xlab="return",ylab="frequency", main="Simulated returns from CER model", col="slateblue1") boxplot(sim.ret, col="slateblue1") plot(density(sim.ret),type="l",xlab="return",ylab="density", col="slateblue1", lwd=2, main="smoothed densiy") qqnorm(sim.ret, col="slateblue1") qqline(sim.ret) par(mfrow=c(1,1)) # compare msft data to simulated gaussian data boxplot(returns.z[,"msft"], sim.ret, col="slateblue1", names=c("MSFT","GWN")) # compute quantiles and compare to normal quantiles pvals = c(0.01,0.05,0.25,0.5,0.75,0.95,0.99) quantile(probs=pvals,sim.ret) qnorm(p=pvals,mean=mu,sd=sd) # compute descriptive statistics mean(sim.ret) sd(sim.ret) skewness(sim.ret) kurtosis(sim.ret) # plot autocorrelations tmp = acf(sim.ret) # # simulate random walk model with initial log Price = 1 # mu = 0.03 sd.e = 0.10 nobs = 100 set.seed(111) sim.e = rnorm(nobs, mean=0, sd=sd.e) sim.p = 1 + mu*seq(nobs) + cumsum(sim.e) sim.P = exp(sim.p) par(mfrow=c(2,1)) ts.plot(sim.p, col="blue",lwd=2, ylim=c(-2, 4), ylab="log price") lines( (1+mu*seq(nobs)), lty="dotted", col="black", lwd=2) lines(cumsum(sim.e), col="orange", lty="dashed", lwd=2) abline(h=0) legend(x="topleft",legend=c("p(t)","E[p(t)]","p(t)-E[p(t)]"), lty=c("solid","dotted","dashed"), col=c("blue","black","orange"), lwd=2, cex=c(0.75,0.75,0.75)) ts.plot(sim.P, lwd=2, col="blue", ylab="price") par(mfrow=c(1,1)) # # plot returns on msft, sbux and sp500 # msft.ret = returns.z[,"msft"] sbux.ret = returns.z[,"sbux"] sp500.ret = returns.z[,"sp500"] # panel function to put horizontal lines at zero in each panel my.panel <- function(...) { lines(...) abline(h=0) } plot(returns.z, col="blue", lwd=2, main="", panel=my.panel) plot(returns.z, plot.type="single", lwd=2, col=1:3) legend(x="bottomleft", legend=colnames(returns.z), col=1:3, lwd=2) abline(h=0) pairs(returns.mat, col="blue") # multivariate simulation mu = c(0.03,0.03,0.01) sig2.msft = 0.018 sig2.sbux = 0.011 sig2.sp500 = 0.001 sig.msft.sbux = 0.004 sig.msft.sp500 = 0.002 sig.sbux.sp500 = 0.002 Sigma = matrix(c(sig2.sbux, sig.msft.sbux, sig.sbux.sp500, sig.msft.sbux, sig2.msft, sig.msft.sp500, sig.sbux.sp500, sig.msft.sp500, sig2.sp500), nrow=3, ncol=3, byrow=TRUE) nobs = 100 set.seed(123) returns.sim = rmvnorm(nobs, mean=mu, sigma=Sigma) colnames(returns.sim) = c("sbux", "msft", "sp500") plot(as.zoo(returns.sim), lwd=2, col="blue", main="", ylab=c("sbux", "msft", "sp500"), panel=my.panel) plot(as.zoo(returns.sim), lwd=2, main="", ylab=c("sbux", "msft", "sp500"), plot.type="single", col=c("black","red","green")) abline(h=0) legend(x="topleft", legend=colnames(returns.sim), col=1:3, lwd=2) pairs(returns.sim, col="blue") # # plot pdf's for unbiased estimator with high variance, and for # biased estimator with low variance # x.min = -3 x.max = 3 npts = 100 x.vals = seq(from=x.min,to=x.max,length=npts) pdf1 = dnorm(x.vals,mean=0,sd=1) pdf2 = dnorm(x.vals,mean=0.1,sd=0.25) ylimits = range(pdf1,pdf2) plot(x.vals,pdf1,ylim=ylimits,type="l",lty=1,ylab="pdf", xlab="estimate value", lwd=2) segments(x0=0, y0=0, x1=0, y1=dnorm(0), lwd=2) lines(x.vals, pdf2, lty=2, lwd=2, col="red") legend(x="topleft",legend=c("theta.hat 1","theta.hat 2"), lty=1:2, lwd=c(2,2), col=c(1,2)) segments(x0=0.1, y0=0, x1=0.1, y1=dnorm(0.1, mean=0.1, sd=0.25), lwd=2, col="red") # # estimate parameters from CER model # options(digits=4) muhat.vals = apply(returns.mat,2,mean) muhat.vals sigma2hat.vals = apply(returns.mat,2,var) sigma2hat.vals sigmahat.vals = apply(returns.mat,2,sd) sigmahat.vals cov.mat = var(returns.z) cov.mat cor.mat = cor(returns.z) cor.mat covhat.vals = cov.mat[lower.tri(cov.mat)] rhohat.vals = cor.mat[lower.tri(cor.mat)] names(covhat.vals) <- names(rhohat.vals) <- c("sbux,msft","sbux,sp500","msft,sp500") covhat.vals rhohat.vals # compute estimated standard error for mean nobs = nrow(returns.mat) nobs se.muhat = sigmahat.vals/sqrt(nobs) se.muhat # compute t-ratios muhat.vals/se.muhat # evaluate sampling distribution of muhat x.vals = seq(3, -3, length=100) pdf1 = dnorm(x.vals) pdf2 = dnorm(x.vals, sd=1/sqrt(10)) pdf3 = dnorm(x.vals, sd=1/sqrt(50)) ub = max(pdf1, pdf2, pdf3) plot(x.vals, pdf1, ylim=c(0, ub), type="l", lwd=2) lines(x.vals, pdf2, lwd=2, col=2, lty=2) lines(x.vals, pdf3, lwd=2, col=3, lty=3) legend(x="topleft", legend=c("pdf: T=1", "pdf: T=10", "pdf: T=50"), lty=1:3, col=1:3, lwd=2) # compute exact 95% confidence intervals t.975 = qt(0.975, df=99) mu.lower = muhat.vals - t.975*se.muhat mu.upper = muhat.vals + t.975*se.muhat mu.width = mu.upper - mu.lower cbind(mu.lower,mu.upper,mu.width) # compute approx 95% confidence intervals mu.lower = muhat.vals - 2*se.muhat mu.upper = muhat.vals + 2*se.muhat mu.width = mu.upper - mu.lower cbind(mu.lower,mu.upper,mu.width) # compute estimated standard errors for variance and sd se.sigma2hat = sigma2hat.vals/sqrt(nobs/2) se.sigmahat = sigmahat.vals/sqrt(2*nobs) se.sigma2hat se.sigmahat # compute approx 95% confidence intervals sigma2.lower = sigma2hat.vals - 2*se.sigma2hat sigma2.upper = sigma2hat.vals + 2*se.sigma2hat sigma2.width = sigma2.upper - sigma2.lower cbind(sigma2.lower,sigma2.upper,sigma2.width) sigma.lower = sigmahat.vals - 2*se.sigmahat sigma.upper = sigmahat.vals + 2*se.sigmahat sigma.width = sigma.upper - sigma.lower cbind(sigma.lower,sigma.upper,sigma.width) # compute estimated standard errors for correlation se.rhohat = (1-rhohat.vals^2)/sqrt(nobs) se.rhohat # compute approx 95% confidence intervals rho.lower = rhohat.vals - 2*se.rhohat rho.upper = rhohat.vals + 2*se.rhohat rho.width = rho.upper - rho.lower cbind(rho.lower,rho.upper,rho.width) # # Monte Carlo evaluation of unbiasedness # # generate 10 simulated samples from CER model and plot # r = 0.05 + e, e ~ iid N(0, 0.10^2) mu = 0.05 sd = 0.10 n.obs = 100 n.sim = 10 set.seed(111) sim.ret = matrix(0, n.obs, n.sim) for (sim in 1:n.sim) { sim.ret[,sim] = rnorm(n.obs,mean=mu,sd=sd) } ts.plot(sim.ret,ylab="returns", col=1:10, lty=1:10) abline(h=0) plot(as.zoo(sim.ret), col="blue", lwd=2, main="", panel=my.panel) # generate 1000 samples from CER and evaluate sampling properties of muhat mu = 0.05 sd = 0.10 n.obs = 100 n.sim = 1000 set.seed(111) sim.means = rep(0,n.sim) # initialize vectors mu.lower = rep(0,n.sim) mu.upper = rep(0,n.sim) qt.975 = qt(0.975, nobs-1) for (sim in 1:n.sim) { sim.ret = rnorm(n.obs,mean=mu,sd=sd) sim.means[sim] = mean(sim.ret) se.muhat = sd(sim.ret)/sqrt(n.obs) mu.lower[sim] = sim.means[sim]-qt.975*se.muhat mu.upper[sim] = sim.means[sim]+qt.975*se.muhat } mean(sim.means) sd(sim.means) in.interval = mu >= mu.lower & mu <= mu.upper sum(in.interval)/n.sim hist(sim.means, col="slateblue1", ylim=c(0,40), main="", xlab="muhat", probability=T) abline(v=mean(sim.means), col="white", lwd=4, lty=2) # overlay normal curve x.vals = seq(0.02, 0.08, length=100) lines(x.vals, dnorm(x.vals, mean=mu, sd=sd/sqrt(100)), col="orange", lwd=2) # compute expected value of estimates and bias mean(sim.means) mean(sim.means) - mu sd(sim.means) se.muhat["msft"] mu = 0.05 sd = 0.10 n.obs = 100 n.sim = 1000 set.seed(111) sim.means = rep(0,n.sim) # initialize vectors sim.vars = rep(0,n.sim) sim.sds = rep(0,n.sim) for (sim in 1:n.sim) { sim.ret = rnorm(n.obs,mean=mu,sd=sd) sim.means[sim] = mean(sim.ret) sim.vars[sim] = var(sim.ret) sim.sds[sim] = sqrt(sim.vars[sim]) } # compute expected value of estimates and bias mean(sim.means) mean(sim.means) - mu mean(sim.vars) mean(sim.vars) - sd^2 mean(sim.sds) mean(sim.sds) - sd par(mfrow=c(2,2)) hist(sim.means, col="slateblue1", xlab="mu hat values", main="mu hat") abline(v=mean(sim.means), col="white", lwd=4, lty=2) hist(sim.vars, col="slateblue1", xlab="sigma2 hat values", main="sigma2 hat") abline(v=mean(sim.vars), col="white", lwd=4, lty=2) hist(sim.sds, col="slateblue1", xlab="sigma hat values", main="sigma hat") abline(v=mean(sim.sds), col="white", lwd=4, lty=2) par(mfrow=c(1,1)) # # compute Monte Carlo standard errors and compare with # asymptotic formulas based on true values # sd(sim.means) sd/sqrt(nobs) sd(sim.vars) sd^2/sqrt(nobs/2) sd(sim.sds) sd/sqrt(2*nobs) # # plot pdf for different sample sizes # x.min = -3 x.max = 3 npts = 100 x.vals = seq(from=x.min,to=x.max,length=npts) pdf1 = dnorm(x.vals,mean=0,sd=1) pdf2 = dnorm(x.vals,mean=0,sd=1/sqrt(10)) pdf3 = dnorm(x.vals,mean=0,sd=1/sqrt(50)) ylimits = range(pdf1,pdf2,pdf3) plot(x.vals,pdf1,ylim=ylimits,type="l",lty=1,ylab="pdf", lwd=2,xlab="estimate value") lines(x.vals,pdf2,type="l", lty=1, col=2, lwd=2) lines(x.vals,pdf3,type="l", lty=1, col=3, lwd=2) legend(1.5,2.5,legend=c("pdf T=1","pdf T=10","pdf T=50"), lty=rep(1,3),col=1:3, lwd=rep(2,3)) # # LLN # nobs = 1000 # Draw iid N(0.03,0.11) values set.seed(123) z = rnorm(1000, mean=0.03, sd=0.11) mu.hat.n = cumsum(z)/1:nobs ts.plot(mu.hat.n, main="Consistency of Sample Mean from CER Model", ylab="Sample Mean", xlab="Sample Size, T") abline(h=0.03) # # Illustration of CLT for muhat, sigma2hat and sigmahat # # generate 1000 samples from CER and compute sample statistics mu = 0.03 sd = 0.11 n.obs = 25 n.sim = 1000 set.seed(111) sim.means = rep(0,n.sim) # initialize vectors sim.vars = rep(0,n.sim) sim.sds = rep(0,n.sim) for (sim in 1:n.sim) { sim.ret = rnorm(n.obs,mean=mu,sd=sd) sim.means[sim] = mean(sim.ret) sim.vars[sim] = var(sim.ret) sim.sds[sim] = sqrt(sim.vars[sim]) } # plot pdfs par(mfrow=c(2,2)) hist(sim.means,xlab="mu hat", col="slateblue1") abline(v=mu, lwd=2, col="white") hist(sim.vars,xlab="sigma2 hat", col="slateblue1") abline(v=sd^2, lwd=2, col="white") hist(sim.sds,xlab="sigma hat", col="slateblue1") abline(v=sd, lwd=2, col="white") par(mfrow=c(1,1)) # # use Monte Carlo to evaluate confidence interval coverage # # generate 1000 samples from CER and compute sample statistics n.sim = 1000 set.seed(111) mu.lower = rep(0,n.sim) # initialize vectors mu.upper = rep(0,n.sim) for (sim in 1:n.sim) { sim.ret = rnorm(n.obs,mean=mu,sd=sd) mu.hat = mean(sim.ret) se.muhat = sd(sim.ret)/sqrt(n.obs) mu.lower[sim] = mu.hat - 2*se.muhat mu.upper[sim] = mu.hat + 2*se.muhat } in.interval = (mu >= mu.lower) & (mu <= mu.upper) sum(in.interval)/n.sim # # plot actual data # plot(returns.z, lwd=c(2,2,2), col=c(1,2,5), lty=c(1,1,1)) plot(returns.z, plot.type = "single", col=c(1,2,5), lwd=2) abline(h=0) legend(x="bottomleft", legend=c("SBUX","MSFT","SP500"), lwd=c(2,2,2), col=c(1,2,5), lty=c(1,1,1)) # # simulate data for three asset returns # nobs = 100 set.seed(123) sim.e = rmvnorm(nobs,mean=rep(0,3),cov=cov.mat) sim.ret = muhat.vals + sim.e colIds(sim.ret) = paste(colIds(returns.mat),".sim",sep="") # plot data and scatterplots ts.plot(sim.ret,main="Simulated return data", lty=rep(1,3),lwd=rep(2,3),col=c(1,2,5)) legend(0,-0.2,legend=colIds(sim.ret), lty=rep(1,3),lwd=rep(2,3),col=c(1,2,5)) abline(h=0) pairs(sim.ret) # compare with actual data ts.plot(returns.mat,main="Actual return data", lty=rep(1,3),lwd=rep(2,3),col=c(1,2,5)) legend(0,-0.2,legend=colIds(sim.ret), lty=rep(1,3),lwd=rep(2,3),col=c(1,2,5)) abline(h=0) pairs(returns.mat) # generate 1000 samples from CER and compute correlations n.obs = 100 n.sim = 1000 set.seed(111) sim.corrs = matrix(0,n.sim,3) # initialize vectors colIds(sim.corrs) = c("sbux,msft","sbux,sp500","msft,sp500") for (sim in 1:n.sim) { sim.ret = rmvnorm(n.obs,mean=muhat.vals,sigma=cov.mat) cor.mat = cor(sim.ret) sim.corrs[sim,] = cor.mat[lower.tri(cor.mat)] } par(mfrow=c(2,2)) hist(sim.corrs[,1], xlab="rhohat(sbux,msft)", col="slateblue1", main="sbux,msft") abline(v=rhohat.vals[1], lwd=4, col="white") hist(sim.corrs[,2], xlab="rhohat(sbux,sp500)", col="slateblue1", main="sbux,sp500") abline(v=rhohat.vals[2], lwd=4, col="white") hist(sim.corrs[,3], xlab="rhohat(msft,sp500)", col="slateblue1", main="msft,sp500") abline(v=rhohat.vals[3], lwd=4, col="white") par(mfrow=c(1,1)) # MC means and standard deviations colMeans(sim.corrs) colStdevs(sim.corrs) se.rhohat # # Compute rolling mean and SD values: using rolling functions from zoo # class(returns.z) colIds(returns.z) start(returns.z) end(returns.z) MSFT.z = returns.z[,"msft"] class(MSFT.z) plot(MSFT.z,reference.grid=F) # compute rolling means of width 24 months roll.mean.24 = aggregateSeries(MSFT.z,moving=23,adj=1,FUN=mean) roll.mean.24 plot(roll.mean.24,reference.grid=F, main="Rolling 24 month mean estimates") abline(h=muhat.vals["msft"]) plot(roll.mean.24,MSFT.z,reference.grid=F, main="MSFT returns and rolling 24 month means") abline(h=muhat.vals["msft"]) # compute rolling sds of width 24 months roll.sd.24 = aggregateSeries(MSFT.z,moving=23,adj=1,FUN=sd) roll.sd.24 plot(roll.sd.24,reference.grid=F, main="Rolling 24 month SD estimates") abline(h=sigmahat.vals["msft"]) plot(roll.sd.24,MSFT.z,reference.grid=F, main="MSFT returns and rolling 24 month sds") abline(h=muhat.vals["msft"]) # # Estimate Quantiles and VaR from CER model # # use R function qnorm to compute quantiles from standard normal distribution args(qnorm) qhat.05 = muhat.vals + sigmahat.vals*qnorm(0.05) qhat.05 qnorm(0.05,muhat.vals,sigmahat.vals) W0 = 100000 VaRhat.05 = (exp(qhat.05)-1)*W0 VaRhat.05 # VaR example from slides muhat = 0.02 sigmahat = 0.10 qhat.05 = muhat + sigmahat*qnorm(0.05) W0 = 10000 VaRhat.05 = (exp(qhat.05)-1)*W0 VaRhat.05