# extremeValues.ssc script file for examples used in # Modeling Extreme Values chapter # Updated for FM 2.0 February 2005 # Updated for FM 3.0 July 2007 # # author: Eric Zivot # created: February 27, 2002 # revised: February 28, 2005 # revised: July 5, 2007 # # #################################################################### # # motivating example of extreme value theory # # create percentage return data spto87 = getReturns(sp.raw,type="discrete",percentage=T) par(mfrow=c(2,1)) plot(sp.raw,main="Daily Closing Prices") plot(spto87,main="Daily Percentage Returns") par(mfrow=c(1,1)) # # plot gev distributions and densities # # CDFs z.vals = seq(-5,5,length=200) cdf.f = ifelse((z.vals > -2),pgev(z.vals,xi=0.5),0) cdf.w = ifelse((z.vals < 2),pgev(z.vals,xi=-0.5),1) cdf.g = exp(-exp(-z.vals)) plot(z.vals,cdf.w,type="l",xlab="z",ylab="H(z)") lines(z.vals,cdf.f,type="l",lty=2) lines(z.vals,cdf.g,type="l",lty=3) legend(-5,1,legend=c("Weibull H(-0.5,0,1)","Frechet H(0.5,0,1)", "Gumbel H(0,0,1)"),lty=1:3) # pdfs pdf.f = ifelse((z.vals > -2),dgev(z.vals,xi=0.5),0) pdf.w = ifelse((z.vals < 2),dgev(z.vals,xi=-0.5),0) pdf.g = exp(-exp(-z.vals))*exp(-z.vals) plot(z.vals,pdf.w,type="l",xlab="z",ylab="h(z)") lines(z.vals,pdf.f,type="l",lty=2) lines(z.vals,pdf.g,type="l",lty=3) legend(-5.25,0.4,legend=c("Weibull H(-0.5,0,1)","Frechet H(0.5,0,1)", "Gumbel H(0,0,1)"),lty=1:3) # # Analysis of Maxima data # # # analysis of S&P 500 daily returns # Jan 5, 1960 - Oct 16, 1987 # # plot -1*(daily returns) upto 1987 class(spto87) plot(-spto87) qqPlot(spto87,strip.text="Daily returns on S&P 500", xlab="Quantiles of standard normal", ylab="Quantiles of S&P 500") qqnorm(spto87) # compute annual maxima using aggregateSeries # and plot descriptive statistics annualMax.sp500 = aggregateSeries(-spto87,by="years", FUN=max) Xn = sort(seriesData(annualMax.sp500)) par(mfrow=c(2,2)) plot(annualMax.sp500) hist(seriesData(annualMax.sp500),xlab="Annual maximum") plot(Xn,-log(-log(ppoints(Xn))),xlab="Annual maximum") tmp = records(-spto87) par(mfrow=c(1,1)) # estimate GEV CDF using annual blocks of daily returns gev.fit.year = gev(-spto87,block="year") class(gev.fit.year) names(gev.fit.year) # number of blocks gev.fit.year$n # plot block maxima plot(gev.fit.year$data, main="Annual block maxima of negative daily returns", ylab="Mn") # mles and estimated asymptotic standard errors gev.fit.year$par.ests gev.fit.year$par.ses # print method added in fm 2.0 gev.fit.year # 95% CI for xi gev.fit.year$par.ests[1]-2*gev.fit.year$par.ses[1] gev.fit.year$par.ests[1]+2*gev.fit.year$par.ses[1] # # plot method: This is better executed from the command line. # # par(mfrow=c(1,2)) # plot(gev.fit.year) # follow Coles and do a histogram and density plot of block # maxima data using the fitted standardized extreme value # data # standardized block maxima Zn = (seriesData(gev.fit.year$data) - gev.fit.year$par.ests["mu"])/ gev.fit.year$par.ests["sigma"] Zn = sort(Zn) # need to figure out how to plot a histogram and density estimate # together. Use dgev to compute density values gev.density = dgev(Zn,xi=gev.fit.year$par.ests["xi"]) plot(Zn,gev.density,type="l") hist(Zn) # do qq-plot against gumbel for standardized maxima # see Embrechts pg 293 plot(Zn,-log(-log(ppoints(Zn)))) # compute probability next year's maximum being a new record 1- pgev(max(gev.fit.year$data), xi=gev.fit.year$par.ests["xi"], mu=gev.fit.year$par.ests["mu"], sigma=gev.fit.year$par.ests["sigma"]) # estimate 40 year return level rlevel.year.40 = rlevel.gev(gev.fit.year,k.blocks=40, type="profile") class(rlevel.year.40) names(rlevel.year.40) rlevel.year.40$rlevel rlevel.year.40 = rlevel.gev(gev.fit.year,k.blocks=40, type="RetLevel") names(rlevel.year.40) # compute return quantile (1-(1/40))^(1/260) # estimate GEV CDF using quarterly blocks of daily returns gev.fit.quarter= gev(-spto87,block="quarter") gev.fit.quarter$n gev.fit.quarter$par.ests gev.fit.quarter$par.ses # print method added in fm 2.0 gev.fit.quarter # 95% CI for xi gev.fit.quarter$par.ests[1]-2*gev.fit.quarter$par.ses[1] gev.fit.quarter$par.ests[1]+2*gev.fit.quarter$par.ses[1] # probability that next quarter's maximum exceeds all previous # maxima 1- pgev(max(gev.fit.quarter$data), xi=gev.fit.quarter$par.ests["xi"], mu=gev.fit.quarter$par.ests["mu"], sigma=gev.fit.quarter$par.ests["sigma"]) # 40 quarter return level rlevel.40.q = rlevel.gev(gev.fit.quarter,k.blocks=40) # 40 year or 160 quarter return level rlevel.160.q = rlevel.gev(gev.fit.quarter,k.blocks=160, type="RetLevel") rlevel.160.q # estimate GEV CDF for maximum data (for short losses) gev.fit.sa = gev(spto87,"semester") names(gev.fit.sa) # number of blocks gev.fit.sa$n # block maxima gev.fit.sa$data # mles and estimated asymptotic standard errors gev.fit.sa$par.ests gev.fit.sa$par.ses # fit gumbel distribution to annual maxima gumbel.fit.year = gumbel(-spto87,block="year") class(gumbel.fit.year) names(gumbel.fit.year) gumbel.fit.year$par.ests gumbel.fit.year$par.ses # print method in fm 2.0 gumbel.fit.year # # The following is better executed from the Command line. # # par(mfrow=c(1,2)) # plot(gumbel.fit.year) # # 1- pgev(max(gumbel.fit.year$data),xi=0, # mu=gumbel.fit.year$par.ests["mu"], # sigma=gumbel.fit.year$par.ests["sigma"]) # # rlevel.gev(gumbel.fit.year,k.blocks=40) # par(mfrow=c(1,1)) # ################################################################### # Modeling Excesses Over Thresholds # using the Dainish Fire Loss Data and S&P 500 returns ################################################################### # simulated gaussian data calibrated to sp500 returns set.seed(123) norm.data = rnorm(numRows(spto87),mean=mean(spto87),sd=colStdevs(spto87)) tsplot(norm.data, main="Simulated Gaussian data") summaryStats(norm.data) plot(danish,main="Fire Loss Insurance Claims", ylab="Millions of Danish Krone") # plot various GPD values # CDFs par(mfrow=c(1,2)) y.vals = seq(0,8,length=200) cdf.p = pgpd(y.vals,xi=0.5) cdf.p2 = ifelse((y.vals < 2),pgpd(y.vals,xi=-0.5),1) cdf.e = 1-exp(-y.vals) plot(y.vals,cdf.p,type="l",xlab="y",ylab="G(y)", ylim=c(0,1)) lines(y.vals,cdf.e,type="l",lty=2) lines(y.vals,cdf.p2,type="l",lty=3) legend(1,0.2,legend=c("Pareto G(0.5,1)","Exponential G(0,1)", "Pareto II G(-0.5,1)"),lty=1:3) # PDFs pdf.p = dgpd(y.vals,xi=0.5) pdf.p2 = ifelse((y.vals < 2),dgpd(y.vals,xi=-0.5),0) pdf.e = exp(-y.vals) plot(y.vals,pdf.p,type="l",xlab="y",ylab="g(y)", ylim=c(0,1)) lines(y.vals,pdf.e,type="l",lty=2) lines(y.vals,pdf.p2,type="l",lty=3) legend(2,1,legend=c("Pareto g(0.5,1)","Exponential g(0,1)", "Pareto II g(-0.5,1)"),lty=1:3) par(mfrow=c(1,1)) # exploratory analysis # qq-plots with exponential reference distribution par(mfrow=c(1,2)) qplot(-spto87,threshold=1,main="S&P 500 negative returns") qplot(danish,threshold=10,main="Danish fire losses") par(mfrow=c(1,1)) qplot(norm.data,threshold=1,main="simulated Gaussian returns") # mean excess plots par(mfrow=c(1,2)) me.sp500 = meplot(-spto87) me.dainsh = meplot(danish) class(me.sp500) colIds(me.sp500) par(mfrow=c(1,1)) # plot is essentially linear with zero slope for values above 1 # indicating exponential tail me.norm = meplot(norm.data) # fit gpd to sp500 negative returns with u=1 # only plot method is available gpd.sp500.1 = gpd(-spto87,threshold=1) class(gpd.sp500.1) names(gpd.sp500.1) gpd.sp500.1$upper.converged gpd.sp500.1$upper.thresh gpd.sp500.1$n.upper.exceed gpd.sp500.1$p.less.upper.thresh gpd.sp500.1$upper.par.ests gpd.sp500.1$upper.par.ses # note: FM 2.0 includes print method but no summary method gpd.sp500.1 # This is better executed from the command line. # plot(gpd.sp500.1) shape(-spto87,end=600) # compare with carmona's shape.plot and gpd.tail function # note: gpd.tail fits lower and upper tails simultaneously shape.plot(-spto87, from = 0.9) shape.plot(spto87) me.sp500 = meplot(spto87) gpd.sp500.2tail = gpd.tail(spto87, upper = 1, lower = -1) class(gpd.sp500.2tail) gpd.sp500.2tail # results for lower tail match evis function # note: no standard errors are computed # fit gpd to danish fire insurance data gpd.danish.10 = gpd(danish,threshold=10) gpd.danish.10$n.upper.exceed gpd.danish.10$p.less.upper.thresh gpd.danish.10$upper.par.ests gpd.danish.10$upper.par.ses # fm 2.0 print method gpd.danish.10 par(mfrow=c(1,2)) tailplot(gpd.danish.10) shape(danish) par(mfrow=c(1,1)) # estimating VaR and ES for sp500 data # use quant, gpd.q, gpd.sfall, riskmeasures riskmeasures(gpd.sp500.1,p=c(0.95,0.99)) gpd.q(0.99,ci.type="likelihood") gpd.q(0.99,ci.type="wald") gpd.sfall(0.99) gpd.sfall(0.99,ci.p="wald") # compute var and es assume normally distributed data # make sure to compute confidence intervals riskmeasures.normal <- function(data,p=c(0.95,0.99)) { mu = colMeans(data) sd = colStdevs(data) q = mu + sd*qnorm(p) sq = (q - mu)/sd sf = mu + sd*dnorm(sq)/(1 - pnorm(sq)) cbind(p, quantile = q, sfall = sf) } riskmeasures.normal(-spto87) # show confidence intervals graphically tailplot(gpd.sp500.1) gpd.q(0.99,plot=T) gpd.sfall(0.99,plot=T) quant(-spto87,p=0.99) # estimating VaR and ES for danish fire loss data riskmeasures(gpd.danish.10, c(0.95,0.99)) danish.mu = mean(danish) danish.sd = sqrt(var(danish)) var.95 = danish.mu + danish.sd*qnorm(0.95) var.99 = danish.mu + danish.sd*qnorm(0.99) var.95 var.99 z95 = (var.95 - danish.mu)/danish.sd z99 = (var.99 - danish.mu)/danish.sd es.95 = danish.mu + danish.sd*dnorm(z95)/(1-pnorm(z95)) es.99 = danish.mu + danish.sd*dnorm(z99)/(1-pnorm(z99)) es.95 es.99 tailplot(gpd.danish.10) gpd.q(0.99,plot=T) gpd.sfall(0.99,plot=T) quant(danish,p=0.99) # # Hill analysis of sp500 data # args(hill) hill(-spto87,option="xi",end=500) hill(spto87,option="xi",end=500) # # Hill analysis of danish loss data # class(danish) plot(danish) hill.danish = hill(danish,option="xi") idx = (hill.danish$threshold >= 9.8 & hill.danish$threshold <= 10.2) hill.danish[idx,] hill.danish.q = hill(danish,option="quantile",p=0.99, end=500)