# extremeValues.ssc			script file for examples used in 
#								Modeling Extreme Values chapter
#									Updated for FM 2.0 February 2005
#									Updated for FM 3.0 July 2007

#
# author: Eric Zivot
# created: February 27, 2002
# revised: February 28, 2005
# revised: July 5, 2007
#	
#
####################################################################

#
# motivating example of extreme value theory
#
# create percentage return data

spto87 = getReturns(sp.raw,type="discrete",percentage=T)
par(mfrow=c(2,1))
plot(sp.raw,main="Daily Closing Prices")
plot(spto87,main="Daily Percentage Returns")
par(mfrow=c(1,1))

#
# plot gev distributions and densities
#
# CDFs

z.vals = seq(-5,5,length=200)
cdf.f = ifelse((z.vals > -2),pgev(z.vals,xi=0.5),0)
cdf.w = ifelse((z.vals < 2),pgev(z.vals,xi=-0.5),1)
cdf.g = exp(-exp(-z.vals))
plot(z.vals,cdf.w,type="l",xlab="z",ylab="H(z)")
lines(z.vals,cdf.f,type="l",lty=2)
lines(z.vals,cdf.g,type="l",lty=3)
legend(-5,1,legend=c("Weibull H(-0.5,0,1)","Frechet H(0.5,0,1)",
"Gumbel H(0,0,1)"),lty=1:3)

# pdfs
pdf.f = ifelse((z.vals > -2),dgev(z.vals,xi=0.5),0)
pdf.w = ifelse((z.vals < 2),dgev(z.vals,xi=-0.5),0)
pdf.g = exp(-exp(-z.vals))*exp(-z.vals)
plot(z.vals,pdf.w,type="l",xlab="z",ylab="h(z)")
lines(z.vals,pdf.f,type="l",lty=2)
lines(z.vals,pdf.g,type="l",lty=3)
legend(-5.25,0.4,legend=c("Weibull H(-0.5,0,1)","Frechet H(0.5,0,1)",
"Gumbel H(0,0,1)"),lty=1:3)

#
# Analysis of Maxima data
#

#
# analysis of S&P 500 daily returns
# Jan 5, 1960 - Oct 16, 1987
#

# plot -1*(daily returns) upto 1987
class(spto87)
plot(-spto87)
qqPlot(spto87,strip.text="Daily returns on S&P 500",
xlab="Quantiles of standard normal",
ylab="Quantiles of S&P 500")

qqnorm(spto87)

# compute annual maxima using aggregateSeries
# and plot descriptive statistics

annualMax.sp500 = aggregateSeries(-spto87,by="years",
FUN=max)
Xn = sort(seriesData(annualMax.sp500))
par(mfrow=c(2,2))
plot(annualMax.sp500)
hist(seriesData(annualMax.sp500),xlab="Annual maximum")
plot(Xn,-log(-log(ppoints(Xn))),xlab="Annual maximum")
tmp = records(-spto87)
par(mfrow=c(1,1))

# estimate GEV CDF using annual blocks of daily returns 
gev.fit.year = gev(-spto87,block="year")
class(gev.fit.year)
names(gev.fit.year)

# number of blocks
gev.fit.year$n

# plot block maxima
plot(gev.fit.year$data,
main="Annual block maxima of negative daily returns",
ylab="Mn")

# mles and estimated asymptotic standard errors
gev.fit.year$par.ests
gev.fit.year$par.ses
# print method added in fm 2.0
gev.fit.year

# 95% CI for xi
gev.fit.year$par.ests[1]-2*gev.fit.year$par.ses[1]
gev.fit.year$par.ests[1]+2*gev.fit.year$par.ses[1]

#
# plot method: This is better executed from the command line.
#
# par(mfrow=c(1,2))
# plot(gev.fit.year)

# follow Coles and do a histogram and density plot of block
# maxima data using the fitted standardized extreme value
# data

# standardized block maxima
Zn = (seriesData(gev.fit.year$data) - gev.fit.year$par.ests["mu"])/
gev.fit.year$par.ests["sigma"]
Zn = sort(Zn)
# need to figure out how to plot a histogram and density estimate
# together. Use dgev to compute density values
gev.density = dgev(Zn,xi=gev.fit.year$par.ests["xi"])
plot(Zn,gev.density,type="l")
hist(Zn)

# do qq-plot against gumbel for standardized maxima
# see Embrechts pg 293

plot(Zn,-log(-log(ppoints(Zn))))

# compute probability next year's maximum being a new record
1- pgev(max(gev.fit.year$data),
xi=gev.fit.year$par.ests["xi"],
mu=gev.fit.year$par.ests["mu"],
sigma=gev.fit.year$par.ests["sigma"])

# estimate 40 year return level
rlevel.year.40 = rlevel.gev(gev.fit.year,k.blocks=40,
                            type="profile")
class(rlevel.year.40)
names(rlevel.year.40)
rlevel.year.40$rlevel

rlevel.year.40 = rlevel.gev(gev.fit.year,k.blocks=40,
type="RetLevel")
names(rlevel.year.40)

# compute return quantile
(1-(1/40))^(1/260)

# estimate GEV CDF using quarterly blocks of daily returns 
gev.fit.quarter= gev(-spto87,block="quarter")
gev.fit.quarter$n
gev.fit.quarter$par.ests
gev.fit.quarter$par.ses
# print method added in fm 2.0
gev.fit.quarter

# 95% CI for xi
gev.fit.quarter$par.ests[1]-2*gev.fit.quarter$par.ses[1]
gev.fit.quarter$par.ests[1]+2*gev.fit.quarter$par.ses[1]

# probability that next quarter's maximum exceeds all previous
# maxima
1- pgev(max(gev.fit.quarter$data),
xi=gev.fit.quarter$par.ests["xi"],
mu=gev.fit.quarter$par.ests["mu"],
sigma=gev.fit.quarter$par.ests["sigma"])

# 40 quarter return level
rlevel.40.q = rlevel.gev(gev.fit.quarter,k.blocks=40)

# 40 year or 160 quarter return level
rlevel.160.q = rlevel.gev(gev.fit.quarter,k.blocks=160,
type="RetLevel")
rlevel.160.q

# estimate GEV CDF for maximum data (for short losses)
gev.fit.sa = gev(spto87,"semester")
names(gev.fit.sa)

# number of blocks
gev.fit.sa$n
# block maxima
gev.fit.sa$data
# mles and estimated asymptotic standard errors
gev.fit.sa$par.ests
gev.fit.sa$par.ses

# fit gumbel distribution to annual maxima

gumbel.fit.year = gumbel(-spto87,block="year")
class(gumbel.fit.year)
names(gumbel.fit.year)
gumbel.fit.year$par.ests
gumbel.fit.year$par.ses
# print method in fm 2.0
gumbel.fit.year

#
# The following is better executed from the Command line.
#
# par(mfrow=c(1,2))
# plot(gumbel.fit.year)
#
# 1- pgev(max(gumbel.fit.year$data),xi=0,
# mu=gumbel.fit.year$par.ests["mu"],
# sigma=gumbel.fit.year$par.ests["sigma"])
#
# rlevel.gev(gumbel.fit.year,k.blocks=40)
# par(mfrow=c(1,1))
#

###################################################################
# Modeling Excesses Over Thresholds
# using the Dainish Fire Loss Data and S&P 500 returns
###################################################################

# simulated gaussian data calibrated to sp500 returns
set.seed(123)
norm.data = rnorm(numRows(spto87),mean=mean(spto87),sd=colStdevs(spto87))
tsplot(norm.data, main="Simulated Gaussian data")
summaryStats(norm.data)

plot(danish,main="Fire Loss Insurance Claims",
ylab="Millions of Danish Krone")

# plot various GPD values
# CDFs
par(mfrow=c(1,2))
y.vals = seq(0,8,length=200)
cdf.p = pgpd(y.vals,xi=0.5)
cdf.p2 = ifelse((y.vals < 2),pgpd(y.vals,xi=-0.5),1)
cdf.e = 1-exp(-y.vals)
plot(y.vals,cdf.p,type="l",xlab="y",ylab="G(y)",
ylim=c(0,1))
lines(y.vals,cdf.e,type="l",lty=2)
lines(y.vals,cdf.p2,type="l",lty=3)
legend(1,0.2,legend=c("Pareto G(0.5,1)","Exponential G(0,1)",
"Pareto II G(-0.5,1)"),lty=1:3)

# PDFs
pdf.p = dgpd(y.vals,xi=0.5)
pdf.p2 = ifelse((y.vals < 2),dgpd(y.vals,xi=-0.5),0)
pdf.e = exp(-y.vals)
plot(y.vals,pdf.p,type="l",xlab="y",ylab="g(y)",
ylim=c(0,1))
lines(y.vals,pdf.e,type="l",lty=2)
lines(y.vals,pdf.p2,type="l",lty=3)
legend(2,1,legend=c("Pareto g(0.5,1)","Exponential g(0,1)",
"Pareto II g(-0.5,1)"),lty=1:3)
par(mfrow=c(1,1))

# exploratory analysis

# qq-plots with exponential reference distribution
par(mfrow=c(1,2))
qplot(-spto87,threshold=1,main="S&P 500 negative returns")
qplot(danish,threshold=10,main="Danish fire losses")
par(mfrow=c(1,1))

qplot(norm.data,threshold=1,main="simulated Gaussian returns")

# mean excess plots
par(mfrow=c(1,2))
me.sp500 = meplot(-spto87)
me.dainsh = meplot(danish)
class(me.sp500)
colIds(me.sp500)
par(mfrow=c(1,1))

# plot is essentially linear with zero slope for values above 1
# indicating exponential tail
me.norm = meplot(norm.data)


# fit gpd to sp500 negative returns with u=1
# only plot method is available

gpd.sp500.1 = gpd(-spto87,threshold=1)
class(gpd.sp500.1)
names(gpd.sp500.1)

gpd.sp500.1$upper.converged
gpd.sp500.1$upper.thresh
gpd.sp500.1$n.upper.exceed
gpd.sp500.1$p.less.upper.thresh

gpd.sp500.1$upper.par.ests
gpd.sp500.1$upper.par.ses

# note: FM 2.0 includes print method but no summary method
gpd.sp500.1

# This is better executed from the command line.
# plot(gpd.sp500.1)

shape(-spto87,end=600)

# compare with carmona's shape.plot and gpd.tail function
# note: gpd.tail fits lower and upper tails simultaneously
shape.plot(-spto87, from = 0.9)
shape.plot(spto87)
me.sp500 = meplot(spto87)
gpd.sp500.2tail = gpd.tail(spto87, upper = 1, lower = -1)
class(gpd.sp500.2tail)
gpd.sp500.2tail

# results for lower tail match evis function
# note: no standard errors are computed

# fit gpd to danish fire insurance data
gpd.danish.10 = gpd(danish,threshold=10)
gpd.danish.10$n.upper.exceed
gpd.danish.10$p.less.upper.thresh
gpd.danish.10$upper.par.ests
gpd.danish.10$upper.par.ses

# fm 2.0 print method
gpd.danish.10

par(mfrow=c(1,2))
tailplot(gpd.danish.10)
shape(danish)
par(mfrow=c(1,1))

# estimating VaR and ES for sp500 data
# use quant, gpd.q, gpd.sfall, riskmeasures

riskmeasures(gpd.sp500.1,p=c(0.95,0.99))
gpd.q(0.99,ci.type="likelihood")
gpd.q(0.99,ci.type="wald")
gpd.sfall(0.99)
gpd.sfall(0.99,ci.p="wald")

# compute var and es assume normally distributed data
# make sure to compute confidence intervals

riskmeasures.normal <- function(data,p=c(0.95,0.99)) {
	mu = colMeans(data)
	sd = colStdevs(data)
	q = mu + sd*qnorm(p)
	sq = (q - mu)/sd
	sf = mu + sd*dnorm(sq)/(1 - pnorm(sq))
	cbind(p, quantile = q, sfall = sf)
}
riskmeasures.normal(-spto87)

# show confidence intervals graphically
tailplot(gpd.sp500.1)
gpd.q(0.99,plot=T)
gpd.sfall(0.99,plot=T)

quant(-spto87,p=0.99)

# estimating VaR and ES for danish fire loss data

riskmeasures(gpd.danish.10, c(0.95,0.99))

danish.mu = mean(danish)
danish.sd = sqrt(var(danish))
var.95 = danish.mu + danish.sd*qnorm(0.95)
var.99 = danish.mu + danish.sd*qnorm(0.99)
var.95
var.99

z95 = (var.95 - danish.mu)/danish.sd
z99 = (var.99 - danish.mu)/danish.sd
es.95 = danish.mu + danish.sd*dnorm(z95)/(1-pnorm(z95))
es.99 = danish.mu + danish.sd*dnorm(z99)/(1-pnorm(z99))
es.95
es.99


tailplot(gpd.danish.10)
gpd.q(0.99,plot=T)
gpd.sfall(0.99,plot=T)

quant(danish,p=0.99)

#
# Hill analysis of sp500 data
#
args(hill)
hill(-spto87,option="xi",end=500)
hill(spto87,option="xi",end=500)

#
# Hill analysis of danish loss data
#

class(danish)
plot(danish)
hill.danish = hill(danish,option="xi")
idx = (hill.danish$threshold >= 9.8 & 
hill.danish$threshold <= 10.2)
hill.danish[idx,]
hill.danish.q = hill(danish,option="quantile",p=0.99,
end=500)