# SV_mcl_est.ssc					script file for estimation of stochastic volatility model
#
# author: Eric Zivot
# created: September 24, 2002
# updates:
# May 18, 2003
#	Fixed loglike.fun.SV.
# May 15, 2003
#	Fixed SsfMcLik. Does not utilize anti-thetic variables
#
# September 26, 2002
#	Modified SsfApproxModel to have s.mIndex as an argument
#	Fixed some typos
# September 25, 2002
#	Added comments
#	Siem Jan: Please check the loop in SsfApproxModel. In particular,
#	check to see if any of the elements to the state space object
#	ssf need to be modified from inside the loop
#	
#
# notes:
# Code is adapted from OX code in sv_mcl_est.ox
# available at www.ssfpack.com
# Uses functions from Ssfnong.ssc
#

SsfApproxModel = function(iD,s.mY,s.vTheta,s.dScale,ssf,s.mIndex,
							   n.iter=50,tol=10^-7) {
	##
	## construct Gaussian approximating model
	##
	## inputs:
	## iD			character id for density. Valid choices are
	## 				d.normal, d.poisson, d.sv, d.svm, d.exp, d.t, 
	##				d.total
	## s.mY		vector of data
	## s.vTheta	vector of signals, theta.tilde = Z*alpha.tilde
	## s.dScale	average variance parameter for SV model
	## ssf			list containing mimimal state space representation
	##				or valid state space object for linear Gaussian
	##				approximating model
	## s.mIndex	index for time-varying variances in state-space
	## n.iter		number of iterations
	## tol			convergence tolerance
	## output:
	## mret		list with components y.tilde and H.tilde 
	##				for linear Gaussian approximating model (lgam)
	##
	## notes:
	## 				See Durbin and Koopman (2001) ch 11, pgs. 191-195
	iD = casefold(iD)								# make all lowercase
	s.mY = as.matrix(s.mY)
	s.vTheta = as.matrix(s.vTheta)
	mynz = ifelse(s.mY == 0, 0.1, s.mY)			# replace 0 with small value
	mret = SsfNonG.Expansion.SV(iD=iD,vY=mynz,vTheta=s.vTheta,
			args=s.dScale)							# [y.tilde, H.tilde]
	ssf$mJOmega = s.mIndex							# index for time varying vars
	ssf$mX = mret$H.tilde							# time varying variances for LGAM
	ssf$cT = nrow(ssf$mX)						
	ssf$cX = 1
	for (i in 1:n.iter) {
		m.s = SsfCondDens(mY=mret$y.tilde,ssf=ssf,task="STSMO")
		s.vTheta = m.s$response					# updated theta.tilde
		mtemp = SsfNonG.Expansion.SV(iD=iD,vY=mynz,vTheta=s.vTheta,
				 args=s.dScale)
		dconv.y = abs((mtemp$y.tilde - mret$y.tilde)/mret$y.tilde)
		dconv.H = abs((mtemp$H.tilde - mret$H.tilde)/mret$H.tilde)
		dconv = max(dconv.y,dconv.H)
		mret = mtemp
		ssf$mX = mret$H.tilde						# add updated time varying data
		if (dconv < tol) break
	}
	mret 													# [y.tilde, H.tilde]
}

SsfMCLik = function(s.mY, mApprox, s.dScale, ssf, s.mIndex,
						s.iM=10, s.iAnti=1, s.seed=123) {
	##
	## Construct log-likelihood using MC integration
	##
	## inputs:
	## s.mY			vector of data
	## mApprox		list containing y.tilde and H.tilde from SsfApproxModel
	## s.dScale		scale parameter
	## ssf				list containing mimimal state space representation
	##					or valid state space object
	##	s.mIndex		index for time varying variances in ssf. Replaces
	##					mJOmega
	##	s.iM			number of MC simulations
	##	s.ianti		number of antithetic draws
	##	s.seed			seed for random number generator
	##
	## Note: this version does not use anti-thetic variables - See SsfNonG.Antithetic
	##
	s.mY = as.matrix(s.mY)
	n = nrow(mApprox$y.tilde)
	vyhat = mApprox$y.tilde
	ssf$mX = mApprox$H.tilde								# time varying variances
	ssf$mJOmega = s.mIndex
	
	## evaluate LGAM log-likelihood
	##
	tmp = SsfLoglike(mY=vyhat,ssf=ssf)
	dg = tmp$loglike		
	
	m.s = SsfCondDens(mY=mApprox$y.tilde,ssf=ssf,task="STSMO")
	
	## compute lnp(y|theta) - lng(y|theta) based on smoothed theta|y
	##
	dlhat = SsfNonG.LogDensity.SV(iD="d.sv",mY=s.mY,vTheta=m.s$state,args=s.dScale) -
	  SsfNonG.LogDensity.SV(iD="d.normal",mY=s.mY,vTheta=(vyhat-m.s$state),
								 args=mApprox$H.tilde)			
	if(s.iM == 0) return(dg + dlhat)
	
	## start MC integration via importance sampling
	##
	mw = matrix(0,s.iM,1)
	kf.lgam = KalmanFil(mY=vyhat,ssf=ssf,task="DSSIM")
	set.seed(s.seed)
	for (i in 1:s.iM) {
		## simulate theta from LGAM using disturbance smoother
		## here: state component = state disturbance
		
		m.s = SimSmoDraw(kf=kf.lgam,ssf=ssf,task="DSSIM")
		a =  vyhat - m.s$response										# simulated theta
		
		## use trick to improve numerical stability of weights: exp(x)*exp(y(i)-x)=exp(y(i)), 
		## use exp(y(i)-x) as weight. missing factor exp(x) doesn't matter when averages are computed below
		## mw[i] is proportional to p(y|theta)/g(y|theta)
		##
		mw[i] = exp(SsfNonG.LogDensity.SV(iD="d.sv",mY=s.mY,vTheta=a,args=s.dScale)
		- SsfNonG.LogDensity.SV(iD="d.normal",mY=s.mY,vTheta=m.s$response,
								args=mApprox$H.tilde) - dlhat)
			
	}
	mn = mean(mw)			
	vr = var(mw)
	## note: the true weight is mw*exp(dlhat) => true mean = mean(mw) + dlhat. This is why
	## dlhat is added to log-likelihood calculation below
	##
	tmp = dg + dlhat + log(mn) + (0.5*vr/(s.iM*mn^2))
	as.numeric(tmp)
}

loglike.fun.SV = function (parm, mY = NULL,s.seed=123)
{
	## inputs:
	## parm			3 x 1 vector of parameters: parm[1] = logit transform of phi,
	##					parm[2] = log variance of state innovation, parm[3] = average
	##					variance of data
	## mY				n x 1 vector of data
	## outputs:
	## loglike			log-likelihood function for the sample
	phi = exp(parm[1])/(1 + exp(parm[1]))	# AR(1) coef
	dsig = exp(parm[2])							# sd of n(t)					
	s.dScale = exp(2*parm[3])					# ave var coef
	ssf.sv = GetSsfArma(ar=phi,sigma=dsig)
	s.mIndex = matrix(-1,2,2)
	s.mIndex[2,2] = 1	# index for measurement eqn error
	## compute approximating model
	##
	s.vTheta = matrix(0,nrow(mY),1)
	mApprox = SsfApproxModel("d.sv",mY,s.vTheta,s.dScale,ssf.sv,s.mIndex)
	loglik = SsfMCLik(mY,mApprox,s.dScale,ssf.sv,s.mIndex,s.iM=10,s.iAnti=1,s.seed)
	-loglik
}

##
## Function to compute smoothed estimate of state vector based on 
## Monte Carlo Integration with Importance sampling
##

DrawMean = function(s.mY,s.vTheta,s.dScale,ssf,s.mIndex,n.iter=50,tol=10^-7,
						s.iM=10,s.iSeed) 
{
	## compute smoothed estimate of state vector based on Monte Carlo
	## integration using importance sampling
	## inputs:
	## s.mY		vector of data
	## s.vTheta	vector of signals, theta.tilde = Z*alpha.tilde
	## s.dScale	average variance parameter for SV model
	## ssf			list containing mimimal state space representation
	##				or valid state space object for linear Gaussian
	##				approximating model
	## s.mIndex	index for time-varying variances in state-space
	## n.iter		number of iterations
	## tol			convergence tolerance
	## s.iM		scalar, number of Monte Carlo draws
	## s.iSeed		scalar, seed for random numbers
	## outputs:
	## ans			list with following components: sd.hat = smoothed estimate
	##				of volatility, lower = lower 95% confidence band, upper = upper
	##				95% confidence band.
	## remarks:
	##	A trick is used to make sure that importance weights do not explode
	
	## compute Gaussian approximating model (LGAM)
	##
	mApprox = mApprox = SsfApproxModel(iD="d.sv",s.mY=s.mY,s.vTheta=s.vTheta,
							s.dScale=s.dScale,ssf=ssf,s.mIndex=s.mIndex,
							n.iter=n.iter,tol=tol)
	ssf$mJOmega = s.mIndex							# index for time varying vars
	ssf$mX=mApprox$H.tilde							# time varying variances for LGAM
	ssf$cT = nrow(ssf$mX)						
	ssf$cX = 1

	## compute smoothed state and response estimates
	## state = response for LGAM
	##
	m.s = SsfCondDens(mY=mApprox$y.tilde,ssf=ssf,task="STSMO")
	
	## compute lnp(y|theta) - lng(y|theta) based on smoothed theta|y
	##
	dlhat = SsfNonG.LogDensity.SV(iD="d.sv",mY=s.mY,vTheta=m.s$state,args=s.dScale) -
	  SsfNonG.LogDensity.SV(iD="d.normal",mY=s.mY,vTheta=(mApprox$y.tilde-m.s$state),
								     args=mApprox$H.tilde)
						
	## start MC integration via importance sampling
	##
	mw = matrix(0,s.iM,1)
	mcum = matrix(0,nrow(s.mY),2)
	kf.lgam = KalmanFil(mY=mApprox$y.tilde,ssf=ssf,task="DSSIM")
	set.seed(s.iSeed)
	for (i in 1:s.iM) {
		## simulate theta from LGAM
		## Here: state component = state disturbance
		m.s = SimSmoDraw(kf=kf.lgam,ssf=ssf,task="DSSIM")
		a =  mApprox$y.tilde - m.s$response			# smoothed estimate of theta
		
		## use trick to improve numerical stability of weights: exp(x)*exp(y(i)-x)=exp(y(i)), 
		## use exp(y(i)-x) as weight. missing factor exp(x) doesn't matter when averages are computed below
		## mw[i] is proportional to p(y|theta)/g(y|theta)
		##
		mw[i] = exp(SsfNonG.LogDensity.SV(iD="d.sv",mY=s.mY,vTheta=a,args=s.dScale)
		- SsfNonG.LogDensity.SV(iD="d.normal",mY=s.mY,vTheta=m.s$response,
								args=mApprox$H.tilde) - dlhat)
		
		mcum[,1] = mcum[,1] + mw[i]*a					# used for mean
		mcum[,2] = mcum[,2] + mw[i]*(a*a)				# used for var
	} 
	## compute weighted averages
	##
	a = sum(mw)											
	mcum = mcum/a											# Normalize MC estimates
	mcum[,2] = mcum[,2] - (mcum[,1]*mcum[,1])		# MC variance
	mn = sqrt(s.dScale*exp(mcum[,1]))					# MC volatility
	se1 = sqrt(s.dScale*exp(mcum[,1]-2*sqrt(mcum[,2])))
	se2 = sqrt(s.dScale*exp(mcum[,1]+2*sqrt(mcum[,2])))
	ans = list(sd.hat=mn,lower=se1,upper=se2,weights=mw)
	ans
}