#########################################################
##					HF/Fx Library		 						##
##			Created by: Bingcheng Yan						##
##			Last Updated on: 11/07/2003						##
#########################################################

#
# Corrected S-Plus functions
# aggregateSeries
#

# Visualization
# 1. plotByDays
#
#

#
#	DATA LOADING
#	1. TAQLoad
#	2. OlsenLoad
#

#	
# TIMESERIES AND DATA MANUPULATION
#	1. rbindtimeSeries
#	2. reorderTS
#	3. is.tsBW
#	4. tsBW
#	5. ExchangeHoursOnly
#	6. FxBizWeekOnly
#	7. diff.withinDay
#	8. diff.withinWeek
#	9. align.withinWeek
#	10. align.withinDay
#	11. aggregateSeriesHF
#	12. OvernightSmpl


#
# DESCRIPTIVE
#	1. SmoothAcrossIntervs
# 	2. tableSmoother

#
# DIAGNOSTIC
#	1. cusumLife
#	2. cusumPlot

#
# VARIABLE CONSTRUCTION
#	1. DurationInInterv
#	2. PriceChgInInterv
#	3. getSpread
#	4. getMidQuote
#	5. aggregateTradeTypes
#	6. DetermInterp
#	7. naDuration
#	8. numNAs
#  9. tradeDirec
#  10. Genr.RealVol

###############################################
# a function to generate realized volatility
# with specified span for realized volatiltiy,
# and span for returns over which realized volatility
# is computed
###############################################
Genr.RealVol = function(ts, interv.type = "daily", bound.hours = c("9:30", "16:00"),
							rv.span, rt.span, how.align = "before")
{
	# ts: one-column log price level timeSeries
	# interv.type: "daily" or "weekly" for exchange based trading data and Fx type data respectively.
	# bound.hours: a two-element character vector in the format of "HH:MM"
	#				the start and closing hours of the interval. In case of daily interval,
	#				they are the start and closing hours of the exchange. In case of weekly interval, they
	#				are the start hour of the biz week on Sunday and the closing hour of the biz week on 
	#				Friday.
	# rv.span: timeSpan object, the time span over which to compute
	#			 the realized volatility, e.g. timeSpan("5m")
	# rt.span: timeSpan object, the time span to define the returns
	#			 used to compute the realized volatility, e.g. timeSpan("1m")
	#			 rt.span must be smaller than or equal to rv.span
	# how.align: how to align, default "before", using the most recent observation,
	#				see help for align.


	# Output: realized volatility in the same unit as price changes

	# Validation
	if(!is(ts, "timeSeries"))
		stop(paste(deparse(substitute(ts)), "must be a timeSeries!"))	
	if(ncol(ts)>1)
		stop(paste(deparse(substitute(ts)), "can only be a one-column log price!"))
	
	if(!is(rv.span, "timeSpan"))
		stop("rv.span must be a timeSpan!")
	if(!is(rt.span, "timeSpan"))
		stop("rt.span must be a timeSpan!")
	
	if (rv.span < rt.span)
		stop("rv.span can not be smaller than rt.span!")
		
	# Check for NA in ts
	na.idx = is.na(ts@data)
	ts = timeSeries(sub(ts@data, !na.idx,), pos= ts@positions[!na.idx])
	
	
	if(interv.type == "daily"){
		ts.aligned = align.withinDay(ts, exch.hours = bound.hours,
								 by = rt.span, k.by = NULL, how = how.align)
		ts.diff = diff.withinDay(ts.aligned, bizhrs.cleaned = T)
		
	} else if(interv.type == "weekly"){
		ts.aligned = align.withinWeek(ts, bizweek.hours = bound.hours,
								 by = rt.span, k.by = NULL, how = how.align, fullweek = T)
		ts.diff = diff.withinWeek(ts.aligned, bizhrs.cleaned = T)
	} else 
 		stop ("interv.type must be either daily or weekly")
		
	rt = ts.diff@data
	pos.rt = ts.diff@positions
	
	# compute realized vol
	if(is(rt, "data.frame"))
		rt = unlist(rt)
	rt.mat = matrix(as.vector(rt), nrow = round(rv.span/rt.span))
	rv = sqrt(colSums(rt.mat^2))
	
	# prepare the pos for realized vol
	idx = matrix(1:nrow(ts.diff), nrow = round(rv.span/rt.span))[round(rv.span/rt.span),]
	pos.rv = pos.rt[idx]

	timeSeries(data = data.frame(RealizedVol = rv), pos = pos.rv)
} 

###############################################
# a function to excerpt data within an overnight interval
# Whether the boundary positions will be included is controlled by
# start.include and close.include 
###############################################
OvernightSmpl = function(ts, smpl.hours = c("22:00", "8:00"), start.include = T, close.include =T)
{
	# a function to excerpt data within an overnight interval
	# Whether the boundary positions will be included is controlled by
	# start.include and close.include 

	# smpl.hours: a two-element character vector in the format of "HH:MM",
	#				specifying the starting and closing hour and minute of an
	# 				overnight interval	
	if(!is(ts, "timeSeries"))
		stop("ts must be a timeSeries object")
	
	smpl.hrsSpan = timeSpan(smpl.hours, in.format = "%H:%M[:%S[:%N]]", format = "%02H%02M%02S%03N")
	if(any(is.na(smpl.hrsSpan)))
		stop("smpl.hours must be in the format of HH:MM:SS!")
	
	# boundary hours
	bound.times = as.numeric(as.character(smpl.hrsSpan))
	start.time = bound.times[1]
	close.time = bound.times[2]
	
	pos = positions(ts)
	hms.list = .Call("time_to_hour_min_sec", pos, timeZoneList(), COPY = c(F, F), CLASSES = c("timeDate", "list"))
	pos.time = hms.list[[1]]*10^7 + hms.list[[2]]*10^5 + hms.list[[3]]*10^3 + hms.list[[4]]
	
	
	if(start.include)
		if(close.include)
			idx = (pos.time >= start.time) | (pos.time <= close.time)
		else
			idx = (pos.time >= start.time) | (pos.time < close.time)
	else
		if(close.include)
			idx = (pos.time > start.time) | (pos.time <= close.time)
		else
			idx = (pos.timie > start.time) | (pos.time < close.time)
	
	timeSeries(sub(ts@data, idx,), pos = pos[idx])
}

###################################################################################
# function to assign buy/sell direction indicators to trades
###################################################################################
tradeDirec = function(trade, mq, timeLag = "5s")
{
	# trade: a one-column timeSeries for trading prices
	# mq: a one-column timeSeries for mid quotes
	# timeLag: either timeSpan or character specifying the time span, e.g. "5s",
	#			 
	
		if(!is(mq, "timeSeries") | !is(trade, "timeSeries") | ncol(mq) >
			1 | ncol(trade) > 1) stop(paste(deparse(substitute(mq)),
				"and", deparse(substitute(trade)), 
				"must be one-column timeSeries!"))
	
		if(!is(timeLag, "timeSpan")){
			timeLag = timeSpan(timeLag)
			if(is.na(timeLag))
				stop("timeLag must be either timeSpan or the character specifying the time span, e.g \"5s\"")
		}
	
		pos.mq = positions(mq)
		pos.trade = positions(trade)
	
		rng.mq = range(pos.mq)
		rng.trade = range(pos.trade)
	
		# separate data into daily intervals and assign trade direction
		days.mq = timeSeq(from=rng.mq[1], to=rng.mq[2], by = "days", align.by = T, extend = T)
		poscut.mq = as.integer(cut(pos.mq, days.mq, include.lowest = T))
		poscats.mq = unique(poscut.mq)
	
		days.trade = timeSeq(from=rng.trade[1], to=rng.trade[2], by="days", align.by = T, extend = T)
		poscut.trade = as.integer(cut(pos.trade, days.trade, include.lowest = T))
		poscats.trade = unique(poscut.trade)
	
		if(!identical(days.mq, days.trade) | !identical(poscats.mq, poscats.trade))
			stop("Mid quotes and trading prices do not correspond to the same trading days")
	
		poscats = poscats.trade
	
		# utility function to assign buy/sell direction each day
		direc.Day = function(mq, trade, timeLag)
		{
			pos.trade = positions(trade)
			posLag = pos.trade - timeLag

			pos.mq = positions(mq)
		
			mq.prevail = seriesData(align(mq, pos = posLag, how = "before",
			                        error.how = "nearest"))
			trade.price = seriesData(trade)

			ans.tmp = rep(0, nrow(trade))
			ans.tmp[trade.price > mq.prevail]= 1
			ans.tmp[trade.price < mq.prevail] = -1
			
			# idex for the trades whose prevailing quotes pos are
			# before the first quote observed in the day, they are
			# assigned "undetermined"
			ans.tmp[posLag < pos.mq[1]]= 0 
			
			timeSeries(data.frame(BuySellDirec = ans.tmp), pos = pos.trade)
		} 
		
		#find out the start and end index of each block in the original trade and quote series
		ini.idx.mq = match(poscats, poscut.mq)
		last.idx.mq = c(ini.idx.mq[-1], length(poscut.mq)+1)-1
		
		ini.idx.trade = match(poscats, poscut.trade)
		last.idx.trade = c(ini.idx.trade[-1], length(poscut.trade)+1)-1

		do.call("rbindtimeSeries", lapply(1:length(poscats), function(i, mq, trade, ini.idx.mq, last.idx.mq, ini.idx.trade, last.idx.trade, timeLag, func.direc.Day)
					{
						idx.mq = ini.idx.mq[i]:last.idx.mq[i]
						idx.trade = ini.idx.trade[i]:last.idx.trade[i]
					     func.direc.Day(mq=timeSeries(sub(mq@data, idx.mq,),pos=positions(mq)[idx.mq]),
						          trade=timeSeries(sub(trade@data, idx.trade,),pos=positions(trade)[idx.trade]),
						          timeLag = timeLag)

					}, mq, trade, ini.idx.mq, last.idx.mq, ini.idx.trade, last.idx.trade, timeLag, direc.Day))
}

###################################################################################
# Corrected and accelerated version of aggregateSeries
###################################################################################
aggregateSeries = function(x, pos, FUN, moving = F, together = F, drop.empty = T, include.ends = F, adj, offset, colnames, by, k.by = 1, week.align = 
								NULL, holidays = timeDate())
{
	# Aggregate series object to new positions
	origpos <- positions(x)
	rng <- range(origpos)
	is.cal <- is(origpos, "positionsCalendar")
	# construct positions if missing
	if(!moving){
	# take care of positions
		if(missing(pos)){
			userPos = F
 			if(is(by, "character")){
				if(by == "weeks" & is.null(week.align))
					stop("If by = \"weeks\" week.align must be supplied")
				pos <- timeSeq(from = rng[1], to = rng[2], by = by, k.by = k.by, align.by = T, extend = T, week.align = 
					week.align, holidays = holidays)
				extendPos = T
			}
			else{
				pos <- timeSeq(from = rng[1], to = rng[2], by = by)
				extendPos = F
			} 
		}
		else
			userPos = T
	}

	if(moving) {
		if(!missing(pos))
			warning("pos argument ignored for moving aggregation")
		len <- length(origpos)
		if(len < moving) {
			if(is.cal)
				return(timeSeries())
			else return(signalSeries())
		}
		# set up positions for returns and offsets
		poslen <- len - moving + 1
		pos <- origpos[1:poslen]
		if(moving > 1)
			diffpos <- diff(as(origpos, "numeric"), moving - 1)
		else diffpos <- rep(0, len)
		whichpos <- T
		# call aggregation function
		newdat <- do.call("rbind", lapply(1:poslen, function(rowstart, numrows, x, nc, func, together)
		{
			cols <- if(together) func(sub(x, rowstart:(rowstart + numrows - 1),  )) else if(nc > 0)
				sapply(1:nc, function(col, x, fun)
				{
					fun(sub(x,  , col))
				}
				, sub(x, rowstart:(rowstart + numrows - 1),  ), func)
			else numeric(0)
			cols <- as.rectangular(cols)
			if(is(cols, "matrix") && numRows(cols) != 1)
				cols <- matrix(cols, nrow = 1)
			cols
		}
		, moving, x@data, numCols(x), FUN, together))
	}
	else {
		# call cut to figure out which positions go in which bins
		posnums <- as(pos, "numeric")
		diffpos <- diff(posnums)
		if(any(is.na(diffpos)) || any(diffpos < 0))
			stop("Positions for aggregation must be monotonically increasing without NA values")
		
		# get right bin endpoints
		newpos <- pos
		bpnum = length(newpos)# number of breakpoints
		
		# adjust the breakpoints or data if necessary
		if(userPos || !extendPos){
			# whether include.ends is only a concern
			# for these two cases
			if(include.ends) {
			# modify the ends of breakpoints
			newpos[1] <- min(newpos[1], rng[1])
			newpos[bpnum] <- max(newpos[bpnum], rng[2])
			}
			else {
				# chop the data based on the range of newpos
				idx = origpos >= newpos[1] & origpos <= newpos[bpnum]
				if(!any(idx))
					stop("The range of supplied positions does
                       not cover any data at all!")
              x = x[idx,]
				origpos <- positions(x)
	           rng <- range(origpos)
			}
		}
		
		
		pos = newpos[1:(bpnum-1)] # the pos now serve the lower end of each bin
		poslen = length(pos)
		diffpos = diff(newpos)
		
		poscut <- as.integer(cut(origpos, newpos, include.lowest = T))
		if(!drop.empty) {
			poscats <- seq(along = pos)
			whichpos <- T
		}
		else {
			poscats <- unique(poscut)
			whichpos <- sort(match(poscats, seq(along = pos), nomatch = 0))
		}
		
		# find out the start and end index of each block in the orignal series
		ini.idx = match(poscats, poscut, nomatch = 0)
		last.idx = rep(0, length(ini.idx))
		last.idx[ini.idx!=0] = c((ini.idx[ini.idx!=0])[-1], length(poscut)+1)-1
		
		if (is.vector(x@data))
			x@data = as.matrix(x@data)
			
		# apply the function to get new series data
		newdat <- do.call("rbind", lapply(1:length(poscats), function(i, x, nc, ini.idx, last.idx, func, together)
		{
			cols <- if(together) func(sub(x, ini.idx[i]:last.idx[i],  )) else if(nc > 0)
				sapply(1:nc, function(col, x, fun)
				{
					fun(sub(x,  , col))
				}
				, sub(x, ini.idx[i]:last.idx[i],  ), func)
			else numeric(0)
			cols <- as.rectangular(cols)
			if(is(cols, "matrix") && numRows(cols) != 1)
				cols <- matrix(cols, nrow = 1)
			cols
		}
		, x@data, numCols(x), ini.idx, last.idx, FUN, together))
	}
	
	# add offset or adj to positions
	if(!missing(offset)) pos <- pos + offset else if(!missing(adj) && (poslen > 1)) {
		pos <- pos + adj * diffpos
	}
	
	# rbind these rows together and put into a series
	# newdat <- do.call("rbind", newdat)
	if(missing(colnames))
		colnames <- colIds(x@data)
	if(!is.null(colnames) && (numCols(newdat) == length(colnames)))
		colIds(newdat) <- colnames
	if(is.cal)
		timeSeries(pos = pos[whichpos], data = newdat)
	else signalSeries(pos = pos[whichpos], data = newdat)
}



#########################################################################################
# A function to smooth high-frequency data across trading intervals, i.e. daily or weekly
#########################################################################################
SmoothAcrossIntervs = function(ts, n.subinterv, n.interv, FUN)
{
	# ts: a one-column timeseries of high-frequency data with fixed number of subintervals in 
	#		each trading interval, daily or weekly.
	# n.subinterv: the fixed number of subintervals in each tradeing interval
	# n.interv: the number of tradeing intervals, days or weeks, involved in ts
	# FUN: the choice of smoothing function, valid choices are "mean", "median",
	#		"loess.smooth", "supsmu".
	# Note that the nrow of ts has to be the product of n.subinterv and n.interv
	
	if(!is(ts, "timeSeries"))
		stop(paste(deparse(substitute(ts)), "must be a timeSeries object!"))
	
	if(nrow(ts) != n.subinterv * n.interv)
		stop(paste("The length of", deparse(substitute(ts)),
				 "must be equal to the product of n.subinterv and n.interv")) 
			
	if(ncol(ts) > 1)
		stop(paste(deparse(substitute(ts)), "can only have one column of data to smooth!"))
	 
	pos = positions(ts)
	data = seriesData(ts)
	
	data.vec = as.vector(as.matrix(data))
	data.mat = matrix(data.vec, ncol = n.interv)
	
	if(!is(FUN, "character"))
		FUN = deparse(substitute(FUN))
		
	smuResult = tableSmoother(y = data.mat, FUN = FUN)

	timeSeries(data = data.frame(smuResult), pos = pos[1:n.subinterv])
}
######################################################################
# Function to generate bid-ask spread
######################################################################
getSpread = function(ask, bid, ticksize = NULL)
{
	# ask: a one-column timeSeries for ask quotes
	# bid: a one-column timeSeries for bid quotes
	# ask and bid must have the same length since they are paired
	
	if(!is(ask, "timeSeries") | !is(bid, "timeSeries") |
		ncol(ask) > 1 | ncol(bid) > 1)
		stop(paste(deparse(substitute(ask)), "and", deparse(substitute(bid)),
				"must be one-column timeSeries!"))
	
	if( nrow(ask) != nrow(bid))
		stop(paste("The length of", deparse(substitute(ask)), "must be equal to that of",
					  deparse(substitute(bid)))) 
	
	pos = positions(ask)
	ask.data = seriesData(ask)
	bid.data = seriesData(bid)
	
	spread.data = ask.data - bid.data
	ans = timeSeries(data = data.frame(Spread = spread.data), pos = pos)
	
	# compute spreads in ticks if required
	if(!is.null(ticksize))
		 ans = round(ans/ticksize)
	
	return(ans)
}


######################################################################
# Function to generate mid quote for bid and ask quotes
######################################################################
getMidQuote = function(ask, bid)
{
	# ask: a one-column timeSeries for ask quotes
	# bid: a one-column timeSeries for bid quotes
	# ask and bid must have the same length since they are paired

	if(!is(ask, "timeSeries") | !is(bid, "timeSeries") |
		ncol(ask) > 1 | ncol(bid) > 1)
		stop(paste(deparse(substitute(ask)), "and", deparse(substitute(bid)),
				"must be one-column timeSeries!"))
	
	if( nrow(ask) != nrow(bid))
		stop(paste("The length of", deparse(substitute(ask)), "must be equal to that of",
					  deparse(substitute(bid)))) 
	
	pos = positions(ask)
	ask.data = seriesData(ask)
	bid.data = seriesData(bid)
	
	midquote.data = (ask.data + bid.data)/2
	timeSeries(data = data.frame(MidQuote = midquote.data), pos = pos)
}
######################################################################
# Function to load TAQ trade or quote data sets from ASCII file
# it outputs a timeSeries object
######################################################################
TAQLoad = function(file="", type= "trade", fields.list=NULL, sep="|", skip=1, date.format = "%2d%m%Y")
{
	
	# file: path of the TAQ data file
	# type: has to be either "trade", or "quote"
	# fields.list: a list object supplying field names and data types in the 
	#					TAQ data file, in the format of the "what" list used in scan( ).
	#					 The default is to use the included "trade" or
	#					"quote" fields.list. If the fields.list is provided, the "type"
	#					argument is ignored. However, the supplied fields.list must contain
	#					the components of "Date" and "Time".
	#  sep: the separator used in the TAQ data file, to be passed to scan( )
	#  skip: the number of rows to skip when reading the data
	# The data set to be loaded must have the date format as "ddmmyyyy" and the time
	# format as the number of seconds since the midnight of the day.	
						
	if (is.null(fields.list)){
		if (type == "trade")
			fields.list = list(Cond="", Ex="", Symbol="", Corr=0,
								G127=0, Price=0, Size=0, Date="", Seq=0, Time=0)
		else if (type == "quote")
					fields.list = list(Ex="", MMID="", Symbol="", Bid=0, BidSize=0, Mode=0,
											Ask=0, AskSize=0, Date="", Seq=0, Time=0)
			  else
					stop("Type has to be either trade or quote!")
	}
	else {
		if ((!is.element("Date", names(fields.list))) |
				(!is.element("Time", names(fields.list))))
			stop("Fields of Date and Time must be in the supplided fields.list!") 
	}
		
	
	df = as.data.frame(scan(file = file, what = fields.list,
							 	sep = sep, skip = skip, multi.line=TRUE, strip.white=TRUE),
						  stringsAsFactors = F)
	
	dates.tmp = timeDate(charvec = df[, "Date"], in.format = date.format,
						 format = "%m/%d/%Y %H:%02M:%02S", zone = "Eastern")

	times.tmp = df[, "Time"]  / (60*60*24)
	td.tmp = dates.tmp + times.tmp
	
	ans = timeSeries(data = df[, setdiff(colIds(df), c("Date","Time"))],
						pos = td.tmp)
	ans
}

##############################################################################################
# for Olsen ASCI file stored at specified location, the function load those data into splus and 
# store them in time Series 
##############################################################################################
OlsenLoad = function(file, fields.list = list(Date = "", Time = "", Bid = 0, Ask = 0,
														Institution = ""))
{
	# for Olsen ASCI file stored at specified location, the function load those data into splus and 
	# store them in time Series 
	
	if ((!is.element("Date", names(fields.list))) |
				(!is.element("Time", names(fields.list))))
		stop("Fields of Date and Time must be in the supplided fields.list!") 

	df = as.data.frame(scan(file = file, what = fields.list,
									 multi.line=TRUE, strip.white=TRUE),
							stringsAsFactors = F)
	td.input = paste(df[, "Date"], df[, "Time"], sep = " ")
	td = timeDate(charvec = td.input, in.format = "%d.%m.%Y %H:%M:%S", format = 
						"%m/%d/%Y %H:%02M:%02S", zone = "GMT")
	ans = timeSeries(data = df[, setdiff(colIds(df), c("Date","Time"))],
						 pos = td)
	ans
}


################################################################################################
# a function to align high frequency data within each trading day, e.g. stock exchange price data.
# Aligning only uses data within each day and then concatenate algined series for each day together
# ####################################################################################################
align.withinDay = function(ts, exch.hours = c("9:30", "16:00"), start.include = T, close.include =T,
									by, k.by = 1, how)
{
	# exch.hours: a two-element character vector in the format of "HH:MM",
	#				specifying the starting hour and minute of an exhange
	#				trading day (the first element) and the closing hour and minute 
	#					of the	trading day (the second element)
	# by: "minutes", "days", etc. see help for align
	# k.by: the units of by, see help for align
	# how: how to align, see help for align
	
	# Whether the boundary positions will be included is controlled by
	# start.include and close.include 
	
	# VALUE: an aligned timeSeries within the defined trading hours each day

	if(!is(ts, "timeSeries"))
		stop("ts must be a timeSeries object")
		
	# utility functions
	day.align = function(ts.d, daystart.hour, dayclose.hour,
								by, k.by, how)
	{
		# ts.d: timeSeries data within one trading day

		pos.d = positions(ts.d)
		
		#the day align start.pos is the daystart.hour of the day
		start.pos = timeDate(paste(as.character(timeSeq(pos.d[1], length.out=1,
											format = "%m/%d/%Y")), daystart.hour), zone = pos.d@time.zone)
		start.pos@time.zone = "GMT"

		#the day align end.pos is the dayclose.hour of the day
		end.pos = timeDate(paste(as.character(timeSeq(pos.d[1], length.out =1,
											format = "%m/%d/%Y")), dayclose.hour), zone = pos.d@time.zone)
		end.pos@time.zone = "GMT"
										
		align.pos = timeSeq(from = start.pos, to = end.pos, by = by, k.by = k.by,
							format = pos.d@format, zone = "GMT")
		align.pos@time.zone = pos.d@time.zone
		align(ts.d, pos = align.pos, how = how, error.how = "nearest")
	} 

	# cleaning observations outside the boundary hours, e.g. exchange hours
	ts = ExchangeHoursOnly(ts, exch.hours = exch.hours, start.include = start.include,
								close.include = close.include)
	
	daystart.hour = exch.hours[1]
	dayclose.hour = exch.hours[2]
	
	origpos = positions(ts)
	rng = range(origpos)
	
	# call cut to slice each day data and apply the day.align()
	pos = timeSeq(from = rng[1], to = rng[2], by = "days", align.by = T, extend = T)
	poscut = as.integer(cut(origpos, pos, include.lowest=T))
	poscats = unique(poscut)
	
	#find out the start and end index of each block in the original series
	ini.idx = match(poscats, poscut)
	last.idx = c(ini.idx[-1], length(poscut)+1)-1

	do.call("rbindtimeSeries", lapply(1:length(poscats), function(i, ts, ini.idx, last.idx, func.dayalign, daystart.hour,
																			dayclose.hour, by, k.by, how)
								{
									idx = ini.idx[i]:last.idx[i]
									func.dayalign(timeSeries(sub(ts@data,idx,), pos=positions(ts)[idx]), daystart.hour=daystart.hour,
							 					dayclose.hour = dayclose.hour, by = by, k.by = k.by, how = how)
									
								}, ts, ini.idx, last.idx, day.align, daystart.hour, dayclose.hour, by, k.by, how))
	
}
##########################################################################################################
# a function to align high frequency data within each biz week, e.g. FX data. Alignment only uses data within
# each week and then concatenate algined series for each week together
# #######################################################################################################
align.withinWeek = function(ts, bizweek.hours = c("22:00", "22:00"), start.include = T, close.include =T,
									by, k.by = 1, how, fullweek = F)
{
	# bizweek.hours: a two-element character vector in the format of "HH:MM",
	#					specifying the starting hour and minute of a new
	#                biz week on Sunday (the first element) and the closing hour and minute 
	#					of the	biz week on Friday (the second element)
	# by: "minutes", "days", etc. see help for align
	# k.by: the units of by, see help for align
	# how: how to align, see help for align
	# fullweek: a logical, if T, the aligment positions for each week will run
	#			 from the biz week start hour on Sun to the biz week close hour
	#			 on Friday. Otherwise, the alignment positions for the first
	#			 week will begin from the biz week start hour on Sun if the first
	#			 data position is on Sun, or from the midnight 00:00 of the day
	#			 if the first data position is on non-Sun; the alignment positions 
	#			 for the last week will end at the biz week close hour on Fri
	#			 if the last data position is on Fri, or end at the midnight
	#			 00:00 of the next day if the last data position is on non-Fri
	

	# Whether the boundary positions will be included is controlled by
	# start.include and close.include 
	
	if(!is(ts, "timeSeries"))
		stop("ts must be a timeSeries object")
	
	
	# utility functions
	week.align = function(ts.w, find.start = T, find.end = T, weekstart.hour, weekclose.hour,
								by, k.by, how)
	{
		# ts.w: timeSeries data within one week (sun-fri)
		# find.start, find.end: logical, if this week is the 1st or last week
		#                        or both (one-week only) or neither (mid-weeks)
		findStartpos = function(firstpos, weekstart.hour)
		{
			# function takes a first timeDate position of one week data
			# returns the align start.pos of the week: the weekstart.hour on
			# Sun if the week data starts from Sun, or the 00:00 of the first 
			# day in the week data
			origformat = firstpos@format
			firstpos@format = "%m/%d/%Y"
			if(wdydy(firstpos)[,"weekday"] == 0)
				# the week starts from Sun			
				start.pos = timeDate(paste(as.character(firstpos), weekstart.hour),
				                    format = origformat, zone=firstpos@time.zone)
			else
				start.pos = timeDate(as.character(firstpos), format = origformat, zone=firstpos@time.zone)
			start.pos 
		}
	
		findEndpos = function(lastpos, weekclose.hour)
		{
			# function takes a last timeDate position of one week data
			# returns the align end.pos of the week: the weekclose.hour on
			# Fri if the week data ends on Fri, or the 00:00 of the day after 
			# the last day in the week data
			origformat = lastpos@format
			lastpos@format = "%m/%d/%Y"
			if (wdydy(lastpos)[,"weekday"] == 5)
				#the week ends on Fri
				end.pos = timeDate(paste(as.character(lastpos), weekclose.hour),
				                  format = origformat, zone = lastpos@time.zone)
			else
				end.pos = timeDate(as.character(lastpos), format = origformat, zone=lastpos@time.zone) +1
			end.pos
		}

		pos.w = positions(ts.w)
		rng.w = range(pos.w)
		if (find.start)
			start.pos = findStartpos(rng.w[1], weekstart.hour)
		else
			#the week align start.pos is the weekstart.hour on Sunday of the week
			start.pos = timeDate(paste(as.character(timeSeq(from = rng.w[1], to = rng.w[1], by = "weeks",
											align.by = T, extend = T, week.align = "Sun",
											format = "%m/%d/%Y")[1]), weekstart.hour), zone=pos.w@time.zone)
		start.pos@time.zone = "GMT"
		if (find.end)
			end.pos = findEndpos(rng.w[2], weekclose.hour)
		else
			#the week align end.pos is the weekclose.hour on Friday of the week
			end.pos = timeDate(paste(as.character(timeSeq(from = rng.w[2], to = rng.w[2], by = "weeks",
											align.by = T, extend = T, week.align = "Sat",
											format = "%m/%d/%Y")[2]-1), weekclose.hour), zone=pos.w@time.zone)
		end.pos@time.zone = "GMT"
												
		align.pos = timeSeq(from = start.pos, to = end.pos, by = by, k.by = k.by,
							format = pos.w@format, zone = "GMT")
		align.pos@time.zone = pos.w@time.zone
		return(align(ts.w, pos = align.pos, how = how, error.how = "nearest"))
	} 

	# cleaning observations outside the bizweek boundary hours, e.g. late Fri, Sat, and early Sun
	ts = FxBizWeekOnly(ts, bizweek.hours = bizweek.hours, start.include = start.include,
								close.include = close.include)
	
	weekstart.hour = bizweek.hours[1]
	weekclose.hour = bizweek.hours[2]
	
	origpos = positions(ts)
	rng = range(origpos)
	
	# call cut to slice each week data and apply the week.align()
	pos = timeSeq(from = rng[1], to = rng[2], by = "weeks", align.by = T, extend = T, week.align = "Sat")
	poscut = as.integer(cut(origpos, pos, include.lowest=T))
	poscats = unique(poscut)
	n.weeks = length(poscats)
	ini.idx = match(poscats, poscut)
	last.idx = c(ini.idx[-1], length(poscut)+1)-1

	if(fullweek)
			return(do.call("rbindtimeSeries", lapply(1:length(poscats), function(i, ts, ini.idx, last.idx, func.weekalign, weekstart.hour,
																			weekclose.hour, by, k.by, how)
								{
									idx = ini.idx[i]:last.idx[i]
									func.weekalign(timeSeries(sub(ts@data,idx,), pos=positions(ts)[idx]), find.start = F, find.end = F, weekstart.hour=weekstart.hour,
							 					weekclose.hour = weekclose.hour, by = by, k.by = k.by, how = how)

								}, ts, ini.idx, last.idx, week.align, weekstart.hour, weekclose.hour, by, k.by, how)))
		
	
	if(n.weeks < 2){
		# only 1 week of data
		# both align start and end positions to be determined
		return(week.align(ts, find.start = T, find.end = T, weekstart.hour=weekstart.hour,
							 weekclose.hour = weekclose.hour, by = by, k.by = k.by, how = how))		
	}
	else{
		# 2 or more weeks of data
		# the align start.pos of the 1st week and 
		# the align end.pos of the last week to be determined
		
		# the 1st week
		ans.1stw = week.align(ts[ini.idx[1]:last.idx[1],], find.start = T, find.end = F, weekstart.hour=weekstart.hour,
							 weekclose.hour = weekclose.hour, by = by, k.by = k.by, how = how)
		
		# the last week
		ans.lastw = week.align(ts[rev(ini.idx)[1]:rev(last.idx)[1], ], find.start = F, find.end = T, weekstart.hour=weekstart.hour,
							 weekclose.hour = weekclose.hour, by = by, k.by = k.by, how = how)
		
		if(n.weeks > 2){
			# there are mid weeks
			return(do.call("rbindtimeSeries", c(list(ans.1stw), lapply(2:(length(poscats)-1), function(i, ts, ini.idx, last.idx, func.weekalign, weekstart.hour,
																			weekclose.hour, by, k.by, how)
								{
									idx = ini.idx[i]:last.idx[i]
									func.weekalign(timeSeries(sub(ts@data,idx,), pos=positions(ts)[idx]), find.start = F, find.end = F, weekstart.hour=weekstart.hour,
							 					weekclose.hour = weekclose.hour, by = by, k.by = k.by, how = how)

								}, ts, ini.idx, last.idx, week.align, weekstart.hour, weekclose.hour, by, k.by, how), list(ans.lastw))))
		}
		else
			return(do.call("rbindtimeSeries", list(ans.1stw, ans.lastw)))
	}
	stop("Unlikely things happened! DEBUG!")
	
}
####################################################################################
# Taking difference for HF timeSeries within each day, e.g. stock exchange price data
# by removing the first observation of differenced data for each day
#############################################################################
diff.withinDay = function(ts, bizhrs.cleaned = F, exch.hours = c("9:30", "16:00"), start.include = T, close.include =T)
{
	# bizhrs.cleaned: a logical if the data has been cleaned for observations outside the daily trading hours
	# exch.hours: ignored if bizhrs.cleaned = T
	#				a two-element character vector in the format of "HH:MM",
	#				specifying the starting hour and minute of an exhange
	#				trading day (the first element) and the closing hour and minute 
	#					of the	trading day (the second element) 
	#			
	# Whether the boundary positions will be included is controlled by
	# start.include and close.include (ignored if bizhrs.cleaned = T)

	if(!is(ts, "timeSeries"))
		stop("ts must be a timeSeries object")
	
	# loading the FinMetrics module, since the diff() function only works 
	# properly under the FinMetrics module
	module(finmetrics, first = F)
	
	# cleaning observations outside the boundary hours, e.g. exchange hours
	if(!bizhrs.cleaned)
		ts = ExchangeHoursOnly(ts, exch.hours = exch.hours, start.include = start.include,
								close.include = close.include)
	# taking difference
	ts.diff = diff(ts)
	pos.diff = positions(ts.diff)
	
	# build an aux time series with row seq as data
	# then call aggregateSeries to find out the row num for the 1st obs each day
	ts.aux = timeSeries(data = seq(along = pos.diff), pos = pos.diff)
	idx.1st = seriesData(aggregateSeries(ts.aux, by = "days", FUN=function(x) {x[1]}, together = T))

	if (nrow(idx.1st) < 2)
		# only one day is involved
		# no need to proceed
		return(ts.diff)
	
	# the row num for the 1st obs each day since the 2nd day
	idx.1st = idx.1st[-1, ]
	
	timeSeries(sub(ts.diff@data, -idx.1st,), pos = pos.diff[-idx.1st])
}

#############################################################################
# Taking difference for HF timeSeries within each biz week, e.g. Fx price data
# by removing the first observation of differenced data for each week
###############################################################################
diff.withinWeek = function(ts, bizhrs.cleaned = F, bizweek.hours = c("22:00", "22:00"), start.include = T, close.include =T)
{
	# bizhrs.cleaned: a logical if the data has been cleaned for observations outside the biz week hours
	# bizweek.hours:  ignored if bizhrs.cleaned = T
	#					a two-element character vector in the format of "HH:MM",
	#					specifying the starting hour and minute of a new
	#                biz week on Sunday (the first element) and the closing hour and minute 
	#					of the	biz week on Friday (the second element)	#	
	# Whether the boundary positions will be included is controlled by
	# start.include and close.include (ignored if bizhrs.cleaned = T)

	if(!is(ts, "timeSeries"))
		stop("ts must be a timeSeries object")
		
	# loading the FinMetrics module, since the diff() function only works 
	# properly under the FinMetrics modlue
	module(finmetrics, first = F)
	
	# cleaning observations outside the bizweek boundary hours, e.g. late Fri, Sat, and early Sun
	if(!bizhrs.cleaned)
		ts = FxBizWeekOnly(ts, bizweek.hours = bizweek.hours, start.include = start.include,
								close.include = close.include)
	
	# taking difference
	ts.diff = diff(ts)
	pos.diff = positions(ts.diff)
	
	# build an aux time series with row seq as data
	# then call aggregateSeries to find out the row num for the 1st obs each week
	ts.aux = timeSeries(data = seq(along = pos.diff), pos = pos.diff)
	idx.1st = seriesData(aggregateSeries(ts.aux, by = "weeks", week.align = "Sat", FUN=function(x) {x[1]}, together = T))

	if (nrow(idx.1st) < 2)
		# only one week is involved
		# no need to proceed
		return(ts.diff)
	
	# the row num for the 1st obs each week since the 2nd week
	idx.1st = idx.1st[-1, ]
	
	timeSeries(sub(ts.diff@data, -idx.1st,), pos = pos.diff[-idx.1st])
}

###############################################
# a function to excerpt data within exchange trading hours
# Whether the boundary positions will be included is controlled by
# start.include and close.include 
###############################################
ExchangeHoursOnly = function(ts, exch.hours = c("9:30", "16:00"), start.include = T, close.include =T)
{
	# a function to excerpt data within exchange trading hours
	# Whether the boundary positions will be included is controlled by
	# start.include and close.include 

	# exch.hours: a two-element character vector in the format of "HH:MM",
	#				 specifying the starting hour and minute of an exhange
	#				trading day (the first element) and the closing hour and minute 
	#					of the	trading day (the second element)
	
	if(!is(ts, "timeSeries"))
		stop("ts must be a timeSeries object")
	
	exch.hrsSpan = timeSpan(exch.hours, in.format = "%H:%M[:%S[:%N]]", format = "%02H%02M%02S%03N")
	if(any(is.na(exch.hrsSpan)))
		stop("exch.hours must be in the format of HH:MM:SS!")
	
	# boundary hours
	bound.times = sort(as.numeric(as.character(exch.hrsSpan)))	
	start.time = bound.times[1]
	close.time = bound.times[2]
	
	pos = positions(ts)
	hms.list = .Call("time_to_hour_min_sec", pos, timeZoneList(), COPY = c(F, F), CLASSES = c("timeDate", "list"))
	pos.time = hms.list[[1]]*10^7 + hms.list[[2]]*10^5 + hms.list[[3]]*10^3 + hms.list[[4]]
	
	
	if(start.include)
		if(close.include)
			idx = (pos.time >= start.time) & (pos.time <= close.time)
		else
			idx = (pos.time >= start.time) & (pos.time < close.time)
	else
		if(close.include)
			idx = (pos.time > start.time) & (pos.time <= close.time)
		else
			idx = (pos.timie > start.time) & (pos.time < close.time)
	
	
	weekdays = .Call("time_to_weekday", pos, timeZoneList(), COPY = c(F, F), CLASSES = c("timeDate", "list"))
	idx = which(idx & !(weekdays == 0 | weekdays == 6))
	timeSeries(sub(ts@data, idx,), pos = pos[idx])
}

###############################################
# a function to excerpt data within the Fx bizweek 
# Whether the boundary positions will be included is controlled by
# start.include and close.include 
###############################################
FxBizWeekOnly = function(ts, bizweek.hours = c("22:00", "22:00"), start.include = T, close.include =T)
{

	# bizweek.hours: a two-element character vector in the format of "HH:MM",
	#					specifying the starting hour and minute of a new
	#                biz week on Sunday (the first element) and the closing hour and minute 
	#					of the	biz week on Friday (the second element) 
	
	if(!is(ts, "timeSeries"))
		stop("ts must be a timeSeries object")
	
	bizweek.hrsSpan = timeSpan(bizweek.hours, in.format = "%H:%M[:%S[:%N]]", format = "%02H%02M%02S%03N")
	if(any(is.na(bizweek.hrsSpan)))
		stop("bizweek.hours  must be in the format of HH:MM:SS!")

	# boundary hours
	bound.times = as.numeric(as.character(bizweek.hrsSpan))
	sundayStart.time = bound.times[1]
	fridayClose.time = bound.times[2]
	
	pos = positions(ts)
	hms.list = .Call("time_to_hour_min_sec", pos, timeZoneList(), COPY = c(F, F), CLASSES = c("timeDate", "list"))
	pos.time = hms.list[[1]]*10^7 + hms.list[[2]]*10^5 + hms.list[[3]]*10^3 + hms.list[[4]]
	

	# index for Fridays, Saturdays and Sundays	
	weekdays = .Call("time_to_weekday", pos, timeZoneList(), COPY = c(F, F), CLASSES = c("timeDate", "list"))
	Fri.idx = weekdays == 5
	Sat.idx = weekdays == 6 
	Sun.idx = weekdays == 0 
	
	
	if(start.include){
		earlySun.idx = (pos.time < sundayStart.time) & Sun.idx
		if(close.include)
			lateFri.idx = (pos.time > fridayClose.time) & Fri.idx
		else
			lateFri.idx = (pos.time >= fridayClose.time) & Fri.idx
	} 
	else {
		earlySun.idx = (pos.time <= sundayStart.time) & Sun.idx
		if(close.include)
			lateFri.idx = (pos.time > fridayClose.time) & Fri.idx
		else
			lateFri.idx = (pos.time >= fridayClose.time) & Fri.idx
	}

	idx = which(!(lateFri.idx | Sat.idx | earlySun.idx))
	timeSeries(sub(ts@data, idx,), pos = pos[idx])
}

###############################################
#  a function to give out a logical vector indicate
# if the positions (of timeSeries) are between given start and end positions
###############################################
is.tsBW = function(x, start.pos, close.pos, start.include = T, close.include = T)
{
	# x: timeDate or timeSeries
	# start.pos, and close.pos: each is a one-element timeDate object	

	if(!is(x, "timeSeries") & !is(x,"timeDate") ) 
		stop("x must be a timeSeries or timeDate object")
	
	if(!is(start.pos, "timeDate") | !is(close.pos, "timeDate") |
			length(start.pos) > 1 | length(close.pos) > 1)
		stop("start.pos and close.pos must be one-element timeDate object")
		
	if(is.na(start.pos) | is.na(close.pos))
		stop("start.pos and close.pos can not be NA")
		
	if(is(x, "timeSeries"))
		pos = x@positions
	else
		pos = x
	
	if(start.include)
		if(close.include)
			ans = (pos >= start.pos) & (pos <= close.pos)
		else
			ans = (pos >= start.pos) & (pos < close.pos)
	else
		if(close.include)
			ans = (pos > start.pos) & (pos <= close.pos)
		else
			ans = (pos > start.pos) & (pos < close.pos)
	
	ans
} 

###############################################
#  a function to subscribe the part of a timeSeries
# between the given start and end positions. If no observation 
# is within the specified interval, a NULL is returned and a warning
# is reported. 
###############################################

tsBW = function(x, start.pos, close.pos, start.include = T, close.include = T)
{
	# x: timeSeries
	# start.pos, and close.pos: each is a one-element timeDate object	

	if(is(x, "timeSeries"))
		pos = x@positions
	else
		stop("x must be a timeSeries object")
		
	foundIdx = is.tsBW(x, start.pos = start.pos, close.pos = close.pos,
			 			start.include = start.include, close.include = close.include)
	if(any(foundIdx))
		# there are some observations withing the interval
		return(timeSeries(sub(x@data, foundIdx,), pos = pos[foundIdx]))
	else{
		warning("No observation is within the interval.")
		return(NULL)
	}
}

###############################################
#  a function to plot intraday data by days.
# useful for exploratory analysis and data diagnosis
# 
###############################################
plotByDays = function(ts, coltoplot=1, daysToPlot = NULL, days.max = NULL, ...)
{
	# ts: timeSeries to plot
	# coltoplot: column(s) to plot (column number, name, or logical vector allowed)
	# daysToPlot: a character vector specifying which days to plot. The format of days is
	#				"m/d/Y". If daysToPlot is provided, days.max is ignored.
	# days.max: if daysToPlot is not given, the function is to plot data by days in order.
	#				if days.max is given, it plots the first days.max days. Otherwise, the function
	#				plots all days in data.
	# ...: other arguments passed to the plot function
	# If a day does not have a minimum of 2 observations to plot, a warning will be issued.
	
	if(is(ts, "timeSeries"))
		pos = ts@positions
	else
		stop("ts must be a timeSeries object")
		
	pos@format = "%02m/%02d/%Y"
	poschar = as.character(pos)
	days.data = unique(poschar)
	
	if(!is.null(daysToPlot)){
		# user specified the days to plot		
		n.daystoplot = length(daysToPlot)
		
		if(is(daysToPlot, "timeDate"))
			daysToPlot@format = pos@format
		else
			daysToPlot = timeDate(daysToPlot, zone = pos@time.zone, format = pos@format)
		
		daysToPlot = as.character(daysToPlot)			
	}
	else{
		# plot the days in data in order
		daysToPlot = days.data
		
		if(!is.null(days.max))
			# plot the first days.max days
			n.daystoplot = min(length(days.data), days.max)
		else
			# plot all days in order
			n.daystoplot = length(days.data)
	}
	
	tmp = lapply(1:n.daystoplot, function(i, daysToPlot, poschar, ts, coltoplot, ...)
	{
		idx = poschar == daysToPlot[i]
		if (sum(idx) < 2)
			warning(paste(daysToPlot[i], "does not have the minimum of 2 observations in data to plot"))
		else						
			plot(timeSeries(sub(ts@data,idx,coltoplot), pos=positions(ts)[idx]), plot.args=list(type="p"),
			 x.axis.args=list(time.of.day.style="24:00"), ...)
		return()		
	}, daysToPlot, poschar, ts, coltoplot, ...)
	return()
}

#############################################
## a function to rbind timeSeries 
############################################
rbindtimeSeries = function(...)
{
	#...: any number of timeSeries objects, should be in the same class, ncol, variabls, etc
	args = list(...)
	
	if(!all(sapply(args, is, "timeSeries")))
		stop("All arguments must be timeSeries")
	
	isDataVector = is.vector(seriesData(args[[1]]))
	
	if(isDataVector)
		newdata = do.call("c", lapply(args, FUN = seriesData))
	else
		newdata = do.call("rbind", lapply(args, function(ts)
									{
										tsData = seriesData(ts)
										if(is(tsData, "matrix")){
											rowIds(tsData) = character(0)
											return(tsData)
										} else {
											# data.frame
											attr(tsData, "dup.row.names") = T
											return(tsData)
										}
									}))
	newpos = do.call("concat", lapply(args, FUN = positions))
	
	ans = timeSeries(newdata, pos = newpos)
	ans
}
##################################################################################################
###  a function to reorder a timeSeries according to its positions
###################################################################################################

reorderTS = function(x)
{
		x.pos = positions(x)
		x.data = seriesData(x)
	
		pos.order = order(x.pos)
		x.pos = x.pos[pos.order]
		x.data = sub(x.data, pos.order,)
	
		if (!is.null(rowIds(x.data)))
		{
			x.len = nrow(x.data)
			rowIds(x.data) = as.character(1:x.len)
		}
		
		ans = timeSeries(x.data, pos=x.pos)
		ans
}

##################################################################################
# A function to smooth data in table form (data.frame or matrix) across columns
################################################################################
tableSmoother = function(y, FUN, ...)
{
	# the function is to smooth data in table form (data.frame or matrix) across columns
	# e.g. rows are timeblocks and cols are different days and the tableSmoother smooth
	# data across days. 
	
	# the table form data can be obtained by, e.g. aggregateSeries(..., drop.empty=F)
	# or by reshape long vector of muti-days data into table form
		
	# y: data.frame or matrix, NA allowed
	# FUN: smoothing function to use. choices are "mean", "median", "loess.smooth",
	#		or "supsmu"
	# ...: any other arguments to be passed to loess.smooth or supsmu
	
	# VALUE: a smoothed vector
	
	if( (!is.matrix(y))&(!is.data.frame(y)))
		stop (" Data has to be in table form, either data.frame or matrix!")
		
	if (is.null(switch(FUN, mean=0, median=0, loess.smooth=0, supsmu=0)))
		stop (" The smoothing function has to be one of mean, median, loess.smooth, or supsmu!")
	
	
	if (FUN=="mean")
		ans = as.vector(rowMeans(y, na.rm=T))
	
	if (FUN=="median")
		ans = as.vector(apply(as.matrix(y), 1, FUN=FUN, na.rm=T))
	
	if ( (FUN=="loess.smooth")|(FUN=="supsmu"))
	{
		n = nrow(y)
		y = as.vector(as.matrix(y))
		x = rep(1:n, length= length(y))
 		
		na.idx = is.na(y)
		y = y[!na.idx]
		x = x[!na.idx]
		
		if (FUN=="loess.smooth")
			ans = loess.smooth(x, y, evaluation = n, ...)$y
		else
			ans = supsmu(x, y, ...)$y		
	}
	
	ans
	
}


##################################################################################
## A function to compute volatility (std.dev) of a given timeSeries for a specified timeinterval using aggregaeSeries
## The function return a dataframe with dimensions of block times and days
#############################################################################
VTable = function(x.ts, by="minutes", k.by, overlapping=T)
{
	# x.ts: the one-variable timeSeries data
	# by: time interval units
	# k.by: time interval length in "by"
	# overlapping: logical flag, if true, compute half-interval overlapping volatility
	 	
	pos = positions(x.ts)
	x.timeZone = pos@time.zone
	
	# Prepare colIds for the Volatility table
	pos@format = "%b.%02d.%Y"
	table.colnames = unique(as.character(pos))
	
	# create an abbreviation of time units
	time.unit.abbrev = switch(by, minutes="m", seconds="s", hours="h")
	
	
	# create timeDate of days in data
	pos@format = "%02m/%02d/%Y"
	x.days=unique(as.character(pos))
	smpl.blocks = timeDate(x.days, zone=x.timeZone)
	numDays=length(smpl.blocks)
	
	
	# Safeguard feature: write each day results onto disk(working directory) with temp.names, then cbind, and finally remove those 
	# intermediate objects
	
	# create temp.names
	temp.names = paste("day", 1:numDays,"ans",sep="")
	
	
	for (i in 1:numDays)
	{
		# Extract the ith day data
		smpl = (pos >=smpl.blocks[i]) & (pos < (smpl.blocks[i]+1))
		y = x.ts[smpl,] 		
		# construct the ith day aggregation positions (starting from t0)
		aggPos.1 = timeSeq(from= smpl.blocks[i], to= (smpl.blocks[i]+1), by=by, k.by=k.by, zone=x.timeZone)
		aggPos.1 = aggPos.1[!(aggPos.1 >= (smpl.blocks[i]+1))] # make sure all aggregation positions are within the ith day
		
		# volitility computing
		aggY = aggregateSeries(y, pos=aggPos.1, FUN=colStdevs, drop.empty=F)
		
		
		# computing overlapping volatility if desired, 
		if (overlapping)
		{
			# construct the ith day aggregation positions (starting from t0+ half aggregation interval)
			aggPos.2 = aggPos.1 + timeSpan(paste(k.by, time.unit.abbrev, sep=""))/2
			aggPos.2 = aggPos.2[!(aggPos.2 >= (smpl.blocks[i]+1))] # make sure all aggregation positions are within the ith day
			
			# data adjustment
			idx = positions(y) >= aggPos.2[1]
			y = y[idx]
			
			
			# volitility computing
			aggY.2 = aggregateSeries(y, pos=aggPos.2, FUN=colStdevs, drop.empty=F)
			
			# concatenate two aggregated series and reorder
			aggY = adhereTS(aggY, aggY.2)
			aggY= reorderTS(aggY)
		}
		
		# Prepare the ith talbe column
		aggData = seriesData(aggY)
		colIds(aggData)= table.colnames[i]
		pos.Y = positions(aggY)
		pos.Y@format = "%02H:%02M:%02S"
		rowIds(aggData)= as.character(pos.Y)
		
		#write ith day results into working directory with temp.names[i]
		assign(temp.names[i], aggData, where=1)		
	}
	
	# Construct the table
	for (i in 1:numDays)
	{
		if (i==1)
			ans = get(temp.names[i], where=1)
		else
			ans = cbind(ans, get(temp.names[i], where=1))
	}
	ans = as.data.frame(ans)
	
	# finalize the operation and remove intermediate objects
	remove(temp.names, where=1)
	
	ans
}

############################################################################
#  A function to aggregate B/S trade type dummy variables
# over a certain interval/a certain number of trades proceeding the most recent
# quote
#
##################################################################################

aggregateTradeTypes=function(trdDummy.ts, quotes.ts, by="seconds",k.by=20)
{
	# trdDummy.ts: a timeSeries object with B/S dummy at the data slot
	# quotes.ts: positions of qutoes
	# by: "seconds", "minutes", "hours", or "trades"
	# k.by: number of time units or trades
	
	trades.pos = positions(trdDummy.ts)
	trdDummy = seriesData(trdDummy.ts)
	
	quotes.pos = positions(quotes.ts)
	quotes.len = length(quotes.pos)
	ans = matrix(NA, quotes.len, 1)
	
	if (by!="trades")	
	{		
		unitType = switch(by, seconds="s", minutes="m", hours="h")
		agg.span=timeSpan(paste(k.by, unitType, sep=""))
		quotes.pos.lag = quotes.pos - agg.span
		
	
		for ( i in 1:quotes.len)
		{
			smpl=(trades.pos >= quotes.pos.lag[i])&(trades.pos < quotes.pos[i])
			if (sum(smpl)==0)
				next
			ans[i,1]=sum(trdDummy[smpl,])
		
		}
	}
	else # aggregation by trades
	{
		for (i in 1:quotes.len)
		{
			smpl= trades.pos < pos[i]
			n= sum(smpl)
			ans[i,1]=sum(trdDummy[(n-k.by+1):n,])
		}
				
	}
	
	ans=timeSeries(as.data.frame(ans), pos=quotes.pos)
	ans
	
}

###################################################################################
# A function to plot the stats with 95% upper and lower bounds
#################################################################################

cusumPlot=function(x)
{
	# Function to plot the cusum stats with upper and lower bounds
	# x: one-column timeSeries object of cusum stats
	
	n=nrow(x)
	factor=0.948*sqrt(n)
	
	pos=positions(x)
	y=cbind(seriesData(x), seq(factor,3*factor,length=n),seq(-factor,-3*factor,length=n))
	colIds(y)=c("CUMSUM", "upper","lower")
	y=timeSeries(y,pos=pos)
	
	invisible(plot(y, plot.args=list(lty=c(1,2,2),col=c(2,1,1)),main=colIds(x),reference.grid=F))
	
}

##############################################################################################################
# A function to compute the duration (life) of cusum stats 
# i.e. the duration of cusum stats before first time hitting the boundary
##############################################################################################################

cusumLife=function(x)
{
	# Function to compute the duration (life) in minutes before the model parameters estimates
	# give ill behaved filtering and forecasting (cusum stats hitting the 95% bounds)
	
	# x: one-column timeSeries object of cusum stats
	
	n=nrow(x)
	pos=positions(x)
	factor=0.948*sqrt(n)
	  
	viol.idx=abs(seriesData(x)) > seq(factor,3*factor,length=n)
	
	if (sum(viol.idx)!=0) # violation occured
	{
		pos.viol=pos[viol.idx]
		life=pos.viol[1]-pos[1]
	}
	else	# violation never occured in the testing sample
		life=pos[n]-pos[1]
	
	# Transform "life" from timespan class to numeric with the unit of minutes
	life@format="%s"
	life.min=as.numeric(as.character(life))%/%60
	life.min	
	
}


######################################################################################3
# A function to compute time series of durations within specified intervals, daily or weekly. 
##################################################################################

DurationInInterv=function(x, units="seconds", interv.type = "daily",
							 bound.hours = c("9:30", "16:00"))
{
	# compute time series of durations within specified intervals, daily or weekly. 
	#
	# x: a timeseries object or a timeDate object
	# units: durations can be either expressed in units of seconds, milliseconds, or minutes, the default is by seconds.
	# interv.type: "daily" or "weekly" for exchange based trading data and Fx type data respectively.
	# bound.hours:  a two-element character vector in the format of "HH:MM"
	#				the start and closing hours of the interval. In case of daily interval,
	#				they are the start and closing hours of the exchange. In case of weekly interval, they
	#				are the start hour of the biz week on Sunday and the closing hour of the biz week on 
	#				Friday.
	
	if(!is(x, "timeSeries") & !is(x,"timeDate") ) 
		stop(paste(deparse(substitute(x)), "must be a timeSeries or timeDate object"))
	
	
	if(is(x, "timeSeries"))
		pos = positions(x)
	else 
		pos = x
	
	ts.tmp = timeSeries(data = data.frame(pos), pos = pos)
	
	if (units == "seconds")
		factor = 86400
	else if (units == "milliseconds")
		factor = 86400000
		else if (units == "minutes")
			factor = 1440
			else
				stop ("Duration units must be one of seconds, milliseconds, or minutes")
	
	if (interv.type == "daily")
		ans = diff.withinDay(ts = ts.tmp, exch.hours = bound.hours,
										 start.include = T, close.include = T)
	else if (interv.type == "weekly")
		ans = diff.withinWeek(ts = ts.tmp, bizweek.hours = bound.hours,
										 start.include = T, close.include = T)
		else
			stop ("interv.type must be one of daily or weekly")
										
	# the duration from differencing is in fraction of day,
	# adjust for desired units.
	ans = round(ans * factor)
	colIds(ans) = paste("Duration", "in", units, sep=".")
	
	return(ans)
}

######################################################################################
# A function to compute time series of price changes (in approriate ticks, if desired) within
# specified intervals, daily or weekly. 
##################################################################################

PriceChgInInterv=function(x, ticksize = NULL, interv.type = "daily",
							 bound.hours = c("9:30", "16:00"))
{
	# compute time series of price changes (in approriate tick sizes, if desired)
	# within specified intervals, daily or weekly. 
	#
	# x: a timeseries of price levels
	# ticksize: the default is NULL to simply compute price change in original price levels. 
	#			 If ticksize is supplied, the price changes will be divided by the ticksize to compute
	#			 price changes in ticks.
	# interv.type: "daily" or "weekly" for exchange based trading data and Fx type data respectively.
	# bound.hours: a two-element character vector in the format of "HH:MM"
	#				the start and closing hours of the interval. In case of daily interval,
	#				they are the start and closing hours of the exchange. In case of weekly interval, they
	#				are the start hour of the biz week on Sunday and the closing hour of the biz week on 
	#				Friday.
	
	if(!is(x, "timeSeries")) 
		stop(paste(deparse(substitute(x)), "must be a timeSeries object"))
	
	
	if (interv.type == "daily")
		ans = diff.withinDay(ts = x, exch.hours = bound.hours,
										 start.include = T, close.include = T)
	else if (interv.type == "weekly")
		ans = diff.withinWeek(ts = x, bizweek.hours = bound.hours,
										 start.include = T, close.include = T)
		else
			stop ("interv.type must be one of daily or weekly")
										
	# compute price changes in ticks if required
	if (!is.null(ticksize))
		ans = round(ans / ticksize)
	
	return(ans)
}

#########################################################################################
# The function extends the aggregatesSeries function in S-Plus for high-frequency data
# by fixing the start and end aggregation positions based on trading conventions:
# daily business hours for exchange trading, and bizweek for Fx trading.
##############################################################################################
aggregateSeriesHF = function(ts, interv.type = "daily", bound.hours = c("9:30", "16:00"), by, k.by,
									FUN, adj, drop.empty = F, ...)
{
	# The function extends the aggregatesSeries function in S-Plus for high-frequency data
	# by fixing the start and end aggregation positions based on trading conventions:
	# daily business hours for exchange trading, and bizweek for Fx trading.
	
	# interv.type: "daily" or "weekly" for exchange based trading data and Fx type data respectively.
	# bound.hours:  a two-element character vector in the format of "HH:MM"
	#				the start and closing hours of the interval. In case of daily interval,
	#				they are the start and closing hours of the exchange. In case of weekly interval, they
	#				are the start hour of the biz week on Sunday and the closing hour of the biz week on 
	#				Friday.
	
	# FUN: aggregation function to be applied to data in each block
	# adj: 0 or 1, adjusting the aggregated data toward the lower end of aggregation block or upper end.
	# ...: other parameters to be passed to aggregateSeries()
	
	if(!is(ts, "timeSeries")) 
		stop(paste(deparse(substitute(ts)), "must be a timeSeries object"))
	
	if((adj!=0) & (adj!=1))
		stop("adj must be either 0 or 1")
	
	# utility functions for Fx data
		findStartpos = function(firstpos, weekstart.hour)
		{
			# function takes a first timeDate position of data
			# returns the start.pos of the week: the weekstart.hour on
			# Sun 
			origformat = firstpos@format
			firstpos@format = "%m/%d/%Y"
			if(wdydy(firstpos)[,"weekday"] == 0)
				# the week starts from Sun			
				start.pos = timeDate(paste(as.character(firstpos), weekstart.hour),
				                    format = origformat, zone=firstpos@time.zone)
			else{
				pos.tmp = timeSeq(from = firstpos, to = firstpos, by = "weeks", 
									align.by = T, extend = T, week.align = "Sun", format = "%m/%d/%Y")[1]
				start.pos = timeDate(paste(as.character(pos.tmp), weekstart.hour),
				                    format = origformat, zone=firstpos@time.zone)				
			}
			start.pos 
		}
	
		findEndpos = function(lastpos, weekclose.hour)
		{
			# function takes a last timeDate position of data
			# returns the end.pos of the week: the weekclose.hour on
			# Fri
			origformat = lastpos@format
			lastpos@format = "%m/%d/%Y"
			if (wdydy(lastpos)[,"weekday"] == 5)
				#the week ends on Fri
				end.pos = timeDate(paste(as.character(lastpos), weekclose.hour),
				                  format = origformat, zone = lastpos@time.zone)
			else{
				pos.tmp = timeSeq(from = lastpos, to = lastpos, by = "weeks", 
									align.by = T, extend = T, week.align = "Fri", format = "%m/%d/%Y")[2]
				end.pos = timeDate(paste(as.character(pos.tmp), weekclose.hour),
				                  format = origformat, zone = lastpos@time.zone)				
			}
			end.pos
		}

	start.hour = bound.hours[1]
	close.hour = bound.hours[2]
	
	if (interv.type == "daily"){
		# exchange based data
		
		# clean data
		ts = ExchangeHoursOnly(ts, exch.hours = bound.hours)
		origpos = positions(ts)
		rng = range(origpos)
	
		
	   # Figure out the start and end positions for aggregation
		pos.tmp = rng[1]
		pos.tmp@format = "%m/%d/%Y"		
		start.pos = timeDate(paste(as.character(pos.tmp), start.hour),
					 format = origpos@format, zone= origpos@time.zone)
		start.pos@time.zone = "GMT"
		
		pos.tmp = rng[2]
		pos.tmp@format = "%m/%d/%Y"
		end.pos = timeDate(paste(as.character(pos.tmp), close.hour),
					 format = origpos@format, zone= origpos@time.zone)
		end.pos@time.zone = "GMT"
		newpos = timeSeq(from = start.pos, to = end.pos, by = by, k.by = k.by,
							 format = origpos@format, zone="GMT")
		newpos@time.zone = origpos@time.zone
		
		# eliminate those off hours and weekend aggregation blocks
		aux.ts = timeSeries(data = seq(along= newpos), pos = newpos)
		aux.ts = ExchangeHoursOnly(aux.ts, exch.hours = bound.hours)
		newpos = positions(aux.ts)
		
		ans = aggregateSeries(ts, pos = newpos, FUN=FUN, adj = adj, drop.empty = drop.empty,...)
		if(adj ==1)
			ans = ExchangeHoursOnly(ans, exch.hours = bound.hours, start.include = F)
		else
			ans = ExchangeHoursOnly(ans, exch.hours = bound.hours, close.include = F)
	}
	else if (interv.type == "weekly"){
		# Fx type data
		
		# clean data
		ts = FxBizWeekOnly(ts, bizweek.hours = bound.hours)
		origpos = positions(ts)
		rng = range(origpos)
		
		
		start.pos = findStartpos(rng[1], start.hour)
		start.pos@time.zone = "GMT"
		end.pos = findEndpos(rng[2], close.hour)
		end.pos@time.zone = "GMT"
		
		newpos = timeSeq(from = start.pos, to = end.pos, by = by, k.by = k.by,
		                  format = origpos@format, zone = "GMT")
		newpos@time.zone = origpos@time.zone
		
		# eliminate those weekend aggregation blocks
		aux.ts = timeSeries(data = seq(along= newpos), pos = newpos)
		aux.ts = FxBizWeekOnly(aux.ts, bizweek.hours = bound.hours)
		newpos = positions(aux.ts)
		
		ans = aggregateSeries(ts, pos = newpos, FUN = FUN, adj = adj, drop.empty = drop.empty,...)
		if (adj ==1)
			ans = FxBizWeekOnly(ans, bizweek.hours = bound.hours, start.include = F)
		else
			ans = FxBizWeekOnly(ans, bizweek.hours = bound.hours, close.include = F)		
	}
	else
		stop ("interv.type must be one of daily or weekly")
	ans	
} 
#########################################################################################
# A function to linearly interplate the values of daily deterministic components in paramters at the positions of 
# data obersvations. 
##############################################################################################

DetermInterp= function(data.pos, daily.determ)
{
	# data.pos: a timeDate object, from the positions of data time series
	# daily.determ: a timeSeries object of daily deterministic patterns of a set of parameters,
	#  				typically output from certain smoother across days. In fact, the date information of this time
	#  				series is ignored and the intra-day time positions are actually used. Each column of the timeSeries 	#  				corresponds to a particular parameter 
	# A critical assumption is that the daily deterministic pattern covers the positions range for each day data
	# output: a timeSeries object with data.pos as postitions and 
	#			linearly interplated daily deterministic values at those postions 
	
	
	# identify the days involved in data observations
	data.pos@format = "%02m/%02d/%Y"
	data.days = unique(as.character(data.pos))
	numDays = length(data.days)
	
	# identify the time blocks in daily deterministic components
	determ.pos = positions(daily.determ)
	determ.pos@format =  "%02H:%02M:%02S"
	determ.blocks = unique(as.character(determ.pos))
	numBlocks = length(determ.blocks)
	
	# record data and determ timezones info
	data.pos.timeZone = data.pos@time.zone
	determ.pos.timeZone = determ.pos@time.zone
	if (data.pos.timeZone != determ.pos.timeZone)
		stop ("Data observations and daily deterministic patterns must be in the same time zones!")
	
	
	# Create a new determ series to match days in data observations
	pos.strings = paste(rep(data.days, each=numBlocks), rep(determ.blocks, numDays), sep=" ")
	determ.newpos = timeDate(pos.strings, zone = data.pos.timeZone)
	
	para.names = colIds(daily.determ)
	determ.data = seriesData(daily.determ)
	determ.newdata = matrix(rep(as.vector(t(as.matrix(determ.data))), numDays), by.row=T, ncol=length(para.names)) 
	colIds(determ.newdata) = para.names
	determ.newdata = as.data.frame(determ.newdata)
	
	determ.newSeries = timeSeries(determ.newdata, pos= determ.newpos)
	
	# linearly interplate deterministic components values for data positions
	ans = align(determ.newSeries, pos= data.pos, how="interp", error.how="nearest")
	
	
	ans

}

###############################################################################
# A function to compute the number of NAs between two adjecent non-NA quotes
##########################################################################

numNAs = function(x)
{
	# x: a timeseries of quotes with NAs
	# output: a timeseries of the number of NAs between two adjecent
	#			non-NA quotes
	
	if(!is(x, "timeSeries")) 
		stop("x must be a timeSeries object")
	
 	numrow=nrow(x)
	pos=positions(x)
	temp=as.matrix(seq(1,numrow, by=1))
	
	na.idx=is.na(seriesData(x))
	temp.nonna=temp[!na.idx,1,drop=F]
	pos.nonna=pos[!na.idx]
	
	ans=diff(temp.nonna)-1
	colIds(ans)="NumNAs"
	ans=timeSeries(as.data.frame(ans), pos=pos.nonna[-1])
	ans

}
############################################################################
# A function to compute NA duration, i.e. how long observations have been in NA state
##############################################################################

naDuration = function (x.ts)
{
	# The function taks a time series of observations with NAs and return a time series
	# of equal length (and the same timedate position) of the input. The values of the resulting
	# timeseries indicate that at each time position, how long (duration) observations have been in 
	# NA state
	
	## E.x. 
	# Input	Positions:P1 		P2	P3		P4	P5	P6		P7	P8	P9	P10	P11		P12
	# 			Value:		nonNA	NA	nonNA	NA	NA	nonNA	NA	NA	NA	NA	nonNA	nonNA
	
	# Output	Positions:P1 		P2	P3	P4	P5		P6	P7	P8		P9		P10			P11		P12
	#         Value:		0		0	0	0	P5-P4	0	0	P8-P7	P9-P7	P10-P7		0		0
	
	
	# data info
	x.pos = positions(x.ts)
	x.data = seriesData(x.ts)
	x.row = nrow(x.ts)
	
	# Initialize results
	ans = matrix(0, x.row,1)
	colIds(ans) = "naDuration"
	
	# construct aux sequence number vector
	aux = seq(1, x.row, by=1)
	
	# identify non-na data sequency number
	nona.idx = !is.na(x.data) 
	nona.seqNum = aux[nona.idx]
	
	# finishing "bracketing"
	nona.seqNum = c(0, nona.seqNum, x.row+1)
	
	# NA duration is only defined for those blocks
	#(bracketed by two nonNAs) that have two or more
	# consecutive NAs, find them
	key.seqNum = nona.seqNum[c(F,diff(nona.seqNum)>2)] 
	key.seqNum.idx = match(key.seqNum, nona.seqNum)
	
	for (i in key.seqNum.idx) 	{
		dur.temp = duration(x.ts[(nona.seqNum[i-1]+1):(nona.seqNum[i]-1),])
		ans[(nona.seqNum[i-1]+2):(nona.seqNum[i]-1),] = cumsum(as.matrix(seriesData(dur.temp)))
		
	}
	
	ans = as.data.frame(ans)
	ans = timeSeries(ans, pos =x.pos)
	
}