# Created on 02/20/2003 # By Bingcheng Yan # Last updated on 11/06/2003 ## Moved utility functions to the HF Library ## The library has to be attached to run this script file ## Functions used in the script file: ## TAQLoad() ## OlsenLoad() ## reorderTS() ## plotByDays() ## ExchangeHoursOnly() ## FxBizWeekOnly() ## align.withinDay() ## align.withinWeek() ## diff.withinDay() ## diff.withinWeek() ## is.tsBW() ## tsBW() ## DurationInInterv() ## PriceChgInInterv() ## getSpread() ## SmoothAcrossIntervs() ## tableSmoother() ## rbindtimeSeries() ## aggregateSeriesHF() ## tradeDirec() ## Genr.RealVol() ## Attach the FxLibrary attach("D:\\Consulting\\FxLibrary", pos = 2, name = "FxLibrary") ## Load the FinMetrics module module("finmetrics", first=F) #################### Section 2 Data Proceesing #################################### ### loading MSFT Trade data #### msftt.ts = TAQLoad(file = "D:\\CompFinProg\\HFAnalysis\\trade_msft.txt", type = "trade") msftt.ts[1:5,] slotNames(msftt.ts) msftt.ts@data[1:5, ] msftt.ts@positions[1:5] ### loading MSFT Quote data ### msftq.ts = TAQLoad("D:\\CompFinProg\\HFAnalysis\\quote_msft.txt", type = "quote") msftq.ts[1:5,] ### loading GE Trade data #### get.ts = TAQLoad("D:\\CompFinProg\\HFAnalysis\\trade_ge.txt", type = "trade") get.ts[1:5,] ### loading GE Quote data ### geq.ts = TAQLoad("D:\\CompFinProg\\HFAnalysis\\quote_ge.txt", type = "quote") geq.ts[1:5,] ### loading USD/EUR data ### eurusd.ts = OlsenLoad("D:\\CompFinProg\\HFAnalysis\\eur_usd.txt") eurusd.ts[1:5,] eurusd.ts[1:5, "Bid"] #### Data examination and cleaning ### # Re-order data series msftt.ts = reorderTS(msftt.ts) msftq.ts = reorderTS(msftq.ts) get.ts = reorderTS(get.ts) geq.ts = reorderTS(geq.ts) eurusd.ts = reorderTS(eurusd.ts) ## Plot data by days par(mfrow = c(3,3)) plotByDays(ts = msftt.ts, coltoplot = "Price", days.max = 7) par(mfrow = c(3,3)) plotByDays(ts = eurusd.ts, coltoplot = 1, days.max = 7) ## Time interval restrictions msftt.ts = ExchangeHoursOnly(ts = msftt.ts, exch.hours = c("9:30", "16:00"), start.include = T, close.include = T) msftq.ts = ExchangeHoursOnly(ts = msftq.ts, exch.hours = c("9:30", "16:00"), start.include = T, close.include = T) get.ts = ExchangeHoursOnly(ts = get.ts, exch.hours = c("9:30", "16:00"), start.include = T, close.include = T) geq.ts = ExchangeHoursOnly(ts = geq.ts, exch.hours = c("9:30", "16:00"), start.include = T, close.include = T) par(mfrow = c(3,3)) plotByDays(ts = msftt.ts, coltoplot = "Price", days.max = 7) eurusd.ts = FxBizWeekOnly(eurusd.ts, bizweek.hours = c("22:00", "22:00"), start.include = T, close.include = T) par(mfrow = c(2,3)) plotByDays(ts = eurusd.ts, coltoplot = 1) ####################### Section 3 Data Manipulation and Characterization ################### ##### 3.1 Construction of Market Variables ######## ## 3.1.1 Computing price changes of MSFT prices and USD/EUR bid quotes args(PriceChgInInterv) pcTicks.msft = PriceChgInInterv(msftt.ts[, "Price"], ticksize = 1/8, interv.type = "daily", bound.hours = c("9:30", "16:00")) pcTicks.eurusd = PriceChgInInterv(eurusd.ts[, "Bid"], ticksize = 0.0001, interv.type = "weekly", bound.hours = c("22:00", "22:00")) ## 3.1.2 Computing durations of MSFT trades and USD/EUR quotes args(DurationInInterv) duration.msftt = DurationInInterv(x = msftt.ts, units = "seconds", interv.type = "daily", bound.hours = c("9:30", "16:00")) duration.msftt[1:5,] duration.eurusd = DurationInInterv(x = eurusd.ts, units = "seconds", interv.type = "weekly", bound.hours = c("22:00", "22:00")) duration.eurusd[1:5,] ## 3.1.3 Computing spreads of MSFT quotes and USD/EUR quotes args(getSpread) spread.msft = getSpread(ask = msftq.ts[, "Ask"], bid = msftq.ts[, "Bid"], ticksize = 1/8) # removing some bad rows with 0 bid or ask or 0 spread error.idx = seriesData(msftq.ts)[, "Ask"]==0 | seriesData(msftq.ts)[, "Bid"]==0 | seriesData(spread.msft)<1 spread.msft = spread.msft[!error.idx,] spread.msft[1:5,] spread.eurusd = getSpread(ask = eurusd.ts[, "Ask"], bid = eurusd.ts[, "Bid"], ticksize = 0.0001) spread.eurusd[1:5,] ## 3.1.4 Computing trade direction of MSFT trades args(tradeDirec) mq.msft = getMidQuote(ask = msftq.ts[,"Ask"], bid = msftq.ts[, "Bid"]) trade.direc.msft = tradeDirec(trade = msftt.ts[, "Price"], mq = mq.msft) trade.direc.msft[1:5,] ## 3.1.5 Computing daily realized volatility for MSFT prices ## and USD/EUR mid quotes args(Genr.RealVol) rvDaily.msft = Genr.RealVol(ts = log(msftt.ts[, "Price"])*100, interv.type = "daily", bound.hours = c("9:30", "16:00"), rv.span = timeSpan("6h30m"), rt.span = timeSpan("5m")) rvDaily.msft[1:5,] ## get the mid quote of USD/EUR bid - ask quotes mq.eurusd = getMidQuote(ask = eurusd.ts[, "Ask"], bid = eurusd.ts[, "Bid"]) rvDaily.eurusd = Genr.RealVol(ts = log(mq.eurusd)*100, interv.type = "weekly", bound.hours = c("22:00", "22:00"), rv.span = timeSpan("24h"), rt.span = timeSpan("5m")) rvDaily.eurusd[1:5,] ###### 3.2 Statistical Properties of Market Variables ###### ## Summary stats of price changes summaryStats(pcTicks.msft) summaryStats(pcTicks.eurusd) ## Histograms of price changes pcFactor.msft = cut(seriesData(pcTicks.msft), breaks=c(min(pcTicks.msft), -3:2, max(pcTicks.msft)), include.lowest =T, labels = c("<= -3", paste(-2:2), ">= 3"), factor.result=T) pcFactor.eurusd = cut(seriesData(pcTicks.eurusd), breaks=c(min(pcTicks.eurusd), -6:5, max(pcTicks.eurusd)), include.lowest =T, labels = c("<= -6", paste(-5:5), ">= 6"), factor.result=T) par(mfrow = c(1,2)) hist.factor(pcFactor.msft, prob = T, xlab = "Price Changes.MSFT", main = "Histogram of MSFT\nTrading Price Changes in Ticks") hist.factor(pcFactor.eurusd, prob = T, xlab = "Price Changes.USD/EUR", main = "Histogram of USD/EUR\nBid Quote Changes in Ticks") ## Summary stats of spreads summaryStats(spread.msft) summaryStats(spread.eurusd) ## Histograms of spreads par(mfrow = c(1,2)) hist.factor(seriesData(spread.msft), prob = T, xlab = "Spread.MSFT", main = "Histogram of MSFT\nSpread in Ticks") hist.factor(seriesData(spread.eurusd), prob = T, xlab = "Spread.USD/EUR", main = "Histogram of USD/EUR\nSpread in Ticks") ## Summary Stats of durations summaryStats(duration.msftt) summaryStats(duration.eurusd) ## Histograms of durations durFactor.msftt = cut(seriesData(duration.msftt), left.include=T, breaks=c(0:10, max(duration.msftt)), include.lowest =T, labels = c(paste(0:9), ">= 10"), factor.result=T) durFactor.eurusd = cut(seriesData(duration.eurusd), left.include=T, breaks=c(0:10, max(duration.eurusd)), include.lowest =T, labels = c(paste(0:9), ">= 10"), factor.result=T) par(mfrow = c(1,2)) hist.factor(durFactor.msftt, prob = T, xlab = "Duration.MSFT", main = "Histogram of MSFT\nTransaction Durations in Seconds") hist.factor(durFactor.eurusd, prob = T, xlab = "Duration.USD/EUR", main = "Histogram of USD/EUR\nQuote Durations in Seconds") ### Temporal dependence in price changes and bid-ask bounce ## Two-way classification table of lead-lag price changes pcLag.msft = pcTicks.msft[1:(nrow(pcTicks.msft)-1),] pcLead.msft = pcTicks.msft[2:nrow(pcTicks.msft),] pcLagFactor.msft = cut(seriesData(pcLag.msft), breaks=c(min(pcLag.msft), -1:0, max(pcLag.msft)), include.lowest =T, labels = paste("Lag.PC", c("-", "0", "+")), factor.result=T) pcLeadFactor.msft = cut(seriesData(pcLead.msft), breaks=c(min(pcLead.msft), -1:0, max(pcLead.msft)), include.lowest =T, labels = paste("Lead.PC", c("-", "0", "+")), factor.result=T) table(pcLagFactor.msft, pcLeadFactor.msft) #### 3.3 Calendar Patterns in Market Activities ####### ### 3.3.1 Calendar patterns in volatility ### ## MSFT ## ## 5-min realized volatility ## rv5min.msft = Genr.RealVol(ts = log(msftt.ts[, "Price"])*100, interv.type = "daily", bound.hours = c("9:30", "16:00"), rv.span = timeSpan("5m"), rt.span = timeSpan("1m")) rv5min.msft[1:5,] # Average 5-min realized volatility across days args(SmoothAcrossIntervs) rv5min.ave.msft = SmoothAcrossIntervs(ts = rv5min.msft, n.subinterv = 78, n.interv = 11, FUN = mean) rv5min.ave.msft@title = "5-min Realized Volatility: MSFT\n(averaging across 11 trading days)" par(mfrow = c(1,2)) # ACF of the 5-min realized volatility for MSFT acf.plot(acf(rv5min.msft, lag.max = 234, plot = F), main = "ACF of 5-min Realized Volatility:\nMSFT (lags up to 3 days)") # Plots of Calendar pattern in realized volatility for MSFT plot(rv5min.ave.msft, reference.grid = F, x.axis.args = list(format.label = c("%H:%02M", "%Z"), time.of.day.style = "24:00")) ## USD/EUR ## ## 5-min realized volatility ## rv5min.eurusd = Genr.RealVol(ts = log(mq.eurusd)*100, interv.type = "weekly", bound.hours = c("22:00", "22:00"), rv.span = timeSpan("5m"), rt.span = timeSpan("1m")) rv5min.eurusd[1:5, ] # Average 5-min realized volatility across days rv5min.ave.eurusd = SmoothAcrossIntervs(ts = rv5min.eurusd, n.subinterv = 288, n.interv = 5, FUN = mean) rv5min.ave.eurusd@title = "5-min Realized Volatility: USD/EUR\n(averaging across 5 trading days)" par(mfrow = c(1,2)) # ACF of the 5-min realized volatility for USD/EUR acf.plot(acf(rv5min.eurusd, lag.max = 864, plot = F), main = "ACF of 5-min Realized Volatility:\nUSD/EUR (lags up to 3 days)") # Plots of Calendar pattern in realized volatility for USD/EUR plot(rv5min.ave.eurusd, reference.grid = F, x.axis.args = list(format.label = c("%H:%02M", "%Z"), time.of.day.style = "24:00")) ### 3.3.2 Calendar patterns in duration ### ## MSFT ## mean durations in 5-min intervals args(aggregateSeriesHF) meanDur5min.msft = aggregateSeriesHF(duration.msftt, interv.type = "daily", bound.hours = c("9:30", "16:00"), FUN = mean, by = "minutes", k.by = 5, adj = 1, drop.empty = F, together = T) meanDur5min.msft[1:5,] # averaging 5-min mean durations across trading days meanDur5min.ave.msft = SmoothAcrossIntervs(ts = meanDur5min.msft, n.subinterv = 78, n.interv = 11, FUN = mean) meanDur5min.ave.msft@title = "5-min Mean Durations: MSFT\n(averaging across 11 trading days)" # plots of ACF and calendar pattern in durations par(mfrow = c(1,2)) acf.plot(acf(meanDur5min.msft, lag.max = 234, plot = F), main = "ACF of 5-min Mean Durations:\nMSFT (lags up to 3 days)") plot(meanDur5min.ave.msft, reference.grid = F, x.axis.args = list(format.label = c("%H:%02M", "%Z"), time.of.day.style = "24:00")) ## USD/EUR ## mean durations in 5-min intervals meanDur5min.eurusd = aggregateSeriesHF(duration.eurusd, interv.type = "weekly", bound.hours = c("22:00", "22:00"), FUN = mean, by = "minutes", k.by = 5, adj = 1, drop.empty = F, together = T) meanDur5min.eurusd[1:5,] # averaging 5-min mean durations across trading days meanDur5min.ave.eurusd = SmoothAcrossIntervs(ts = meanDur5min.eurusd, n.subinterv = 288, n.interv = 5, FUN = mean) meanDur5min.ave.eurusd@title = "5-min Mean Durations: USD/EUR\n(averaging across 11 trading days)" # plots of ACF and calendar pattern in durations #removing the NAs in meanDur5min before plotting ACF na.idx = is.na(seriesData(meanDur5min.eurusd)) par(mfrow = c(1,2)) acf.plot(acf(meanDur5min.eurusd[!na.idx,], lag.max = 864, plot = F), main = "ACF of 5-min Mean Durations:\nUSD/EUR (lags up to 3 days)") plot(meanDur5min.ave.eurusd, reference.grid = F, x.axis.args = list(format.label = c("%H:%02M", "%Z"), time.of.day.style = "24:00")) ### 3.3.3 Calendar patterns in the frequency of trades or quotes ### ## MSFT # construct an aux series to count trades aux.msft = timeSeries(data.frame(Ticks = rep(1, nrow(msftt.ts))), pos = msftt.ts@positions) # the number of trades in 5-min intervals trades5min.msft = aggregateSeriesHF(aux.msft, interv.type = "daily", bound.hours = c("9:30", "16:00"), FUN = sum, by = "minutes", k.by = 5, adj = 1, drop.empty = F, together = T) trades5min.msft[1:5,] # averaging the number of trades in 5-min intervals across trading days trades5min.ave.msft = SmoothAcrossIntervs(ts = trades5min.msft, n.subinterv = 78, n.interv = 11, FUN = mean) trades5min.ave.msft@title = "Number of Trades in 5-min Intervs: MSFT\n(averaging across 11 trading days)" # plots of ACF and calendar pattern in trades number par(mfrow = c(1,2)) acf.plot(acf(trades5min.msft, lag.max = 234, plot = F), main = "ACF of Number of Trades in 5-min Intervs:\nMSFT (lags up to 3 days)") plot(trades5min.ave.msft, reference.grid = F, x.axis.args = list(format.label = c("%H:%02M", "%Z"), time.of.day.style = "24:00")) ## USD/EUR # construct an aux series to count quotes aux.eurusd = timeSeries(data.frame(Ticks = rep(1, nrow(eurusd.ts))), pos = eurusd.ts@positions) # the number of quotes in 5-min intervals quotes5min.eurusd = aggregateSeriesHF(aux.eurusd, interv.type = "weekly", bound.hours = c("22:00", "22:00"), FUN = sum, by = "minutes", k.by = 5, adj = 1, drop.empty = F, together = T) quotes5min.eurusd[1:5,] # averaging the number of quotes in 5-min intervals across trading days quotes5min.ave.eurusd = SmoothAcrossIntervs(ts = quotes5min.eurusd, n.subinterv = 288, n.interv = 5, FUN = mean) quotes5min.ave.eurusd@title = "Number of Quotes in 5-min Intervs: USD/EUR\n(averaging across 11 trading days)" # plots of ACF and calendar pattern in quotes number par(mfrow = c(1,2)) acf.plot(acf(quotes5min.eurusd, lag.max = 864, plot = F), main = "ACF of Number of Quotes in 5-min Intervs:\nUSD/EUR (lags up to 3 days)") plot(quotes5min.ave.eurusd, reference.grid = F, x.axis.args = list(format.label = c("%H:%02M", "%Z"), time.of.day.style = "24:00"))