# timeSeriesManipulation.ssc scripts for time series manipulation chapter # author: Eric Zivot # created: February 4, 2002 # updated: May 10, 2002 # # Notes: # 1. Be specific about time zone specification used in this # chapter. All examples are created on a windows computer with # Pacific coast as the default time zone. For simplicity, the # default time zone is set to GMT using # options(time.zone="GMT") #################################################################### # # illustrate timeSeries objects # class(singleIndex.dat) slotNames(singleIndex.dat) singleIndex.dat@title singleIndex.dat@documentation singleIndex.dat@units singleIndex.dat@positions[1:5] positions(singleIndex.dat)[1:5] start(singleIndex.dat) end(singleIndex.dat) class(positions(singleIndex.dat)) singleIndex.dat@data[1:5,] seriesData(singleIndex.dat)[1:5,] class(seriesData(singleIndex.dat)) # # basic manipulations # msft.p = singleIndex.dat[,"MSFT"] msft.p = singleIndex.dat[,1] msft.p@title = "Monthly closing price on Microsoft" msft.p@documentation = c("Monthly closing price adjusted for stock", "splits and dividends.") msft.p@units = "US dollar price" class(msft.p) smpl = (positions(singleIndex.dat) >= timeDate("3/1/1992") & positions(singleIndex.dat) <= timeDate("1/31/1993")) singleIndex.dat[smpl,] colMeans(singleIndex.dat) colVars(singleIndex.dat) colStdevs(singleIndex.dat) colMeans(seriesData(singleIndex.dat)) # # illustrate timeDate objects # td = timeDate("1/1/2002",in.format="%m/%d/%Y",zone="Pacific") td options("time.in.format") names(format.timeDate) format.timeDate[[1]]$input slotNames(td) ?class.timeDate td@.Data td@format names(format.timeDate)[18] format.timeDate[[18]]$output td@time.zone td@format = paste(td@format,"%z") options("time.zone") td2 = timeDate("Mar 02, 1963 08:00 PM", in.format="%m %d, %Y %H:%M %p", format="%b %02d, %Y %02I:%02M %p %z") td2 td@time.zone = "Eastern" td td@.Data td@time.zone = "GMT" td td@.Data tdGMT = timeDate("1/1/2002",zone="GMT", format="%m/%d/%02y %H:%02M:%02S %p %z") tdGMT tdGMT@.Data tdPST = timeZoneConvert(tdGMT,"PST") tdPST tdPST@.Data tdEST = timeDate("1/1/2002",zone="Eastern") tdEST tdEST@.Data td = timeZoneConvert(td,"Eastern") td td@.Data # # mathematical operations on dates # td1 = timeDate("1/1/2002",in.format="%m/%d/%Y", zone="GMT",format="%m/%d/%04Y %H:%02M:%02S %p %z") td2 = timeDate("2/1/2002",in.format="%m/%d/%Y", zone="GMT",format="%m/%d/%04Y %H:%02M:%02S %p %z") td1 td2 as.numeric(td1) td1 + 1 td1 + 0.5 td1 - 1 2*td1 td1+td2 # timeSpan objects td.diff = td2 - td1 td.diff class(td.diff) slotNames(td.diff) # # MISC date functions # as.character(weekdays(td)) as.character(months(td)) mdy(td) hms(td) wdydy(td) as.numeric(days(td)) hours(td) minutes(td) seconds(td) yeardays(td) # day in the year (of of 365) # # set default time zone to GMT # time.zone.old = options(time.zone="GMT") # # Annual sequences # td = timeCalendar(y=1900:1910,format="%Y") class(td) td td@.Data td@format = "%C" td td = timeSeq(from="1/1/1900", to="1/1/1910", by="years", format="%Y") td tds = timeSequence("1/1/1900","1/1/1910",by="years",format="%Y") class(tds) tds td = as(tds,"timeDate") td # quaterly sequence td = timeSeq(from="1/1/1900", to="10/1/1902", by="quarters", format="%Y:%Q") # monthly sequences # January 1, 1900 to March 1, 1901 timeCalendar(m=rep(1:12,length=15),y=rep(1900:1901,each=12,length=15), format="%b %Y") timeCalendar(m=rep(1:12,length=15),y=rep(1900:1901,each=12,length=15), format="%m/%d/%Y") # December 31, 1899 to February 28, 1901 td = timeSeq(from="1/1/1900",to="3/1/1901",by="months", format="%b %Y") timeSeq(from="1/1/1900",to="3/1/1901",by="months", format="%b %Y") - 1 # weekly sequences timeSeq(from="1/1/1990",to="3/1/1990",by="weeks", format="%a %b %d, %Y") timeSeq(from="1/3/1990",to="3/1/1990",by="weeks", format="%a %b %d, %Y") # 1st business day of January 2000 to last business day of January 2000 holiday.NYSE(2000) as.character(weekdays("1/17/2000")) timeSeq(from="1/3/2000",to="1/31/2000",by="bizdays", holidays=holiday.NYSE(2000),format="%a %b %d, %Y") td1 = timeSeq(from="1/3/2000",to="1/31/2000",by="bizdays", format="%a %b %d, %Y") # # do example with timeEvent to deal with 9/11 closing of markets # # hourly data # deal with time zones hrs = 9:15 hrs.total = 7*2 timeCalendar(h=rep(9:15,2),d=rep(3:4,each=7), y=2000,format="%a %b %d, %Y %02I:%02M %p") # minute data # 390 trading minutes in a day: 6.5 hours mts = c(rep(0:59,6)) hrs = 9:15 mts.total = 420*2 timeCalendar(min=rep(mts,2,length=mts.total), h=rep(hrs,each=60,length=mts.total), m=rep(1,mts.total), d=rep(3:4,each=420,length=mts.total), y=rep(2000,mts.total), format="%a %b %d, %Y %02I:%02M %p") timeCalendar(min=rep(rep(0:59,6),2), h=rep(9:14,each=60,length=360*2), d=rep(3:4,each=360,length=360*2), y=2000,format="%a %b %d, %Y %02I:%02M %p") # # transaction time examples # use Steve McKinney's examples # # creating timeSeries data my.df = data.frame(x=abs(rnorm(10,mean=5)), y=abs(rnorm(10,mean=10))) my.td = timeCalendar(y=1990:1999,format="%Y") my.ts = timeSeries(data=my.df,pos=my.td) my.ts my.ts@title = "My timeSeries" my.ts@documentation = c("Simulated annual price data using the", "S-PLUS function rnorm") my.ts@units = c("US dollars","US dollars") # # convert data frame to time series # yhoo.df[1:2,] td = timeDate(yhoo.df[,1],in.format="%d-%m-%y", format="%a %b %d, %Y") td[1:2] yhoo.ts = timeSeries(pos=td,data=yhoo.df[,-1]) yhoo.ts[1:2,] # Example using Tsay's data for 3M # date information is expressed as the day of the month # and the number of seconds from midnight # Data is for December 1999 # # do example with high frequency tick data in data frame # highFreq3M.df # highFreq3M.df[1:2,] # tmp = data.frame(trade.day=c(1,1),trade.time=c(34412,34414)) td = timeDate(julian=(highFreq3M.df$trade.day-1), ms=highFreq3M.df$trade.time*1000, in.origin=c(month=12,day=1,year=1999),zone="GMT") hf3M.ts = timeSeries(pos=td,data=highFreq3M.df) smpl = hf3M.ts[,"trade.day"] == 1 # # merging timeSeries # IP.dat CPI.dat IP.CPI.dat = seriesMerge(IP.dat,CPI.dat, pos=positions(IP.dat)) IP.CPI.dat[1:2,] # # aggregating timeSeries # dec.vals = "Dec"==months(positions(singleIndex.dat)) annual.p = singleIndex.dat[dec.vals,] annual.p pickDecember = function(x) { monthNum = as.numeric(row.names(x)) idx = (monthNum %% 12) == 0 if(sum(idx)){ return(x[idx,,drop=F]) }else{ return(NA) } } annual.p = aggregateSeries(singleIndex.dat, FUN=pickDecember,by="years") positions(annual.p)@format = "%Y" annual.p # function to pick off closing values. Based on S-PLUS function hloc pickClose = function(x) { # return closing values of a vector if(length(dim(x))) x = as.vector(as.matrix(x)) len = length(x) if(!len) as(NA, class(x)) else x[len] } annual.p = aggregateSeries(singleIndex.dat, FUN=pickClose,by="years") positions(annual.p)@format = "%Y" annual.p msft.daily.p = DowJones30[,"MSFT"] msft.daily.p@title = "Daily closing price on Microsoft" msft.daily.p@units = "Dollar price" msft.monthly.p = aggregateSeries(msft.daily.p,FUN=pickClose, by="months",adj=0.99) msft.monthly.p[1:12] # use steve mckinney's subscripting trick end.month.idx = which(diff(as.numeric(months(positions(msft.daily.p)))) != 0) msft.monthly.p = msft.daily.p[end.month.idx] msft.monthly.p[1:12] plot(msft.daily.p[end.month.idx]) plot(msft.daily.p,msft.daily.p[end.month.idx]) ### Set up a function to use with aggregateSeries ### to calculate volume weighted average open and close ### monthly prices. vol.wtd.avg.price = function(x){ VolumeSum = as.double(sum(x[, "Volume"])) nrowx = numRows(x) return(data.frame(Open = x[1, "Open"], High = max(x[, "High"]), Low = min(x[, "Low"]), Close = x[nrowx, "Close"], vwap.Open = sum(x[, "Open"] * x[, "Volume"])/VolumeSum, vwap.Close = sum(x[, "Close"] * x[, "Volume"])/VolumeSum, Volume = VolumeSum)) } ### Use the vwap function to calculate monthly ### high, low, open, close, volume weighted open and close, ### and total volume. smpl = (positions(msft.dat) >= timeDate("10/1/2000") & positions(msft.dat) <= timeDate("8/31/2001")) msft.dat[smpl,] msft.vwap.dat = aggregateSeries(x = msft.dat[smpl,], by = "months",FUN = vol.wtd.avg.price, together = T) positions(msft.vwap.dat)@format="%b %Y" msft.vwap.dat[,-7] # # disaggregation # # interpolate monthly CPI to daily CPI start(msft.daily.p) end(msft.daily.p) start(CPI.dat) end(CPI.dat) smpl = (positions(CPI.dat) >= timeDate("12/1/1990") & positions(CPI.dat) <= timeDate("2/1/2001")) cpi = CPI.dat[smpl,] cpi[1:3] # create daily cpi cpi.daily.before = align(cpi,positions(msft.daily.p),how="before") cpi.daily.before[c(1:3,21:23)] cpi.daily.after = align(cpi,positions(msft.daily.p),how="after") cpi.daily.after[c(1:3,21:23)] cpi.daily.interp = align(cpi,positions(msft.daily.p),how="interp") cpi.daily.interp[c(1:3,21:23)] # create real prices and plot msft.daily.rp = (msft.daily.p/cpi.daily.interp)*100 plot(msft.daily.rp,msft.daily.p, plot.args=list(lty=c(1,3))) legend(0,100,legend=c("Real price","Nominal price"), lty=c(1,3)) # # S+FinMetrics disaggregate function # args(disaggregate) start(shiller.annual) end(shiller.annual) monthly.dates = timeSeq(from="1/1/1871",to="12/31/2000", by="months",format="%b %Y") div.monthly = disaggregate(shiller.annual[,"dividend"],12, out.positions=monthly.dates) div.monthly[1:12] sum(div.monthly[1:12]) shiller.annual[1,"dividend"] smpl = positions(shiller.dat) <= timeDate("12/31/2000") price.monthly = as.matrix(seriesData(shiller.dat[smpl,"price"])) div2.monthly = disaggregate(shiller.annual[,"dividend"],12, method="gls",x=price.monthly, out.positions=monthly.dates) div2.monthly[1:12] sum(div2.monthly[1:12]) shiller.annual[1,"dividend"] # # note: disaggregate doesn't work with Jeff's examples # ip.quarter = disaggregate(nelson.dat[,"IP"],4) # # S+FinMetrics interpNA function # djia.close = djia[positions(djia) >= timeDate("1/1/1990"), "close"] djia.close[10:12,] djia.close = interpNA(djia.close) djia.close[10:12,] # # Illustrate return calculations using getReturns # # # creating lags and differences # args(tslag) tslag(singleIndex.dat[1:5,]) tslag(singleIndex.dat[1:5,],trim=T) tslag(singleIndex.dat[1:5,],k=2) tslag(singleIndex.dat[1:5,],k=-1) tslag(singleIndex.dat[1:5,],k=c(1,3)) tslag(singleIndex.dat[1:5,],k=-1:1) args(diff.timeSeries) diff(singleIndex.dat[1:5,],lag=1,trim=F) diff(singleIndex.dat[1:5,],lag=2,trim=F,pad=0) diff(singleIndex.dat[1:5,],lag=1,diff=2,trim=F) colIds(singleIndex.dat) singleIndex.dat[1:3,] ret.d = getReturns(singleIndex.dat,type="discrete", percentage=T) ret.d[1:3,] ret.d = getReturns(singleIndex.dat,type="discrete",trim=F) ret.d[1:3,] ret.cc = getReturns(singleIndex.dat,type="continuous") ret.cc[1:3,] # annual cc returns using aggregate ret12.cc = aggregate(ret.cc,moving=12,FUN=sum) ret12.cc[1:3,] colSums(seriesData(ret.cc[1:12,])) ret12.cc = aggregate(ret.cc,by="years",FUN=sum) ret12.cc[1:3,] colSums(seriesData(ret.cc[1:11,])) # annual discrete returns using aggregate ret12.d = aggregate((1+ret.d),moving=12,FUN=prod)-1 ret12.d[1:3,] prod(seriesData(1+ret.d[1:12,1]))-1 ret12.d = aggregate((1+ret.d),by="years",FUN=prod)-1 ret12.d[1:3,] # # converting daily timeSeries to monthly timeSeries # # pick off end of month values: use the hloc option to aggregateSeries p.daily = DowJones30[,1] p.hloc = aggregate(p.daily, by="months", FUN=hloc, colnames=c("high", "low", "open", "close")) p.close = p.hloc[,"close"] # problem: how do I get the correct date for the end of month observations? end.month.idx = which(diff(as.numeric(months(positions(p.daily)))) != 0) p.daily[end.month.idx] plot(p.daily[end.month.idx]) plot(p.daily,p.daily[end.month.idx]) # # converting monthly timeSeries to annual timeSeries # # pick off December value for data series start(singleIndex.dat) end(singleIndex.dat) p.annual = singleIndex.dat["Dec"==months(positions(singleIndex.dat)),] p.annual # # plotting timeSeries # # plotting a single series msft.p@title msft.p@units plot(msft.p) # eliminate reference grids set reference.grid=F plot(msft.p,reference.grid=F) # to do a log plot set log.axes=T plot(msft.p,log.axes="y") # illustrate hloc and stackbar plots colIds(djia) smpl = (positions(djia) >= timeDate("9/1/1987") & positions(djia) <= timeDate("11/30/1987")) par(mfrow=c(2,1)) plot(djia[smpl,1:4],plot.type="hloc") plot(djia[smpl,5],plot.type="stackbar") par(mfrow=c(1,1)) # plotting multiple series on the same graph singleIndex.dat = singleIndex.dat singleIndex.dat@units = c("US dollar price","US dollar price") plot(singleIndex.dat, plot.args=list(lty=c(1,3))) legend(0.1,1400,legend=colIds(singleIndex.dat),lty=c(1,3)) # creating a scatterplot of two time series plot(as.matrix(seriesData(singleIndex.dat[,"SP500"])), as.matrix(seriesData(singleIndex.dat[,"MSFT"]))) par(mfrow=c(2,1)) plot(singleIndex.dat[,"MSFT"],main="Monthly price on Microsoft") plot(singleIndex.dat[,"SP500"],main="Monthly price on S&P 500 index") # split plot with prices and volume par(mfrow=c(2,1)) plot(msft.vwap.dat[, "vwap.Close"], msft.vwap.dat[, "Close"], main = "MSFT vwap.Close",plot.args=list(lty=c(1,3))) legend(0,70,legend=c("VWAP Close","Close"),lty=c(1,3)) plot.out = plot(msft.vwap.dat[, "Volume"]/1000000.0, plot.args = list(type = "n"), main = "MSFT Volume (Millions)") stackbar.render(positions(msft.vwap.dat[, "Volume"]), seriesData(msft.vwap.dat[, "Volume"]/1000000.0), x.scale = plot.out$scale) par(mfrow=c(1,1)) # # S+FinMetrics plotting functions # # rvfPlot rvfPlot(ret.cc[,"MSFT"],ret.cc[,"SP500"], strip.text="MSFT vs. SP500",xlab="S&P 500", ylab="MSFT") plot(seriesData(ret.cc[,"SP500"]), seriesData(ret.cc[,"MSFT"]), main="MSFT vs. SP500",xlab="S&P 500", ylab="MSFT") args(seriesPlot) DJ.ret = getReturns(DowJones30[,1:6], percentage=T) colIds(DJ.ret) # seriesPlot seriesPlot(DJ.ret,one.plot=F,strip.text=colIds(DJ.ret), main="Monthly returns on six Dow Jones 30 stocks") # note: all series are on different scales # histPlot histPlot(DJ.ret,strip.text=colIds(DJ.ret), main="Histograms of returns on six Dow Jones 30 stocks") # note: histograms use the same bin widths # qqPlot s.text = paste(colIds(DJ.ret),5:10,sep=" ","df") qqPlot(DJ.ret,strip.text=s.text, distribution="t",dof=c(5,6,7,8,9,10), id.n=FALSE, main="Student-t QQ-plots for returns on six Dow Jones 30 stocks") mk.maturity yield.curve = mk.zero1[,c(1,12,20,24,29)] colIds(yield.curve) = c("1 month","1 year","2 year","5 year", "10 year") seriesPlot(yield.curve, main="Zero coupon yield curve", ylab="Percent per month") histPlot(yield.curve,strip.text=colIds(yield.curve), main="Histograms of zero coupon yield curve") qqPlot(yield.curve,strip.text=colIds(yield.curve), main="Normal qq plots of zero coupon yield curve")