User:Phil Hurvitz/Projects/In process/MSB/Scripts/conflate msb sms gps.R

From phurvitz
< User:Phil Hurvitz‎ | Projects‎ | In process‎ | MSB/Scripts
Revision as of 04:05, 15 January 2009 by Phil Hurvitz (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search
# match the Myexp+SMS record with the MSB(GPS) record most closely matching in time
# assumes GPS timestamps on SMS and MSB data (pre subject 11?)

# needs: msb/gps data (from msb_parse_gps_files.R)
#        Myexp+SMS data (from read_sms.R)

# usage:
# source("/home/phurvitz/public_html/msb/tools/conflate_msb_sms_gps.R"); msb.sms <- conflate.msb.sms(sms, msb, sid)

# first define a function to parse the files
conflate.msb.sms <- function(sms, msb, sid) {

    # file locations
    basedir <- "/home/phurvitz/public_html/msb/processed_data/downloaded_data"
    subject.dir <- paste(basedir, "/", sid, "/", sep="")
    
    # add a field to SMS for the msb gps time
    sms$msb.gps.loctim <- as.POSIXct("1900-01-01 GMT")
    #sms$msb.gps.loctim <- NA
    sms$sms.msb.timediff <- -9

    # an informative message about progress
    cat(paste("Processing ", nrow(sms), "...", sep=""))

    # loop through each Myexp+SMS record
    for (i in 1:nrow(sms)) {
       
        # an informative message about progress
        cat(paste(i, ",", sep=""))  

        # get the SMS gps time and msb seconds
        sms.gps.localtime <- sms[i,]$sms.gps.localtime
        sms.msb.secs <- sms[i,]$msb.secs

        # difference vector between this and the MSB times
        td <- abs(as.numeric(difftime(sms.gps.localtime, msb$gps.loctim, units="s")))
        min.td <- min(td)
        # set the GPS timestamp of this SMS to the closest matching record from MSB data
        if (min.td < (5 * 60)) {
            sms$msb.gps.loctim[i] <- msb$gps.loctim[which(td==min.td)]
        } else {
            sms$msb.gps.loctim[i] <- NA
        }
        sms$sms.msb.timediff[i] <- round(min.td,3)

    }
    # an informative message
    cat("\n")

    # to avoid duplicate fields
    msb$sid <- NULL

    # merge SMS and MSB data to a new data frame where each SMS has MSB data
    msb.sms <- merge(sms, msb, by.x="msb.gps.loctim", by.y="gps.loctim")
    
    # any records?
    if (nrow(msb.sms)==0) {
        return (NULL)
    }

    # reset row names
    rownames(msb.sms) <- NULL

    # write out the file
    colnames(msb.sms) <- unfix.colnames(msb.sms)
    outFN = paste(subject.dir, "msb_sms.csv", sep="")
    write.table(msb.sms, file=outFN, row.names=F, col.names=T, sep=",", quote=T)
    colnames(msb.sms) <- fix.colnames(msb.sms)
    cat(paste("wrote", outFN, "\n"))

    return(msb.sms)
}