User:Phil Hurvitz/Projects/In process/MSB/Scripts/read sms 8.R

From phurvitz
< User:Phil Hurvitz‎ | Projects‎ | In process‎ | MSB/Scripts
Revision as of 02:51, 14 January 2009 by Phil Hurvitz (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search
# makes "sms.csv" a formatted file from the SMS records

# basic functions
source("http://gis.washington.edu/phurvitz/R/functions.R")

# a function to do the process
process.sms <- function(sid, timediff) {
    # where are files?
    basedir <- "/home/phurvitz/public_html/msb/processed_data/downloaded_data"
    subject.dir <- paste(basedir, "/", sid, "/", sep="") 
    current.dir <- getwd()
    setwd(subject.dir)

    # is the arg a file?
    if (!file.exists(subject.dir)) {
        err.txt <- paste(subject.dir, "does not exist!", "\n", sep=" ")
        cat(err.txt)
        setwd(current.dir)
        return(invisible())
    }
    
    # is the arg a dir?
    if (!file.info(subject.dir)$isdir) {
        err.txt <- paste(subject.dir, "is not a dir!", "\n", sep=" ")
        cat(err.txt)
        setwd(current.dir)
        return(invisible())
    }

    # time difference, if not specified, set to 0
    if (missing(timediff)) {
      timediff <- 0
    }
    
    # the sid should be the same name as the subject.dir
    if (missing(sid)) {
        sid <- basename(subject.dir)   
    }
    
    # parse the sms log file
    if (.Platform$OS.type=="windows") {
        home <- "P:/"
        cmd.parse.sms <- paste(Sys.getenv("COMSPEC"), "/c", "p:\\public_html\\msb\\tools\\msb_parse_sms.pl")
    }
    else {
        home <- "/home/phurvitz/"
        cmd.parse.sms <- paste(home, "public_html/msb/tools/msb_parse_sms.pl", sep="")
    }
    system(cmd.parse.sms)

    # read sms log
    csv.file <- "sms8.csv"

    # read in the file
    csv.file.in <- paste(home, "public_html/msb/processed_data/", csv.file, sep="")
    sms <- read.csv(csv.file.in, head=T, as.is=T)

    # leap-seconds offset for GPS
    gps.offset <- -14

    # start of the GPS epoch
    start.epoch <- strptime("1980-01-06", "%Y-%m-%d", "GMT")

    # only take records for this subject
    sms <- sms[sms[,1]==sid,]

    # any records?
    if (nrow(sms)==0) {
       cat(paste("Error, no SMS records for", sid, "\n"))
        setwd(current.dir)
       return("no records")
    }
    
    # convert the sms network time to a unix seconds value
    sms$sms.net.localtime <- as.POSIXct(strptime(sms$sms.net.localtime, "%Y-%b-%d %H:%M:%S"))

    # change the sms time to a formal date string
    sms$date.sms <- NULL
    # handle different time formats in the SMS !!???
    year2char <- nchar(sub(".*/", "", sms$sms.phone.localtime))==2
    sms$x <- as.POSIXct("2000-01-01")
    sms[year2char,]$x <- as.POSIXct(strptime(sms[year2char,]$sms.phone.localtime, "%I:%M:%S %p %m/%d/%y"))
    sms[!year2char,]$x <- as.POSIXct(strptime(sms[!year2char,]$sms.phone.localtime, "%I:%M:%S %p %m/%d/%Y"))
    sms$sms.phone.localtime <- NULL
    sms$sms.phone.localtime <- sms$x
    sms$x <- NULL

    # fix specific subject data
    # timestamps off by 12 hours
    if (length(grep(sid, c("s11","s16")))) {sms$sms.phone.localtime <- sms$sms.phone.localtime - 12 * 3600}
    # timestamps off by 1 hour
    if (length(grep(sid, c("s45")))) {sms$sms.phone.localtime <- sms$sms.phone.localtime - 1 * 3600}
    # some very strange timestamps
    if (sid=="s61") {
       td <- abs(as.numeric(difftime(sms$sms.phone.localtime, sms$sms.net.localtime, units="s")))
       sms <- sms[td < 40000 | td > 50000,]
    }
    
    # calculate the GPS time
    sms$sms.gps.localtime <- ""
    sms$sms.gps.gmt <- start.epoch + (sms$gps.week * 7 * 24 * 60 * 60 + sms$gps.ms / 1000 + gps.offset)
    sms$sms.gps.localtime <- sms$sms.gps.gmt
    attr(sms$sms.gps.localtime, "tzone") <- NULL
    # sort
    o <- order(sms$sms.phone.localtime)
    sms <- sms[o,]

    # date
    sms$sms.localdate <- as.POSIXct(strptime(sms$sms.phone.localtime, format="%F"))

    # change the "never" string to "-9"
    #sms[,3] <- as.data.frame(ifelse(sms[,3]=="never", "-9", sms[,3]))
    sms$gps.lock.last <- ifelse(sms$gps.lock.last=="never", "-9", sms$gps.lock.last)

    # "unique" rows
    sms <- sms[!duplicated(cbind(sms$msb.secs, format(sms$localtime, "%Y-%m-%d %H"))),]

    # add a field that adjusts timestams for those records that have bunk GPS times
    # adjust times
    # records that do not need adjustment
    unadj <- sms$sms.gps.localtime<as.POSIXct("2006-01-01")
    # records that need adjustment
    adj <- sms$sms.gps.localtime>as.POSIXct("2006-01-01")
    # the time difference between local and GPS time for those records with both local and GPS time
    vec.dt <- difftime(sms[adj,]$sms.phone.localtime, sms[adj,]$sms.gps.localtime, units="s")
    # the mean time difference
    mean.dt <- as.numeric(mean(vec.dt))
    
    # what is the mean difference, only if there are records from 2005
    if (length(unadj)!=nrow(sms)) {
        if (mean.dt > 10) {
           cat(paste("problem; mean time difference is", round(mean.dt, 3), "s", "\n"))
        }
        else {
            start.epoch <- as.POSIXct(strptime("1980-01-06", "%Y-%m-%d"))
            sms$sms.adj.localtime <- start.epoch
            sms[unadj,]$sms.adj.localtime <- as.POSIXct(strptime("1980-01-06", "%Y-%m-%d") + 
                as.numeric(as.POSIXct(sms[unadj,]$sms.phone.localtime + as.numeric(mean.dt))))
            sms[unadj,]$sms.adj.localtime <- as.POSIXct(sms[unadj,]$sms.phone.localtime) + mean.dt
            sms[adj,]$sms.adj.localtime <- sms[adj,]$sms.gps.localtime
        }
    }
    
    # reinitialize row numbers
    rownames(sms) <- NULL

    # write out the table
    colnames(sms) <- unfix.colnames(sms)
    outFN <- paste(subject.dir, "sms.csv", sep="")
    write.table(sms, file=outFN, col.names=T, row.names=F, quote=T, sep=",")
    colnames(sms) <- fix.colnames(sms)
    
    setwd(current.dir)
    return(sms)

}