User:Phil Hurvitz/Projects/In process/MSB/Scripts/read sms 8.R
From phurvitz
< User:Phil Hurvitz | Projects | In process | MSB/Scripts
Revision as of 02:51, 14 January 2009 by Phil Hurvitz (talk | contribs)
# makes "sms.csv" a formatted file from the SMS records # basic functions source("http://gis.washington.edu/phurvitz/R/functions.R") # a function to do the process process.sms <- function(sid, timediff) { # where are files? basedir <- "/home/phurvitz/public_html/msb/processed_data/downloaded_data" subject.dir <- paste(basedir, "/", sid, "/", sep="") current.dir <- getwd() setwd(subject.dir) # is the arg a file? if (!file.exists(subject.dir)) { err.txt <- paste(subject.dir, "does not exist!", "\n", sep=" ") cat(err.txt) setwd(current.dir) return(invisible()) } # is the arg a dir? if (!file.info(subject.dir)$isdir) { err.txt <- paste(subject.dir, "is not a dir!", "\n", sep=" ") cat(err.txt) setwd(current.dir) return(invisible()) } # time difference, if not specified, set to 0 if (missing(timediff)) { timediff <- 0 } # the sid should be the same name as the subject.dir if (missing(sid)) { sid <- basename(subject.dir) } # parse the sms log file if (.Platform$OS.type=="windows") { home <- "P:/" cmd.parse.sms <- paste(Sys.getenv("COMSPEC"), "/c", "p:\\public_html\\msb\\tools\\msb_parse_sms.pl") } else { home <- "/home/phurvitz/" cmd.parse.sms <- paste(home, "public_html/msb/tools/msb_parse_sms.pl", sep="") } system(cmd.parse.sms) # read sms log csv.file <- "sms8.csv" # read in the file csv.file.in <- paste(home, "public_html/msb/processed_data/", csv.file, sep="") sms <- read.csv(csv.file.in, head=T, as.is=T) # leap-seconds offset for GPS gps.offset <- -14 # start of the GPS epoch start.epoch <- strptime("1980-01-06", "%Y-%m-%d", "GMT") # only take records for this subject sms <- sms[sms[,1]==sid,] # any records? if (nrow(sms)==0) { cat(paste("Error, no SMS records for", sid, "\n")) setwd(current.dir) return("no records") } # convert the sms network time to a unix seconds value sms$sms.net.localtime <- as.POSIXct(strptime(sms$sms.net.localtime, "%Y-%b-%d %H:%M:%S")) # change the sms time to a formal date string sms$date.sms <- NULL # handle different time formats in the SMS !!??? year2char <- nchar(sub(".*/", "", sms$sms.phone.localtime))==2 sms$x <- as.POSIXct("2000-01-01") sms[year2char,]$x <- as.POSIXct(strptime(sms[year2char,]$sms.phone.localtime, "%I:%M:%S %p %m/%d/%y")) sms[!year2char,]$x <- as.POSIXct(strptime(sms[!year2char,]$sms.phone.localtime, "%I:%M:%S %p %m/%d/%Y")) sms$sms.phone.localtime <- NULL sms$sms.phone.localtime <- sms$x sms$x <- NULL # fix specific subject data # timestamps off by 12 hours if (length(grep(sid, c("s11","s16")))) {sms$sms.phone.localtime <- sms$sms.phone.localtime - 12 * 3600} # timestamps off by 1 hour if (length(grep(sid, c("s45")))) {sms$sms.phone.localtime <- sms$sms.phone.localtime - 1 * 3600} # some very strange timestamps if (sid=="s61") { td <- abs(as.numeric(difftime(sms$sms.phone.localtime, sms$sms.net.localtime, units="s"))) sms <- sms[td < 40000 | td > 50000,] } # calculate the GPS time sms$sms.gps.localtime <- "" sms$sms.gps.gmt <- start.epoch + (sms$gps.week * 7 * 24 * 60 * 60 + sms$gps.ms / 1000 + gps.offset) sms$sms.gps.localtime <- sms$sms.gps.gmt attr(sms$sms.gps.localtime, "tzone") <- NULL # sort o <- order(sms$sms.phone.localtime) sms <- sms[o,] # date sms$sms.localdate <- as.POSIXct(strptime(sms$sms.phone.localtime, format="%F")) # change the "never" string to "-9" #sms[,3] <- as.data.frame(ifelse(sms[,3]=="never", "-9", sms[,3])) sms$gps.lock.last <- ifelse(sms$gps.lock.last=="never", "-9", sms$gps.lock.last) # "unique" rows sms <- sms[!duplicated(cbind(sms$msb.secs, format(sms$localtime, "%Y-%m-%d %H"))),] # add a field that adjusts timestams for those records that have bunk GPS times # adjust times # records that do not need adjustment unadj <- sms$sms.gps.localtime<as.POSIXct("2006-01-01") # records that need adjustment adj <- sms$sms.gps.localtime>as.POSIXct("2006-01-01") # the time difference between local and GPS time for those records with both local and GPS time vec.dt <- difftime(sms[adj,]$sms.phone.localtime, sms[adj,]$sms.gps.localtime, units="s") # the mean time difference mean.dt <- as.numeric(mean(vec.dt)) # what is the mean difference, only if there are records from 2005 if (length(unadj)!=nrow(sms)) { if (mean.dt > 10) { cat(paste("problem; mean time difference is", round(mean.dt, 3), "s", "\n")) } else { start.epoch <- as.POSIXct(strptime("1980-01-06", "%Y-%m-%d")) sms$sms.adj.localtime <- start.epoch sms[unadj,]$sms.adj.localtime <- as.POSIXct(strptime("1980-01-06", "%Y-%m-%d") + as.numeric(as.POSIXct(sms[unadj,]$sms.phone.localtime + as.numeric(mean.dt)))) sms[unadj,]$sms.adj.localtime <- as.POSIXct(sms[unadj,]$sms.phone.localtime) + mean.dt sms[adj,]$sms.adj.localtime <- sms[adj,]$sms.gps.localtime } } # reinitialize row numbers rownames(sms) <- NULL # write out the table colnames(sms) <- unfix.colnames(sms) outFN <- paste(subject.dir, "sms.csv", sep="") write.table(sms, file=outFN, col.names=T, row.names=F, quote=T, sep=",") colnames(sms) <- fix.colnames(sms) setwd(current.dir) return(sms) }