Difference between revisions of "Main Page/Research/MSB/Scripts/read.msb.files.R"

From phurvitz
< Main Page‎ | Research‎ | MSB‎ | Scripts
Jump to: navigation, search
(No difference)

Revision as of 20:33, 11 October 2007

# libraries
library(gmt)

# functions
source ("http://gis.washington.edu/phurvitz/R/functions.R") 

#indir <- "C:/users/phurvitz/htdocs/phurvitz/msb/processed_data/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/pmh04_20070921/"
indir <- "C:/users/phurvitz/htdocs/phurvitz/msb/processed_data/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/pmh04_20070921a"
setwd(indir)

process.msb <- function(indir, sid) {
    
    function.name <- "process.msb"
    if ((missing(indir)) || (missing(sid))) {
        cat(paste("Usage: ", function.name, " (indir, sid)\n", sep=""))
    }
    
    # get a list of directories
    list.allfiles <- list.files(indir)
    list.dirs <- NULL
    for (f in list.allfiles) {
        if (file.info(f)$isdir) {
            list.dirs <- c(list.dirs, f)
        }
    }
    count.list.dirs <- length(list.dirs)
    
    # file list from one of the dirs
    filelist0 <- list.files(list.dirs[1], pattern=".*csv")
    
    # initialize master lists
    for (f in filelist0) {
        command.init.csv <- paste(f, "<- NULL")
        eval(parse(text=command.init.csv))
    }
    
    # timestamp adjuster
    time.adj <- -16 * 3600
    
    # for each directory
    for (d in list.dirs) {
        # a list of CSV files in the directory
        filelist <- list.files(d, pattern=".*csv")
        # for each CSV file in the directory
        for (f in filelist) {
            # get the relative path to the CSV
            fn <- (paste(d, f, sep="/"))
            # formulate this as a data frame name in the R session
            tn <- (paste(d, f, sep="."))
            # create a command to read the filename into the data frame
            command.read.csv <- paste(tn, "<- read.csv(", quote(fn), ", head=F)", sep="")
            # print the command (for testing)
            #print(command.read.csv)
            # evaluate the command
            as.data.frame(eval(parse(text=command.read.csv)))
            # fix the colnames and timestamps on these
            if (attr(regexpr("class", fn), "match.length") > 0 ) {
                command.set.colnames <- paste("colnames(", tn, ") <- c(\"longitude\", \"latitude\", \"unixtime.gps\", \"updown\", \"moving\")", sep="")
            }
            else if (attr(regexpr("Output.csv", fn), "match.length") > 0 ) {
                command.set.colnames <- paste("colnames(", tn, ") <- c(\"longitude\", \"latitude\", \"elevation\", \"unixtime.gps\")", sep="")
            }
            else if (attr(regexpr("timestamp", fn), "match.length") > 0 ) {
                command.set.colnames <- paste("colnames(", tn, ") <- c(\"milliseconds.msb\", \"unixtime.gps\")", sep="")
            }
            # set the colnames
            eval(parse(text=command.set.colnames))
            
            # set the sid
            commad.set.sid <- paste(tn, "$sid <- sid", sep="")
            eval(parse(text=commad.set.sid))

            # set the timestamp
            command.set.timestamp <- paste(tn, "$date.gps.adj <- ISOdatetime(1970,1,1,0,0,0) + ", tn, "$unixtime.gps + time.adj", sep="")
            eval(parse(text=command.set.timestamp))
            command.set.timestamp <- paste(tn, "$reftime.gps <- ISOdatetime(1970,1,1,0,0,0) + ", tn, "$unixtime.gps", sep="")
            eval(parse(text=command.set.timestamp))
            
            # set the MSB seconds
            if (attr(regexpr("timestamp", fn), "match.length") > 0 ) {
                command.set.msbtime <- paste(tn, "$seconds.msb <- round(", tn, "$milliseconds.msb / 1000, 0)", sep="")
                #print(command.set.msbtime)
                eval(parse(text=command.set.msbtime)) 
            }
            
            # fix extra spaces in moving and updown fields
            if (attr(regexpr("class", fn), "match.length") > 0 ) {
                command.remove.spaces <- sprintf("%s%s%s%s", tn, "$updown <- gsub('  *', '', ", tn, "$updown)")
                eval(parse(text=command.remove.spaces))
                command.remove.spaces <- sprintf("%s%s%s%s", tn, "$moving <- gsub('  *', '', ", tn, "$moving)")
                eval(parse(text=command.remove.spaces))
            }

            
            # concatenate with the master list
            command.concatenate.table <- paste(f, "<- rbind (", f, ",", tn, ")", sep="")
            print(command.concatenate.table)
            eval(parse(text=command.concatenate.table))
            
            # write out the files
            command.write.table <- paste("write.table (", 
                tn, ", file=\"", tn, "\", col.names=T, row.names=F, sep=\",\")", sep="")
            print(command.write.table)
            eval(parse(text=command.write.table)) 
        }
    }
    
    # dump out concatenated tables
    for (f in filelist0) {
        command.write.table <- paste("write.table (", f, ", file=\"", f, "\", col.names=T, row.names=F, sep=\",\")", sep="")
        print(command.write.table)
        eval(parse(text=command.write.table))
    }
    write.table (gps_class.csv, file="class.csv", col.names=T, row.names=F, quote=F, sep=",")
    write.table (sirfGPSOutput_timestamp.csv, file="timestmp.csv", col.names=T, row.names=F, quote=F, sep=",")
    write.table (sirfGPSOutput.csv, file="gps.csv", col.names=T, row.names=F, quote=F, sep=",")
 
    # dump to gismo
    dir.gismo <- "P:/public_html/msb/processed_data/processed/subjects"
    write.table (gps_class.csv, file=sprintf("%s/%s_%s", dir.gismo, sid, "class.csv"), col.names=F, row.names=F, quote=F, sep=",")
    write.table (sirfGPSOutput_timestamp.csv, file=sprintf("%s/%s_%s", dir.gismo, sid, "timestmp.csv"), col.names=F, quote=F, row.names=F, sep=",")
    write.table (sirfGPSOutput.csv, file=sprintf("%s/%s_%s", dir.gismo, sid, "gps.csv"), col.names=F, row.names=F, quote=F, sep=",")   

    # read phone log
    phone.log <- read.csv("P:/public_html/msb/processed_data/phonelog.csv", as.is=T)
    phone.log <- phone.log[phone.log$time.phone!="",]
    phone.log$time.phone <- as.character(phone.log$time.phone)
    phone.log$date.phone <- as.character(strptime(phone.log$time.phone, "%I:%M:%S %p %m/%d/%Y"))
    phone.log$unixtime.phone <- as.numeric(as.POSIXct(phone.log$date.phone)) 
    phone.log$unixtime.phone.corrected <- as.numeric(as.POSIXct(phone.log$date.phone)) + (8 * 60 * 60)
    write.table(phone.log, file="phone_log.csv", col.names=T, row.names=F, sep=",")
    write.table(phone.log, file=sprintf("%s/%s_%s", dir.gismo, sid, "phone_log.csv", col.names=F, row.names=F, quote=F, sep=","))
}