Difference between revisions of "Main Page/Research/MSB/Scripts/read.msb.files.R"

From phurvitz
< Main Page‎ | Research‎ | MSB‎ | Scripts
Jump to: navigation, search
 
Line 1: Line 1:
 
 
<pre>
 
<pre>
 
# libraries
 
# libraries
Line 11: Line 10:
 
setwd(indir)
 
setwd(indir)
  
# get a list of directories
+
process.msb <- function(indir, sid) {
list.allfiles <- list.files(indir)
+
   
list.dirs <- NULL
+
    function.name <- "process.msb"
for (f in list.allfiles) {
+
    if ((missing(indir)) || (missing(sid))) {
    if (file.info(f)$isdir) {
+
        cat(paste("Usage: ", function.name, " (indir, sid)\n", sep=""))
         list.dirs <- c(list.dirs, f)
+
    }
 +
   
 +
    # get a list of directories
 +
    list.allfiles <- list.files(indir)
 +
    list.dirs <- NULL
 +
    for (f in list.allfiles) {
 +
        if (file.info(f)$isdir) {
 +
            list.dirs <- c(list.dirs, f)
 +
         }
 +
    }
 +
    count.list.dirs <- length(list.dirs)
 +
   
 +
    # file list from one of the dirs
 +
    filelist0 <- list.files(list.dirs[1], pattern=".*csv")
 +
   
 +
    # initialize master lists
 +
    for (f in filelist0) {
 +
        command.init.csv <- paste(f, "<- NULL")
 +
        eval(parse(text=command.init.csv))
 
     }
 
     }
}
+
   
count.list.dirs <- length(list.dirs)
+
    # timestamp adjuster
 
+
    time.adj <- -16 * 3600
# file list from one of the dirs
+
   
filelist0 <- list.files(list.dirs[1], pattern=".*csv")
+
    # for each directory
 
+
    for (d in list.dirs) {
# initialize master lists
+
        # a list of CSV files in the directory
for (f in filelist0) {
+
        filelist <- list.files(d, pattern=".*csv")
    command.init.csv <- paste(f, "<- NULL")
+
        # for each CSV file in the directory
    eval(parse(text=command.init.csv))
+
        for (f in filelist) {
}
+
            # get the relative path to the CSV
 +
            fn <- (paste(d, f, sep="/"))
 +
            # formulate this as a data frame name in the R session
 +
            tn <- (paste(d, f, sep="."))
 +
            # create a command to read the filename into the data frame
 +
            command.read.csv <- paste(tn, "<- read.csv(", quote(fn), ", head=F)", sep="")
 +
            # print the command (for testing)
 +
            #print(command.read.csv)
 +
            # evaluate the command
 +
            as.data.frame(eval(parse(text=command.read.csv)))
 +
            # fix the colnames and timestamps on these
 +
            if (attr(regexpr("class", fn), "match.length") > 0 ) {
 +
                command.set.colnames <- paste("colnames(", tn, ") <- c(\"longitude\", \"latitude\", \"unixtime.gps\", \"updown\", \"moving\")", sep="")
 +
            }
 +
            else if (attr(regexpr("Output.csv", fn), "match.length") > 0 ) {
 +
                command.set.colnames <- paste("colnames(", tn, ") <- c(\"longitude\", \"latitude\", \"elevation\", \"unixtime.gps\")", sep="")
 +
            }
 +
            else if (attr(regexpr("timestamp", fn), "match.length") > 0 ) {
 +
                command.set.colnames <- paste("colnames(", tn, ") <- c(\"milliseconds.msb\", \"unixtime.gps\")", sep="")
 +
            }
 +
            # set the colnames
 +
            eval(parse(text=command.set.colnames))
 +
           
 +
            # set the sid
 +
            commad.set.sid <- paste(tn, "$sid <- sid", sep="")
 +
            eval(parse(text=commad.set.sid))
  
# timestamp adjuster
+
            # set the timestamp
time.adj <- -16 * 3600
+
            command.set.timestamp <- paste(tn, "$date.gps.adj <- ISOdatetime(1970,1,1,0,0,0) + ", tn, "$unixtime.gps + time.adj", sep="")
 +
            eval(parse(text=command.set.timestamp))
 +
            command.set.timestamp <- paste(tn, "$reftime.gps <- ISOdatetime(1970,1,1,0,0,0) + ", tn, "$unixtime.gps", sep="")
 +
            eval(parse(text=command.set.timestamp))
 +
           
 +
            # set the MSB seconds
 +
            if (attr(regexpr("timestamp", fn), "match.length") > 0 ) {
 +
                command.set.msbtime <- paste(tn, "$seconds.msb <- round(", tn, "$milliseconds.msb / 1000, 0)", sep="")
 +
                #print(command.set.msbtime)
 +
                eval(parse(text=command.set.msbtime))
 +
            }
 +
           
 +
            # fix extra spaces in moving and updown fields
 +
            if (attr(regexpr("class", fn), "match.length") > 0 ) {
 +
                command.remove.spaces <- sprintf("%s%s%s%s", tn, "$updown <- gsub('  *', '', ", tn, "$updown)")
 +
                eval(parse(text=command.remove.spaces))
 +
                command.remove.spaces <- sprintf("%s%s%s%s", tn, "$moving <- gsub('  *', '', ", tn, "$moving)")
 +
                eval(parse(text=command.remove.spaces))
 +
            }
  
# for each directory
+
           
for (d in list.dirs) {
+
            # concatenate with the master list
    # a list of CSV files in the directory
+
            command.concatenate.table <- paste(f, "<- rbind (", f, ",", tn, ")", sep="")
    filelist <- list.files(d, pattern=".*csv")
+
            print(command.concatenate.table)
    # for each CSV file in the directory
+
            eval(parse(text=command.concatenate.table))
    for (f in filelist) {
+
           
        # get the relative path to the CSV
+
            # write out the files
        fn <- (paste(d, f, sep="/"))
+
             command.write.table <- paste("write.table (",  
        # formulate this as a data frame name in the R session
+
                tn, ", file=\"", tn, "\", col.names=T, row.names=F, sep=\",\")", sep="")
        tn <- (paste(d, f, sep="."))
+
            print(command.write.table)
        # create a command to read the filename into the data frame
+
            eval(parse(text=command.write.table))  
        command.read.csv <- paste(tn, "<- read.csv(", quote(fn), ", head=F)", sep="")
 
        # print the command (for testing)
 
        #print(command.read.csv)
 
        # evaluate the command
 
        as.data.frame(eval(parse(text=command.read.csv)))
 
        # fix the colnames and timestamps on these
 
        if (attr(regexpr("class", fn), "match.length") > 0 ) {
 
             command.set.colnames <- paste("colnames(", tn, ") <- c(\"longitude\", \"latitude\", \"unixtime\", \"updown\", \"moving\")", sep="")
 
 
         }
 
         }
        else if (attr(regexpr("Output.csv", fn), "match.length") > 0 ) {
+
    }
            command.set.colnames <- paste("colnames(", tn, ") <- c(\"longitude\", \"latitude\", \"elevation\", \"unixtime\")", sep="")
+
   
        }
+
    # dump out concatenated tables
        else if (attr(regexpr("timestamp", fn), "match.length") > 0 ) {
+
    for (f in filelist0) {
            command.set.colnames <- paste("colnames(", tn, ") <- c(\"msbtime\", \"unixtime\")", sep="")
+
         command.write.table <- paste("write.table (", f, ", file=\"", f, "\", col.names=T, row.names=F, sep=\",\")", sep="")
        }
 
        # set the colnames
 
        eval(parse(text=command.set.colnames))
 
       
 
        # set the timestamp
 
        command.set.timestamp <- paste(tn, "$date <- ISOdatetime(1970,1,1,0,0,0) + ", tn, "$unixtime + time.adj", sep="")
 
        eval(parse(text=command.set.timestamp))
 
        command.set.timestamp <- paste(tn, "$reftime <- ISOdatetime(1970,1,1,0,0,0) + ", tn, "$unixtime", sep="")
 
        eval(parse(text=command.set.timestamp))
 
       
 
        # set the MSB seconds
 
        if (attr(regexpr("timestamp", fn), "match.length") > 0 ) {
 
            command.set.msbtime <- paste(tn, "$msbseconds <- round(", tn, "$msbtime / 1000, 0)", sep="")
 
            #print(command.set.msbtime)
 
            eval(parse(text=command.set.msbtime))
 
        }
 
       
 
        # concatenate with the master list
 
        command.concatenate.table <- paste(f, "<- rbind (", f, ",", tn, ")", sep="")
 
        print(command.concatenate.table)
 
        eval(parse(text=command.concatenate.table))
 
       
 
        # write out the files
 
         command.write.table <- paste("write.table  
 
          (", tn, ", file=\"", tn, "\", col.names=T, row.names=F, sep=\",\")", sep="")
 
 
         print(command.write.table)
 
         print(command.write.table)
         eval(parse(text=command.write.table))  
+
         eval(parse(text=command.write.table))
 
     }
 
     }
}
+
    write.table (gps_class.csv, file="class.csv", col.names=T, row.names=F, quote=F, sep=",")
 +
    write.table (sirfGPSOutput_timestamp.csv, file="timestmp.csv", col.names=T, row.names=F, quote=F, sep=",")
 +
    write.table (sirfGPSOutput.csv, file="gps.csv", col.names=T, row.names=F, quote=F, sep=",")
 +
 +
    # dump to gismo
 +
    dir.gismo <- "P:/public_html/msb/processed_data/processed/subjects"
 +
    write.table (gps_class.csv, file=sprintf("%s/%s_%s", dir.gismo, sid, "class.csv"), col.names=F, row.names=F, quote=F, sep=",")
 +
    write.table (sirfGPSOutput_timestamp.csv, file=sprintf("%s/%s_%s", dir.gismo, sid, "timestmp.csv"), col.names=F, quote=F, row.names=F, sep=",")
 +
    write.table (sirfGPSOutput.csv, file=sprintf("%s/%s_%s", dir.gismo, sid, "gps.csv"), col.names=F, row.names=F, quote=F, sep=",") 
  
# dump out concatenated tables
+
    # read phone log
for (f in filelist0) {
+
    phone.log <- read.csv("P:/public_html/msb/processed_data/phonelog.csv", as.is=T)
     command.write.table <- paste("write.table (", f, ", file=\"", f, "\", col.names=T, row.names=F, sep=\",\")", sep="")
+
    phone.log <- phone.log[phone.log$time.phone!="",]
     print(command.write.table)
+
    phone.log$time.phone <- as.character(phone.log$time.phone)
    eval(parse(text=command.write.table))
+
     phone.log$date.phone <- as.character(strptime(phone.log$time.phone, "%I:%M:%S %p %m/%d/%Y"))
 +
    phone.log$unixtime.phone <- as.numeric(as.POSIXct(phone.log$date.phone))
 +
    phone.log$unixtime.phone.corrected <- as.numeric(as.POSIXct(phone.log$date.phone)) + (8 * 60 * 60)
 +
    write.table(phone.log, file="phone_log.csv", col.names=T, row.names=F, sep=",")
 +
     write.table(phone.log, file=sprintf("%s/%s_%s", dir.gismo, sid, "phone_log.csv", col.names=F, row.names=F, quote=F, sep=","))
 
}
 
}
write.table (gps_class.csv, file="class.csv", col.names=T, row.names=F, sep=",")
 
write.table (sirfGPSOutput_timestamp.csv, file="timestmp.csv", col.names=T, row.names=F, sep=",")
 
write.table (sirfGPSOutput.csv, file="gps.csv", col.names=T, row.names=F, sep=",")
 
 
# read phone log
 
phone.log <- read.csv("P:/public_html/msb/processed_data/phonelog.csv", as.is=T)
 
phone.log <- phone.log[phone.log$time.phone!="",]
 
phone.log$time.phone <- as.character(phone.log$time.phone)
 
phone.log$date <- as.character(strptime(phone.log$time.phone, "%I:%M:%S %p %m/%d/%Y"))
 
write.table(phone.log, file="phone_log.csv", col.names=T, row.names=F, sep=",")
 
 
 
</pre>
 
</pre>

Revision as of 22:18, 10 October 2007

# libraries
library(gmt)

# functions
source ("http://gis.washington.edu/phurvitz/R/functions.R") 

#indir <- "C:/users/phurvitz/htdocs/phurvitz/msb/processed_data/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/pmh04_20070921/"
indir <- "C:/users/phurvitz/htdocs/phurvitz/msb/processed_data/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/pmh04_20070921a"
setwd(indir)

process.msb <- function(indir, sid) {
    
    function.name <- "process.msb"
    if ((missing(indir)) || (missing(sid))) {
        cat(paste("Usage: ", function.name, " (indir, sid)\n", sep=""))
    }
    
    # get a list of directories
    list.allfiles <- list.files(indir)
    list.dirs <- NULL
    for (f in list.allfiles) {
        if (file.info(f)$isdir) {
            list.dirs <- c(list.dirs, f)
        }
    }
    count.list.dirs <- length(list.dirs)
    
    # file list from one of the dirs
    filelist0 <- list.files(list.dirs[1], pattern=".*csv")
    
    # initialize master lists
    for (f in filelist0) {
        command.init.csv <- paste(f, "<- NULL")
        eval(parse(text=command.init.csv))
    }
    
    # timestamp adjuster
    time.adj <- -16 * 3600
    
    # for each directory
    for (d in list.dirs) {
        # a list of CSV files in the directory
        filelist <- list.files(d, pattern=".*csv")
        # for each CSV file in the directory
        for (f in filelist) {
            # get the relative path to the CSV
            fn <- (paste(d, f, sep="/"))
            # formulate this as a data frame name in the R session
            tn <- (paste(d, f, sep="."))
            # create a command to read the filename into the data frame
            command.read.csv <- paste(tn, "<- read.csv(", quote(fn), ", head=F)", sep="")
            # print the command (for testing)
            #print(command.read.csv)
            # evaluate the command
            as.data.frame(eval(parse(text=command.read.csv)))
            # fix the colnames and timestamps on these
            if (attr(regexpr("class", fn), "match.length") > 0 ) {
                command.set.colnames <- paste("colnames(", tn, ") <- c(\"longitude\", \"latitude\", \"unixtime.gps\", \"updown\", \"moving\")", sep="")
            }
            else if (attr(regexpr("Output.csv", fn), "match.length") > 0 ) {
                command.set.colnames <- paste("colnames(", tn, ") <- c(\"longitude\", \"latitude\", \"elevation\", \"unixtime.gps\")", sep="")
            }
            else if (attr(regexpr("timestamp", fn), "match.length") > 0 ) {
                command.set.colnames <- paste("colnames(", tn, ") <- c(\"milliseconds.msb\", \"unixtime.gps\")", sep="")
            }
            # set the colnames
            eval(parse(text=command.set.colnames))
            
            # set the sid
            commad.set.sid <- paste(tn, "$sid <- sid", sep="")
            eval(parse(text=commad.set.sid))

            # set the timestamp
            command.set.timestamp <- paste(tn, "$date.gps.adj <- ISOdatetime(1970,1,1,0,0,0) + ", tn, "$unixtime.gps + time.adj", sep="")
            eval(parse(text=command.set.timestamp))
            command.set.timestamp <- paste(tn, "$reftime.gps <- ISOdatetime(1970,1,1,0,0,0) + ", tn, "$unixtime.gps", sep="")
            eval(parse(text=command.set.timestamp))
            
            # set the MSB seconds
            if (attr(regexpr("timestamp", fn), "match.length") > 0 ) {
                command.set.msbtime <- paste(tn, "$seconds.msb <- round(", tn, "$milliseconds.msb / 1000, 0)", sep="")
                #print(command.set.msbtime)
                eval(parse(text=command.set.msbtime)) 
            }
            
            # fix extra spaces in moving and updown fields
            if (attr(regexpr("class", fn), "match.length") > 0 ) {
                command.remove.spaces <- sprintf("%s%s%s%s", tn, "$updown <- gsub('  *', '', ", tn, "$updown)")
                eval(parse(text=command.remove.spaces))
                command.remove.spaces <- sprintf("%s%s%s%s", tn, "$moving <- gsub('  *', '', ", tn, "$moving)")
                eval(parse(text=command.remove.spaces))
            }

            
            # concatenate with the master list
            command.concatenate.table <- paste(f, "<- rbind (", f, ",", tn, ")", sep="")
            print(command.concatenate.table)
            eval(parse(text=command.concatenate.table))
            
            # write out the files
            command.write.table <- paste("write.table (", 
                tn, ", file=\"", tn, "\", col.names=T, row.names=F, sep=\",\")", sep="")
            print(command.write.table)
            eval(parse(text=command.write.table)) 
        }
    }
    
    # dump out concatenated tables
    for (f in filelist0) {
        command.write.table <- paste("write.table (", f, ", file=\"", f, "\", col.names=T, row.names=F, sep=\",\")", sep="")
        print(command.write.table)
        eval(parse(text=command.write.table))
    }
    write.table (gps_class.csv, file="class.csv", col.names=T, row.names=F, quote=F, sep=",")
    write.table (sirfGPSOutput_timestamp.csv, file="timestmp.csv", col.names=T, row.names=F, quote=F, sep=",")
    write.table (sirfGPSOutput.csv, file="gps.csv", col.names=T, row.names=F, quote=F, sep=",")
 
    # dump to gismo
    dir.gismo <- "P:/public_html/msb/processed_data/processed/subjects"
    write.table (gps_class.csv, file=sprintf("%s/%s_%s", dir.gismo, sid, "class.csv"), col.names=F, row.names=F, quote=F, sep=",")
    write.table (sirfGPSOutput_timestamp.csv, file=sprintf("%s/%s_%s", dir.gismo, sid, "timestmp.csv"), col.names=F, quote=F, row.names=F, sep=",")
    write.table (sirfGPSOutput.csv, file=sprintf("%s/%s_%s", dir.gismo, sid, "gps.csv"), col.names=F, row.names=F, quote=F, sep=",")   

    # read phone log
    phone.log <- read.csv("P:/public_html/msb/processed_data/phonelog.csv", as.is=T)
    phone.log <- phone.log[phone.log$time.phone!="",]
    phone.log$time.phone <- as.character(phone.log$time.phone)
    phone.log$date.phone <- as.character(strptime(phone.log$time.phone, "%I:%M:%S %p %m/%d/%Y"))
    phone.log$unixtime.phone <- as.numeric(as.POSIXct(phone.log$date.phone)) 
    phone.log$unixtime.phone.corrected <- as.numeric(as.POSIXct(phone.log$date.phone)) + (8 * 60 * 60)
    write.table(phone.log, file="phone_log.csv", col.names=T, row.names=F, sep=",")
    write.table(phone.log, file=sprintf("%s/%s_%s", dir.gismo, sid, "phone_log.csv", col.names=F, row.names=F, quote=F, sep=","))
}