Main Page/Research/MSB/Scripts/process sdf.R

From phurvitz
Jump to: navigation, search
source ("http://gis.washington.edu/phurvitz/R/functions.R") 

# for processing MyExperience.sdf files
# requires the sdf data be saved as a CSV file

# source("/home/phurvitz/public_html/msb/tools/process_sdf.R"); 

# a function
process.sdf <- function(sid) {
    function.name <- "process.sdf"

    if (missing(sid)) {
        cat(paste("Usage: ", function.name, " (sid)\n", sep=""))
        setwd(current.dir)
        return(invisible())
    }
    
    # save current dir
    current.dir <- getwd()

    # where are files?
    basedir <- "/home/phurvitz/public_html/msb/processed_data/downloaded_data"
    subject.dir <- paste(basedir, "/", sid, "/", sep="")
    setwd(subject.dir)

    # does the dir exist?
    if (!file.exists(subject.dir)) {
        err.txt <- paste(subject.dir, "does not exist!", "\n", sep=" ")
        cat(err.txt)
        setwd(current.dir)
        return(invisible())
    }
    
    # is the arg a dir?
    if (!file.info(subject.dir)$isdir) {
        err.txt <- paste(subject.dir, "is not a dir!", "\n", sep=" ")
        cat(err.txt)
        setwd(current.dir)
        return(invisible())
    }
    
    # input data
    infile <- paste(subject.dir, "Get All Responses.csv", sep="")
    myexper <- read.csv(infile)
    # standardize field names
    colnames(myexper) <- fix.colnames(myexper)
    
    # unique QuestionText
    vector.questions <- c("sid", "myexp.phone.localtime", "whatactivity", "howlong", "locationtype", "currentactivity", 
        "travelingpurpose", "travelingstart", "travelingend", "chaining")

    # remove the "Thank .ou" QuestionText
    vector.questions <- vector.questions["" != gsub("Thank .ou", "", vector.questions)]
    
    # construct a data frame to hold the output table
    d1 <- as.data.frame(matrix(data=NA, nrow=0, ncol=length(vector.questions)))
    colnames(d1) <- vector.questions

    # standardize field names
    colnames(d1) <- fix.colnames(d1)
    
    # process each record
    for (i in 1:nrow(myexper)) {
        # get the record
        rec <- myexper[i,]
        # get the QuestionId and TextResponse
        qi <- rec$questionid
        tr <- rec$textresponse
        # if this is a new activity start gathering data
        if (qi=="WhatActivity") {
            # standardize the date to ISO
            #date.phone <- as.character(strptime(as.character(rec$timestamp), 
            #    "%m/%d/%Y %I:%M:%S %p"))
            # get values
            whatactivity <- as.character(rec$textresponse)
            howlong <- as.character(myexper[i+1,]$textresponse)
            locationtype <- as.character(myexper[i+2,]$textresponse)
            currentactivity <- as.character(myexper[i+3,]$textresponse)
            # if this is traveling then get more data
            if (currentactivity=="traveling from one place to another") {
                travelingpurpose <- as.character(myexper[i+4,]$textresponse)
                travelingstart <- as.character(myexper[i+5,]$textresponse)
                travelingend <- as.character(myexper[i+6,]$textresponse)
                chaining <- as.character(myexper[i+7,]$textresponse)
                #if (chaining != "yes" | chaining != "no") {
                    #chaining <- "x"
                #}
                myexp.phone.localtime <- as.character(strptime(as.character(myexper[i+7,]$timestamp), 
                    "%m/%d/%Y %I:%M:%S %p"))
            } # if traveling
            # if this is not traveling use blank values for traveling fields
            else {
                travelingpurpose <- ""
                travelingstart <- ""
                travelingend <- ""
                chaining <- ""
                myexp.phone.localtime <- as.character(strptime(as.character(myexper[i+3,]$timestamp), 
                    "%m/%d/%Y %I:%M:%S %p"))            } # if not traveling
            
            # concatenate values for this record
            out.values <- c(sid, myexp.phone.localtime, whatactivity, howlong, locationtype, currentactivity,
                travelingpurpose, travelingstart, travelingend, chaining)

            # handle the "skip this question" cases, convert to "X"
            out.values <- gsub(".*Skip this Question.*", "X", out.values)
            
            # create a new record and set values
            d1[nrow(d1)+1,] <- out.values
             
        } # if this is a new activity
        
    } # process each record
    
    # localtime as a time field
    d1$myexp.phone.localtime <- as.POSIXct(d1$myexp.phone.localtime)

    # fix for specific subjects (note relationship between "read_sms_*.R" as well
    # s01 off by 1 hour
    if (sid=="s01") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime - 1 * 3600}
    # s11 and 16 were off by 12 hours
    if (sid=="s11") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime - 12 * 3600}
    if (sid=="s16") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime - 12 * 3600}
    # s45 off by 1 hour
    if (sid=="s45") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime - 1 * 3600}
    # the following were adjusted by the offset between phone time (set incorrectly) and SMS network receipt time.
    # not optimal, but workable
    if (sid=="s03") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime + 55124083.5147059}
    if (sid=="s06") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime + 55134785.6808511}
    
    # write out the table
    colnames(d1) <- unfix.colnames(d1)
    outputFN <- paste(subject.dir, "myexper.csv", sep="")
    write.table(d1, file=outputFN, sep=",", col.names=T, row.names=F, quote=T)
    colnames(d1) <- fix.colnames(d1)
    
    #ret <- list(d1=d1, myexper=myexper)
    #return(ret)
    setwd(current.dir)
    return(d1)
}