Main Page/Research/MSB/Scripts/process sdf.R
From phurvitz
< Main Page | Research | MSB | Scripts
Revision as of 16:32, 27 January 2009 by Phil Hurvitz (talk | contribs) (User:Phil Hurvitz/Projects/In process/MSB/Scripts/process sdf.R moved to Main Page/Research/MSB/Scripts/process sdf.R)
source ("http://gis.washington.edu/phurvitz/R/functions.R") # for processing MyExperience.sdf files # requires the sdf data be saved as a CSV file # source("/home/phurvitz/public_html/msb/tools/process_sdf.R"); # a function process.sdf <- function(sid) { function.name <- "process.sdf" if (missing(sid)) { cat(paste("Usage: ", function.name, " (sid)\n", sep="")) setwd(current.dir) return(invisible()) } # save current dir current.dir <- getwd() # where are files? basedir <- "/home/phurvitz/public_html/msb/processed_data/downloaded_data" subject.dir <- paste(basedir, "/", sid, "/", sep="") setwd(subject.dir) # does the dir exist? if (!file.exists(subject.dir)) { err.txt <- paste(subject.dir, "does not exist!", "\n", sep=" ") cat(err.txt) setwd(current.dir) return(invisible()) } # is the arg a dir? if (!file.info(subject.dir)$isdir) { err.txt <- paste(subject.dir, "is not a dir!", "\n", sep=" ") cat(err.txt) setwd(current.dir) return(invisible()) } # input data infile <- paste(subject.dir, "Get All Responses.csv", sep="") myexper <- read.csv(infile) # standardize field names colnames(myexper) <- fix.colnames(myexper) # unique QuestionText vector.questions <- c("sid", "myexp.phone.localtime", "whatactivity", "howlong", "locationtype", "currentactivity", "travelingpurpose", "travelingstart", "travelingend", "chaining") # remove the "Thank .ou" QuestionText vector.questions <- vector.questions["" != gsub("Thank .ou", "", vector.questions)] # construct a data frame to hold the output table d1 <- as.data.frame(matrix(data=NA, nrow=0, ncol=length(vector.questions))) colnames(d1) <- vector.questions # standardize field names colnames(d1) <- fix.colnames(d1) # process each record for (i in 1:nrow(myexper)) { # get the record rec <- myexper[i,] # get the QuestionId and TextResponse qi <- rec$questionid tr <- rec$textresponse # if this is a new activity start gathering data if (qi=="WhatActivity") { # standardize the date to ISO #date.phone <- as.character(strptime(as.character(rec$timestamp), # "%m/%d/%Y %I:%M:%S %p")) # get values whatactivity <- as.character(rec$textresponse) howlong <- as.character(myexper[i+1,]$textresponse) locationtype <- as.character(myexper[i+2,]$textresponse) currentactivity <- as.character(myexper[i+3,]$textresponse) # if this is traveling then get more data if (currentactivity=="traveling from one place to another") { travelingpurpose <- as.character(myexper[i+4,]$textresponse) travelingstart <- as.character(myexper[i+5,]$textresponse) travelingend <- as.character(myexper[i+6,]$textresponse) chaining <- as.character(myexper[i+7,]$textresponse) #if (chaining != "yes" | chaining != "no") { #chaining <- "x" #} myexp.phone.localtime <- as.character(strptime(as.character(myexper[i+7,]$timestamp), "%m/%d/%Y %I:%M:%S %p")) } # if traveling # if this is not traveling use blank values for traveling fields else { travelingpurpose <- "" travelingstart <- "" travelingend <- "" chaining <- "" myexp.phone.localtime <- as.character(strptime(as.character(myexper[i+3,]$timestamp), "%m/%d/%Y %I:%M:%S %p")) } # if not traveling # concatenate values for this record out.values <- c(sid, myexp.phone.localtime, whatactivity, howlong, locationtype, currentactivity, travelingpurpose, travelingstart, travelingend, chaining) # handle the "skip this question" cases, convert to "X" out.values <- gsub(".*Skip this Question.*", "X", out.values) # create a new record and set values d1[nrow(d1)+1,] <- out.values } # if this is a new activity } # process each record # localtime as a time field d1$myexp.phone.localtime <- as.POSIXct(d1$myexp.phone.localtime) # fix for specific subjects (note relationship between "read_sms_*.R" as well # s01 off by 1 hour if (sid=="s01") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime - 1 * 3600} # s11 and 16 were off by 12 hours if (sid=="s11") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime - 12 * 3600} if (sid=="s16") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime - 12 * 3600} # s45 off by 1 hour if (sid=="s45") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime - 1 * 3600} # the following were adjusted by the offset between phone time (set incorrectly) and SMS network receipt time. # not optimal, but workable if (sid=="s03") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime + 55124083.5147059} if (sid=="s06") {d1$myexp.phone.localtime <- d1$myexp.phone.localtime + 55134785.6808511} # write out the table colnames(d1) <- unfix.colnames(d1) outputFN <- paste(subject.dir, "myexper.csv", sep="") write.table(d1, file=outputFN, sep=",", col.names=T, row.names=F, quote=T) colnames(d1) <- fix.colnames(d1) #ret <- list(d1=d1, myexper=myexper) #return(ret) setwd(current.dir) return(d1) }