Main Page/Research/MSB/Scripts/join data.R

From phurvitz
< Main Page‎ | Research‎ | MSB‎ | Scripts
Revision as of 02:55, 14 January 2009 by Phil Hurvitz (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search
# generates the "msbmaster.csv" file for a subject
library(rgdal)
msb.join <- function(sid, overwrite=F, filter=F){    
    if (missing(sid)) {
        cat("Usage: msb.join(sid, overwrite=F, filter=F)\n")
        return(invisible())
    }
    current.dir <- getwd()
    basedir <- "/home/phurvitz/public_html/msb/processed_data/downloaded_data"
    subject.dir <- paste(basedir, sid, sep="/")
    setwd(subject.dir)
    if ((file.exists("msbmaster.csv") && overwrite) | !file.exists("msbmaster.csv")) {
        # refresh import & join of data sets?
        # helper perl script for concatenating data, adds headers etc.
        concatenate.command <- paste("~/public_html/msb/tools/msb_concatenate.pl", sid)
        system(concatenate.command)
    
        # open the msb data
        cat("Reading MSB data....\n")
        msb <- read.csv("sirfGPS_output.csv", as.is=T)
        colnames(msb) <- fix.colnames(msb)
        # drop records with duplicate timestamps
        msb <- msb[!duplicated(msb$msb.gps.unixtime),]
        rownames(msb) <- NULL
    
        # date
        cat ("Formatting local time....\n")
        gps.loctim <- strptime("1jan1970", "%d%b%Y", "GMT") + msb$msb.gps.unixtime
        attr(gps.loctim, "tzone") <- NULL
        msb$gps.loctim <- as.character(gps.loctim)

        # open the class data
        cat("Reading class data....\n")
        gpsclass <- read.csv("gps_class.csv", as.is=T)
        colnames(gpsclass) <- fix.colnames(gpsclass)
        gpsclass <- gpsclass[!duplicated(gpsclass$msb.gps.unixtime),]
        gpsclass$latitude <- gpsclass$longitude <- NULL
        rownames(gpsclass) <- NULL
    
        # join the class onto the msb
        cat("Joining MSB and class data...\n")
        msbjoin <- merge(msb, gpsclass, by.x="msb.gps.unixtime", by.y="msb.gps.unixtime")

        # handle errors for various data sets
        if(sid=="s13") {
            cat ("dropping first bad record(s)\n")
            msbjoin <- msbjoin[3:nrow(msbjoin),]
        }
        dropfirstrec <- c("s15", "s16", "s18", "s23")
        if(!is.na(match(sid, dropfirstrec))) {
            cat ("dropping first bad record(s)\n")
            msbjoin <- msbjoin[2:nrow(msbjoin),]
        }
    
        # construct new fields that store previous and next activity state
        cat("Adding fields to joined table and calculating movement activity....\n")
        msbjoin$allmove <- ifelse(msbjoin$moving=="s" & msbjoin$updown=="s", "s", "m")
        msbjoin$allmove.num <- ifelse(msbjoin$allmove=="s",0,1)
    
        # projection
        cat("Projecting coordinates....\n")
        xy.waspn <- project(cbind(msbjoin$longitude, msbjoin$latitude), "+init=epsg:2926")
        msbjoin$x.waspn <- round(xy.waspn[,1],3)
        msbjoin$y.waspn <- round(xy.waspn[,2],3)
    
        # calculate the distance to the next point, there will be lots of points at the start of the bout with 0 to the next point.
        source("~/public_html/msb/tools/calc.distance.R")
        msbjoin <- calc.distance(msbjoin, msbjoin$x.waspn, msbjoin$y.waspn, T)

        if(filter) {
            reps.dropzero <- 2
            for (i in 1:reps.dropzero) {
                cat (paste ("Dropping first positions with no fix, run ", i, " of ", reps.dropzero, "....\n", sep=""))
                # get the first record number where the distance to the next point is greater than 0
                (num.first.good.record <- as.numeric(rownames(head(msbjoin[msbjoin$dist.next > 0,],1))) + 1)
                # drop the first set of records before the first GPS fix
                msbjoin <- msbjoin[num.first.good.record:nrow(msbjoin),]
                # renumber rows
                rownames(msbjoin) <- NULL
            }
        }

        # write a processed file
        cat("Writing processed file (msbmaster.csv)....\n")
        write.csv(msbjoin, file="msbmaster.csv", row.names=F)
    } else {
        cat ("Reading in msb data....\n")
        msbjoin <- read.csv(file="msbmaster.csv", as.is=T)
    }

    msbjoin$gps.loctim <- as.POSIXct(msbjoin$gps.loctim)
    setwd(current.dir)
    return(msbjoin)
}