Main Page/Research/MSB/Scripts/join data.R
From phurvitz
< Main Page | Research | MSB | Scripts
Revision as of 16:32, 27 January 2009 by Phil Hurvitz (talk | contribs) (User:Phil Hurvitz/Projects/In process/MSB/Scripts/join data.R moved to Main Page/Research/MSB/Scripts/join data.R)
# generates the "msbmaster.csv" file for a subject library(rgdal) msb.join <- function(sid, overwrite=F, filter=F){ if (missing(sid)) { cat("Usage: msb.join(sid, overwrite=F, filter=F)\n") return(invisible()) } current.dir <- getwd() basedir <- "/home/phurvitz/public_html/msb/processed_data/downloaded_data" subject.dir <- paste(basedir, sid, sep="/") setwd(subject.dir) if ((file.exists("msbmaster.csv") && overwrite) | !file.exists("msbmaster.csv")) { # refresh import & join of data sets? # helper perl script for concatenating data, adds headers etc. concatenate.command <- paste("~/public_html/msb/tools/msb_concatenate.pl", sid) system(concatenate.command) # open the msb data cat("Reading MSB data....\n") msb <- read.csv("sirfGPS_output.csv", as.is=T) colnames(msb) <- fix.colnames(msb) # drop records with duplicate timestamps msb <- msb[!duplicated(msb$msb.gps.unixtime),] rownames(msb) <- NULL # date cat ("Formatting local time....\n") gps.loctim <- strptime("1jan1970", "%d%b%Y", "GMT") + msb$msb.gps.unixtime attr(gps.loctim, "tzone") <- NULL msb$gps.loctim <- as.character(gps.loctim) # open the class data cat("Reading class data....\n") gpsclass <- read.csv("gps_class.csv", as.is=T) colnames(gpsclass) <- fix.colnames(gpsclass) gpsclass <- gpsclass[!duplicated(gpsclass$msb.gps.unixtime),] gpsclass$latitude <- gpsclass$longitude <- NULL rownames(gpsclass) <- NULL # join the class onto the msb cat("Joining MSB and class data...\n") msbjoin <- merge(msb, gpsclass, by.x="msb.gps.unixtime", by.y="msb.gps.unixtime") # handle errors for various data sets if(sid=="s13") { cat ("dropping first bad record(s)\n") msbjoin <- msbjoin[3:nrow(msbjoin),] } dropfirstrec <- c("s15", "s16", "s18", "s23") if(!is.na(match(sid, dropfirstrec))) { cat ("dropping first bad record(s)\n") msbjoin <- msbjoin[2:nrow(msbjoin),] } # construct new fields that store previous and next activity state cat("Adding fields to joined table and calculating movement activity....\n") msbjoin$allmove <- ifelse(msbjoin$moving=="s" & msbjoin$updown=="s", "s", "m") msbjoin$allmove.num <- ifelse(msbjoin$allmove=="s",0,1) # projection cat("Projecting coordinates....\n") xy.waspn <- project(cbind(msbjoin$longitude, msbjoin$latitude), "+init=epsg:2926") msbjoin$x.waspn <- round(xy.waspn[,1],3) msbjoin$y.waspn <- round(xy.waspn[,2],3) # calculate the distance to the next point, there will be lots of points at the start of the bout with 0 to the next point. source("~/public_html/msb/tools/calc.distance.R") msbjoin <- calc.distance(msbjoin, msbjoin$x.waspn, msbjoin$y.waspn, T) if(filter) { reps.dropzero <- 2 for (i in 1:reps.dropzero) { cat (paste ("Dropping first positions with no fix, run ", i, " of ", reps.dropzero, "....\n", sep="")) # get the first record number where the distance to the next point is greater than 0 (num.first.good.record <- as.numeric(rownames(head(msbjoin[msbjoin$dist.next > 0,],1))) + 1) # drop the first set of records before the first GPS fix msbjoin <- msbjoin[num.first.good.record:nrow(msbjoin),] # renumber rows rownames(msbjoin) <- NULL } } # write a processed file cat("Writing processed file (msbmaster.csv)....\n") write.csv(msbjoin, file="msbmaster.csv", row.names=F) } else { cat ("Reading in msb data....\n") msbjoin <- read.csv(file="msbmaster.csv", as.is=T) } msbjoin$gps.loctim <- as.POSIXct(msbjoin$gps.loctim) setwd(current.dir) return(msbjoin) }