User:Phil Hurvitz/Projects/In process/MSB/Scripts/msb concatenate.pl

From phurvitz
< User:Phil Hurvitz‎ | Projects‎ | In process‎ | MSB/Scripts
Revision as of 00:50, 27 January 2009 by Phil Hurvitz (talk | contribs) (New page: <pre> #! /usr/bin/perl -w # concatenate downloaded data use strict; use File::Basename; use File::Find; use Time::Local; # the subject to process if ($#ARGV == -1) { print "Usage; $0...)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search
#! /usr/bin/perl -w
# concatenate downloaded data

use strict;
use File::Basename;
use File::Find;
use Time::Local;

# the subject to process
if ($#ARGV == -1) {
    print "Usage; $0 <subject_dir>\nE.g. $0 s03\n";
    exit;
}
my $subject_num = $ARGV[0];

# data locations
my $basedir = "/home/phurvitz/public_html/msb/processed_data/downloaded_data/";
my $subject_dir = join("", $basedir, $subject_num);
if (!-e $subject_dir) {
   die "$subject_dir does not exist.\n";
}
#print "$subject_dir\n";
chdir ($subject_dir);

# delete existing files
if (-e "gps_class_moveNotmove_output.csv") {
    unlink "gps_class_moveNotmove_output.csv";
}

# count variables for prepending the bout number onto each record
my $classcount = 1;

print "Concatenating csv files....\n";

# run a recursive find process
find( {
    preprocess => \&preprocess,
    wanted => \&wanted,
}, $subject_dir);

print "Concatenation complete.\n";

#------------
sub preprocess {
    # sort alphabetically
    sort {$a cmp $b} @_;
}

sub wanted {
    #------------
    # basename of the file
    my $fn = basename($_);

    # depending on the filename, concatenate with the "master" csv file
    if ($fn eq "gps_class_moveNotmove_output.csv") {
        print "PROCESSING $subject_num, bout $classcount, $File::Find::name\n";
        if ($classcount==1) {
            # if this is the first, write out the header line only
            system "head -1 $_ | sed \"s|^|sid,bout,|\" | sed \"s|  *||g\" > $subject_dir/gps_class_moveNotmove_output.csv";
        } 
        # but for all files write out everything after the 1st line
        system "sed 1d $_ | sed \"s|^|$subject_num,$classcount,|\" | sed \"s|  *||g\" >> $subject_dir/gps_class_moveNotmove_output.csv";
        $classcount++;
    }
}