Main Page/Research/MSB/Scripts/msb parse sms.pl

From phurvitz
< Main Page‎ | Research‎ | MSB‎ | Scripts
Jump to: navigation, search
#! /usr/bin/perl -w
use strict;
# reads and parses the phone log file. input is a mailman archive. output is a csv file.

# read the input file
# make an output file
# change to the correct dir
my $outfile4;
my $outfile6;
my $outfile8;
if ("$^O" eq "linux" ) {
    if (! -e "/home/phurvitz/public_html/msb/processed_data/2007.txt") {
        system ("wget -nv -nd -np http://gis.washington.edu/phurvitz/msb/processed_data/2007.txt -O /home/phurvitz/public_html/msb/processed_data/2007.txt");
    }
    open (INFILE, "/home/phurvitz/public_html/msb/processed_data/2007.txt");
    $outfile4 = "/home/phurvitz/public_html/msb/processed_data/sms4.csv";
    open (OUTFILE4, ">$outfile4");
    $outfile6 = "/home/phurvitz/public_html/msb/processed_data/sms6.csv";
    open (OUTFILE6, ">$outfile6");
    $outfile8 = "/home/phurvitz/public_html/msb/processed_data/sms8.csv";
    open (OUTFILE8, ">$outfile8");
}
elsif ("$^O" eq "MSWin32") {
    open (INFILE, "P:/public_html/msb/processed_data/2007.txt");
    $outfile4 = "P:/public_html/msb/processed_data/sms4.csv";
    open (OUTFILE4, ">$outfile4");
    $outfile6 = "P:/public_html/msb/processed_data/sms6.csv";
    open (OUTFILE6, ">$outfile6");
    $outfile8 = "P:/public_html/msb/processed_data/sms8.csv";
    open (OUTFILE8, ">$outfile8");
}

# write the header, different for each format
print OUTFILE4 "phone.num,kb.logged,msb.secs,gps.lock.last,sms.phone.localtime,sms.net.localtime\n";
print OUTFILE6 "sid,kb.logged,msb.secs,gps.lock.last,sms.phone.localtime,sms.net.localtime\n";
print OUTFILE8 "sid,kb.logged,msb.secs,gps.lock.last,gps.week,gps.ms,sms.phone.localtime,sms.net.localtime\n";

# cycle through each line
my @words;
my $word_count;
my $number;
my $sms_date;
my @line_words;
my $outline;
my $line;
while (<INFILE>) {
    chomp $_;
    # skip empty lines
    if ("$_" eq "") {
        #print "-$_-\n";
        next;
    }

    # parse the line into pieces split by spaces
    @words = split ( /  */, $_);
    $word_count = @words;

    # if the line starts "From " get the phone number
    if ($words[0] eq "From") {
       $number = "$words[1],";
       $sms_date = "$words[8]-$words[5]-$words[6] $words[7]";
#print "$sms_date\n";
    }

    # if the line starts with a space grab the rest of the line
    # concatenate with the number prepended
    # append to the output file
    if ($words[0] eq "") {
        ($line = $_) =~ s/ //;
        @line_words = split( /,/, $line);
        if ($line =~ m/connect failed/) {
            next;
        }
        $word_count = @line_words;
        if ($word_count == 1) {
            next;
        }

        # if the XML file has included no subject number then prepend the phone number
        if ($word_count le 4) {
            $outline = "$number$line,$sms_date\n";
            print OUTFILE4 "$outline";
        }
        elsif ($word_count eq 5) {
            $outline = "$line,$sms_date\n";
            print OUTFILE6 "$outline";
        }
        elsif ($word_count eq 7) {
            $outline = "$line,$sms_date\n";
            print OUTFILE8 "$outline";
        }
#print "$outline";
    }


}

close (INFILE);
#print "$0: Created\n$outfile4\n$outfile6\n$outfile8\n";