Main Page/Research/MSB/Scripts/msb parse sms.pl
From phurvitz
#! /usr/bin/perl -w use strict; # reads and parses the phone log file. input is a mailman archive. output is a csv file. # read the input file # make an output file # change to the correct dir my $outfile4; my $outfile6; my $outfile8; if ("$^O" eq "linux" ) { if (! -e "/home/phurvitz/public_html/msb/processed_data/2007.txt") { system ("wget -nv -nd -np http://gis.washington.edu/phurvitz/msb/processed_data/2007.txt -O /home/phurvitz/public_html/msb/processed_data/2007.txt"); } open (INFILE, "/home/phurvitz/public_html/msb/processed_data/2007.txt"); $outfile4 = "/home/phurvitz/public_html/msb/processed_data/sms4.csv"; open (OUTFILE4, ">$outfile4"); $outfile6 = "/home/phurvitz/public_html/msb/processed_data/sms6.csv"; open (OUTFILE6, ">$outfile6"); $outfile8 = "/home/phurvitz/public_html/msb/processed_data/sms8.csv"; open (OUTFILE8, ">$outfile8"); } elsif ("$^O" eq "MSWin32") { open (INFILE, "P:/public_html/msb/processed_data/2007.txt"); $outfile4 = "P:/public_html/msb/processed_data/sms4.csv"; open (OUTFILE4, ">$outfile4"); $outfile6 = "P:/public_html/msb/processed_data/sms6.csv"; open (OUTFILE6, ">$outfile6"); $outfile8 = "P:/public_html/msb/processed_data/sms8.csv"; open (OUTFILE8, ">$outfile8"); } # write the header, different for each format print OUTFILE4 "phone.num,kb.logged,msb.secs,gps.lock.last,sms.phone.localtime,sms.net.localtime\n"; print OUTFILE6 "sid,kb.logged,msb.secs,gps.lock.last,sms.phone.localtime,sms.net.localtime\n"; print OUTFILE8 "sid,kb.logged,msb.secs,gps.lock.last,gps.week,gps.ms,sms.phone.localtime,sms.net.localtime\n"; # cycle through each line my @words; my $word_count; my $number; my $sms_date; my @line_words; my $outline; my $line; while (<INFILE>) { chomp $_; # skip empty lines if ("$_" eq "") { #print "-$_-\n"; next; } # parse the line into pieces split by spaces @words = split ( / */, $_); $word_count = @words; # if the line starts "From " get the phone number if ($words[0] eq "From") { $number = "$words[1],"; $sms_date = "$words[8]-$words[5]-$words[6] $words[7]"; #print "$sms_date\n"; } # if the line starts with a space grab the rest of the line # concatenate with the number prepended # append to the output file if ($words[0] eq "") { ($line = $_) =~ s/ //; @line_words = split( /,/, $line); if ($line =~ m/connect failed/) { next; } $word_count = @line_words; if ($word_count == 1) { next; } # if the XML file has included no subject number then prepend the phone number if ($word_count le 4) { $outline = "$number$line,$sms_date\n"; print OUTFILE4 "$outline"; } elsif ($word_count eq 5) { $outline = "$line,$sms_date\n"; print OUTFILE6 "$outline"; } elsif ($word_count eq 7) { $outline = "$line,$sms_date\n"; print OUTFILE8 "$outline"; } #print "$outline"; } } close (INFILE); #print "$0: Created\n$outfile4\n$outfile6\n$outfile8\n";