Difference between revisions of "Main Page/Research/MSB/Scripts/msb remdupes.pl"

From phurvitz
< Main Page‎ | Research‎ | MSB‎ | Scripts
Jump to: navigation, search
 
(No difference)

Revision as of 20:40, 11 October 2007

#! /usr/bin/perl -w
# remove records with duplicate timestamps

# handle args
if ("$#ARGV" == -1) {
  print "usage: $0 <infile>\n";
}

# open the input file
$infile = "$ARGV[0]";
if (!-e $infile) {
  die "$infile does not exist!\n";
}
open (INFILE, "$infile");

# open the output file
($outfile = $infile) =~ s/csv/fixed\.csv/;
open (OUTFILE, ">$outfile");

# open the error file
($errfile = $infile) =~ s/csv/error\.csv/;
open (ERRFILE, ">$errfile");


# start reading the file
while ($record = <INFILE>) {
    chomp $record;

    # parse out the line into elements and get the timestamp
    @elements = split(/,/, $record);
    $this_timestamp = $elements[2];

    # write out this line
    if ($. == 1) {
        print OUTFILE "$record\n";
	$prev_timestamp = $this_timestamp;
	#$record = <INFILE>;
    }

    @elements = split(/,/, $record);
    $this_timestamp = $elements[2];

    # if this is the last line then write to output
    #print "$prev_timestamp $this_timestamp\n";

    # if the next line's unix time is same as the first line
    # skip and move to next
    if ($prev_timestamp != $this_timestamp) {
	print OUTFILE "$record\n";
    }
    else {
        if ($. != 1) {
            print ERRFILE "$.\n";
        }
    }

    $prev_timestamp = $this_timestamp;

}


# close the input file
close (INFILE);

# close the output file
close (OUTFILE);

# close the errput file
close (ERRFILE);