Main Page/Research/MSB/Scripts/msb remdupes.pl
From phurvitz
#! /usr/bin/perl -w # remove records with duplicate timestamps # handle args if ("$#ARGV" == -1) { print "usage: $0 <infile>\n"; } # open the input file $infile = "$ARGV[0]"; if (!-e $infile) { die "$infile does not exist!\n"; } open (INFILE, "$infile"); # open the output file ($outfile = $infile) =~ s/csv/fixed\.csv/; open (OUTFILE, ">$outfile"); # open the error file ($errfile = $infile) =~ s/csv/error\.csv/; open (ERRFILE, ">$errfile"); # start reading the file while ($record = <INFILE>) { chomp $record; # parse out the line into elements and get the timestamp @elements = split(/,/, $record); $this_timestamp = $elements[2]; # write out this line if ($. == 1) { print OUTFILE "$record\n"; $prev_timestamp = $this_timestamp; #$record = <INFILE>; } @elements = split(/,/, $record); $this_timestamp = $elements[2]; # if this is the last line then write to output #print "$prev_timestamp $this_timestamp\n"; # if the next line's unix time is same as the first line # skip and move to next if ($prev_timestamp != $this_timestamp) { print OUTFILE "$record\n"; } else { if ($. != 1) { print ERRFILE "$.\n"; } } # increment the timestamp to the next record $prev_timestamp = $this_timestamp; } # close the input file close (INFILE); # close the output file close (OUTFILE); # close the errput file close (ERRFILE);