User:Phil Hurvitz/Projects/In process/MSB/Scripts/msb get data.pl

From phurvitz
Jump to: navigation, search
#! /usr/bin/perl -w
use strict;
use warnings;
use File::Basename;
use File::Find;
use Time::Local;
use Cwd;

print "\n===========running $0==================\n";
print "===========downloading data============\n";

# gets the MSB data
# URL of the form
# http://www.cs.washington.edu/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/pmh07/part_00


#---------------------------
# first arg should be the URL
if ($#ARGV != 0) {
  print "Usage: $0 <subject to download>\ne.g., $0 pmh07\n";
  exit;
}

# strip out dots and slashes
my $subject = $ARGV[0];

# OS
my $os = "$^O";

#---------------------------
# change to the correct dir
my $maindir;
my $basedir;
my $pwd;
if ($os eq "linux" ) {
    $maindir = "/home/phurvitz/public_html/msb/processed_data/processed";
    $basedir = "/home/phurvitz/public_html/msb/processed_data/downloaded_data/$subject";
    chdir "$maindir";
    $pwd = `pwd`;
}
elsif ($os eq "MSWin32") {
    chdir "/users/phurvitz/htdocs/phurvitz/msb/processed_data";
    $pwd = `pwd`;
}

#---------------------------
# download
my $durl = "http://www.cs.washington.edu/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/$subject";

# reject [0-9]* dirs from the relative root of the subject (because of the "clean" format due to pre-parsing of
#  uwar files into subdirs by bouts, from "parse_file_size.pl"
my $dexcl = "/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/$subject/[0-9]*";
#print "wget -np -r -nH -N --accept=png,csv,kml --reject=html,htm,txt --exclude-directories=$dexcl --http-user=phurvitz --http-passwd=phurvitz $durl";
#exit;

#system "wget -np -r -nH -N --accept=png,csv,kml --reject=html,htm,txt --exclude-directories=$dexcl --http-user=phurvitz --http-passwd=phurvitz $durl";
system "wget -np -r -nH -N --accept=png,csv,kml --reject=html,htm,txt --exclude-directories=$dexcl --http-user=phurvitz --http-passwd=phurvitz $durl";

#system "wget -np -r -nH -nc --accept=png,csv,kml --reject=html,htm,txt --http-user=phurvitz --http-passwd=phurvitz $durl";
#system "wget -np -r -nH -N --accept=png,csv,kml --reject=html,htm,txt \
#    --http-user=phurvitz --http-passwd=phurvitz \
#    http://www.cs.washington.edu/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/$url/part_00";

#---------------------------
# go into each "part_*" dir and make a symlink to the name of the kml file
#chdir $basedir;
#print "$basedir\n";
#my @dirs = `ls -d part*`;
#foreach my $dir (@dirs) {
    #chomp $dir;
    #chdir "$dir";
    #if (-e "sirfGPSOutput.kml") {
        #if (! -e "$subject.$dir.kml") {
            #system "ln -s sirfGPSOutput.kml $subject.$dir.kml";
        #}
    #}
    ##chdir "$basedir";
#}
#---------------------------
# make a "kml" dir
if (!-e "$basedir/kml") {
    system("mkdir $basedir/kml");
}

# run a recursive find process
find( {
    preprocess => \&preprocess,
    wanted => \&wanted,
}, "$basedir/clean");

print "KML file linking complete.\n";

#------------
sub preprocess {
  # sort alphabetically
  sort {$a cmp $b} @_;
}

my $count = 0;
sub wanted {
    #------------
    # basename of the file
    my $fn = basename($_);
   # print "PROCESSING $_ $count\n";

    ## depending on the filename, concatenate with the "master" csv file

    if ($fn eq "sirfGPSOutput.kml") {
        $count++;
        my $lnfn = join("_", $count, $fn);
        my $wd = $File::Find::dir;
        (my $pd = $wd) =~ s/$basedir\///;
        #system("ls ../../");
        if (-l "../../../kml/$lnfn") {
            system("rm ../../../kml/$lnfn");
        }
        system ("ln -s ../$pd/$fn ../../../kml/$lnfn\n");
    }
}