User:Phil Hurvitz/Projects/In process/MSB/Scripts/msb get data.pl
From phurvitz
< User:Phil Hurvitz | Projects | In process | MSB/Scripts
#! /usr/bin/perl -w use strict; use warnings; use File::Basename; use File::Find; use Time::Local; use Cwd; print "\n===========running $0==================\n"; print "===========downloading data============\n"; # gets the MSB data # URL of the form # http://www.cs.washington.edu/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/pmh07/part_00 #--------------------------- # first arg should be the URL if ($#ARGV != 0) { print "Usage: $0 <subject to download>\ne.g., $0 pmh07\n"; exit; } # strip out dots and slashes my $subject = $ARGV[0]; # OS my $os = "$^O"; #--------------------------- # change to the correct dir my $maindir; my $basedir; my $pwd; if ($os eq "linux" ) { $maindir = "/home/phurvitz/public_html/msb/processed_data/processed"; $basedir = "/home/phurvitz/public_html/msb/processed_data/downloaded_data/$subject"; chdir "$maindir"; $pwd = `pwd`; } elsif ($os eq "MSWin32") { chdir "/users/phurvitz/htdocs/phurvitz/msb/processed_data"; $pwd = `pwd`; } #--------------------------- # download my $durl = "http://www.cs.washington.edu/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/$subject"; # reject [0-9]* dirs from the relative root of the subject (because of the "clean" format due to pre-parsing of # uwar files into subdirs by bouts, from "parse_file_size.pl" my $dexcl = "/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/$subject/[0-9]*"; #print "wget -np -r -nH -N --accept=png,csv,kml --reject=html,htm,txt --exclude-directories=$dexcl --http-user=phurvitz --http-passwd=phurvitz $durl"; #exit; #system "wget -np -r -nH -N --accept=png,csv,kml --reject=html,htm,txt --exclude-directories=$dexcl --http-user=phurvitz --http-passwd=phurvitz $durl"; system "wget -np -r -nH -N --accept=png,csv,kml --reject=html,htm,txt --exclude-directories=$dexcl --http-user=phurvitz --http-passwd=phurvitz $durl"; #system "wget -np -r -nH -nc --accept=png,csv,kml --reject=html,htm,txt --http-user=phurvitz --http-passwd=phurvitz $durl"; #system "wget -np -r -nH -N --accept=png,csv,kml --reject=html,htm,txt \ # --http-user=phurvitz --http-passwd=phurvitz \ # http://www.cs.washington.edu/research/projects/ubicomp3/phurvitz/gis.washington.edu/phurvitz/msb/data/$url/part_00"; #--------------------------- # go into each "part_*" dir and make a symlink to the name of the kml file #chdir $basedir; #print "$basedir\n"; #my @dirs = `ls -d part*`; #foreach my $dir (@dirs) { #chomp $dir; #chdir "$dir"; #if (-e "sirfGPSOutput.kml") { #if (! -e "$subject.$dir.kml") { #system "ln -s sirfGPSOutput.kml $subject.$dir.kml"; #} #} ##chdir "$basedir"; #} #--------------------------- # make a "kml" dir if (!-e "$basedir/kml") { system("mkdir $basedir/kml"); } # run a recursive find process find( { preprocess => \&preprocess, wanted => \&wanted, }, "$basedir/clean"); print "KML file linking complete.\n"; #------------ sub preprocess { # sort alphabetically sort {$a cmp $b} @_; } my $count = 0; sub wanted { #------------ # basename of the file my $fn = basename($_); # print "PROCESSING $_ $count\n"; ## depending on the filename, concatenate with the "master" csv file if ($fn eq "sirfGPSOutput.kml") { $count++; my $lnfn = join("_", $count, $fn); my $wd = $File::Find::dir; (my $pd = $wd) =~ s/$basedir\///; #system("ls ../../"); if (-l "../../../kml/$lnfn") { system("rm ../../../kml/$lnfn"); } system ("ln -s ../$pd/$fn ../../../kml/$lnfn\n"); } }