#!/usr/local/bin/perl # PERL program to remove duplicate entries in the tropical storm database. # Only the last entry for a particular time/storm is saved. Also removes # outdated forecasts. # # If -SaveOldForecasts is present on the command line, old forecasts are # deleted from the input but saved in a file. # Copyright (C) 1996 Thomas R. Metcalf # # This software is provided "as is" and is subject to change without # notice. No warranty of any kind is made with regard to this software, # including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose. The author shall # not be liable for any errors or for direct, indirect, special, # incidental or consequential damages in connection with the furnishing, # performance, or use of this software: use it at your own risk. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free # Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # #setpriority(0,0,getpriority(0,0)+4); # nice the process $SaveForecasts = 0; for ($i=0; $i<=$#ARGV; ++$i) { if ($ARGV[$i] =~ /^-SaveOldForecasts$/) { splice(@ARGV,$i,1); $SaveForecasts = 1; } } while (<>) { # Read input file into an array s/\n//; # Get rid of carriage return. while (s/\s\s/ /g) {}; # Remove multiple white spaces. $input[$.-1] = $_; } $OldForecastDirectory = "/data1/WWW/Tropical/Data"; $oneminute = 0.0000018974; # Times in years $tenminutes = 0.000018974; $onehour = 0.000113843; $threehours = 0.000341530; $sixhours = 0.000683061; $oneday = 0.00273225; $oneweek = 0.0191257; # Find the time of the last actual observation which was generated # at least one day before $now. This is a guess at the time remdup was # last run. # Only observations after one week before this time will be checked. # For safety, take the minimum of $lasttime and now. ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime; if ($year>=94) {$year+=1900;} else {$year+=2000;} $now = $year + ($yday+1 + ($hour/24.0))/366.0; $lasttime = $now; tsearch: for ($i=$#input;$i>=0;--$i) { @sline = split(/[ ]/,$input[$i]); # split fields if ($#sline >= 23) { if ($sline[20] eq 'ACT' && (($sline[23]+$oneday)<$now)) { $lasttime = $sline[21]; last tsearch; } } } if ($now < $lasttime) {$lasttime = $now;} for ($i=0;$i<=$#input;++$i) { $line = $input[$i]; $snum = @sline = split(/[ ]/,$line); # split fields # Old forecasts (>1 week old) are deleted $notoldfor = !((($sline[20]) eq "FOR") && (($sline[21]+$oneweek)<$lasttime)); # just print if not final week if ($notoldfor && (($snum < 22) || (($sline[21]+$oneweek) < $lasttime)) ) { print $input[$i],"\n"; } elsif ($notoldfor) { $check = $sline[9].$sline[20]; # Name and type (ACT or FOR) $cfdate = $sline[21]; $plusweek = $cfdate + $oneweek; # One week ahead of current line if ($sline[20] eq "FOR") { # To get rid of old forecasts $checkfor = $sline[9]."ACT"; # to delete forecasts before actuals $checkforfor = $sline[9]."FOR"; # to delete forecasts before updated forecast $checkforact = "NEVER-MATCH"; $fordate = $sline[23]; } else { $checkfor = "NEVER-MATCH"; # actuals are never outdated $checkforfor = "NEVER-MATCH"; $checkforact = $sline[9]."FOR"; # Look for old forecasts ahead of an actual $fordate = 0; } $matches = 0; loop: for ($j=$i+1;$j<=$#input;++$j) { @sline = split(/[ ]/,$input[$j]); $fcheck = $sline[9].$sline[20]; $fcfdate = $sline[21]; $ffordate = $sline[23]; $cdiff = $cfdate-$fcfdate; # Look for duplicate times if ($cdiff < 0) {$cdiff=-$cdiff;} # abs if ($fcfdate > $plusweek) { last loop;} # No need to check more than a week # get rid of forecasts less than 3 hours after an actual observation # There are no 3 hr forecasts so anything within 3 hours of an actual # observation must be old: elsif (($checkfor eq $fcheck) && (($fcfdate+$threehours)>=$cfdate)) {$matches++;} elsif (($checkforact eq $fcheck) && (($cfdate+$threehours)>=$fcfdate)) { # Old forecast found ahead of actual observation. # Swap them and delete the forecast. &SaveOldForecast($sline[9],$input[$j]); $input[$j]=$input[$i]; # swap so that forecast is not printed $matches++; } # get rid of forecasts before updated forecast: elsif ($checkforfor eq $fcheck) { # $i and $j point to forecasts for same storm. $ddiff = $fordate-$ffordate; # Take the one with the latest creation date. if ($ddiff < 0) {$ddiff=-$ddiff;} # abs if ($ddiff > $onehour) { if ($fordate > $ffordate) { &SaveOldForecast($sline[9],$input[$j]); $input[$j]=$input[$i]; # swap } else {&SaveOldForecast($sline[9],$input[$i]);} ++$matches; } elsif ($cdiff < $onehour) {$matches++;} # Delete duplicate forecast } elsif (($sline[20] ne "FOR") && ($check eq $fcheck) && ($cdiff<$onehour)) { # Duplicate actual observation (name and date) found. # Save the duplicate with fewer question marks (unknown values). $first = $input[$i]; # Don't use array context so that $second = $input[$j]; # split gives a number. if (split(/\?/,$first) < split(/\?/,$second)) { # if equal, save later $best=$first; } else { $best=$second; } # Combine data in case diffent data is available if ($cdiff<$tenminutes) { $nbsplit = @bsplit = split(/\s+/,$best); $nfsplit = @fsplit = split(/\s+/,$first); $nssplit = @ssplit = split(/\s+/,$second); if (($nbsplit == $nfsplit) && ($nbsplit == $nssplit)) { for ($k=0;$k<$nbsplit;++$k) { if ($bsplit[$k] eq "???") { # $bsplit[$k] will equal $fsplit[$k] or $ssplit[$k] # so one of them will be "???". Is the other? if ($fsplit[$k] ne "???") {$bsplit[$k]=$fsplit[$k];} if ($ssplit[$k] ne "???") {$bsplit[$k]=$ssplit[$k];} } } $best = join(' ',@bsplit); } } # Save the best entry $input[$j]=$best; $matches++; # get rid of duplicate } if ($matches != 0) { last loop;} # when deletion found, we're done } if ($matches == 0) { print $input[$i],"\n"; } } } sub SaveOldForecast { local($name,$line) = @_; if ($SaveForecasts) { if (open(OldForecast,">>$OldForecastDirectory/$name.forecast")) { print OldForecast "$line\n"; close OldForecast; } } } exit;