I hope someone can help with this...

I have a directory full of files that each contain some data saved with a CGI.pm "save" routine like so...

        $Q->save(FILE);

Here's an example file:

=================================
save_type=save
c_name=Archway%20Auto%20Salvage
c_contact=
customer_vat=
c_phone=636-671-1120
c_email=
c_address1=4140%20Gravois%20Rd.
c_address2=
c_city=House%20Springs
c_state=MO
c_zip=63051
c_country=USA
customer_file_name=1113864636-158.cst
=
=================================

Some of these files are exactly alike except the "customer_file_name" field. What is a good way take the first file, compare it to each file in the directory and remove the duplicates, and then move on to the next file and compare it for duplicates and remove them until all files are unique?

I've succeeded somewhat in doing it with the code below, but this cannot be efficient and I know there must be a better approach.

Kindest Regards,

--
Bill Stephenson

############################### start code

#!/usr/bin/perl

use CGI;
use Benchmark;

my $t0 = new Benchmark;
my $SAVE;
my $COMPARE;
my @record_list;
my @record_list2;

my $user="test"; #put the users username here
my $dir="./users/$user/customer";

#my $dir="/Library/WebServer/CGI-Executables/test/remove_dups/customer";

opendir(DIR, "$dir") or die ("Can not open the directory: ");
@record_list=(sort readdir (DIR));
close(DIR);
shift (@record_list);
shift (@record_list);

foreach my $record(@record_list){

        unless ($record=~/^[\w\._\-]+$/) {
        die ("Bad Record Name::");
        }

if (open(FILE, "$dir/$record")) {
no strict "subs";
$SAVE = new CGI(FILE); # Throw out the old $SAVE, replace it with a new one
close FILE;



opendir(DIR, "$dir") or die ("Can not open the directory: ");
@record_list2=(sort readdir (DIR));
close(DIR);
shift (@record_list2);
shift (@record_list2);

foreach my $record_to_test(@record_list2){

if ($record_to_test eq ".") {next;}
if ($record_to_test eq "..") {next;}
if ($record_to_test eq ".DS_STORE") {next;}

if (open(FILE, "$dir/$record_to_test")) {
no strict "subs";
$COMPARE = new CGI(FILE); # Throw out the old $INVOICE, replace it with a new one
close FILE;

if ($SAVE->param('customer_file_name') eq $COMPARE->param('customer_file_name')) {
next;
}


if ($SAVE->param('c_name') eq $COMPARE->param('c_name')) {

if ($SAVE->param('c_contact') eq $COMPARE->param('c_contact')) {

if ($SAVE->param('customer_vat') eq $COMPARE->param('customer_vat')) {

if ($SAVE->param('c_phone') eq $COMPARE->param('c_phone')) {

if ($SAVE->param('c_email') eq $COMPARE->param('c_email')) {

if ($SAVE->param('c_address1') eq $COMPARE->param('c_address1')) {

if ($SAVE->param('c_address2') eq $COMPARE->param('c_address2')) {

if ($SAVE->param('c_city') eq $COMPARE->param('c_city')) {

if ($SAVE->param('c_state') eq $COMPARE->param('c_state')) {

if ($SAVE->param('c_zip') eq $COMPARE->param('c_zip')) {

if ($SAVE->param('c_country') eq $COMPARE->param('c_country')) {

print "Deleting $record_to_test\n";
unlink ("$dir/$record_to_test");

}
}
}
}
}
}
}
}
}
}
}
}
}


# The next 3 lines fix a bug in the data format
                my $c_phone1= $SAVE->param('c_phone');
                $SAVE->delete('c_phone');
                $SAVE->param('c_phone1', "$c_phone1");

                no strict "subs";
            open(DATA, "> $dir/$record")
            or die(" Couldn't open $dir/$record for writing: \n");
                $SAVE->save(DATA);
                close DATA;             

                }
        }

print "\n\nAll Done\n";
my $t1 = new Benchmark;
my $td = timediff($t1, $t0);
print my $benchmark_time= "the code took: ". timestr($td);
exit;

############################### end code





Kindest Regards,

--
Bill Stephenson
417-527-3647



Reply via email to