I hope someone can help with this...
I have a directory full of files that each contain some data saved with a CGI.pm "save" routine like so...
$Q->save(FILE);
Here's an example file:
================================= save_type=save c_name=Archway%20Auto%20Salvage c_contact= customer_vat= c_phone=636-671-1120 c_email= c_address1=4140%20Gravois%20Rd. c_address2= c_city=House%20Springs c_state=MO c_zip=63051 c_country=USA customer_file_name=1113864636-158.cst = =================================
Some of these files are exactly alike except the "customer_file_name" field. What is a good way take the first file, compare it to each file in the directory and remove the duplicates, and then move on to the next file and compare it for duplicates and remove them until all files are unique?
I've succeeded somewhat in doing it with the code below, but this cannot be efficient and I know there must be a better approach.
Kindest Regards,
-- Bill Stephenson
############################### start code
#!/usr/bin/perl
use CGI; use Benchmark;
my $t0 = new Benchmark; my $SAVE; my $COMPARE; my @record_list; my @record_list2;
my $user="test"; #put the users username here my $dir="./users/$user/customer";
#my $dir="/Library/WebServer/CGI-Executables/test/remove_dups/customer";
opendir(DIR, "$dir") or die ("Can not open the directory: "); @record_list=(sort readdir (DIR)); close(DIR); shift (@record_list); shift (@record_list);
foreach my $record(@record_list){
unless ($record=~/^[\w\._\-]+$/) { die ("Bad Record Name::"); }
if (open(FILE, "$dir/$record")) {
no strict "subs";
$SAVE = new CGI(FILE); # Throw out the old $SAVE, replace it with a new one
close FILE;
opendir(DIR, "$dir") or die ("Can not open the directory: ");
@record_list2=(sort readdir (DIR));
close(DIR);
shift (@record_list2);
shift (@record_list2);
foreach my $record_to_test(@record_list2){
if ($record_to_test eq ".") {next;}
if ($record_to_test eq "..") {next;}
if ($record_to_test eq ".DS_STORE") {next;}
if (open(FILE, "$dir/$record_to_test")) {
no strict "subs";
$COMPARE = new CGI(FILE); # Throw out the old $INVOICE, replace it with a new one
close FILE;
if ($SAVE->param('customer_file_name') eq $COMPARE->param('customer_file_name')) {
next;
}
if ($SAVE->param('c_name') eq $COMPARE->param('c_name')) {
if ($SAVE->param('c_contact') eq $COMPARE->param('c_contact')) {
if ($SAVE->param('customer_vat') eq $COMPARE->param('customer_vat')) {
if ($SAVE->param('c_phone') eq $COMPARE->param('c_phone')) {
if ($SAVE->param('c_email') eq $COMPARE->param('c_email')) {
if ($SAVE->param('c_address1') eq $COMPARE->param('c_address1')) {
if ($SAVE->param('c_address2') eq $COMPARE->param('c_address2')) {
if ($SAVE->param('c_city') eq $COMPARE->param('c_city')) {
if ($SAVE->param('c_state') eq $COMPARE->param('c_state')) {
if ($SAVE->param('c_zip') eq $COMPARE->param('c_zip')) {
if ($SAVE->param('c_country') eq $COMPARE->param('c_country')) {
print "Deleting $record_to_test\n";
unlink ("$dir/$record_to_test");
}
}
}
}
}
}
}
}
}
}
}
}
}
# The next 3 lines fix a bug in the data format my $c_phone1= $SAVE->param('c_phone'); $SAVE->delete('c_phone'); $SAVE->param('c_phone1', "$c_phone1");
no strict "subs"; open(DATA, "> $dir/$record") or die(" Couldn't open $dir/$record for writing: \n"); $SAVE->save(DATA); close DATA;
} }
print "\n\nAll Done\n"; my $t1 = new Benchmark; my $td = timediff($t1, $t0); print my $benchmark_time= "the code took: ". timestr($td); exit;
############################### end code
Kindest Regards,
-- Bill Stephenson 417-527-3647