Grmpf. New Soft might have bugs. ;-)

This fix it.

Gruß
   Klaus
-- 
Klaus Ethgen                            http://www.ethgen.de/
pub  2048R/D1A4EDE5 2000-02-26 Klaus Ethgen <[EMAIL PROTECTED]>
Fingerprint: D7 67 71 C4 99 A6 D4 FE  EA 40 30 57 3C 88 26 2B
#! /usr/bin/perl
#
# finddup 2.0 - find identical files and do somethink with it.
#

use strict;
use warnings;

use File::Find ();
use Digest::MD5;
use Getopt::Long;
use Pod::Usage;

# for the convenience of &wanted calls, including -eval statements:
use vars qw(*name *dir *prune);
*name   = *File::Find::name;
*dir    = *File::Find::dir;
*prune  = *File::Find::prune;

use vars qw($RCS_VERSION $VERSION $opt %filelist %md5list);

sub wanted;

$RCS_VERSION = '$Id: finddup,v 2.2 2005/02/06 12:21:02 klaus Exp $';
($VERSION = '$Revision: 2.2 $') =~ s/^\D*([\d.]*)\D*$/$1/;

GetOptions($opt = {}, qw(help|h man version noaction|n verbose|v quiet|q link|l 
oldresult|o)) || pod2usage 2;
pod2usage(1) if $opt->{help};
pod2usage(-exitstatus => 0, -verbose => 2) if $opt->{man};
if ($opt->{version}) { print "Version: $VERSION\n"; exit 0; }
# Force some options
$opt->{verbose} = 1 if not exists $opt->{verbose} and $opt->{noaction};
$opt->{link} = 1 if not exists $opt->{link} and $0 =~ /^(.*\/)?nodup(.pl)?$/;
$opt->{oldresult} = 1 if not exists $opt->{oldresult} and $0 =~ 
/^(.*\/)?nodup(.pl)?$/;

if ($opt->{oldresult})
{
   while (<>)
   {
      chomp;
      s/^(\d+) '//;
      my $size = $1;
      s/'$//;
      my @files = split(/' '/);
      open IN, "<", $files[0];
      my $md5 = Digest::MD5->new->addfile(*IN)->hexdigest;
      close IN;
      $md5list{$md5} = [[$size, [EMAIL PROTECTED];
   } # while (<>)
} # if ($opt->{oldresult})
else
{
   # Traverse desired filesystems
   File::Find::find({wanted => \&wanted}, '.');

   # Now calculate all md5sums. Afterwards %filelist can be freed.
   foreach (sort {$a->[1]->[0] cmp $b->[1]->[0]} values(%filelist))
   {
      open IN, "<", $_->[1]->[0];
      my $md5 = Digest::MD5->new->addfile(*IN)->hexdigest;
      close IN;
      $md5list{$md5} = [] unless exists $md5list{$md5};
      push @{$md5list{$md5}}, $_;
   }
   %filelist = ();
} # if ($opt->{oldresult}) { ... }...

# Now we can output doubles sorted by size
foreach (sort {$md5list{$b}->[0]->[0] <=> $md5list{$a}->[0]->[0]} 
keys(%md5list))
{
   next unless @{$md5list{$_}} > 1; # This file is single
   my $size = $md5list{$_}->[0]->[0];
   if ($size) # Do not output empty files
   {
      if ($opt->{link})
      {
         my $reffile = shift @{$md5list{$_}->[0]->[1]}; # Remove the first file 
to not unlink them
         print "Länge: $size Files:\t$reffile\n" if $opt->{verbose};
         foreach (@{$md5list{$_}})
         {
            foreach (@{$_->[1]})
            {
               print "\t\t\t$_\n" if $opt->{verbose};
               unless ($opt->{noaction})
               {
                  unlink $_ || die "Fehler beim Löschen von '$_'";
                  link $reffile, $_ || die "Fehler beim ln '$reffile' '$_'";
               }
            }
         }
         print "\n" if $opt->{verbose};
      } # if ($opt->{link})
      else
      {
         print "$size" unless $opt->{quiet};
         foreach (@{$md5list{$_}})
         {
            foreach (@{$_->[1]})
            {
               print " '$_'" unless $opt->{quiet};
            }
         }
         print "\n" unless $opt->{quiet};
      } # if ($opt->{link}) { ... } else
   } # if ($size) # Do not output emp...
} # foreach (keys(%md5list))

exit 0;


sub wanted
{
   my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size);

   if ((($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size) = lstat($_)) && 
!($File::Find::prune |= ($dev != $File::Find::topdev)) && -f _)
   {
      $filelist{$ino} = [$size, []] unless exists $filelist{$ino};
      push @{$filelist{$ino}->[1]}, $name;
   }
}

__END__

=head1 NAME

finddup - Find identical files and do somethink with it

=head1 SYNOPSIS

B<finddup> [I<options>...]

     --man              the manpage
 -h, --help             a short help
     --version          the version (CVS) of the program
 -n, --noaction         do just nothing, just print out (implies -v)
 -v, --verbose          just what the name says
 -q, --quiet            be quiet
 -l, --link             link the identical files together
 -o, --oldresult        Use the old output of this script

=head1 DESCRIPTION

finddup search the working directory and all files below on the same partition
for duplicate files.

finddup can optional hardlink such files to save space.

Files size 0 will not be reported or hardlinked as this might give problemes
later.

This is a complete rewrite of the finddup in perl to handle several issues:

=over

=item

Allow spaces and other characters in filenames

=item

be faster

=item

include nodup in same script

=item

Handle if the files allready have other hardlinks to it in the same tree

=item

Several improbvements

=back

If started as nodup or nodup.pl the script will act like started with optiones 
--link and
--oldresult

=head1 COPYRIGHT

Copyright (c) 2005 by Klaus Ethgen. All rights reserved.

=head1 LICENSE

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

=head1 AUTHOR

S<Klaus Ethgen E<lt>[EMAIL PROTECTED]<gt>>

=head1 HISTORY

 $Log: finddup,v $
 Revision 2.2  2005/02/06 12:21:02  klaus
 Little but important bug in link routine.

 Revision 2.1  2005/02/05 18:43:11  klaus
 Just cosmetic

 Revision 2.0  2005/02/05 18:41:20  klaus
 Completely new version

=cut

Attachment: signature.asc
Description: Digital signature

Reply via email to