Robin Lee Powell wrote at about 10:37:52 -0800 on Monday, December 6, 2010:
 > So I'm writing a script to transfer a client from one host to
 > another, using tarPCCopy, and I'm getting messages like this:
 >   Can't find 
 > foo--tm50-e00145--tm50-s00339---shared/47/f%2f/fshared/ffoo/fpurchase_order_assets/fbatch_7813/f7105620_done.txt
 >  in pool, will copy file
 > which is fascinating because the first column in ls -l is *3*. -_-
 > The tarPCCopy tar file therefore ends up becoming really large
 > (hundreds of gibibytes) with files that already exist in the pool,
 > presumably.
 > I've tried running md5sum on that file; can't find that in the pool.
 > I've tried BackupPC_zcat | md5sum; can't find that in the pool.

Well the 'md5sum' used in pool naming is only a partial file md5sum.
I wrote (and posted) a routine to calculate and optionally test for
existence of the md5sum pool name corresponding to any pc tree
file. I will attach a copy to the end of this post.

 > BackupPC_fixLinks, from the wiki, doesn't see the problem at all,
 > which I'd *very* much like to fix.

First check to make sure there really is a problem with the pool...
Then, we need to figure out whether there is a problem with tarcopy or
with my program BackupPC_fixLinks etc.


#============================================================= -*-perl-*-
# calculate and optionally verify the BackupPC-style
#                        partial md5sum of any file compressed by BackupPC

#   This program allows you to calculate the partial md5sum
#   corresponding to the cpool path for any file that uses
#   BackupPC-style compression whether or not that file is actually
#   stored or linked to the cpool. Optionally, if the file is a cpool
#   entry or is linked to the cpool, you can add the '-k' flag to
#   verify whether the corresponding cpool path is consistent with the
#   actual md5sum of the file.
#   Multiple files or directories can be given on the command line,
#   allowing you to calculate (and optionally verify) the md5sum for
#   multiple files or multiple trees of files. The script also does
#   path globbing using standard shell globbing conventions.
#   Paths are assumed to be either absolute or relative to the current
#   directory unless, the options -C, -c, or -p are given in which
#   case the paths are understood to be a cpool file name (without
#   path), a path relative to the cpool, or a path relative to the
#   pc directory, respectively.

#   Jeff Kosowsky
#   Copyright (C) 2009  Jeff Kosowsky
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   GNU General Public License for more details.
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
# Version 1.0, released Dec 2009
use strict;
use warnings;
use Getopt::Std;

use lib "/usr/share/BackupPC/lib";
use BackupPC::FileZIO;
use BackupPC::Lib;
use BackupPC::jLib;
use Cwd 'abs_path';
use File::Find;
use File::Glob ':glob';

die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new("", "", 
"", 1)) ); #No user check
%Conf   = $bpc->Conf(); #Global variable defined in (do not use 'my')

my %opts;
if ( !getopts("Ccpka", \%opts) || @ARGV < 1
         || (defined($opts{C}) + defined($opts{c}) + defined($opts{p}) > 1)) {
    print STDERR <<EOF;
usage: $0 [options] path1 [path2] [path3]....
  Find BackupPC-style md5sum of compressed file
    -C   Entry is a cpool file name (no path)
    -c   Consider path relative to cpool directory
    -p   Consider path relative to pc directory
    -k   Compare to md5sum embedded in file name (for cpool entries)
         or to the inode number of the corresponding pool file (otherwise)
    -a   Use size from attrib file if available (for backup files)

my $useattribsize = $opts{a} ? 0 : -1;
my $TopDir = $Conf{TopDir};
my $compress = $Conf{CompressLevel};
my $pool = $compress > 0 ? "cpool" : "pool";

my $md5 = Digest::MD5->new;
my @zpathlist;
foreach (@ARGV) {
        if($opts{C}) {
                @zpathlist = (@zpathlist, bsd_glob($bpc->MD52Path($_, 
        } elsif($opts{c}) {
                @zpathlist = (@zpathlist, bsd_glob($bpc->TopDir() . "cpool/" . 
        } elsif($opts{p}) {
                @zpathlist = (@zpathlist, bsd_glob($bpc->TopDir() . "pc/" . 
        } else {
                @zpathlist = (@zpathlist, bsd_glob(abs_path($_)));
die "No valid paths...\n" unless @zpathlist;
foreach my $zpath (@zpathlist) {
        $zpath =~ s#/+#/#g; #Remove extra slashes
        $zpath =~ s#/\.(/|$)#/#g; #Remove extra /.

        unless(-e $zpath) {
                warn "'$zpath' is not an existing file or directory path...\n";

        find(\&check_md5, $zpath);

sub check_md5
        return unless -f;
        my $filename = $File::Find::name;
        my $digest = zFile2MD5($bpc, $md5, $File::Find::name, $useattribsize);
        return if ($digest eq "-1");

        $filename =~ s#^${TopDir}pc/## if $opts{p};
        $filename =~ s#^${TopDir}$pool/## if $opts{c};
        $filename =~ s#.*/## if $opts{C};
        print "$digest  $filename";
        if ($opts{k}) {
                my $match="ERROR";
                if ($opts{c} || $opts{C}) {
                        $File::Find::name =~ m#(.*/)?([[:xdigit:]]+)(_\d+)?#;
                        $match = "MATCH" if $digest eq $2;
                } else {
                        my ($dev, $ino, $mode, $nlink, @therestD) = 
                        if ( $nlink < 2) {
                                $match = "NOLINK";
                        } else {
                                my $poolpath_ = my $poolpath 
                                        = $bpc->MD52Path($digest,$compress);
                                my $i=0;
                                $match="ERROR2" unless -e $poolpath_;
                                #Note: ERROR2 if no cpool file exists with 
md5sum path
                                #ERROR if cpool file exists but files not linked
                                while( -e ${poolpath_} ) {
                # Iterate through pool chain with same md5sum
                                        if ((stat($poolpath_))[1] == $ino) {
                                        ${poolpath_} = ${poolpath} . "_" . $i++;
                print " $match";
        print "\n";

