Op 20110401 om 04:30 schreef John Stoffel:
>
> John> I've been happily running bacula at home, and usualy it's pretty
> John> predictable about the size of data backed up each night, but
> John> last night I had an incremental run for a specfic client which
> John> used 8Gb of data, when I normally expect around 500mb or so.
>
> John> Is there an easy mysql query I can use to:
>
> John> a) find the largest file(s) backed up for a particular jobid?
>
> Ok, I've googled, read back in the archives about the File.LStat field
> and how the file size is in a strange Base64 (almost) encodiing which
> needs to be hacked on to get out the sizes.
>
> Looks like I'll just need to:
>
> 1. write a perl script to read in a jobid, then pull out the files
> from that job.
> 2. For each file, pull out the File.LStat field and decode the info.
> 3. And finally then I can search for the largest file(s).
>
> I'll post something when I'm done. I'd prefer to do it in mysql
> directly, but it looks too funky to parse the string properly without
> writing really crazy (to me!) SQL procedures or cursors. Not worth
> it.
On march 15th posted Kjetil Torgrim Homme his "bacula-du" script
to this mailinglist. The 'du' in the name is from "disk usage",
same as the UNIX command `du`. Subject line was/is "file listing"
Find attached my modified version plus a diff.
Hope this helps
Geert Stappers
Sysadmin
#! /usr/bin/perl -w
# bacula-du 1.0
# Written by Kjetil Torgrim Homme <kjetil.ho...@redpill-linpro.com>
# Released under GPLv3 or the same terms as Bacula itself
sub usage {
print <<"_END_";
Usage: $0 [OPTIONS] -j JOBID
Summarize disk usage of directories included in the backup JOBID
Options are:
-a, --all write counts for all files, not just directories
-b, --bytes use size in octets rather than number of blocks
-B, --block-size=SIZE report SIZE-byte blocks (default 1Ki)
-m like --block-size=1Mi
-S, --separate-dirs do not include size of subdirectories
-t, --threshold=SIZE skip output for files or directories with usage
below SIZE
-L, --largest=NUM only print NUM largest directories/files
SIZE may be (or may be an integer optionally followed by) one of following:
k (1000), Ki (1024), M (1000*1000), Mi (1024*1024), G, Gi, T, Ti, P, Pi.
_END_
exit(64);
}
use strict;
use DBD::mysql;
use DBI;
use MIME::Base64;
use Getopt::Long qw(:config bundling no_ignore_case);
use Data::Dumper;
my $dbhost = "localhost";
my $db = "bacula";
my $dsn = "DBI:mysql:database=$db;mysql_read_default_group=clientp";
my $dbuser = $db;
my $dbpass = undef;
#######################
my $i = 0;
my %base64 = map { $_ => $i++ } split("",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
sub decode_bacula_base64 {
my $acc = 0;
for (split("", $_[0])) {
$acc <<= 6;
$acc += $base64{$_};
}
return $acc;
}
sub extract_size_from_lstat {
return decode_bacula_base64((split(" ", shift))[7]);
}
sub extract_blocks_from_lstat {
return 512 * decode_bacula_base64((split(" ", shift))[9]);
}
sub convert_units {
my $num = shift;
my %units = ("k" => 1000**1, "Ki" => 1024**1, "ki" => 1024**1,
"M" => 1000**2, "Mi" => 1024**2,
"G" => 1000**3, "Gi" => 1024**3,
"T" => 1000**4, "Ti" => 1024**4,
"P" => 1000**5, "Pi" => 1024**5);
if ($num =~ /^(\d*)([kKMGTP]i?)B?$/) {
$num = ($1 ? $1 : 1) * $units{$2};
} elsif ($num !~ /^\d+$/) {
die "Can't parse: $num\n";
}
return $num;
}
### main program resumes
my $threshold = 1; # omit 0 octet sized files/directories by default
my $blocksize = 1024;
my ($jobid, $all, $bytes, $separate_dirs, $largest);
GetOptions("jobid|j=i" => \$jobid,
"threshold|t=s" => \$threshold,
"separate-dirs|S" => \$separate_dirs,
"all|a" => \$all,
"bytes|b" => \$bytes,
"block-size|B=s" => \$blocksize,
"largest|L=i" => \$largest,
"m" => sub { $blocksize = "1Mi" },
) || usage();
usage() unless $jobid;
$threshold = convert_units($threshold);
$blocksize = convert_units($blocksize);
my @padding = ("", "A==", "==", "=");
sub extract_size_from_lstat_foo {
my ($b64) = (split(" ", shift))[7];
my $acc = 0;
for (split("", decode_base64($b64 . $padding[length($b64) % 4]))) {
$acc <<= 8;
$acc += ord($_);
}
return $acc;
}
my $extract_size = $bytes
? \&extract_size_from_lstat
: \&extract_blocks_from_lstat;
my $dbh;
unless ($dbh = DBI->connect($dsn, $dbuser, $dbpass, {AutoCommit => 0})) {
print STDERR "Could not connect to database $db on host $dbhost\n";
exit 2;
}
print STDERR "DB connect \n";
my $sth = $dbh->prepare("
SELECT p.Path, fn.Name, LStat
FROM Path p
JOIN File f ON f.PathId = p.PathId
JOIN Filename fn ON f.FilenameId = fn.FilenameId
WHERE f.JobId = $jobid");
$sth->execute();
print STDERR "DB prepare \n";
my %du;
my $rowcount = 0;
while (my ($path, $fname, $lstat) = $sth->fetchrow_array) {
my $size = $extract_size->($lstat);
print STDERR "Got '$path' size $size\n";
$du{"$path$fname"} += $size if $all;
$du{$path} += $size;
next if $separate_dirs;
while ($path ne '/') {
$path =~ s,[^/]+/$,,;
$du{$path} += $size;
}
if ((++$rowcount % 1000) == 0) {
print STDERR "got $rowcount rows\r";
}
}
$dbh->disconnect();
print STDERR "done reading database.\n";
if ($largest) {
my @sizes = sort { $a <=> $b } values %du;
my $cutoff = $largest < @sizes ? $sizes[-$largest] : 0;
$threshold = $cutoff unless ($threshold && $threshold > $cutoff);
}
# We add ~ to the filename so that the parent directory is printed
# below the children. ('~' could be any character which sorts after
# '/')
for my $path (sort { "$a~" cmp "$b~" } keys %du) {
next if $du{$path} < $threshold;
printf("%9d %s\n", ($du{$path} + $blocksize - 1) / $blocksize, $path);
}
diff --git a/bacula/scripts/bacula-du b/bacula/scripts/bacula-du
old mode 100644
new mode 100755
index e32b191..0e165ce
--- a/bacula/scripts/bacula-du
+++ b/bacula/scripts/bacula-du
@@ -34,13 +34,9 @@ use Data::Dumper;
my $dbhost = "localhost";
my $db = "bacula";
-my $dsn = "DBI:Pg:dbname=$db;host=$dbhost";
-my $dbuser = "postgres";
-my $dbpass = "";
-# Suggestion for MySQL:
-# my $dsn = "DBI:mysql:database=mysql;mysql_read_default_group=clientp";
-# my $dbuser = "mysql";
-# my $dbpass = undef;
+my $dsn = "DBI:mysql:database=$db;mysql_read_default_group=clientp";
+my $dbuser = $db;
+my $dbpass = undef;
#######################
@@ -126,6 +122,7 @@ unless ($dbh = DBI->connect($dsn, $dbuser, $dbpass, {AutoCommit => 0})) {
exit 2;
}
+ print STDERR "DB connect \n";
my $sth = $dbh->prepare("
SELECT p.Path, fn.Name, LStat
FROM Path p
@@ -134,11 +131,12 @@ my $sth = $dbh->prepare("
WHERE f.JobId = $jobid");
$sth->execute();
+ print STDERR "DB prepare \n";
my %du;
my $rowcount = 0;
while (my ($path, $fname, $lstat) = $sth->fetchrow_array) {
my $size = $extract_size->($lstat);
- # print STDERR "Got '$path' size $size\n";
+ print STDERR "Got '$path' size $size\n";
$du{"$path$fname"} += $size if $all;
$du{$path} += $size;
next if $separate_dirs;
------------------------------------------------------------------------------
Xperia(TM) PLAY
It's a major breakthrough. An authentic gaming
smartphone on the nation's most reliable network.
And it wants your games.
http://p.sf.net/sfu/verizon-sfdev
_______________________________________________
Bacula-users mailing list
Bacula-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-users