Neil Roeth wrote: > Thanks for the hints. I should have been more clear - I have no problem > getting the main page, i.e., [EMAIL PROTECTED] There are > links in that page to bugs.debian.org/cgi-bin/bugreport.cgi?bug=<num> for each > bug, and I want to get each of those as a local web page, too. That is the > part that seems to require more than a simple wget command.
You can use the following patch to the bts command from devscripts. After patching bts, run 'bts cache <package>' or 'bts cache <email>', then use bts -o when offline to view cached bugs. It also progressively caches any bugs you look at while online. --- /usr/bin/bts 2003-01-01 05:37:21.000000000 -0500 +++ bin/bts 2003-04-08 13:42:58.000000000 -0400 @@ -18,11 +18,18 @@ my $browser; # Will set if necessary my $btsurl='http://bugs.debian.org/'; +my $btscgiurl='http://bugs.debian.org/cgi-bin/'; my $btsemail='[EMAIL PROTECTED]'; +my $cachedir=$ENV{HOME}."/.bts_cache/"; + +# Add any temporary files to this array, to ensure they are cleaned up +# properly on program exit. +my @tmpfiles; +$SIG{INT}=sub { unlink @tmpfiles if @tmpfiles }; =head1 SYNOPSIS -B<bts> command [args] [#comment] [.|, command [args] [#comment]] ... +B<bts> [options] command [args] [#comment] [.|, command [args] [#comment]] ... =head1 DESCRIPTION @@ -66,6 +73,41 @@ Please use this program responsibly, and do take our users into consideration. +=head1 OPTIONS + +=over 4 + +=item -o, --offline + +Make bts use cached bugs for the 'show' and 'bugs' commands, if a cache +is available for the requested data. See the cache command, below for +information on setting up a cache. Setting the BUGSOFFLINE environment +variable has the same effect. + +=back + +=cut + +# For now, a very simple parser, instead of Getopt::Long since there are +# so few options. +my $offlinemode=(exists $ENV{BUGSOFFLINE}); +foreach (@ARGV) { + if (/^--(.*)/ || /^-(.*)/) { + my $option=$1; + shift @ARGV; + if ($option eq 'offline' || $option eq 'o') { + $offlinemode=1; + } + else { + print STDERR "Unknown option, \"$option\"\n"; + bts_help(1); + } + } + else { + last; # end of options + } +} + =head1 COMMANDS For full details about the commands, see the BTS documentation. @@ -104,7 +146,7 @@ sub bts_show { my $thing=shift or die "display what bug?\n"; - execbrowser($btsurl.$thing); + browse($thing); } =item bugs <package> @@ -128,7 +170,7 @@ } $email=$ENV{DEBEMAIL}; } - execbrowser($btsurl.$email); + browse($email); } =item clone <bug> [new IDs] @@ -314,6 +356,70 @@ mailbts("bug $bug is not forwarded", "notforwarded $bug"); } +=item cache [email address | package] + +Generate or update a cache of bug reports for the given email address +or package. By default it downloads all bugs belonging to the email address +in the DEBEMAIL environment variable. This command may be repeated to +cache bugs belonging to several people or packages. The cached bugs are +stored in ~/.bts_cache/ + +Note that each update of the cache can be rather slow, as it currently +downloads all bugs again. A better interface for programs is needed +than the web pages.. + +Once you have set up a cache, you can ask for it to be used with the -o +switch. For example: + + bts -o bugs + bts -o show 12345 + +The BUGSOFFLINE variable can also be set to do the same thing. + +Also, once the cache is set up, bts will update the files in it peicemeil +as it downloads information from the bts. You might thus set up the cache, +and update the whole thing once a week, while letting the automatic cache +updates update the bugs you frequently refer to during the week. + +=cut + +sub bts_cache { + my $tocache=shift; + if (! length $tocache) { + $tocache=$ENV{DEBEMAIL}; + } + if (! length $tocache) { + die "cache what?\n"; + } + + if (! -d $cachedir) { + mkdir($cachedir) || die "mkdir $cachedir: $!"; + } + + my $cachefile=cachefile($tocache); + + my @oldbugs = bugs_from_file($cachefile) if -e $cachefile; + + # download index + my $data=download($tocache); + cache($tocache, $data); + + my %bugs = map { $_ => 1 } bugs_from_file($cachefile); + + # remove old bugs from cache + foreach my $bug (@oldbugs) { + if (! $bugs{$bug}) { + unlink cachefile($bug); + } + } + + # download bugs + foreach my $bug (keys %bugs) { + cache($bug, download($bug)); + } + +} + # Add any new commands here. =item version @@ -325,7 +431,7 @@ sub bts_version { (my $progname = $0) =~ s%.*/%%; print STDOUT "$progname version $version\n"; - print STDOUT "Copyright (C) 2001 by Joey Hess <[EMAIL PROTECTED]>.\n"; + print STDOUT "Copyright (C) 2001-2003 by Joey Hess <[EMAIL PROTECTED]>.\n"; print STDOUT "It is licensed under the terms of the GPL.\n"; exit(0); } @@ -340,7 +446,7 @@ sub bts_help { my $exit = $_[0] || 0; (my $progname = $0) =~ s%.*/%%; - print STDERR "Usage: $progname command [args] [#comment] [.|, command [args] [#comment]] ...\n"; + print STDERR "Usage: $progname [options] command [args] [#comment] [.|, command [args] [#comment]] ...\n"; seek DATA, 0, 0; while (<DATA>) { print STDERR "\t$1\n" if /^=item\s(.*)/; @@ -476,9 +582,97 @@ } } +# Downloads a given thing and returns it in one large string. +sub download { + my $thing=shift; + + local $/; + open (DL, "wget -O - $btsurl$thing |") || die "wget failed"; + my $ret=<DL> || die "wget failed"; + close DL || die "wget failed"; + die "wget returned nothing" unless length $ret; + return $ret; +} + +# Given a thing, returns the filename for it in the cache. +sub cachefile { + my $thing=shift; + return $cachedir.$thing.".html"; +} + +# Given a thing and the data downloaded for it, munges and caches it. +sub cache { + my $thing=shift; + my $data=shift; + + my $cachefile=cachefile($thing); + open (OUT_CACHE, ">$cachefile") || die "$cachefile: $!"; + my $time=localtime; + $data =~ s!(<BODY.*>)!$1<p><em>[Locally cached on $time]</em></p>!i; + $data =~ s!"[^"]*bugreport\.cgi\?bug=(\d+)[^"]*"!"$cachedir$1.html"!g; + $data =~ s!"[^"]*pkgreport\.cgi\?(?:pkg|maint|src)=([^"]*)"!"$cachedir$1.html"!g; + print OUT_CACHE $data; + close OUT_CACHE; +} + +# Given a file in the cache, reads all links to bugs from it and returns a +# list of them. +sub bugs_from_file { + my $file=shift; + + local $/; + open (IN, $file) || die "$file: $!"; + my $data=<IN>; + close IN; + + return $data =~ m!"$cachedir(\d+)\.html"!g; +} + +# Browses a given thing, with possible caching. +sub browse { + my $thing=shift; + + my $hascache=-d $cachedir; + my $livefile; + my $cachefile=cachefile($thing); + + if ($offlinemode) { + if (! $hascache) { + die "Sorry, you are in offline mode and have no cache. Run \"bts cache\" to create one.\n"; + } + elsif (! -e $cachefile) { + die "Sorry, you are in offline mode and that is not cached. Use \"bts cache\" to update the cache.\n"; + } + } + if (! $offlinemode && $hascache) { + my $data=download($thing); + + cache($thing, $data); + + $livefile=cachefile("live-$thing"); + push @tmpfiles, $livefile; + open (OUT_LIVE, ">$livefile") || die "$livefile: $!"; + # Correct relative urls to point to the bts. + $data =~ s/(?!\/)(\w+\.cgi)/$btscgiurl$1/g; + print OUT_LIVE $data; + close OUT_LIVE; + } + if ($hascache) { + if ($livefile) { + execbrowser($livefile); + } + else { + execbrowser($cachefile); + } + } + else { + execbrowser($btsurl.$thing); + } +} + # Determines which browser to use sub execbrowser { - my $URL = $_[0]; + my $URL = shift; my $browserlist = $ENV{'BROWSER'} || 'w3m:links:lynx:mozilla -raise -remote "openURL(%s,new-window)":netscape -raise -remote "openURL(%s,new-window)"'; @@ -509,9 +703,15 @@ $browser = join('', @split_command); $browser .= " $URL" unless $substs; - exec $browser or die "Couldn't exec $browser: $!"; + # Not really exec, because it needs to clean up after. + system($browser) == 0 || die "Couldn't exec $browser: $!"; + unlink @tmpfiles if @tmpfiles; + exit(0); } +# Clean up +unlink @tmpfiles if @tmpfiles; + =back =head1 ENVIRONMENT VARIABLES @@ -543,12 +743,14 @@ If set, it specifies the browser to use for the 'show' and 'bugs' options. See the description above. -=back +=over 4 + +=item BUGSOFFLINE -=head1 BUGS +If set and if cached data exists, it will be used for the 'show' and 'bugs' +options. Equivilant to the -o switch. -No caching is done of requested data from the BTS, unless the web browser -you use is configured to use a proxy. +=back =head1 COPYRIGHT -- see shy jo
pgp00000.pgp
Description: PGP signature