Decrement regen_down when visiting messages that appear in %D that we know will later be deleted. This ensures consistent message numbers are generated no matter which commit number is on top. Allowing deletes to propagage separately from the messages they delete without causing problems.
The v2 trees already do this and when the indexes are deleted and rebuilt they maintain they commit numbers. Add a v1 version of the v2reindex test to verify that reindexing is working properly on v1 as well as v2. Signed-off-by: "Eric W. Biederman" <[email protected]> --- lib/PublicInbox/SearchIdx.pm | 7 ++- t/v1reindex.t | 109 +++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 t/v1reindex.t diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 107cd3457133..0e0796c12c12 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -561,7 +561,12 @@ sub read_log { while (defined($line = <$log>)) { if ($line =~ /$addmsg/o) { my $blob = $1; - delete $D{$blob} and next; + if (delete $D{$blob}) { + if (defined $self->{regen_down}) { + $self->{regen_down}--; + } + next; + } my $mime = do_cat_mail($git, $blob, \$bytes) or next; batch_adjust(\$max, $bytes, $batch_cb, $latest); $add_cb->($self, $mime, $bytes, $blob); diff --git a/t/v1reindex.t b/t/v1reindex.t new file mode 100644 index 000000000000..7b8d883753ee --- /dev/null +++ b/t/v1reindex.t @@ -0,0 +1,109 @@ +# Copyright (C) 2018 all contributors <[email protected]> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use warnings; +use Test::More; +use PublicInbox::MIME; +use PublicInbox::ContentId qw(content_digest); +use File::Temp qw/tempdir/; +use File::Path qw(remove_tree); + +foreach my $mod (qw(DBD::SQLite Search::Xapian)) { + eval "require $mod"; + plan skip_all => "$mod missing for v1reindex.t" if $@; +} +use_ok 'PublicInbox::SearchIdx'; +use_ok 'PublicInbox::Import'; +my $mainrepo = tempdir('pi-v1reindex-XXXXXX', TMPDIR => 1, CLEANUP => 1); +is(system(qw(git init --bare), $mainrepo), 0); +my $ibx_config = { + mainrepo => $mainrepo, + name => 'test-v1reindex', + -primary_address => '[email protected]', +}; +my $ibx = PublicInbox::Inbox->new($ibx_config); +my $mime = PublicInbox::MIME->create( + header => [ + From => '[email protected]', + To => '[email protected]', + Subject => 'this is a subject', + Date => 'Fri, 02 Oct 1993 00:00:00 +0000', + ], + body => "hello world\n", +); +my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); +foreach my $i (1..10) { + $mime->header_set('Message-Id', "<$i\@example.com>"); + ok($im->add($mime), "message $i added"); + if ($i == 4) { + $im->remove($mime); + } +} + +if ('test remove later') { + $mime->header_set('Message-Id', "<5\@example.com>"); + $im->remove($mime); +} + +$im->done; +my $rw = PublicInbox::SearchIdx->new($ibx, 1); +eval { $rw->index_sync() }; +is($@, '', 'no error from indexing'); + +my $minmax = [ $ibx->mm->minmax ]; +ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); +is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); + +$rw = PublicInbox::SearchIdx->new($ibx, 1); +eval { $rw->index_sync({reindex => 1}) }; +is($@, '', 'no error from reindexing'); +$im->done; + +my $xap = "$mainrepo/public-inbox/xapian".PublicInbox::Search::SCHEMA_VERSION(); +remove_tree($xap); +ok(!-d $xap, 'Xapian directories removed'); +$rw = PublicInbox::SearchIdx->new($ibx, 1); + +eval { $rw->index_sync({reindex => 1}) }; +is($@, '', 'no error from reindexing'); +$im->done; +ok(-d $xap, 'Xapian directories recreated'); + +delete $ibx->{mm}; +is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); + +ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); +remove_tree($xap); +$rw = PublicInbox::SearchIdx->new($ibx, 1); + +ok(!-d $xap, 'Xapian directories removed again'); +{ + my @warn; + #local $SIG{__WARN__} = sub { push @warn, @_ }; + eval { $rw->index_sync({reindex => 1}) }; + is($@, '', 'no error from reindexing without msgmap'); + is(scalar(@warn), 0, 'no warnings from reindexing'); + $im->done; + ok(-d $xap, 'Xapian directories recreated'); + delete $ibx->{mm}; + is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); +} + +ok(unlink "$mainrepo/public-inbox/msgmap.sqlite3", 'remove msgmap'); +remove_tree($xap); +$rw = PublicInbox::SearchIdx->new($ibx, 1); + +ok(!-d $xap, 'Xapian directories removed again'); +{ + my @warn; + local $SIG{__WARN__} = sub { push @warn, @_ }; + eval { $rw->index_sync({reindex => 1}) }; + is($@, '', 'no error from reindexing without msgmap'); + is_deeply(\@warn, [], 'no warnings'); + $im->done; + ok(-d $xap, 'Xapian directories recreated'); + delete $ibx->{mm}; + is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); +} + +done_testing(); -- 2.17.1 -- unsubscribe: [email protected] archive: https://public-inbox.org/meta/
