Not documented, yet, but it runs... --- MANIFEST | 1 + script/public-inbox-eindex | 43 ++++++++++++++++++++++++++++++++++++++ t/extsearch.t | 26 +++++++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 script/public-inbox-eindex
diff --git a/MANIFEST b/MANIFEST index 418a2f17..10561cd2 100644 --- a/MANIFEST +++ b/MANIFEST @@ -225,6 +225,7 @@ sa_config/user/.spamassassin/user_prefs script/public-inbox-compact script/public-inbox-convert script/public-inbox-edit +script/public-inbox-eindex script/public-inbox-httpd script/public-inbox-imapd script/public-inbox-index diff --git a/script/public-inbox-eindex b/script/public-inbox-eindex new file mode 100644 index 00000000..c26edb93 --- /dev/null +++ b/script/public-inbox-eindex @@ -0,0 +1,43 @@ +#!perl -w +# Copyright (C) 2020 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# Basic tool to create a Xapian search index for a public-inbox. +use strict; +use v5.10.1; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-eindex [options] EINDEX_DIR [INBOX_DIR] + + Create and update external (detached) search indices + + --no-fsync speed up indexing, risk corruption on power outage + -L LEVEL `medium', or `full' (default: full) + --all index all configured inboxes + --jobs=NUM set or disable parallelization (NUM=0) + --batch-size=BYTES flush changes to OS after a given number of bytes + --max-size=BYTES do not index messages larger than the given size + --verbose | -v increase verbosity (may be repeated) + +BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) +See public-inbox-eindex(1) man page for full documentation. +EOF +my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 }; +GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i + fsync|sync! + indexlevel|index-level|L=s max_size|max-size=s + batch_size|batch-size=s + skip-docdata all help|h)) + or die $help; +if ($opt->{help}) { print $help; exit 0 }; +die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; + +# require lazily to speed up --help +my $eidx_dir = shift(@ARGV) // die "E: $help"; +require PublicInbox::Admin; +my $cfg = PublicInbox::Config->new; +my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); +PublicInbox::Admin::require_or_die(qw(-search)); +require PublicInbox::ExtSearchIdx; +my $eidx = PublicInbox::ExtSearchIdx->new($eidx_dir, $opt); +$eidx->attach_inbox($_) for @ibxs; +$eidx->eidx_sync($opt); diff --git a/t/extsearch.t b/t/extsearch.t index 54927c50..dfec6b6f 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -4,9 +4,35 @@ use strict; use Test::More; use PublicInbox::TestCommon; +use Fcntl qw(:seek); require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::ExtSearch'; use_ok 'PublicInbox::ExtSearchIdx'; +my ($home, $for_destroy) = tmpdir(); +local $ENV{HOME} = $home; +mkdir "$home/.public-inbox" or BAIL_OUT $!; +open my $fh, '>', "$home/.public-inbox/config" or BAIL_OUT $!; +print $fh <<EOF or BAIL_OUT $!; +[publicinboxMda] + spamcheck = none +EOF +close $fh or BAIL_OUT $!; +my $v2addr = 'v2t...@example.com'; +my $v1addr = 'v1t...@example.com'; +ok(run_script([qw(-init -V2 v2test), "$home/v2test", + 'http://example.com/v2test', $v2addr ]), 'v2test init'); +my $env = { ORIGINAL_RECIPIENT => $v2addr }; +open($fh, '<', 't/utf8.eml') or BAIL_OUT("open t/utf8.eml: $!"); +run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT '-mda'; + +ok(run_script([qw(-init -V1 v1test), "$home/v1test", + 'http://example.com/v1test', $v1addr ]), 'v1test init'); +$env = { ORIGINAL_RECIPIENT => $v1addr }; +seek($fh, 0, SEEK_SET) or BAIL_OUT $!; +run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT '-mda'; +run_script(['-index', "$home/v1test"]) or BAIL_OUT "index $?"; + +ok(run_script([qw(-eindex --all), "$home/eindex"]), 'eindex init'); done_testing; -- unsubscribe: one-click, see List-Unsubscribe header archive: https://public-inbox.org/meta/