Avoid slurping gigantic (e.g. 100000) result sets into a single response if a giant limit is specified, and instead use 10000 as a window for the mset with a given offset. We'll also warn and hint towards about the --limit= switch when the estimated result set is larger than the default limit. --- Documentation/lei-q.pod | 6 ++++-- lib/PublicInbox/LeiLcat.pm | 3 +-- lib/PublicInbox/LeiQuery.pm | 9 +++++++-- lib/PublicInbox/LeiSavedSearch.pm | 18 +++++++++++------- lib/PublicInbox/LeiUp.pm | 25 +++++++++---------------- lib/PublicInbox/LeiXSearch.pm | 18 +++++++++++++++--- 6 files changed, 47 insertions(+), 32 deletions(-)
diff --git a/Documentation/lei-q.pod b/Documentation/lei-q.pod index 2823ced8..e1e3666d 100644 --- a/Documentation/lei-q.pod +++ b/Documentation/lei-q.pod @@ -10,7 +10,7 @@ lei q [OPTIONS] (--stdin|-) =head1 DESCRIPTION -Search for messages across the lei store and externals. +Search for messages across the lei/store and externals. =for comment TODO: Give common prefixes, or at least a description/reference. @@ -192,7 +192,9 @@ Default: fcntl,dotlock =item -n NUMBER -Limit the number of matches. +Fuzzy limit the number of matches per-local external and lei/store. +Messages added by the L<--threads> switch do not count towards this +limit, and there is no limit on remote externals. Default: 10000 diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm index c13e2153..d553b187 100644 --- a/lib/PublicInbox/LeiLcat.pm +++ b/lib/PublicInbox/LeiLcat.pm @@ -144,9 +144,8 @@ sub lei_lcat { $lei->ale->refresh_externals($lxs, $lei); $lei->_lei_store(1); my $opt = $lei->{opt}; - my %mset_opt = map { $_ => $opt->{$_} } qw(threads limit offset); + my %mset_opt; $mset_opt{asc} = $opt->{'reverse'} ? 1 : 0; - $mset_opt{limit} //= 10000; $opt->{sort} //= 'relevance'; $mset_opt{relevance} = 1; $lei->{mset_opt} = \%mset_opt; diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index d5f132f1..cb5ac8fb 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -41,6 +41,12 @@ sub _start_query { # used by "lei q" and "lei up" # descending docid order is cheapest, MUA controls sorting order $self->{mset_opt}->{relevance} //= -2 if $l2m || $opt->{threads}; + + my $tot = $self->{mset_opt}->{total} //= $self->{opt}->{limit} // 10000; + $self->{mset_opt}->{limit} = $tot > 10000 ? 10000 : $tot; + $self->{mset_opt}->{offset} //= 0; + $self->{mset_opt}->{threads} //= $opt->{threads}; + if ($self->{net}) { require PublicInbox::LeiAuth; $self->{auth} = PublicInbox::LeiAuth->new @@ -118,9 +124,8 @@ sub lei_q { my $lxs = lxs_prepare($self) or return; $self->ale->refresh_externals($lxs, $self); my $opt = $self->{opt}; - my %mset_opt = map { $_ => $opt->{$_} } qw(threads limit offset); + my %mset_opt; $mset_opt{asc} = $opt->{'reverse'} ? 1 : 0; - $mset_opt{limit} //= 10000; if (defined(my $sort = $opt->{'sort'})) { if ($sort eq 'relevance') { $mset_opt{relevance} = 1; diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm index 637456e4..3e10f780 100644 --- a/lib/PublicInbox/LeiSavedSearch.pm +++ b/lib/PublicInbox/LeiSavedSearch.pm @@ -29,6 +29,8 @@ sub BOOL_FIELDS () { qw(external local remote import-remote import-before threads) } +sub SINGLE_FIELDS () { qw(limit dedupe output) } + sub lss_dir_for ($$;$) { my ($lei, $dstref, $on_fs) = @_; my $pfx; @@ -89,9 +91,9 @@ sub list { } @$out } -sub translate_dedupe ($$$) { - my ($self, $lei, $dd) = @_; - $dd //= 'content'; +sub translate_dedupe ($$) { + my ($self, $lei) = @_; + my $dd = $lei->{opt}->{dedupe} // 'content'; return 1 if $dd eq 'content'; # the default return $self->{"-dedupe_$dd"} = 1 if ($dd eq 'oid' || $dd eq 'mid'); $lei->fail("--dedupe=$dd requires --no-save"); @@ -128,8 +130,7 @@ sub new { # new saved search "lei q --save" File::Path::make_path($dir); # raises on error $self->{-cfg} = {}; my $f = $self->{'-f'} = "$dir/lei.saved-search"; - my $dd = $lei->{opt}->{dedupe}; - translate_dedupe($self, $lei, $dd) or return; + translate_dedupe($self, $lei) or return; open my $fh, '>', $f or return $lei->fail("open $f: $!"); my $sq_dst = PublicInbox::Config::squote_maybe($dst); my $q = $lei->{mset_opt}->{q_raw} // die 'BUG: {q_raw} missing'; @@ -139,15 +140,14 @@ sub new { # new saved search "lei q --save" $q = "\tq = ".cquote_val($q); } $dst = "$lei->{ovv}->{fmt}:$dst" if $dst !~ m!\Aimaps?://!i; + $lei->{opt}->{output} = $dst; print $fh <<EOM; ; to refresh with new results, run: lei up $sq_dst ; `maxuid' and `lastresult' lines are maintained by "lei up" for optimization [lei] $q [lei "q"] - output = $dst EOM - print $fh "\tdedupe = $dd\n" if $dd; for my $k (ARRAY_FIELDS) { my $ary = $lei->{opt}->{$k} // next; for my $x (@$ary) { @@ -158,6 +158,10 @@ EOM my $val = $lei->{opt}->{$k} // next; print $fh "\t$k = ".($val ? 1 : 0)."\n"; } + for my $k (SINGLE_FIELDS) { + my $val = $lei->{opt}->{$k} // next; + print $fh "\t$k = $val\n"; + } close($fh) or return $lei->fail("close $f: $!"); $self->{lock_path} = "$self->{-f}.flock"; $self->{-ovf} = "$dir/over.sqlite3"; diff --git a/lib/PublicInbox/LeiUp.pm b/lib/PublicInbox/LeiUp.pm index abb05d46..89cf0112 100644 --- a/lib/PublicInbox/LeiUp.pm +++ b/lib/PublicInbox/LeiUp.pm @@ -18,7 +18,6 @@ sub up1 ($$) { my $lss = PublicInbox::LeiSavedSearch->up($lei, $out) or return; my $f = $lss->{'-f'}; my $mset_opt = $lei->{mset_opt} = { relevance => -2 }; - $mset_opt->{limit} = $lei->{opt}->{limit} // 10000; my $q = $mset_opt->{q_raw} = $lss->{-cfg}->{'lei.q'} // return $lei->fail("lei.q unset in $f"); my $lse = $lei->{lse} // die 'BUG: {lse} missing'; @@ -27,24 +26,18 @@ sub up1 ($$) { } else { $lse->query_approxidate($lse->git, $mset_opt->{qstr} = $q); } - my $o = $lei->{opt}->{output} = $lss->{-cfg}->{'lei.q.output'} // - return $lei->fail("lei.q.output unset in $f"); - ref($o) and return $lei->fail("multiple values of lei.q.output in $f"); - if (defined(my $dd = $lss->{-cfg}->{'lei.q.dedupe'})) { - $lss->translate_dedupe($lei, $dd) or return; - $lei->{opt}->{dedupe} = $dd; - } - for my $k (qw(only include exclude)) { + # n.b. only a few CLI args are accepted for "up", so //= usually sets + for my $k ($lss->ARRAY_FIELDS) { my $v = $lss->{-cfg}->get_all("lei.q.$k") // next; - $lei->{opt}->{$k} = $v; + $lei->{opt}->{$k} //= $v; } - for my $k (qw(external local remote - import-remote import-before threads)) { - my $c = "lei.q.$k"; - my $v = $lss->{-cfg}->{$c} // next; - ref($v) and return $lei->fail("multiple values of $c in $f"); - $lei->{opt}->{$k} = $v; + for my $k ($lss->BOOL_FIELDS, $lss->SINGLE_FIELDS) { + my $v = $lss->{-cfg}->get_1('lei.q', $k) // next; + $lei->{opt}->{$k} //= $v; } + my $o = $lei->{opt}->{output} // ''; + return $lei->fail("lei.q.output unset in $f") if $o eq ''; + $lss->translate_dedupe($lei) or return; $lei->{lss} = $lss; # for LeiOverview->new and query_remote_mboxrd my $lxs = $lei->lxs_prepare or return; $lei->ale->refresh_externals($lxs, $lei); diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 2227c2ac..584ffde9 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -110,10 +110,20 @@ sub recent { sub over {} +sub _check_mset_limit ($$$) { + my ($lei, $desc, $mset) = @_; + return if defined($lei->{opt}->{limit}); # user requested limit + my $est = $mset->get_matches_estimated; + my $tot = $lei->{mset_opt}->{total}; + $est > $tot and $lei->qerr(<<""); +# $desc estimated matches ($est) exceeds default --limit=$tot + +} + sub _mset_more ($$) { my ($mset, $mo) = @_; my $size = $mset->size; - $size >= $mo->{limit} && (($mo->{offset} += $size) < $mo->{limit}); + $size >= $mo->{limit} && (($mo->{offset} += $size) < $mo->{total}); } # $startq will EOF when do_augment is done augmenting and allow @@ -182,7 +192,7 @@ sub query_one_mset { # for --threads and l2m w/o sort my $first_ids; do { $mset = $srch->mset($mo->{qstr}, $mo); - mset_progress($lei, $dir, $mset->size, + mset_progress($lei, $dir, $mo->{offset} + $mset->size, $mset->get_matches_estimated); wait_startq($lei); # wait for keyword updates my $ids = $srch->mset_to_artnums($mset, $mo); @@ -222,6 +232,7 @@ sub query_one_mset { # for --threads and l2m w/o sort } } } while (_mset_more($mset, $mo)); + _check_mset_limit($lei, $dir, $mset); if ($lss && scalar(@$first_ids)) { undef $stop_at; my $max = $first_ids->[0]; @@ -244,7 +255,7 @@ sub query_combined_mset { # non-parallel for non-"--threads" users my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); do { $mset = $self->mset($mo->{qstr}, $mo); - mset_progress($lei, 'xsearch', $mset->size, + mset_progress($lei, 'xsearch', $mo->{offset} + $mset->size, $mset->get_matches_estimated); wait_startq($lei); # wait for keyword updates for my $mitem ($mset->items) { @@ -252,6 +263,7 @@ sub query_combined_mset { # non-parallel for non-"--threads" users $each_smsg->($smsg, $mitem); } } while (_mset_more($mset, $mo)); + _check_mset_limit($lei, 'xsearch', $mset); undef $each_smsg; # may commit $lei->{ovv}->ovv_atexit_child($lei); } -- unsubscribe: one-click, see List-Unsubscribe header archive: https://public-inbox.org/meta/