On Tue, Jun 16, 2009 at 01:23:24PM -0700, [email protected] wrote:
>
> On Jun 14, 2009, at 8:25 PM, Marvin Humphrey wrote:
>
> >On Sun, Jun 14, 2009 at 06:42:20PM -0700, [email protected]
> >wrote:
> >
> >>Strangely, make_termquery still works (rev. 4798) but make_term_query
> >>causes a bus error. Having both present also causes the crash. (Try
> >>the same script with that change.)
> >
> >I presume that this is without applying the patch to RegexpTermQuery? If
> >so, then I get the behavior you describe.
>
> I have applied the patch. But, since it was inline and not attached to
> your message, it got a bit scrambled and I had to apply it by hand.
> Maybe I made a mistake. The new version is on CPAN already. Could you
> test it? I still get the bus error both with 5.8.8 and 5.10.0.
This turns out to have been an error in ORMatcher.c. It was 100% reproducible
once I got everything squared, so I can only conclude that I botched the last
go-round.
The bug, which was failed loop termination resulting in a NULL-pointer
dereference, has been fixed by r4813. WildCardQuery 0.04 works with that
revision.
Nevertheless, I had already concluded that deep-sixing Tally was the best
course of action. I've left some compatibility code in there for now, but
when you have the opportunity, I would appreciate it if you would apply the
patch below (also attached, and should survive via the CC) and release a new
version to CPAN. Once that's done, I can remove the compatibility code.
Marvin Humphrey
diff -ur KSx-Search-WildCardQuery-0.04/lib/KSx/Search/RegexpTermQuery.pm
KSx-Search-WildCardQuery-0.04-revised/lib/KSx/Search/RegexpTermQuery.pm
--- KSx-Search-WildCardQuery-0.04/lib/KSx/Search/RegexpTermQuery.pm 2009-06-14
18:39:35.000000000 -0700
+++ KSx-Search-WildCardQuery-0.04-revised/lib/KSx/Search/RegexpTermQuery.pm
2009-06-16 19:52:52.000000000 -0700
@@ -65,6 +65,9 @@
my($pack, %args) = @_;
my $searcher = $args{searchable};
+ my $reader = $searcher->get_reader;
+ my $lex_reader = $reader->fetch("KinoSearch::Index::LexiconReader");
+ my $post_reader = $reader->fetch("KinoSearch::Index::PostingsReader");
# Retrieve the correct Similarity for the Query's field.
my $sim = $args{similarity} =
@@ -77,8 +80,7 @@
# Get a lexicon and find our place therein
my( $re, $prefix ) = ($re{$parent}, $prefix{$parent});
ref $re eq 'Regexp' or $re = qr/$re/; # avoid repetitive recompilation
- my $reader = $searcher->get_reader;
- my $lexcn = $reader->lexicon( field => $field{$parent} );
+ my $lexcn = $lex_reader->lexicon( field => $field{$parent} );
$lexcn->seek(defined $prefix ? $prefix : '');
# iterate through it, stopping at terms that match
@@ -103,8 +105,7 @@
# the doc freq has to be 2, since the re matches two docs. The doc
# freqs of the individual terms are 1 and 2, so we can’t add or
# average them.
-# push @plists, my $plist = $reader->posting_list(
- my $plist = $reader->posting_list(
+ my $plist = $post_reader->posting_list(
term => $term,
field => $field{$parent},
);
@@ -214,10 +215,8 @@
package KSx::Search::RegexpTermScorer;
use base 'KinoSearch::Search::Matcher';
-use KinoSearch::Search::Tally;
-
use Hash::Util::FieldHash::Compat 'fieldhashes';
-fieldhashes\my( %doc_nums, %pos, %wv, %sim, %compiler, %tally );
+fieldhashes\my( %doc_nums, %pos, %wv, %sim, %compiler );
sub new {
my ($class, %args) = @_;
@@ -234,7 +233,6 @@
$pos{$self} = -1;
$wv {$self} = $compiler->get_value;
$compiler{$self} = $compiler;
- $tally{$self} = KinoSearch::Search::Tally->new;
$self
}
@@ -253,20 +251,13 @@
return $pos < scalar @$doc_nums ? $$doc_nums[$pos] : 0;
}
-sub tally {
+sub score {
my $self = shift;
my $pos = $pos{$self};
my $doc_nums = $doc_nums{$self};
- return unless $pos < scalar @$doc_nums;
-
- (my $tally = $tally{$self})
- ->set_score(
- $wv{$self} * $sim{$self}->tf(
- $tfs{$compiler{$self}}{$$doc_nums[$pos]}
- )
- );
-
- $tally;
+ return $wv{$self} * $sim{$self}->tf(
+ $tfs{$compiler{$self}}{$$doc_nums[$pos]}
+ );
}
Only in KSx-Search-WildCardQuery-0.04-revised/: blib
diff -ur KSx-Search-WildCardQuery-0.04/lib/KSx/Search/RegexpTermQuery.pm KSx-Search-WildCardQuery-0.04-revised/lib/KSx/Search/RegexpTermQuery.pm
--- KSx-Search-WildCardQuery-0.04/lib/KSx/Search/RegexpTermQuery.pm 2009-06-14 18:39:35.000000000 -0700
+++ KSx-Search-WildCardQuery-0.04-revised/lib/KSx/Search/RegexpTermQuery.pm 2009-06-16 19:52:52.000000000 -0700
@@ -65,6 +65,9 @@
my($pack, %args) = @_;
my $searcher = $args{searchable};
+ my $reader = $searcher->get_reader;
+ my $lex_reader = $reader->fetch("KinoSearch::Index::LexiconReader");
+ my $post_reader = $reader->fetch("KinoSearch::Index::PostingsReader");
# Retrieve the correct Similarity for the Query's field.
my $sim = $args{similarity} =
@@ -77,8 +80,7 @@
# Get a lexicon and find our place therein
my( $re, $prefix ) = ($re{$parent}, $prefix{$parent});
ref $re eq 'Regexp' or $re = qr/$re/; # avoid repetitive recompilation
- my $reader = $searcher->get_reader;
- my $lexcn = $reader->lexicon( field => $field{$parent} );
+ my $lexcn = $lex_reader->lexicon( field => $field{$parent} );
$lexcn->seek(defined $prefix ? $prefix : '');
# iterate through it, stopping at terms that match
@@ -103,8 +105,7 @@
# the doc freq has to be 2, since the re matches two docs. The doc
# freqs of the individual terms are 1 and 2, so we can’t add or
# average them.
-# push @plists, my $plist = $reader->posting_list(
- my $plist = $reader->posting_list(
+ my $plist = $post_reader->posting_list(
term => $term,
field => $field{$parent},
);
@@ -214,10 +215,8 @@
package KSx::Search::RegexpTermScorer;
use base 'KinoSearch::Search::Matcher';
-use KinoSearch::Search::Tally;
-
use Hash::Util::FieldHash::Compat 'fieldhashes';
-fieldhashes\my( %doc_nums, %pos, %wv, %sim, %compiler, %tally );
+fieldhashes\my( %doc_nums, %pos, %wv, %sim, %compiler );
sub new {
my ($class, %args) = @_;
@@ -234,7 +233,6 @@
$pos{$self} = -1;
$wv {$self} = $compiler->get_value;
$compiler{$self} = $compiler;
- $tally{$self} = KinoSearch::Search::Tally->new;
$self
}
@@ -253,20 +251,13 @@
return $pos < scalar @$doc_nums ? $$doc_nums[$pos] : 0;
}
-sub tally {
+sub score {
my $self = shift;
my $pos = $pos{$self};
my $doc_nums = $doc_nums{$self};
- return unless $pos < scalar @$doc_nums;
-
- (my $tally = $tally{$self})
- ->set_score(
- $wv{$self} * $sim{$self}->tf(
- $tfs{$compiler{$self}}{$$doc_nums[$pos]}
- )
- );
-
- $tally;
+ return $wv{$self} * $sim{$self}->tf(
+ $tfs{$compiler{$self}}{$$doc_nums[$pos]}
+ );
}
Only in KSx-Search-WildCardQuery-0.04-revised/lib/KSx/Search: .RegexpTermQuery.pm.swp
Only in KSx-Search-WildCardQuery-0.04-revised/: Makefile
Only in KSx-Search-WildCardQuery-0.04-revised/: pm_to_blib