On 09/02/2012 02:49, Marvin Humphrey wrote:
After reviewing the Lucy::Simple code, I realized that we can avoid breaking
compat with only a few extra lines.
* If the index exists during new(), extract the schema and type from what's
on disk.
* Otherwise, create a new EasyAnalyzer for the type.
That way, we avoid a schema conflict crash when indexes built by Lucy::Simple
prior to 0.4.0 are read by 0.4.0 or above.
I tried to implement this and ran into two little problems:
1. If the index doesn't exist and a schema isn't supplied, a "no schema"
exception is thrown, but the write lock isn't released.
2. I didn't find a way to get the schema of an indexer from Perl.
See the attached patch for my attempt to fix this.
Nick
diff --git a/core/Lucy/Index/Indexer.c b/core/Lucy/Index/Indexer.c
index 9139b2f..b3f74b9 100644
--- a/core/Lucy/Index/Indexer.c
+++ b/core/Lucy/Index/Indexer.c
@@ -116,6 +116,7 @@ Indexer_init(Indexer *self, Schema *schema, Obj *index,
}
else {
if (!latest_snapfile) {
+ S_release_write_lock(self);
THROW(ERR, "No Schema supplied, and can't find one in the index");
}
else {
@@ -554,6 +555,11 @@ Indexer_commit(Indexer *self) {
S_release_write_lock(self);
}
+Schema*
+Indexer_get_schema(Indexer *self) {
+ return self->schema;
+}
+
SegWriter*
Indexer_get_seg_writer(Indexer *self) {
return self->seg_writer;
diff --git a/core/Lucy/Index/Indexer.cfh b/core/Lucy/Index/Indexer.cfh
index ad30281..adfea0c 100644
--- a/core/Lucy/Index/Indexer.cfh
+++ b/core/Lucy/Index/Indexer.cfh
@@ -134,6 +134,11 @@ class Lucy::Index::Indexer inherits Lucy::Object::Obj {
public void
Prepare_Commit(Indexer *self);
+ /** Accessor for schema.
+ */
+ public Schema*
+ Get_Schema(Indexer *self);
+
/** Accessor for seg_writer member var.
*/
public SegWriter*
diff --git a/perl/buildlib/Lucy/Build/Binding/Index.pm
b/perl/buildlib/Lucy/Build/Binding/Index.pm
index edfc878..406d5f9 100644
--- a/perl/buildlib/Lucy/Build/Binding/Index.pm
+++ b/perl/buildlib/Lucy/Build/Binding/Index.pm
@@ -508,6 +508,7 @@ sub bind_indexer {
Prepare_Commit
Delete_By_Term
Delete_By_Query
+ Get_Schema
);
my @bound = @exposed;
diff --git a/perl/lib/Lucy/Simple.pm b/perl/lib/Lucy/Simple.pm
index aeb92c4..8941a74 100644
--- a/perl/lib/Lucy/Simple.pm
+++ b/perl/lib/Lucy/Simple.pm
@@ -50,7 +50,6 @@ sub new {
# Get type and schema.
my $analyzer = Lucy::Analysis::EasyAnalyzer->new( language => $language );
$self->{type} = Lucy::Plan::FullTextType->new( analyzer => $analyzer, );
- my $schema = $self->{schema} = Lucy::Plan::Schema->new;
# Cache the object for later clean-up.
weaken( $obj_cache{ refaddr $self } = $self );
@@ -61,20 +60,32 @@ sub new {
sub _lazily_create_indexer {
my $self = shift;
if ( !defined $self->{indexer} ) {
- $self->{indexer} = Lucy::Index::Indexer->new(
- schema => $self->{schema},
- index => $self->{path},
- );
+ eval {
+ $self->{indexer} = Lucy::Index::Indexer->new(
+ index => $self->{path},
+ );
+ };
+ if ($@) {
+ die($@) unless $@ =~ /no schema/i;
+ $self->{schema} = Lucy::Plan::Schema->new;
+ $self->{indexer} = Lucy::Index::Indexer->new(
+ schema => $self->{schema},
+ index => $self->{path},
+ );
+ }
+ else {
+ $self->{schema} = $self->{indexer}->get_schema;
+ }
}
}
sub add_doc {
my ( $self, $hashref ) = @_;
- my $schema = $self->{schema};
my $type = $self->{type};
croak("add_doc requires exactly one argument: a hashref")
unless ( @_ == 2 and reftype($hashref) eq 'HASH' );
$self->_lazily_create_indexer;
+ my $schema = $self->{schema};
$schema->spec_field( name => $_, type => $type ) for keys %$hashref;
$self->{indexer}->add_doc($hashref);
}