Author: rooneg
Date: Sun Mar  6 18:56:12 2005
New Revision: 156376

URL: http://svn.apache.org/viewcvs?view=rev&rev=156376
Log:
Fix the recently found problem with unoptimized indices.  It turns out we
were not incrementing the document id by the appropriate amount for segments
past the first one.  Each segment is internally numbered starting at 0, but
when results that span multiple segments are returned the later ones need to
be incremented by the appropriate amount.

If I'd been paying attention when writing the index tests I would have seen
this problem then, but I was lazy.  Oops.

* include/lcn_segment.h
  (lcn_segment_open): add an offset parameter, update docs.

* src/index/segment.c
  (lcn_segment_t): store the starting offset for the segment.
  (lcn_segment_open): take offset parameter, store it in the segment.
  (lcn_segment_term_docs): increment docnums by offset.

* src/index/segments.c
  (lcn_segments_read): move calculation of starting offsets into the
   first loop, so we can use them when opening the segments.

* test/index/segment_test.c
  (test_segment_term_docs): update call to lcn_segment_open.

* test/index/index_test.c
  (test_index_term_docs_unopt): correct docnums we expect to see.

Modified:
    incubator/lucene4c/trunk/include/lcn_segment.h
    incubator/lucene4c/trunk/src/index/segment.c
    incubator/lucene4c/trunk/src/index/segments.c
    incubator/lucene4c/trunk/test/index/index_test.c
    incubator/lucene4c/trunk/test/index/segment_test.c

Modified: incubator/lucene4c/trunk/include/lcn_segment.h
URL: 
http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/include/lcn_segment.h?view=diff&r1=156375&r2=156376
==============================================================================
--- incubator/lucene4c/trunk/include/lcn_segment.h (original)
+++ incubator/lucene4c/trunk/include/lcn_segment.h Sun Mar  6 18:56:12 2005
@@ -34,13 +34,15 @@
 /** An opaque structure representing a segment of a lucene index. */
 typedef struct lcn_segment_t lcn_segment_t;
 
-/** Open the segment @a segname, of size @a size, within directory @a d and
- * return it in @a segment, allocated from @a pool.
+/** Open the segment @a segname, of size @a size containing documents
+ * starting at @a offset, within directory @a d and return it in @a segment,
+ * allocated from @a pool.
  */
 lcn_error_t *
 lcn_segment_open (lcn_segment_t **segment,
                   const lcn_char_t *segname,
                   apr_uint32_t size,
+                  apr_uint32_t offset,
                   const lcn_directory_t *d,
                   apr_pool_t *pool);
 

Modified: incubator/lucene4c/trunk/src/index/segment.c
URL: 
http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/index/segment.c?view=diff&r1=156375&r2=156376
==============================================================================
--- incubator/lucene4c/trunk/src/index/segment.c (original)
+++ incubator/lucene4c/trunk/src/index/segment.c Sun Mar  6 18:56:12 2005
@@ -29,6 +29,8 @@
 
   apr_uint32_t size;
 
+  apr_uint32_t offset;
+
   const lcn_directory_t *directory;
 
   lcn_terminfos_t *term_infos;
@@ -42,6 +44,7 @@
 lcn_segment_open (lcn_segment_t **segment,
                   const lcn_char_t *segname,
                   apr_uint32_t size,
+                  apr_uint32_t offset,
                   const lcn_directory_t *directory,
                   apr_pool_t *pool)
 {
@@ -51,6 +54,7 @@
 
   s->name = lcn_strcpy (segname, pool);
   s->size = size;
+  s->offset = offset;
 
   LCN_ERR (lcn_str_to_cstring (&cfsname, segname, pool));
 
@@ -132,6 +136,16 @@
                                               pool));
 
   *doc_freq = lcn_terminfo_doc_freq (ti);
+
+  {
+    apr_uint32_t *docarray = *docs;
+    int i;
+
+    for (i = 0; i < *doc_freq; ++i)
+      {
+        docarray[i] += segment->offset;
+      }
+  }
 
   return LCN_NO_ERROR;
 }

Modified: incubator/lucene4c/trunk/src/index/segments.c
URL: 
http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/index/segments.c?view=diff&r1=156375&r2=156376
==============================================================================
--- incubator/lucene4c/trunk/src/index/segments.c (original)
+++ incubator/lucene4c/trunk/src/index/segments.c Sun Mar  6 18:56:12 2005
@@ -69,18 +69,27 @@
                                                 segcount,
                                                 sizeof (lcn_segment_t *));
 
+  (*segments)->starts = apr_pcalloc (pool, sizeof (apr_uint32_t) * segcount);
+
   for (i = 0; i < segcount; ++i)
     {
       lcn_segment_t *segment;
       apr_uint32_t size;
       lcn_char_t *name;
 
+      (*segments)->starts[i] = (*segments)->max_docs;
+
       /* the string is dynamically allocated, so we use pool so it persists */
       LCN_ERR (lcn_istream_read_string (istream, &name, pool));
 
       LCN_ERR (lcn_istream_read_int (istream, &size, subpool));
 
-      LCN_ERR (lcn_segment_open (&segment, name, size, directory, pool));
+      LCN_ERR (lcn_segment_open (&segment,
+                                 name,
+                                 size,
+                                 (*segments)->max_docs,
+                                 directory,
+                                 pool));
 
       apr_hash_set ((*segments)->segments,
                     name,
@@ -88,18 +97,8 @@
                     segment);
 
       APR_ARRAY_PUSH ((*segments)->segments_bynum, lcn_segment_t *) = segment;
-    }
-
-  (*segments)->starts = apr_pcalloc (pool, sizeof (apr_uint32_t) * segcount);
-
-  for (i = 0; i < segcount; ++i)
-    {
-      (*segments)->starts[i] = (*segments)->max_docs;
 
-      (*segments)->max_docs
-        += lcn_segment_size (APR_ARRAY_IDX ((*segments)->segments_bynum,
-                                            i,
-                                            lcn_segment_t *));
+      (*segments)->max_docs += lcn_segment_size (segment);
     }
 
   lcn_pool_destroy (subpool);

Modified: incubator/lucene4c/trunk/test/index/index_test.c
URL: 
http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/test/index/index_test.c?view=diff&r1=156375&r2=156376
==============================================================================
--- incubator/lucene4c/trunk/test/index/index_test.c (original)
+++ incubator/lucene4c/trunk/test/index/index_test.c Sun Mar  6 18:56:12 2005
@@ -157,16 +157,16 @@
             ABTS_INT_EQUAL (tc, 274, lcn_doc_iter_doc (itr));
             break;
           case 4:
-            ABTS_INT_EQUAL (tc, 31, lcn_doc_iter_doc (itr));
+            ABTS_INT_EQUAL (tc, 1731, lcn_doc_iter_doc (itr));
             break;
           case 5:
-            ABTS_INT_EQUAL (tc, 39, lcn_doc_iter_doc (itr));
+            ABTS_INT_EQUAL (tc, 1739, lcn_doc_iter_doc (itr));
             break;
           case 6:
-            ABTS_INT_EQUAL (tc, 40, lcn_doc_iter_doc (itr));
+            ABTS_INT_EQUAL (tc, 1840, lcn_doc_iter_doc (itr));
             break;
           case 7:
-            ABTS_INT_EQUAL (tc, 56, lcn_doc_iter_doc (itr));
+            ABTS_INT_EQUAL (tc, 1856, lcn_doc_iter_doc (itr));
             break;
           default:
             ABTS_TRUE (tc, FALSE);

Modified: incubator/lucene4c/trunk/test/index/segment_test.c
URL: 
http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/test/index/segment_test.c?view=diff&r1=156375&r2=156376
==============================================================================
--- incubator/lucene4c/trunk/test/index/segment_test.c (original)
+++ incubator/lucene4c/trunk/test/index/segment_test.c Sun Mar  6 18:56:12 2005
@@ -30,7 +30,7 @@
 
   CHK_ERR (lcn_fs_directory_open (&dir, "test/data/index", p));
 
-  CHK_ERR (lcn_segment_open (&seg, segname, 395, dir, p));
+  CHK_ERR (lcn_segment_open (&seg, segname, 395, 0, dir, p));
 
   CHK_ERR (lcn_segment_term_docs (&doc_freq,
                                   &docs,


Reply via email to