Separate purging of snapshots and aborted merges

Purge aborted merge when starting an Indexer/BGMerger session and
purge snapshots at the end of Commit.

Rework FilePurger to use a single hash for purged and spared files
when purging snapshots. Optimize Folder handling assuming that
Snapshots only contain entries local to the index folder.

Make sure to list the contents of the real directory, not the
virtual compound file directory. CFReaderDirHandle lists both real
and virtual files which might be a bug in itself.

Port BGMerger tests to C.

TODO: Move Perl BGMerge test to t/binding.


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/2051451d
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/2051451d
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/2051451d

Branch: refs/heads/master
Commit: 2051451de456f51e35345f12206ca4cb0040dd7c
Parents: 5f15a92
Author: Nick Wellnhofer <wellnho...@aevum.de>
Authored: Wed Feb 15 05:10:07 2017 +0100
Committer: Nick Wellnhofer <wellnho...@aevum.de>
Committed: Mon Feb 20 16:26:21 2017 +0100

----------------------------------------------------------------------
 core/Lucy/Index/BackgroundMerger.c            |   6 +-
 core/Lucy/Index/FilePurger.c                  | 210 ++++++++++-----------
 core/Lucy/Index/FilePurger.cfh                |  15 +-
 core/Lucy/Index/Indexer.c                     |  17 +-
 go/lucy/index_test.go                         |   5 +-
 perl/t/core/233-background_merger.t           |  23 +++
 test/Lucy/Test.c                              |   2 +
 test/Lucy/Test/Index/NoMergeManager.c         |  42 +++++
 test/Lucy/Test/Index/NoMergeManager.cfh       |  30 +++
 test/Lucy/Test/Index/TestBackgroundMerger.c   | 208 ++++++++++++++++++++
 test/Lucy/Test/Index/TestBackgroundMerger.cfh |  29 +++
 test/Lucy/Test/Index/TestSortWriter.c         |  28 +--
 test/Lucy/Test/Index/TestSortWriter.cfh       |  15 --
 test/Lucy/Test/TestUtils.c                    |  17 +-
 14 files changed, 462 insertions(+), 185 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/core/Lucy/Index/BackgroundMerger.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/BackgroundMerger.c 
b/core/Lucy/Index/BackgroundMerger.c
index c729bda..8f36050 100644
--- a/core/Lucy/Index/BackgroundMerger.c
+++ b/core/Lucy/Index/BackgroundMerger.c
@@ -109,8 +109,8 @@ BGMerger_init(BackgroundMerger *self, Obj *index, 
IndexManager *manager) {
     }
 
     // Create FilePurger. Zap detritus from previous sessions.
-    ivars->file_purger = FilePurger_new(folder, ivars->snapshot, 
ivars->manager);
-    FilePurger_Purge(ivars->file_purger);
+    ivars->file_purger = FilePurger_new(folder, ivars->manager);
+    FilePurger_Purge_Aborted_Merge(ivars->file_purger);
 
     // Open a PolyReader, passing in the IndexManager so we get a read lock on
     // the Snapshot's files -- so that Indexers don't zap our files while
@@ -518,7 +518,7 @@ BGMerger_Commit_IMP(BackgroundMerger *self) {
 
     if (ivars->needs_commit) {
         // Purge obsolete files.
-        FilePurger_Purge(ivars->file_purger);
+        FilePurger_Purge_Snapshots(ivars->file_purger, ivars->snapshot);
     }
 
     // Release the write lock.

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/core/Lucy/Index/FilePurger.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/FilePurger.c b/core/Lucy/Index/FilePurger.c
index 8c71678..55ade17 100644
--- a/core/Lucy/Index/FilePurger.c
+++ b/core/Lucy/Index/FilePurger.c
@@ -15,55 +15,52 @@
  */
 
 #define C_LUCY_FILEPURGER
-#include <ctype.h>
 #include "Lucy/Util/ToolSet.h"
 
 #include "Lucy/Index/FilePurger.h"
 #include "Clownfish/Boolean.h"
+#include "Clownfish/HashIterator.h"
 #include "Lucy/Index/IndexManager.h"
 #include "Lucy/Index/Segment.h"
 #include "Lucy/Index/Snapshot.h"
-#include "Lucy/Plan/Schema.h"
+#include "Lucy/Store/CompoundFileReader.h"
 #include "Lucy/Store/DirHandle.h"
 #include "Lucy/Store/Folder.h"
 #include "Lucy/Store/Lock.h"
 #include "Lucy/Util/Json.h"
 
-// Place unused files into purgables array and obsolete Snapshots into
-// snapshots array.
+// Add unused files to purged hash, used files to spared hash, and
+// obsolete snapshots to snapshots array.
 static void
-S_discover_unused(FilePurger *self, Vector **purgables, Vector **snapshots);
+S_discover_unused(FilePurger *self, Snapshot *current, Hash *spared,
+                  Hash *purged, Vector *snapshots);
 
-// Clean up after a failed background merge session, adding all dead files to
-// the list of candidates to be zapped.
+// Add filepath entries referenced by a snapshot to a Hash. Note that
+// it's assumed that snapshots only list entries local to the index
+// folder.
 static void
-S_zap_dead_merge(FilePurger *self, Hash *candidates);
+S_find_all_referenced(Snapshot *snapshot, Hash *set);
 
-// Return an array of recursively expanded filepath entries.
-static Vector*
-S_find_all_referenced(Folder *folder, Vector *entries);
+// Delete local entry from a folder. Handles CompoundFileReaders efficiently
+// but doesn't support subdirectories in 'entry'.
+static bool
+S_delete_entry(Folder *folder, String *entry);
 
 FilePurger*
-FilePurger_new(Folder *folder, Snapshot *snapshot, IndexManager *manager) {
+FilePurger_new(Folder *folder, IndexManager *manager) {
     FilePurger *self = (FilePurger*)Class_Make_Obj(FILEPURGER);
-    return FilePurger_init(self, folder, snapshot, manager);
+    return FilePurger_init(self, folder, manager);
 }
 
 FilePurger*
-FilePurger_init(FilePurger *self, Folder *folder, Snapshot *snapshot,
-                IndexManager *manager) {
+FilePurger_init(FilePurger *self, Folder *folder, IndexManager *manager) {
     FilePurgerIVARS *const ivars = FilePurger_IVARS(self);
     ivars->folder       = (Folder*)INCREF(folder);
-    ivars->snapshot     = (Snapshot*)INCREF(snapshot);
     ivars->manager      = manager
                          ? (IndexManager*)INCREF(manager)
                          : IxManager_new(NULL, NULL);
     IxManager_Set_Folder(ivars->manager, folder);
 
-    // Don't allow the locks directory to be zapped.
-    ivars->disallowed = Hash_new(0);
-    Hash_Store_Utf8(ivars->disallowed, "locks", 5, (Obj*)CFISH_TRUE);
-
     return self;
 }
 
@@ -71,35 +68,36 @@ void
 FilePurger_Destroy_IMP(FilePurger *self) {
     FilePurgerIVARS *const ivars = FilePurger_IVARS(self);
     DECREF(ivars->folder);
-    DECREF(ivars->snapshot);
     DECREF(ivars->manager);
-    DECREF(ivars->disallowed);
     SUPER_DESTROY(self, FILEPURGER);
 }
 
 void
-FilePurger_Purge_IMP(FilePurger *self) {
+FilePurger_Purge_Snapshots_IMP(FilePurger *self, Snapshot *current) {
     FilePurgerIVARS *const ivars = FilePurger_IVARS(self);
     Lock *deletion_lock = IxManager_Make_Deletion_Lock(ivars->manager);
 
     // Obtain deletion lock, purge files, release deletion lock.
     Lock_Clear_Stale(deletion_lock);
     if (Lock_Obtain(deletion_lock)) {
-        Folder *folder   = ivars->folder;
-        Hash   *failures = Hash_new(0);
-        Vector *purgables;
-        Vector *snapshots;
+        Folder *folder    = ivars->folder;
+        Hash   *failures  = Hash_new(16);
+        Hash   *spared    = Hash_new(32);
+        Hash   *purged    = Hash_new(32);
+        Vector *snapshots = Vec_new(16);
+
+        // Don't allow the locks directory to be zapped.
+        Hash_Store_Utf8(spared, "locks", 5, (Obj*)CFISH_TRUE);
 
-        S_discover_unused(self, &purgables, &snapshots);
+        S_discover_unused(self, current, spared, purged, snapshots);
 
         // Attempt to delete entries -- if failure, no big deal, just try
-        // again later.  Proceed in reverse lexical order so that directories
-        // get deleted after they've been emptied.
-        Vec_Sort(purgables);
-        for (size_t i = Vec_Get_Size(purgables); i--;) {
-            String *entry = (String*)Vec_Fetch(purgables, i);
-            if (Hash_Fetch(ivars->disallowed, entry)) { continue; }
-            if (!Folder_Delete(folder, entry)) {
+        // again later.
+        HashIterator *iter = HashIter_new(purged);
+        while (HashIter_Next(iter)) {
+            String *entry = HashIter_Get_Key(iter);
+            if (Hash_Fetch(spared, entry)) { continue; }
+            if (!S_delete_entry(folder, entry)) {
                 if (Folder_Exists(folder, entry)) {
                     Hash_Store(failures, entry, (Obj*)CFISH_TRUE);
                 }
@@ -124,12 +122,14 @@ FilePurger_Purge_IMP(FilePurger *self) {
             }
             if (!snapshot_has_failures) {
                 String *snapfile = Snapshot_Get_Path(snapshot);
-                Folder_Delete(folder, snapfile);
+                Folder_Local_Delete(folder, snapfile);
             }
         }
 
+        DECREF(iter);
         DECREF(failures);
-        DECREF(purgables);
+        DECREF(purged);
+        DECREF(spared);
         DECREF(snapshots);
         Lock_Release(deletion_lock);
     }
@@ -141,8 +141,8 @@ FilePurger_Purge_IMP(FilePurger *self) {
     DECREF(deletion_lock);
 }
 
-static void
-S_zap_dead_merge(FilePurger *self, Hash *candidates) {
+void
+FilePurger_Purge_Aborted_Merge_IMP(FilePurger *self) {
     FilePurgerIVARS *const ivars = FilePurger_IVARS(self);
     IndexManager *manager    = ivars->manager;
     Lock         *merge_lock = IxManager_Make_Merge_Lock(manager);
@@ -155,27 +155,26 @@ S_zap_dead_merge(FilePurger *self, Hash *candidates) {
                        : NULL;
 
         if (cutoff) {
-            String *cutoff_seg = Seg_num_to_name(Json_obj_to_i64(cutoff));
-            if (Folder_Exists(ivars->folder, cutoff_seg)) {
-                String *merge_json = SSTR_WRAP_C("merge.json");
-                DirHandle *dh = Folder_Open_Dir(ivars->folder, cutoff_seg);
+            Folder *folder = ivars->folder;
 
-                if (!dh) {
-                    THROW(ERR, "Can't open segment dir '%o'", cutoff_seg);
+            String *cutoff_seg = Seg_num_to_name(Json_obj_to_i64(cutoff));
+            if (Folder_Local_Exists(folder, cutoff_seg)) {
+                if (!S_delete_entry(folder, cutoff_seg)) {
+                    if (Folder_Local_Exists(folder, cutoff_seg)) {
+                        WARN("Couldn't delete '%o' from aborted merge",
+                             cutoff_seg);
+                    }
                 }
+            }
 
-                Hash_Store(candidates, cutoff_seg, (Obj*)CFISH_TRUE);
-                Hash_Store(candidates, merge_json, (Obj*)CFISH_TRUE);
-                while (DH_Next(dh)) {
-                    // TODO: recursively delete subdirs within seg dir.
-                    String *entry = DH_Get_Entry(dh);
-                    String *filepath = Str_newf("%o/%o", cutoff_seg, entry);
-                    Hash_Store(candidates, filepath, (Obj*)CFISH_TRUE);
-                    DECREF(filepath);
-                    DECREF(entry);
+            String *merge_json = SSTR_WRAP_C("merge.json");
+            if (!Folder_Local_Delete(folder, merge_json)) {
+                if (Folder_Local_Exists(folder, merge_json)) {
+                    WARN("Couldn't delete '%o' from aborted merge",
+                         merge_json);
                 }
-                DECREF(dh);
             }
+
             DECREF(cutoff_seg);
         }
 
@@ -187,28 +186,20 @@ S_zap_dead_merge(FilePurger *self, Hash *candidates) {
 }
 
 static void
-S_discover_unused(FilePurger *self, Vector **purgables_ptr,
-                  Vector **snapshots_ptr) {
+S_discover_unused(FilePurger *self, Snapshot *current, Hash *spared,
+                  Hash *purged, Vector *snapshots) {
     FilePurgerIVARS *const ivars = FilePurger_IVARS(self);
     Folder      *folder       = ivars->folder;
     DirHandle   *dh           = Folder_Open_Dir(folder, NULL);
     if (!dh) { RETHROW(INCREF(Err_get_error())); }
-    Vector      *spared       = Vec_new(1);
-    Vector      *snapshots    = Vec_new(1);
-    String      *snapfile     = NULL;
-
-    // Start off with the list of files in the current snapshot.
-    if (ivars->snapshot) {
-        Vector *entries    = Snapshot_List(ivars->snapshot);
-        Vector *referenced = S_find_all_referenced(folder, entries);
-        Vec_Push_All(spared, referenced);
-        DECREF(entries);
-        DECREF(referenced);
-        snapfile = Snapshot_Get_Path(ivars->snapshot);
-        if (snapfile) { Vec_Push(spared, INCREF(snapfile)); }
+    String      *snapfile     = Snapshot_Get_Path(current);
+
+    snapfile = Snapshot_Get_Path(current);
+    if (snapfile) {
+        Hash_Store(spared, snapfile, (Obj*)CFISH_TRUE);
     }
+    S_find_all_referenced(current, spared);
 
-    Hash *candidates = Hash_new(64);
     while (DH_Next(dh)) {
         String *entry = DH_Get_Entry(dh);
         if (Str_Starts_With_Utf8(entry, "snapshot_", 9)
@@ -219,8 +210,6 @@ S_discover_unused(FilePurger *self, Vector **purgables_ptr,
                 = Snapshot_Read_File(Snapshot_new(), folder, entry);
             Lock *lock
                 = IxManager_Make_Snapshot_Read_Lock(ivars->manager, entry);
-            Vector *snap_list  = Snapshot_List(snapshot);
-            Vector *referenced = S_find_all_referenced(folder, snap_list);
 
             // DON'T obtain the lock -- only see whether another
             // entity holds a lock on the snapshot file.
@@ -230,67 +219,60 @@ S_discover_unused(FilePurger *self, Vector 
**purgables_ptr,
             if (lock && Lock_Is_Locked(lock)) {
                 // The snapshot file is locked, which means someone's using
                 // that version of the index -- protect all of its entries.
-                size_t new_size = Vec_Get_Size(spared)
-                                  + Vec_Get_Size(referenced)
-                                  + 1;
-                Vec_Grow(spared, new_size);
-                Vec_Push(spared, (Obj*)Str_Clone(entry));
-                Vec_Push_All(spared, referenced);
+                Hash_Store(spared, entry, (Obj*)CFISH_TRUE);
+                S_find_all_referenced(snapshot, spared);
             }
             else {
                 // No one's using this snapshot, so all of its entries are
                 // candidates for deletion.
-                for (size_t i = 0, max = Vec_Get_Size(referenced); i < max; 
i++) {
-                    String *file = (String*)Vec_Fetch(referenced, i);
-                    Hash_Store(candidates, file, (Obj*)CFISH_TRUE);
-                }
                 Vec_Push(snapshots, INCREF(snapshot));
+                S_find_all_referenced(snapshot, purged);
             }
 
-            DECREF(referenced);
-            DECREF(snap_list);
             DECREF(snapshot);
             DECREF(lock);
         }
         DECREF(entry);
     }
+
     DECREF(dh);
+}
 
-    // Clean up after a dead segment consolidation.
-    S_zap_dead_merge(self, candidates);
+static void
+S_find_all_referenced(Snapshot *snapshot, Hash *set) {
+    Vector *snap_list = Snapshot_List(snapshot);
 
-    // Eliminate any current files from the list of files to be purged.
-    for (size_t i = 0, max = Vec_Get_Size(spared); i < max; i++) {
-        String *filename = (String*)Vec_Fetch(spared, i);
-        DECREF(Hash_Delete(candidates, filename));
+    for (size_t i = 0, max = Vec_Get_Size(snap_list); i < max; i++) {
+        String *entry = (String*)Vec_Fetch(snap_list, i);
+        Hash_Store(set, entry, (Obj*)CFISH_TRUE);
     }
 
-    // Pass back purgables and Snapshots.
-    *purgables_ptr = Hash_Keys(candidates);
-    *snapshots_ptr = snapshots;
-
-    DECREF(candidates);
-    DECREF(spared);
+    DECREF(snap_list);
 }
 
-static Vector*
-S_find_all_referenced(Folder *folder, Vector *entries) {
-    Hash *uniqued = Hash_new(Vec_Get_Size(entries));
-    for (size_t i = 0, max = Vec_Get_Size(entries); i < max; i++) {
-        String *entry = (String*)Vec_Fetch(entries, i);
-        Hash_Store(uniqued, entry, (Obj*)CFISH_TRUE);
-        if (Folder_Is_Directory(folder, entry)) {
-            Vector *contents = Folder_List_R(folder, entry);
-            for (size_t j = Vec_Get_Size(contents); j--;) {
-                String *sub_entry = (String*)Vec_Fetch(contents, j);
-                Hash_Store(uniqued, sub_entry, (Obj*)CFISH_TRUE);
-            }
-            DECREF(contents);
+static bool
+S_delete_entry(Folder *folder, String *folder_entry) {
+    if (Folder_Local_Is_Directory(folder, folder_entry)) {
+        // CFReader has the nasty habit of listing both real and virtual
+        // files. Get the real folder.
+        Folder *inner = Folder_Local_Find_Folder(folder, folder_entry);
+        if (inner == NULL) { return false; }
+        if (Folder_is_a(inner, COMPOUNDFILEREADER)) {
+            inner = CFReader_Get_Real_Folder((CompoundFileReader*)inner);
+        }
+
+        Vector *entries = Folder_List(inner, NULL);
+        if (entries == NULL) { return false; }
+
+        for (size_t i = 0, max = Vec_Get_Size(entries); i < max; i++) {
+            // TODO: recursively delete subdirs within seg dir.
+            String *entry = (String*)Vec_Fetch(entries, i);
+            Folder_Local_Delete(inner, entry);
         }
+
+        DECREF(entries);
     }
-    Vector *referenced = Hash_Keys(uniqued);
-    DECREF(uniqued);
-    return referenced;
-}
 
+    return Folder_Local_Delete(folder, folder_entry);
+}
 

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/core/Lucy/Index/FilePurger.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/FilePurger.cfh b/core/Lucy/Index/FilePurger.cfh
index c03ca18..4f3923d 100644
--- a/core/Lucy/Index/FilePurger.cfh
+++ b/core/Lucy/Index/FilePurger.cfh
@@ -22,22 +22,23 @@ parcel Lucy;
 class Lucy::Index::FilePurger inherits Clownfish::Obj {
 
     Folder       *folder;
-    Snapshot     *snapshot;
     IndexManager *manager;
-    Hash         *disallowed;
 
     inert incremented FilePurger*
-    new(Folder *folder, Snapshot *snapshot = NULL,
-        IndexManager *manager = NULL);
+    new(Folder *folder, IndexManager *manager = NULL);
 
     inert FilePurger*
-    init(FilePurger *self, Folder *folder, Snapshot *snapshot = NULL,
-         IndexManager *manager = NULL);
+    init(FilePurger *self, Folder *folder, IndexManager *manager = NULL);
 
     /** Purge obsolete files from the index.
      */
     void
-    Purge(FilePurger *self);
+    Purge_Snapshots(FilePurger *self, Snapshot *current);
+
+    /** Purge files left behind by an aborted background merge.
+     */
+    void
+    Purge_Aborted_Merge(FilePurger *self);
 
     public void
     Destroy(FilePurger *self);

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/core/Lucy/Index/Indexer.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/Indexer.c b/core/Lucy/Index/Indexer.c
index 16b915b..031237f 100644
--- a/core/Lucy/Index/Indexer.c
+++ b/core/Lucy/Index/Indexer.c
@@ -161,13 +161,10 @@ Indexer_init(Indexer *self, Schema *schema, Obj *index,
         }
     }
 
-    // Zap detritus from previous sessions.
-    // Note: we have to feed FilePurger with the most recent snapshot file
-    // now, but with the Indexer's snapshot later.
-    FilePurger *file_purger
-        = FilePurger_new(folder, latest_snapshot, ivars->manager);
-    FilePurger_Purge(file_purger);
-    DECREF(file_purger);
+    // Create new FilePurger and zap detritus from an aborted background
+    // merge.
+    ivars->file_purger = FilePurger_new(folder, ivars->manager);
+    FilePurger_Purge_Aborted_Merge(ivars->file_purger);
 
     // Create a new segment.
     int64_t new_seg_num
@@ -204,9 +201,7 @@ Indexer_init(Indexer *self, Schema *schema, Obj *index,
 
     DECREF(merge_lock);
 
-    // Create new SegWriter and FilePurger.
-    ivars->file_purger
-        = FilePurger_new(folder, ivars->snapshot, ivars->manager);
+    // Create new SegWriter.
     ivars->seg_writer = SegWriter_new(ivars->schema, ivars->snapshot,
                                      ivars->segment, ivars->polyreader);
     SegWriter_Prep_Seg_Dir(ivars->seg_writer);
@@ -571,7 +566,7 @@ Indexer_Commit_IMP(Indexer *self) {
         if (!success) { RETHROW(INCREF(Err_get_error())); }
 
         // Purge obsolete files.
-        FilePurger_Purge(ivars->file_purger);
+        FilePurger_Purge_Snapshots(ivars->file_purger, ivars->snapshot);
     }
 
     // Release locks, invalidating the Indexer.

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/go/lucy/index_test.go
----------------------------------------------------------------------
diff --git a/go/lucy/index_test.go b/go/lucy/index_test.go
index b8823e8..30f7c4a 100644
--- a/go/lucy/index_test.go
+++ b/go/lucy/index_test.go
@@ -541,8 +541,9 @@ func TestFilePurgerMisc(t *testing.T) {
 
        snapshot := NewSnapshot()
        snapshot.WriteFile(folder, "")
-       purger := NewFilePurger(folder, snapshot, nil)
-       purger.purge()
+       purger := NewFilePurger(folder, nil)
+       purger.purgeSnapshots(snapshot)
+       purger.purgeAbortedMerge()
        if folder.exists("foo") {
                t.Errorf("Failed to purge file")
        }

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/perl/t/core/233-background_merger.t
----------------------------------------------------------------------
diff --git a/perl/t/core/233-background_merger.t 
b/perl/t/core/233-background_merger.t
new file mode 100644
index 0000000..b5cd424
--- /dev/null
+++ b/perl/t/core/233-background_merger.t
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+use Lucy::Test;
+my $success = Lucy::Test::run_tests("Lucy::Test::Index::TestBackgroundMerger");
+
+exit($success ? 0 : 1);
+

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/test/Lucy/Test.c
----------------------------------------------------------------------
diff --git a/test/Lucy/Test.c b/test/Lucy/Test.c
index 97d1854..22223e1 100644
--- a/test/Lucy/Test.c
+++ b/test/Lucy/Test.c
@@ -32,6 +32,7 @@
 #include "Lucy/Test/Analysis/TestStandardTokenizer.h"
 #include "Lucy/Test/Highlight/TestHeatMap.h"
 #include "Lucy/Test/Highlight/TestHighlighter.h"
+#include "Lucy/Test/Index/TestBackgroundMerger.h"
 #include "Lucy/Test/Index/TestDocWriter.h"
 #include "Lucy/Test/Index/TestHighlightWriter.h"
 #include "Lucy/Test/Index/TestIndexManager.h"
@@ -138,6 +139,7 @@ Test_create_test_suite() {
     TestSuite_Add_Batch(suite, (TestBatch*)TestBlobType_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestNumericType_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestFType_new());
+    TestSuite_Add_Batch(suite, (TestBatch*)TestBGMerger_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestSeg_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestHighlighter_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestSimple_new());

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/test/Lucy/Test/Index/NoMergeManager.c
----------------------------------------------------------------------
diff --git a/test/Lucy/Test/Index/NoMergeManager.c 
b/test/Lucy/Test/Index/NoMergeManager.c
new file mode 100644
index 0000000..7c0804b
--- /dev/null
+++ b/test/Lucy/Test/Index/NoMergeManager.c
@@ -0,0 +1,42 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define TESTLUCY_USE_SHORT_NAMES
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Test/Index/NoMergeManager.h"
+#include "Lucy/Index/DeletionsWriter.h"
+#include "Lucy/Index/IndexManager.h"
+#include "Lucy/Index/PolyReader.h"
+
+NoMergeManager*
+NoMergeManager_new() {
+    NoMergeManager *self = (NoMergeManager*)Class_Make_Obj(NOMERGEMANAGER);
+    return (NoMergeManager*)IxManager_init((IndexManager*)self, NULL, NULL);
+}
+
+Vector*
+NoMergeManager_Recycle_IMP(NoMergeManager *self, PolyReader *reader,
+                           DeletionsWriter *del_writer, int64_t cutoff,
+                           bool optimize) {
+    UNUSED_VAR(self);
+    UNUSED_VAR(reader);
+    UNUSED_VAR(del_writer);
+    UNUSED_VAR(cutoff);
+    UNUSED_VAR(optimize);
+    return Vec_new(0);
+}
+

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/test/Lucy/Test/Index/NoMergeManager.cfh
----------------------------------------------------------------------
diff --git a/test/Lucy/Test/Index/NoMergeManager.cfh 
b/test/Lucy/Test/Index/NoMergeManager.cfh
new file mode 100644
index 0000000..0b667ab
--- /dev/null
+++ b/test/Lucy/Test/Index/NoMergeManager.cfh
@@ -0,0 +1,30 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel TestLucy;
+
+class Lucy::Test::Index::NoMergeManager inherits Lucy::Index::IndexManager {
+
+    inert incremented NoMergeManager*
+    new();
+
+    public incremented Vector*
+    Recycle(NoMergeManager *self, PolyReader *reader,
+            DeletionsWriter *del_writer, int64_t cutoff,
+            bool optimize = false);
+}
+
+

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/test/Lucy/Test/Index/TestBackgroundMerger.c
----------------------------------------------------------------------
diff --git a/test/Lucy/Test/Index/TestBackgroundMerger.c 
b/test/Lucy/Test/Index/TestBackgroundMerger.c
new file mode 100644
index 0000000..084c49d
--- /dev/null
+++ b/test/Lucy/Test/Index/TestBackgroundMerger.c
@@ -0,0 +1,208 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define TESTLUCY_USE_SHORT_NAMES
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Test/Index/TestBackgroundMerger.h"
+#include "Clownfish/TestHarness/TestBatchRunner.h"
+#include "Lucy/Document/Doc.h"
+#include "Lucy/Index/BackgroundMerger.h"
+#include "Lucy/Index/Indexer.h"
+#include "Lucy/Index/Segment.h"
+#include "Lucy/Search/Hits.h"
+#include "Lucy/Search/IndexSearcher.h"
+#include "Lucy/Store/FSFolder.h"
+#include "Lucy/Store/RAMFolder.h"
+#include "Lucy/Test/Index/NoMergeManager.h"
+#include "Lucy/Test/TestSchema.h"
+#include "Lucy/Test/TestUtils.h"
+#include "Lucy/Util/Json.h"
+
+TestBackgroundMerger*
+TestBGMerger_new() {
+    return (TestBackgroundMerger*)Class_Make_Obj(TESTBACKGROUNDMERGER);
+}
+
+static void
+S_add_doc(Schema *schema, Folder *folder, int code_point,
+          IndexManager *manager) {
+    Indexer      *indexer = Indexer_new(schema, (Obj*)folder, manager, 0);
+    Doc          *doc     = Doc_new(NULL, 0);
+    String       *field   = SSTR_WRAP_C("content");
+    String       *content = Str_new_from_char(code_point);
+
+    Doc_Store(doc, field, (Obj*)content);
+    Indexer_Add_Doc(indexer, doc, 1.0f);
+    Indexer_Commit(indexer);
+
+    DECREF(content);
+    DECREF(doc);
+    DECREF(indexer);
+}
+
+static int
+S_count_segs(Folder *folder) {
+    Vector *entries = Folder_List_R(folder, NULL);
+    int count = 0;
+
+    for (size_t i = 0, max = Vec_Get_Size(entries); i < max; i++) {
+        String *entry = (String*)Vec_Fetch(entries, i);
+        if (Str_Ends_With_Utf8(entry, "segmeta.json", 12)) { count++; }
+    }
+
+    DECREF(entries);
+    return count;
+}
+
+static void
+S_test_terms(TestBatchRunner *runner, Folder *folder, const char *msg) {
+    IndexSearcher *searcher = IxSearcher_new((Obj*)folder);
+
+    for (int i = 'a'; i <= 'e'; i++) {
+        const char *status;
+        uint32_t expected;
+
+        if (i == 'b') {
+            expected = 0;
+            status   = "deleted";
+        }
+        else {
+            expected = 1;
+            status   = "present";
+        }
+
+        String *query = Str_new_from_char(i);
+        Hits *hits = IxSearcher_Hits(searcher, (Obj*)query, 0, 10, NULL);
+        TEST_UINT_EQ(runner, Hits_Total_Hits(hits), expected,
+                     "term %c still %s%s", i, status, msg);
+        DECREF(hits);
+        DECREF(query);
+    }
+
+    DECREF(searcher);
+}
+
+static void
+test_bg_merger(TestBatchRunner *runner) {
+    Schema  *schema        = (Schema*)TestSchema_new(false);
+#if 1
+    Folder  *folder        = (Folder*)RAMFolder_new(NULL);
+#else
+    Folder  *folder        = (Folder*)FSFolder_new(SSTR_WRAP_C("_test_index"));
+#endif
+    IndexManager *no_merge = (IndexManager*)NoMergeManager_new();
+
+    Folder_Initialize(folder);
+
+    for (int i = 'a'; i <= 'c'; i++) {
+        S_add_doc(schema, folder, i, no_merge);
+    }
+
+    BackgroundMerger *bg_merger = BGMerger_new((Obj*)folder, NULL);
+    S_add_doc(schema, folder, 'd', NULL);
+    TEST_INT_EQ(runner, S_count_segs(folder), 4,
+                "BackgroundMerger prevents Indexer from merging claimed"
+                " segments");
+
+    {
+        Indexer      *indexer = Indexer_new(NULL, (Obj*)folder, NULL, 0);
+        Doc          *doc     = Doc_new(NULL, 0);
+        String       *field   = SSTR_WRAP_C("content");
+        String       *content = Str_new_from_char('e');
+
+        Doc_Store(doc, field, (Obj*)content);
+        Indexer_Add_Doc(indexer, doc, 1.0f);
+        Indexer_Delete_By_Term(indexer, field, (Obj*)SSTR_WRAP_C("b"));
+        Indexer_Commit(indexer);
+        TEST_INT_EQ(runner, S_count_segs(folder), 4,
+                    "Indexer may still merge unclaimed segments");
+
+        DECREF(content);
+        DECREF(doc);
+        DECREF(indexer);
+    }
+
+    BGMerger_Commit(bg_merger);
+    TEST_INT_EQ(runner, S_count_segs(folder), 3, "Background merge completes");
+    String *del_file = SSTR_WRAP_C("seg_7/deletions-seg_4.bv");
+    TEST_TRUE(runner, Folder_Exists(folder, del_file),
+              "deletions carried forward");
+
+    S_test_terms(runner, folder, "");
+
+    String *merge_json = SSTR_WRAP_C("merge.json");
+    String *cutoff_seg = NULL;
+
+    {
+        // Simulate failed background merge.
+        DECREF(bg_merger);
+        IndexManager *manager = IxManager_new(NULL, NULL);
+        bg_merger = BGMerger_new((Obj*)folder, manager);
+        BGMerger_Prepare_Commit(bg_merger);
+        DECREF(bg_merger);
+
+        TEST_TRUE(runner, Folder_Local_Exists(folder, merge_json),
+                  "merge.json exists");
+        Hash *merge_data = IxManager_Read_Merge_Data(manager);
+        Obj  *cutoff     = Hash_Fetch_Utf8(merge_data, "cutoff", 6);
+
+        cutoff_seg = Seg_num_to_name(Json_obj_to_i64(cutoff));
+        TEST_TRUE(runner, Folder_Local_Exists(folder, cutoff_seg),
+                  "cutoff segment exists");
+
+        Indexer *indexer = Indexer_new(NULL, (Obj*)folder, no_merge, 0);
+        Indexer_Commit(indexer);
+
+        DECREF(indexer);
+        DECREF(cutoff_seg);
+        DECREF(merge_data);
+        DECREF(manager);
+    }
+
+    TEST_FALSE(runner, Folder_Local_Exists(folder, merge_json),
+               "merge.json deleted after failed bg merge");
+    // Doesn't work because an indexing session always creates an empty
+    // segment directory even if no documents were added and nothing
+    // was merged.
+#if 0
+    TEST_FALSE(runner, Folder_Local_Exists(folder, cutoff_seg),
+               "cutoff segment deleted after failed bg merge");
+#endif
+
+    {
+        Indexer *indexer = Indexer_new(NULL, (Obj*)folder, NULL, 0);
+        Indexer_Optimize(indexer);
+        Indexer_Commit(indexer);
+        DECREF(indexer);
+    }
+
+    TEST_INT_EQ(runner, S_count_segs(folder), 1,
+                "Only a single segment remains after full optimize");
+    S_test_terms(runner, folder, " after full optimize");
+
+    DECREF(no_merge);
+    DECREF(folder);
+    DECREF(schema);
+}
+
+void
+TestBGMerger_Run_IMP(TestBackgroundMerger *self, TestBatchRunner *runner) {
+    TestBatchRunner_Plan(runner, (TestBatch*)self, 18);
+    test_bg_merger(runner);
+}
+
+

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/test/Lucy/Test/Index/TestBackgroundMerger.cfh
----------------------------------------------------------------------
diff --git a/test/Lucy/Test/Index/TestBackgroundMerger.cfh 
b/test/Lucy/Test/Index/TestBackgroundMerger.cfh
new file mode 100644
index 0000000..273e68d
--- /dev/null
+++ b/test/Lucy/Test/Index/TestBackgroundMerger.cfh
@@ -0,0 +1,29 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel TestLucy;
+
+class Lucy::Test::Index::TestBackgroundMerger nickname TestBGMerger
+    inherits Clownfish::TestHarness::TestBatch {
+
+    inert incremented TestBackgroundMerger*
+    new();
+
+    void
+    Run(TestBackgroundMerger *self, TestBatchRunner *runner);
+}
+
+

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/test/Lucy/Test/Index/TestSortWriter.c
----------------------------------------------------------------------
diff --git a/test/Lucy/Test/Index/TestSortWriter.c 
b/test/Lucy/Test/Index/TestSortWriter.c
index 539cc9e..0d876a0 100644
--- a/test/Lucy/Test/Index/TestSortWriter.c
+++ b/test/Lucy/Test/Index/TestSortWriter.c
@@ -38,6 +38,7 @@
 #include "Lucy/Plan/Schema.h"
 #include "Lucy/Plan/StringType.h"
 #include "Lucy/Store/RAMFolder.h"
+#include "Lucy/Test/Index/NoMergeManager.h"
 
 static String *name_str;
 static String *speed_str;
@@ -226,7 +227,7 @@ test_sort_writer(TestBatchRunner *runner) {
 
     {
         // Add a second segment.
-        NonMergingIndexManager *manager = NMIxManager_new();
+        NoMergeManager *manager = NoMergeManager_new();
         Indexer *indexer
             = Indexer_new(schema, (Obj*)folder, (IndexManager*)manager, 0);
         // no "wheels" field -- test NULL/undef
@@ -295,29 +296,4 @@ TestSortWriter_Run_IMP(TestSortWriter *self, 
TestBatchRunner *runner) {
     S_destroy_strings();
 }
 
-NonMergingIndexManager*
-NMIxManager_new() {
-    NonMergingIndexManager *self
-        = (NonMergingIndexManager*)Class_Make_Obj(NONMERGINGINDEXMANAGER);
-    return NMIxManager_init(self);
-}
-
-NonMergingIndexManager*
-NMIxManager_init(NonMergingIndexManager *self) {
-    IxManager_init((IndexManager*)self, NULL, NULL);
-    return self;
-}
-
-Vector*
-NMIxManager_Recycle_IMP(NonMergingIndexManager *self, PolyReader *reader,
-                        lucy_DeletionsWriter *del_writer, int64_t cutoff,
-                        bool optimize) {
-    UNUSED_VAR(self);
-    UNUSED_VAR(reader);
-    UNUSED_VAR(del_writer);
-    UNUSED_VAR(cutoff);
-    UNUSED_VAR(optimize);
-    return Vec_new(0);
-}
-
 

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/test/Lucy/Test/Index/TestSortWriter.cfh
----------------------------------------------------------------------
diff --git a/test/Lucy/Test/Index/TestSortWriter.cfh 
b/test/Lucy/Test/Index/TestSortWriter.cfh
index 5aa63ab..3855cc2 100644
--- a/test/Lucy/Test/Index/TestSortWriter.cfh
+++ b/test/Lucy/Test/Index/TestSortWriter.cfh
@@ -26,18 +26,3 @@ class Lucy::Test::Index::TestSortWriter
     Run(TestSortWriter *self, TestBatchRunner *runner);
 }
 
-class Lucy::Test::Index::NonMergingIndexManager nickname NMIxManager
-    inherits Lucy::Index::IndexManager {
-
-    public inert incremented NonMergingIndexManager*
-    new();
-
-    public inert NonMergingIndexManager*
-    init(NonMergingIndexManager *self);
-
-    public incremented Vector*
-    Recycle(NonMergingIndexManager *self, PolyReader *reader,
-            DeletionsWriter *del_writer, int64_t cutoff,
-            bool optimize = false);
-}
-

http://git-wip-us.apache.org/repos/asf/lucy/blob/2051451d/test/Lucy/Test/TestUtils.c
----------------------------------------------------------------------
diff --git a/test/Lucy/Test/TestUtils.c b/test/Lucy/Test/TestUtils.c
index 0f579b0..614e84c 100644
--- a/test/Lucy/Test/TestUtils.c
+++ b/test/Lucy/Test/TestUtils.c
@@ -83,15 +83,18 @@ TestUtils_create_index(Vector *doc_set) {
 Folder*
 TestUtils_create_index_c(const char *first, ...) {
     Vector *doc_set = Vec_new(1);
-    Vec_Push(doc_set, (Obj*)Str_new_from_utf8(first, strlen(first)));
 
-    va_list ap;
-    va_start(ap, first);
-    const char *next;
-    while ((next = va_arg(ap, const char*))) {
-        Vec_Push(doc_set, (Obj*)Str_new_from_utf8(next, strlen(next)));
+    if (first != NULL) {
+        Vec_Push(doc_set, (Obj*)Str_new_from_utf8(first, strlen(first)));
+
+        va_list ap;
+        va_start(ap, first);
+        const char *next;
+        while ((next = va_arg(ap, const char*))) {
+            Vec_Push(doc_set, (Obj*)Str_new_from_utf8(next, strlen(next)));
+        }
+        va_end(ap);
     }
-    va_end(ap);
 
     Folder *folder = TestUtils_create_index(doc_set);
 

Reply via email to