This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new d05cb815 [fix](index writer)fix max buffered docs not working (#167)
d05cb815 is described below
commit d05cb8154ef4368bd40c43c94e8e3c679e13c490
Author: qiye <[email protected]>
AuthorDate: Wed Dec 27 18:54:39 2023 +0800
[fix](index writer)fix max buffered docs not working (#167)
---
src/core/CLucene/index/SDocumentWriter.cpp | 7 +++++++
src/core/CLucene/index/SDocumentWriter.h | 11 +++++++++--
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/src/core/CLucene/index/SDocumentWriter.cpp
b/src/core/CLucene/index/SDocumentWriter.cpp
index 3c33a30b..d11e39bf 100644
--- a/src/core/CLucene/index/SDocumentWriter.cpp
+++ b/src/core/CLucene/index/SDocumentWriter.cpp
@@ -142,6 +142,13 @@ typename SDocumentsWriter<T>::ThreadState
*SDocumentsWriter<T>::getThreadState(D
// Only increment nextDocID & numDocsInRAM on successful init
nextDocID++;
numDocsInRAM++;
+ // We must at this point commit to flushing to ensure we
+ // always get N docs when we flush by doc count, even if
+ // > 1 thread is adding documents:
+ if (!flushPending && maxBufferedDocs != IndexWriter::DISABLE_AUTO_FLUSH &&
numDocsInRAM >= maxBufferedDocs) {
+ flushPending = true;
+ threadState->doFlushAfter = true;
+ }
return threadState;
}
diff --git a/src/core/CLucene/index/SDocumentWriter.h
b/src/core/CLucene/index/SDocumentWriter.h
index 342089a7..b1217ba1 100644
--- a/src/core/CLucene/index/SDocumentWriter.h
+++ b/src/core/CLucene/index/SDocumentWriter.h
@@ -6,6 +6,7 @@
#define _lucene_index_SDocumentsWriter_
#include "CLucene/_ApiHeader.h"
+#include "CLucene/index/IndexWriter.h"
#include "CLucene/store/Directory.h"
#include "CLucene/store/_RAMDirectory.h"
#include "CLucene/util/Array.h"
@@ -53,6 +54,9 @@ private:
std::vector<uint32_t> freqBuffer;
std::ostream* infoStream{};
int64_t ramBufferSize;
+ // Flush @ this number of docs. If rarmBufferSize is
+ // non-zero we will flush by RAM usage instead.
+ int32_t maxBufferedDocs;
bool hasProx_ = false;
IndexVersion indexVersion_ = IndexVersion::kV1;
@@ -679,6 +683,7 @@ public:
postingsFreeCountDW = postingsAllocCountDW = 0;
docStoreOffset = nextDocID = numDocsInRAM = numDocsInStore =
nextWriteDocID = 0;
ramBufferSize = (int64_t) (256 * 1024 * 1024);
+ maxBufferedDocs = IndexWriter::DEFAULT_MAX_BUFFERED_DOCS;
}
virtual ~SDocumentsWriter();
int32_t flush(bool closeDocStore) override ;
@@ -730,7 +735,7 @@ public:
return segmentFileName(string(extension));
}
int32_t getMaxBufferedDocs() override {
- return 0;
+ return maxBufferedDocs;
}
int32_t getNumDocsInRAM() override {
@@ -748,7 +753,9 @@ public:
return 0;
}
void abort(AbortException *ae) override {}
- void setMaxBufferedDocs(int32_t count) override {}
+ void setMaxBufferedDocs(int32_t count) override {
+ maxBufferedDocs = count;
+ }
void setInfoStream(std::ostream *is) override {
this->infoStream = is;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]