This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new fd453665 [feature](analysis) add tokenizer CharFilter preprocessing
interface (#118)
fd453665 is described below
commit fd453665055c65b94892d13a93ac47180afd72bb
Author: zzzxl <[email protected]>
AuthorDate: Fri Sep 8 16:14:54 2023 +0800
[feature](analysis) add tokenizer CharFilter preprocessing interface (#118)
---
src/core/CLucene/analysis/CharFilter.h | 37 +++++++++++++++++++++++++++
src/core/CLucene/index/MultiSegmentReader.cpp | 10 +++++---
src/core/CLucene/util/CLStreams.h | 8 ++++--
3 files changed, 49 insertions(+), 6 deletions(-)
diff --git a/src/core/CLucene/analysis/CharFilter.h
b/src/core/CLucene/analysis/CharFilter.h
new file mode 100644
index 00000000..b0238893
--- /dev/null
+++ b/src/core/CLucene/analysis/CharFilter.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <memory>
+
+#include "CLucene/util/CLStreams.h"
+
+namespace lucene::analysis {
+
+class CharFilter : public lucene::util::Reader {
+public:
+ CharFilter(lucene::util::Reader* input) : input_(input) {}
+
+ virtual ~CharFilter() {
+ if (input_) {
+ delete input_;
+ input_ = nullptr;
+ }
+ }
+
+ int64_t position() override {
+ _CLTHROWA(CL_ERR_UnsupportedOperation,
+ "UnsupportedOperationException CharFilter::position");
+ }
+
+ int64_t skip(int64_t ntoskip) override {
+ _CLTHROWA(CL_ERR_UnsupportedOperation, "UnsupportedOperationException
CharFilter::skip");
+ }
+
+ size_t size() override {
+ _CLTHROWA(CL_ERR_UnsupportedOperation, "UnsupportedOperationException
CharFilter::size");
+ }
+
+protected:
+ lucene::util::Reader* input_ = nullptr;
+};
+
+} // namespace lucene::analysis
\ No newline at end of file
diff --git a/src/core/CLucene/index/MultiSegmentReader.cpp
b/src/core/CLucene/index/MultiSegmentReader.cpp
index d4e8c8ea..e093cd61 100644
--- a/src/core/CLucene/index/MultiSegmentReader.cpp
+++ b/src/core/CLucene/index/MultiSegmentReader.cpp
@@ -592,11 +592,13 @@ void MultiTermDocs::seek( Term* tterm) {
pointer = 0;
current = NULL;
- for (int32_t i = 0; i < readerTermDocs->length; i++) {
- termDocs(i);
+ if (readerTermDocs) {
+ for (int32_t i = 0; i < readerTermDocs->length; i++) {
+ termDocs(i);
+ }
+ base = starts[pointer];
+ current = termDocs(pointer++);
}
- base = starts[pointer];
- current = termDocs(pointer++);
}
bool MultiTermDocs::next() {
diff --git a/src/core/CLucene/util/CLStreams.h
b/src/core/CLucene/util/CLStreams.h
index 4fb00eee..82bb1274 100644
--- a/src/core/CLucene/util/CLStreams.h
+++ b/src/core/CLucene/util/CLStreams.h
@@ -27,6 +27,10 @@ class CLUCENE_EXPORT CLStream: public IReader{
public:
virtual ~CLStream(){}
+ virtual void init(const void *_value, int32_t _length, bool copyData) {
+ _CLTHROWA(CL_ERR_UnsupportedOperation, "UnsupportedOperationException:
CLStream::init");
+ }
+
inline int read(){
const T* buffer;
const int32_t nread = read((const void**)&buffer,1, 1);
@@ -191,7 +195,7 @@ public:
this->buffer_size = 0;
this->init(_value, _length, copyData);
}
- void init(const T *_value, int32_t _length, bool copyData = true){
+ void init(const void *_value, int32_t _length, bool copyData = true)
override {
const size_t length = _length;
this->pos = 0;
if (copyData) {
@@ -209,7 +213,7 @@ public:
if (ownValue && this->value != NULL) {
_CLDELETE_LARRAY((T *) this->value);
}
- this->value = _value;
+ this->value = (T *)_value;
this->buffer_size = 0;
}
this->m_size = length;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]