This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new d75e5a15 [Update](unitest) make unitest work for clucene (#160)
d75e5a15 is described below
commit d75e5a152aeb9154ba64848867b01baeeb535257
Author: airborne12 <[email protected]>
AuthorDate: Fri Dec 22 18:50:37 2023 +0800
[Update](unitest) make unitest work for clucene (#160)
---
src/core/CLucene/util/stringUtil.h | 1 +
src/test/CMakeLists.txt | 14 +
src/test/analysis/TestAnalysis.cpp | 3 +-
src/test/contribs-lib/analysis/testChinese.cpp | 23 +-
src/test/document/TestDocument.cpp | 172 +++++----
src/test/search/TestSearchRange.cpp | 2 +-
src/test/test.h | 2 +-
src/test/tests.cpp | 34 +-
src/test/util/TestBKD.cpp | 486 +++++++++++++++++--------
src/test/util/TestBKD.h | 21 +-
src/test/util/TestMSBRadixSorter.cpp | 12 +-
11 files changed, 478 insertions(+), 292 deletions(-)
diff --git a/src/core/CLucene/util/stringUtil.h
b/src/core/CLucene/util/stringUtil.h
index 7d97e735..1616737d 100644
--- a/src/core/CLucene/util/stringUtil.h
+++ b/src/core/CLucene/util/stringUtil.h
@@ -12,6 +12,7 @@
#endif
#include <cstring>
+#include <assert.h>
#include "SSEUtil.h"
template <typename T>
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index a487f343..20c722f8 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -272,6 +272,20 @@ IF (BUILD_STATIC_LIBRARIES)
TARGET_LINK_LIBRARIES(cl_test clucene-core-static
clucene-shared-static ic gtest ${EXTRA_LIBS} ${Roaring_LIBRARY})
ENDIF (UNIX)
+ SET(DATA_SOURCE_DIR ${clucene_SOURCE_DIR}/src/test/data)
+ SET(DATA_TARGET_DIR "${EXECUTABLE_OUTPUT_PATH}/data")
+
+ ADD_CUSTOM_COMMAND(TARGET cl_test POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_directory ${DATA_SOURCE_DIR}
${DATA_TARGET_DIR}
+ COMMENT "Copying ${DATA_SOURCE_DIR} to ${DATA_TARGET_DIR}/data")
+
+ SET(DICT_SOURCE_DIR
${clucene_SOURCE_DIR}/src/contribs-lib/CLucene/analysis/jieba/dict)
+ SET(DICT_TARGET_DIR "${EXECUTABLE_OUTPUT_PATH}/dict")
+
+ ADD_CUSTOM_COMMAND(TARGET cl_test POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_directory ${DICT_SOURCE_DIR}
${DICT_TARGET_DIR}
+ COMMENT "Copying ${DATA_SOURCE_DIR} to ${DATA_TARGET_DIR}/dict")
+
ENDIF (BUILD_STATIC_LIBRARIES)
############################
diff --git a/src/test/analysis/TestAnalysis.cpp
b/src/test/analysis/TestAnalysis.cpp
index da51aa06..a29b65ed 100644
--- a/src/test/analysis/TestAnalysis.cpp
+++ b/src/test/analysis/TestAnalysis.cpp
@@ -5,6 +5,7 @@
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "test.h"
+#include <memory>
void test(CuTest *tc, Reader *reader, bool verbose, int64_t bytes) {
StandardAnalyzer analyzer;
@@ -63,7 +64,7 @@ void testTokenStreamField(CuTest *tc) {
TokenStream *stream = analyzer.reusableTokenStream(L"field1", &reader);
int field_config = lucene::document::Field::STORE_NO |
lucene::document::Field::INDEX_TOKENIZED;
- auto field = _CLNEW Field(L"field1", field_config);
+ auto field = std::make_unique<Field>(L"field1", field_config);
field->setValue(stream);
auto s = field->tokenStreamValue();
int32_t count = 0;
diff --git a/src/test/contribs-lib/analysis/testChinese.cpp
b/src/test/contribs-lib/analysis/testChinese.cpp
index 7e47aa2d..2aeb0367 100644
--- a/src/test/contribs-lib/analysis/testChinese.cpp
+++ b/src/test/contribs-lib/analysis/testChinese.cpp
@@ -142,13 +142,6 @@ void testCJK(CuTest *tc) {
_testCJK(tc, "a\xe5\x95\xa4\xe9\x85\x92\xe5\x95\xa4x", exp2);
}
-std::string get_dict_path() {
- if(const char* env_p = std::getenv("DICT_PATH")) {
- return env_p;
- }
- return "";
-}
-
void testSimpleJiebaSearchModeTokenizer2(CuTest* tc) {
LanguageBasedAnalyzer a;
const char* field_value_data = "冰咒龙";
@@ -161,7 +154,7 @@ void testSimpleJiebaSearchModeTokenizer2(CuTest* tc) {
a.setLanguage(_T("chinese"));
a.setStem(false);
a.setMode(lucene::analysis::AnalyzerMode::Search);
- a.initDict(get_dict_path());
+ a.initDict("./dict");
ts = a.tokenStream(_T("contents"), stringReader);
CLUCENE_ASSERT(ts->next(&t) != NULL);
@@ -184,7 +177,7 @@ void testSimpleJiebaAllModeTokenizer2(CuTest* tc) {
a.setLanguage(_T("chinese"));
a.setStem(false);
a.setMode(lucene::analysis::AnalyzerMode::All);
- a.initDict(get_dict_path());
+ a.initDict("./dict");
ts = a.tokenStream(_T("contents"), stringReader);
CLUCENE_ASSERT(ts->next(&t) != NULL);
@@ -209,7 +202,7 @@ void testSimpleJiebaAllModeTokenizer(CuTest* tc) {
a.setLanguage(_T("chinese"));
a.setStem(false);
a.setMode(lucene::analysis::AnalyzerMode::All);
- a.initDict(get_dict_path());
+ a.initDict("./dict");
ts = a.tokenStream(_T("contents"), stringReader);
CLUCENE_ASSERT(ts->next(&t) != NULL);
@@ -240,7 +233,7 @@ void testSimpleJiebaDefaultModeTokenizer2(CuTest* tc) {
a.setLanguage(_T("chinese"));
a.setStem(false);
a.setMode(lucene::analysis::AnalyzerMode::Default);
- a.initDict(get_dict_path());
+ a.initDict("./dict");
ts = a.tokenStream(_T("contents"), stringReader);
/*char tmp[255] = {};
@@ -277,7 +270,7 @@ void testSimpleJiebaDefaultModeTokenizer(CuTest* tc) {
a.setLanguage(_T("chinese"));
a.setStem(false);
a.setMode(lucene::analysis::AnalyzerMode::Default);
- a.initDict(get_dict_path());
+ a.initDict("./dict");
ts = a.tokenStream(_T("contents"), stringReader);
CLUCENE_ASSERT(ts->next(&t) != NULL);
@@ -302,7 +295,7 @@ void testSimpleJiebaSearchModeTokenizer(CuTest* tc) {
a.setLanguage(_T("chinese"));
a.setStem(false);
a.setMode(lucene::analysis::AnalyzerMode::Search);
- a.initDict(get_dict_path());
+ a.initDict("./dict");
ts = a.tokenStream(_T("contents"), stringReader);
CLUCENE_ASSERT(ts->next(&t) != NULL);
@@ -335,7 +328,7 @@ void testSimpleJiebaTokenizer(CuTest* tc) {
a.setLanguage(_T("chinese"));
a.setStem(false);
a.setMode(lucene::analysis::AnalyzerMode::Default);
- a.initDict(get_dict_path());
+ a.initDict("./dict");
ts = a.tokenStream(_T("contents"), stringReader);
CLUCENE_ASSERT(ts->next(&t) != NULL);
@@ -652,7 +645,7 @@ void testJiebaMatchHuge(CuTest* tc) {
auto analyzer = _CLNEW lucene::analysis::LanguageBasedAnalyzer();
analyzer->setLanguage(L"chinese");
analyzer->setMode(lucene::analysis::AnalyzerMode::Default);
- analyzer->initDict(get_dict_path());
+ analyzer->initDict("./dict");
IndexWriter w(&dir, analyzer, true);
auto field_name = lucene::util::Misc::_charToWide("chinese");
diff --git a/src/test/document/TestDocument.cpp
b/src/test/document/TestDocument.cpp
index 93d52e8e..7dc44a68 100644
--- a/src/test/document/TestDocument.cpp
+++ b/src/test/document/TestDocument.cpp
@@ -58,95 +58,123 @@ public:
void TestReaderValueField(CuTest *tc) {
RAMDirectory dir;
- SimpleAnalyzer<TCHAR> analyzer;
+ SimpleAnalyzer<char> analyzer;
IndexWriter w(&dir, &analyzer, true);
+ w.setUseCompoundFile(false);
auto field_name = lucene::util::Misc::_charToWide("f3");
Document doc;
auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED |
Field::STORE_NO);
- auto value1 = lucene::util::Misc::_charToWide("value1");
- auto stringReader = _CLNEW StringReader(value1, wcslen(value1), false);
- field->setValue(stringReader);
+ auto char_string_reader =
std::make_unique<lucene::util::SStringReader<char>>();
+ char_string_reader->init("value1", 6, true);
+ auto stream = analyzer.tokenStream(field->name(),
char_string_reader.get());
+ field->setValue(stream);
doc.add(*field);
w.addDocument(&doc);
w.close();
IndexSearcher searcher(&dir);
- Term *t1 = _CLNEW Term(_T("f3"), _T("value1"));
- auto *query1 = _CLNEW TermQuery(t1);
- Hits *hits1 = searcher.search(query1);
+ auto t1 = std::make_unique<Term>(_T("f3"), _T("value1"));
+ auto query1 =std::make_unique<TermQuery>(t1.get());
+ Hits *hits1 = searcher.search(query1.get());
CLUCENE_ASSERT(1 == hits1->length());
+ _CLDELETE(stream)
+ _CLDELETE(hits1)
+ _CLDELETE_ARRAY(field_name)
}
void TestMultiSetValueField(CuTest *tc) {
RAMDirectory dir;
- SimpleAnalyzer<TCHAR> analyzer;
+ SimpleAnalyzer<char> analyzer;
IndexWriter w(&dir, &analyzer, true);
+ w.setUseCompoundFile(false);
auto field_name = lucene::util::Misc::_charToWide("f3");
Document doc;
auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED |
Field::STORE_NO);
+ auto char_string_reader =
std::make_unique<lucene::util::SStringReader<char>>();
+ char_string_reader->init("value1", 6, false);
+ auto stream = analyzer.tokenStream(field->name(),
char_string_reader.get());
+ field->setValue(stream);
+ char_string_reader->init("value2", 6, false);
+ auto stream2 = analyzer.tokenStream(field->name(),
char_string_reader.get());
+ field->setValue(stream2);
- auto value1 = lucene::util::Misc::_charToWide("value1");
- field->setValue(value1, false);
- auto value2 = lucene::util::Misc::_charToWide("value2");
- field->setValue(value2, false);
doc.add(*field);
w.addDocument(&doc);
w.close();
IndexSearcher searcher(&dir);
- Term *t1 = _CLNEW Term(_T("f3"), _T("value1"));
- auto *query1 = _CLNEW TermQuery(t1);
- Hits *hits1 = searcher.search(query1);
+ auto t1 = std::make_unique<Term>(_T("f3"), _T("value1"));
+ auto query1 = std::make_unique<TermQuery>(t1.get());
+ Hits *hits1 = searcher.search(query1.get());
CLUCENE_ASSERT(0 == hits1->length());
- Term *t2 = _CLNEW Term(_T("f3"), _T("value2"));
- auto *query2 = _CLNEW TermQuery(t2);
- Hits *hits2 = searcher.search(query2);
+ auto t2 = std::make_unique<Term>(_T("f3"), _T("value2"));
+ auto query2 = std::make_unique<TermQuery>(t2.get());
+ Hits *hits2 = searcher.search(query2.get());
CLUCENE_ASSERT(1 == hits2->length());
doc.clear();
- //_CLDELETE(field)
+ _CLDELETE(stream)
+ _CLDELETE(stream2)
+ _CLDELETE(hits1)
+ _CLDELETE(hits2)
+ _CLDELETE_ARRAY(field_name)
}
void TestMultiAddValueField(CuTest *tc) {
RAMDirectory dir;
auto field_name = lucene::util::Misc::_charToWide("f3");
- SimpleAnalyzer<TCHAR> analyzer;
+ SimpleAnalyzer<char> analyzer;
IndexWriter w(&dir, &analyzer, true);
+ w.setUseCompoundFile(false);
Document doc;
- doc.add(*_CLNEW Field(field_name, _T("value1"), Field::INDEX_TOKENIZED |
Field::STORE_NO));
- doc.add(*_CLNEW Field(field_name, _T("value2"), Field::INDEX_TOKENIZED |
Field::STORE_NO));
-
- w.addDocument(&doc);
- w.close();
+ auto field1 = _CLNEW Field(field_name, Field::INDEX_TOKENIZED |
Field::STORE_NO);
+ auto char_string_reader =
std::make_unique<lucene::util::SStringReader<char>>();
+ char_string_reader->init("value1", 6, false);
+ auto stream = analyzer.tokenStream(field1->name(),
char_string_reader.get());
+ field1->setValue(stream);
+ doc.add(*field1);
+
+ auto field2 = _CLNEW Field(field_name, Field::INDEX_TOKENIZED |
Field::STORE_NO);
+ auto char_string_reader2 =
std::make_unique<lucene::util::SStringReader<char>>();
+ char_string_reader2->init("value2", 6, false);
+ auto stream2 = analyzer.tokenStream(field2->name(),
char_string_reader2.get());
+ field2->setValue(stream2);
+ doc.add(*field2);
+
+ try {
+ w.addDocument(&doc);
+ w.close();
+ } catch (CLuceneError& ae) {
+ std::cout <<ae.what() << std::endl;
+ throw ae;
+ }
- Term *t1 = _CLNEW Term(_T("f3"), _T("value1"));
- auto *query1 = _CLNEW TermQuery(t1);
+ auto t1 = std::make_unique<Term>(_T("f3"), _T("value1"));
+ auto query1 = std::make_unique<TermQuery>(t1.get());
IndexSearcher searcher(&dir);
- Hits *hits1 = searcher.search(query1);
+ Hits *hits1 = searcher.search(query1.get());
CLUCENE_ASSERT(1 == hits1->length());
- Term *t2 = _CLNEW Term(_T("f3"), _T("value2"));
- auto *query2 = _CLNEW TermQuery(t2);
- Hits *hits2 = searcher.search(query2);
+ auto t2 = std::make_unique<Term>(_T("f3"), _T("value2"));
+ auto query2 = std::make_unique<TermQuery>(t2.get());
+ Hits *hits2 = searcher.search(query2.get());
CLUCENE_ASSERT(1 == hits2->length());
doc.removeFields(_T("f3"));
CLUCENE_ASSERT(doc.getFields()->size() == 0);
-
- _CLDELETE(query1);
- _CLDELETE(query2);
- _CLDELETE(t1);
- _CLDELETE(t2);
_CLDELETE(hits1);
_CLDELETE(hits2);
+ _CLDELETE(stream);
+ _CLDELETE(stream2);
+ _CLDELETE_ARRAY(field_name)
}
void TestFields(CuTest *tc) {
@@ -426,38 +454,42 @@ const int32_t MAX_FIELD_LEN = 0x7FFFFFFFL;
const int32_t MAX_BUFFER_DOCS = 100000000;
const int32_t MERGE_FACTOR = 100000000;
void TestAddDocument(CuTest *tc) {
- RAMDirectory dir;
- SimpleAnalyzer<char> sanalyzer;
- IndexWriter w(&dir, NULL, true);
- w.setUseCompoundFile(false);
- w.setMaxBufferedDocs(MAX_BUFFER_DOCS);
- w.setRAMBufferSizeMB(256);
- w.setMaxFieldLength(MAX_FIELD_LEN);
- w.setMergeFactor(MERGE_FACTOR);
- w.setDocumentWriter(_CLNEW SDocumentsWriter<char>(w.getDirectory(), &w));
- Document doc;
- auto field_name = lucene::util::Misc::_charToWide("f3");
- auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED |
Field::STORE_NO);
- doc.add(*field);
+ try {
+ RAMDirectory dir;
+ SimpleAnalyzer<char> sanalyzer;
+ IndexWriter w(&dir, &sanalyzer, true);
+ w.setUseCompoundFile(false);
+ w.setMaxBufferedDocs(MAX_BUFFER_DOCS);
+ w.setRAMBufferSizeMB(256);
+ w.setMaxFieldLength(MAX_FIELD_LEN);
+ w.setMergeFactor(MERGE_FACTOR);
+ w.setDocumentWriter(_CLNEW SDocumentsWriter<char>(w.getDirectory(),
&w));
+ Document doc;
+ auto field_name = lucene::util::Misc::_charToWide("f3");
+ auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED |
Field::STORE_NO);
+ doc.add(*field);
- for (int i = 0; i <= 2000000; i++) {
- std::string value1 = "value1";
- if (i > 0)
- value1 = generateRandomString(2000);
- auto stringReader = _CLNEW lucene::util::SStringReader<char>(
- value1.c_str(), strlen(value1.c_str()), false);
- auto stream = sanalyzer.reusableTokenStream(field_name, stringReader);
+ for (int i = 0; i <= 2000000; i++) {
+ std::string value1 = "value1";
+ if (i > 0) value1 = generateRandomString(2000);
+ auto stringReader = _CLNEW lucene::util::SStringReader<char>(
+ value1.c_str(), strlen(value1.c_str()), false);
+ auto stream = sanalyzer.reusableTokenStream(field_name,
stringReader);
- field->setValue(stream);
- w.addDocument(&doc, &sanalyzer);
+ field->setValue(stream);
+ w.addDocument(&doc, &sanalyzer);
+ }
+ IndexSearcher searcher(&dir);
+ Term* t2 = _CLNEW Term(_T("f3"), _T("value1"));
+ auto* query2 = _CLNEW TermQuery(t2);
+ Hits* hits2 = searcher.search(query2);
+ CLUCENE_ASSERT(1 == hits2->length());
+ doc.clear();
+ w.close();
+ } catch (CLuceneError& ae) {
+ std::cout <<ae.what() << std::endl;
+ throw ae;
}
- IndexSearcher searcher(&dir);
- Term *t2 = _CLNEW Term(_T("f3"), _T("value1"));
- auto *query2 = _CLNEW TermQuery(t2);
- Hits *hits2 = searcher.search(query2);
- CLUCENE_ASSERT(1 == hits2->length());
- doc.clear();
- w.close();
}
void TestNewFieldBench(CuTest *tc) {
@@ -495,18 +527,10 @@ void TestNewFieldBench(CuTest *tc) {
CuSuite *testdocument(void) {
CuSuite *suite = CuSuiteNew(_T("CLucene Document Test"));
- //SUITE_ADD_TEST(suite, TestCompressedDocument);
- //SUITE_ADD_TEST(suite, TestBinaryDocument);
- //SUITE_ADD_TEST(suite, TestLazyCompressedDocument);
- //SUITE_ADD_TEST(suite, TestLazyBinaryDocument);
- // SUITE_ADD_TEST(suite, TestFieldSelectors);
SUITE_ADD_TEST(suite, TestFields);
SUITE_ADD_TEST(suite, TestMultiSetValueField);
SUITE_ADD_TEST(suite, TestMultiAddValueField);
- //SUITE_ADD_TEST(suite, TestSetFieldBench);
- //SUITE_ADD_TEST(suite, TestNewFieldBench);
SUITE_ADD_TEST(suite, TestReaderValueField);
- SUITE_ADD_TEST(suite, TestAddDocument);
- //SUITE_ADD_TEST(suite, TestDateTools);
+ //SUITE_ADD_TEST(suite, TestAddDocument);
return suite;
}
diff --git a/src/test/search/TestSearchRange.cpp
b/src/test/search/TestSearchRange.cpp
index 82a4e170..a94fb386 100644
--- a/src/test/search/TestSearchRange.cpp
+++ b/src/test/search/TestSearchRange.cpp
@@ -1087,7 +1087,7 @@ static void testSearchEqual(CuTest* tc) {
searcher._search(query, [&result2](DocRange* docRange) {
if (docRange->type_ == DocRangeType::kMany) {
result2.addMany(docRange->doc_many_size_,
- docRange->doc_many.data());
+ docRange->doc_many->data());
} else if (docRange->type_ == DocRangeType::kRange) {
result2.addRange(docRange->doc_range.first,
docRange->doc_range.second);
diff --git a/src/test/test.h b/src/test/test.h
index cbf08803..08c168cb 100644
--- a/src/test/test.h
+++ b/src/test/test.h
@@ -153,7 +153,7 @@ void TestAssertIndexReaderEquals(CuTest *tc, IndexReader*
reader1, IndexReader*
extern unittest tests[];
-#define CLUCENE_DATA_LOCATION1 "../../src/test/data/"
+#define CLUCENE_DATA_LOCATION1 "./data/"
#define CLUCENE_DATA_LOCATION2 "../src/test/data/"
#define CLUCENE_DATA_LOCATION3 "../../../src/test/data/"
#define CLUCENE_DATA_LOCATIONENV "srcdir"
diff --git a/src/test/tests.cpp b/src/test/tests.cpp
index 5d5421cb..372a4a28 100644
--- a/src/test/tests.cpp
+++ b/src/test/tests.cpp
@@ -7,48 +7,16 @@
#include "test.h"
unittest tests[] = {
-// {"threads", testatomicupdates},
-// {"indexreader", testindexreader},
-// {"indexsearcher", testIndexSearcher},
-// {"reuters", testreuters},
{"analysis", testanalysis},
{"analyzers", testanalyzers},
{"analysis", teststandard95},
{"document", testdocument},
{"field", testField},
-// {"numbertools", testNumberTools},
-// {"debug", testdebug},
-// {"ramdirectory", testRAMDirectory},
-// {"indexwriter", testindexwriter},
-// {"indexmodifier", testIndexModifier},
-// {"addIndexesNoOptimize", testAddIndexesNoOptimize},
-// {"highfreq", testhighfreq},
-// {"priorityqueue", testpriorityqueue},
-// {"datetools", testDateTools},
-// {"queryparser", testQueryParser},
-// {"mfqueryparser", testMultiFieldQueryParser},
-// {"boolean", testBoolean},
-// {"search", testsearch},
-// {"rangefilter", testRangeFilter},
-// {"queries", testqueries},
-// {"csrqueries", testConstantScoreQueries},
-// {"termvector", testtermvector},
-// {"sort", testsort},
-// {"duplicates", testduplicates},
-// {"datefilter", testdatefilter},
-// {"wildcard", testwildcard},
-// {"store", teststore},
-// {"utf8", testutf8},
-// {"bitset", testBitSet},
{"bkd", testBKD},
{"MSBRadixSorter",testMSBRadixSorter},
-// {"extractterms", testExtractTerms},
-// {"spanqueries", testSpanQueries},
-// {"stringbuffer", testStringBuffer},
-// {"termvectorsreader", testTermVectorsReader},
{"strconvert", testStrConvert},
{"searchRange", testSearchRange},
#ifdef TEST_CONTRIB_LIBS
- {"chinese", testchinese},
+ //{"chinese", testchinese},
#endif
{"LastTest", NULL}};
diff --git a/src/test/util/TestBKD.cpp b/src/test/util/TestBKD.cpp
index ec6c8f99..330f72f7 100644
--- a/src/test/util/TestBKD.cpp
+++ b/src/test/util/TestBKD.cpp
@@ -36,25 +36,30 @@ void TestVisitor1::visit(int docID) {
}
}
-bool TestVisitor1::matches(uint8_t *packedValue) {
+int TestVisitor1::matches(uint8_t* packedValue) {
std::vector<uint8_t> result(4);
std::copy(packedValue, packedValue + 4, result.begin());
int x = NumericUtils::sortableBytesToInt(result, 0);
if (x >= queryMin && x <= queryMax) {
- return true;
+ return 0;
+ }
+ if (x < queryMin) {
+ return -1;
+ }
+ if (x > queryMax) {
+ return 1;
}
- return false;
}
void TestVisitor1::visit(roaring::Roaring *docID, std::vector<uint8_t>
&packedValue) {
- if (!matches(packedValue.data())) {
+ if (matches(packedValue.data()) != 0) {
return;
}
visit(*docID);
}
void TestVisitor1::visit(bkd::bkd_docid_set_iterator *iter,
std::vector<uint8_t> &packedValue) {
- if (!matches(packedValue.data())) {
+ if (matches(packedValue.data()) != 0) {
return;
}
int32_t docID = iter->docid_set->nextDoc();
@@ -64,8 +69,7 @@ void TestVisitor1::visit(bkd::bkd_docid_set_iterator *iter,
std::vector<uint8_t>
}
}
-void TestVisitor1::visit(
- int docID, std::vector<uint8_t> &packedValue) {
+int TestVisitor1::visit(int docID, std::vector<uint8_t>& packedValue) {
int x = NumericUtils::sortableBytesToInt(packedValue, 0);
if (0) {
wcout << L"visit docID=" << docID << L" x=" << x << endl;
@@ -73,17 +77,29 @@ void TestVisitor1::visit(
if (x >= queryMin && x <= queryMax) {
//wcout << L"visit docID=" << docID << L" x=" << x << endl;
hits->set(docID);
+ return 0;
+ }
+ if (x < queryMin) {
+ return -1;
}
+ if (x > queryMax) {
+ return 1;
+ }
+ return 0;
}
-lucene::util::bkd::relation TestVisitor1::compare(
- std::vector<uint8_t> &minPacked, std::vector<uint8_t> &maxPacked) {
+lucene::util::bkd::relation TestVisitor1::compare_prefix(std::vector<uint8_t>&
prefix) {
+ return lucene::util::bkd::relation::CELL_CROSSES_QUERY;
+}
+
+lucene::util::bkd::relation TestVisitor1::compare(std::vector<uint8_t>&
minPacked,
+ std::vector<uint8_t>&
maxPacked) {
int min = NumericUtils::sortableBytesToInt(minPacked, 0);
int max = NumericUtils::sortableBytesToInt(maxPacked, 0);
assert(max >= min);
if (0) {
- wcout << L"compare: min=" << min << L" max=" << max << L" vs queryMin="
- << queryMin << L" queryMax=" << queryMax << endl;
+ wcout << L"compare: min=" << min << L" max=" << max << L" vs
queryMin=" << queryMin
+ << L" queryMax=" << queryMax << endl;
}
if (max < queryMin || min > queryMax) {
@@ -95,104 +111,262 @@ lucene::util::bkd::relation TestVisitor1::compare(
}
}
-TestVisitor::TestVisitor(const uint8_t *qMin, const uint8_t *qMax,
- BitSet *h, predicate p) {
+template <predicate QT>
+TestVisitor<QT>::TestVisitor(const uint8_t* qMin, const uint8_t* qMax, BitSet*
h) {
queryMin = qMin;
queryMax = qMax;
hits = h;
- pred = p;
}
-bool TestVisitor::matches(uint8_t *packedValue) {
+template <predicate QT>
+int TestVisitor<QT>::matches(uint8_t* packed_value) {
+ bool all_greater_than_max = true;
+ bool all_within_range = true;
+
for (int dim = 0; dim < reader->num_data_dims_; dim++) {
int offset = dim * reader->bytes_per_dim_;
- if (pred == L) {
- if (lucene::util::FutureArrays::CompareUnsigned(
- packedValue, offset, offset + reader->bytes_per_dim_,
queryMax, offset,
- offset + reader->bytes_per_dim_) >= 0) {
- // Doc's value is too high, in this dimension
- return false;
- }
- } else if (pred == G) {
- if (lucene::util::FutureArrays::CompareUnsigned(
- packedValue, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
- offset + reader->bytes_per_dim_) <= 0) {
- // Doc's value is too high, in this dimension
- return false;
+
+ auto result_max = lucene::util::FutureArrays::CompareUnsigned(
+ packed_value, offset, offset + reader->bytes_per_dim_,
queryMax, offset,
+ offset + reader->bytes_per_dim_);
+
+ auto result_min = lucene::util::FutureArrays::CompareUnsigned(
+ packed_value, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
+ offset + reader->bytes_per_dim_);
+
+ all_greater_than_max &= (result_max > 0);
+ all_within_range &= (result_min > 0 && result_max < 0);
+
+ if (!all_greater_than_max && !all_within_range) {
+ return -1;
+ }
+ }
+
+ if (all_greater_than_max) {
+ return 1;
+ } else if (all_within_range) {
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+template <>
+int TestVisitor<predicate::EQ>::matches(uint8_t* packed_value) {
+ // if query type is equal, query_min == query_max
+ if (reader->num_data_dims_ == 1) {
+ return std::memcmp(packed_value, queryMin, reader->bytes_per_dim_);
+ } else {
+ // if all dim value > matched value, then return > 0, otherwise return
< 0
+ int return_result = 0;
+ for (int dim = 0; dim < reader->num_data_dims_; dim++) {
+ int offset = dim * reader->bytes_per_dim_;
+ auto result = lucene::util::FutureArrays::CompareUnsigned(
+ packed_value, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
+ offset + reader->bytes_per_dim_);
+ if (result < 0) {
+ return -1;
+ } else if (result > 0) {
+ return_result = 1;
}
- } else {
- if (lucene::util::FutureArrays::CompareUnsigned(
- packedValue, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
- offset + reader->bytes_per_dim_) < 0) {
- // Doc's value is too low, in this dimension
- return false;
+ }
+ return return_result;
+ }
+}
+
+template <>
+int TestVisitor<predicate::L>::matches(uint8_t* packed_value) {
+ if (reader->num_data_dims_ == 1) {
+ auto result = std::memcmp(packed_value, queryMax,
reader->bytes_per_dim_);
+ if (result >= 0) {
+ return 1;
+ }
+ return 0;
+ } else {
+ bool all_greater_or_equal = true;
+ bool all_lesser = true;
+
+ for (int dim = 0; dim < reader->num_data_dims_; dim++) {
+ int offset = dim * reader->bytes_per_dim_;
+ auto result = lucene::util::FutureArrays::CompareUnsigned(
+ packed_value, offset, offset + reader->bytes_per_dim_,
queryMax, offset,
+ offset + reader->bytes_per_dim_);
+
+ all_greater_or_equal &=
+ (result >= 0); // Remains true only if all results
are greater or equal
+ all_lesser &= (result < 0); // Remains true only if all results
are lesser
+ }
+
+ // Return 1 if all values are greater or equal, 0 if all are lesser,
otherwise -1
+ return all_greater_or_equal ? 1 : (all_lesser ? 0 : -1);
+ }
+}
+
+template <>
+int TestVisitor<predicate::LE>::matches(uint8_t* packed_value) {
+ if (reader->num_data_dims_ == 1) {
+ auto result = std::memcmp(packed_value, queryMax,
reader->bytes_per_dim_);
+ if (result > 0) {
+ return 1;
+ }
+ return 0;
+ } else {
+ bool all_greater = true;
+ bool all_lesser_or_equal = true;
+
+ for (int dim = 0; dim < reader->num_data_dims_; dim++) {
+ int offset = dim * reader->bytes_per_dim_;
+ auto result = lucene::util::FutureArrays::CompareUnsigned(
+ packed_value, offset, offset + reader->bytes_per_dim_,
queryMax, offset,
+ offset + reader->bytes_per_dim_);
+
+ all_greater &= (result > 0); // Remains true only if all results
are greater
+ all_lesser_or_equal &=
+ (result <= 0); // Remains true only if all results are
lesser or equal
+ }
+
+ // Return 1 if all values are greater or equal, 0 if all are lesser,
otherwise -1
+ return all_greater ? 1 : (all_lesser_or_equal ? 0 : -1);
+ }
+}
+
+template <>
+int TestVisitor<predicate::G>::matches(uint8_t* packed_value) {
+ if (reader->num_data_dims_ == 1) {
+ auto result = std::memcmp(packed_value, queryMin,
reader->bytes_per_dim_);
+ if (result <= 0) {
+ return -1;
+ }
+ return 0;
+ } else {
+ for (int dim = 0; dim < reader->num_data_dims_; dim++) {
+ int offset = dim * reader->bytes_per_dim_;
+ auto result = lucene::util::FutureArrays::CompareUnsigned(
+ packed_value, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
+ offset + reader->bytes_per_dim_);
+ if (result <= 0) {
+ return -1;
}
- if (lucene::util::FutureArrays::CompareUnsigned(
- packedValue, offset, offset + reader->bytes_per_dim_,
queryMax, offset,
- offset + reader->bytes_per_dim_) > 0) {
- // Doc's value is too high, in this dimension
- return false;
+ }
+ return 0;
+ }
+}
+
+template <>
+int TestVisitor<predicate::GE>::matches(uint8_t* packed_value) {
+ if (reader->num_data_dims_ == 1) {
+ auto result = std::memcmp(packed_value, queryMin,
reader->bytes_per_dim_);
+ if (result < 0) {
+ return -1;
+ }
+ return 0;
+ } else {
+ for (int dim = 0; dim < reader->num_data_dims_; dim++) {
+ int offset = dim * reader->bytes_per_dim_;
+ auto result = lucene::util::FutureArrays::CompareUnsigned(
+ packed_value, offset, offset + reader->bytes_per_dim_,
queryMin, offset,
+ offset + reader->bytes_per_dim_);
+ if (result < 0) {
+ return -1;
}
}
+ return 0;
}
- return true;
}
-void TestVisitor::visit(int rowID) {
+template <predicate QT>
+void TestVisitor<QT>::visit(int rowID) {
hits->set(rowID);
if (0) {
std::wcout << L"visit docID=" << rowID << std::endl;
}
}
-void TestVisitor::visit(int rowID, std::vector<uint8_t> &packedValue) {
+template <predicate QT>
+int TestVisitor<QT>::visit(int rowID, std::vector<uint8_t>& packedValue) {
if (0) {
int x = lucene::util::NumericUtils::sortableBytesToLong(packedValue,
0);
std::wcout << L"visit docID=" << rowID << L" x=" << x << std::endl;
}
- if (matches(packedValue.data())) {
- hits->set(rowID);
+ auto result = matches(packedValue.data());
+ if (result != 0) {
+ return result;
}
+ hits->set(rowID);
+ return 0;
}
-lucene::util::bkd::relation TestVisitor::compare(std::vector<uint8_t>
&minPacked,
- std::vector<uint8_t>
&maxPacked) {
+template <>
+lucene::util::bkd::relation
TestVisitor<predicate::L>::compare(std::vector<uint8_t>& min_packed,
+
std::vector<uint8_t>& max_packed) {
bool crosses = false;
-
for (int dim = 0; dim < reader->num_data_dims_; dim++) {
int offset = dim * reader->bytes_per_dim_;
-
- if (pred == L) {
- if (lucene::util::FutureArrays::CompareUnsigned(
- minPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMax, offset,
- offset + reader->bytes_per_dim_) >= 0) {
- return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
- }
- } else if (pred == G) {
- if (lucene::util::FutureArrays::CompareUnsigned(
- maxPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMin, offset,
- offset + reader->bytes_per_dim_) <= 0) {
- return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
- }
- } else {
- if (lucene::util::FutureArrays::CompareUnsigned(
- minPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMax, offset,
- offset + reader->bytes_per_dim_) > 0 ||
- lucene::util::FutureArrays::CompareUnsigned(
- maxPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMin, offset,
- offset + reader->bytes_per_dim_) < 0) {
- return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
- }
+ if (lucene::util::FutureArrays::CompareUnsigned(
+ min_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMax, offset,
+ offset + reader->bytes_per_dim_) >= 0) {
+ return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
}
+ crosses |= lucene::util::FutureArrays::CompareUnsigned(
+ min_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMin,
+ offset, offset + reader->bytes_per_dim_) <= 0 ||
+ lucene::util::FutureArrays::CompareUnsigned(
+ max_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMax,
+ offset, offset + reader->bytes_per_dim_) >= 0;
+ }
+ if (crosses) {
+ return lucene::util::bkd::relation::CELL_CROSSES_QUERY;
+ } else {
+ return lucene::util::bkd::relation::CELL_INSIDE_QUERY;
+ }
+}
+template <>
+lucene::util::bkd::relation
TestVisitor<predicate::G>::compare(std::vector<uint8_t>& min_packed,
+
std::vector<uint8_t>& max_packed) {
+ bool crosses = false;
+ for (int dim = 0; dim < reader->num_data_dims_; dim++) {
+ int offset = dim * reader->bytes_per_dim_;
+ if (lucene::util::FutureArrays::CompareUnsigned(
+ max_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMin, offset,
+ offset + reader->bytes_per_dim_) <= 0) {
+ return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
+ }
crosses |= lucene::util::FutureArrays::CompareUnsigned(
- minPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMin,
+ min_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMin,
offset, offset + reader->bytes_per_dim_) <= 0 ||
lucene::util::FutureArrays::CompareUnsigned(
- maxPacked.data(), offset, offset +
reader->bytes_per_dim_, queryMax,
+ max_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMax,
offset, offset + reader->bytes_per_dim_) >= 0;
}
+ if (crosses) {
+ return lucene::util::bkd::relation::CELL_CROSSES_QUERY;
+ } else {
+ return lucene::util::bkd::relation::CELL_INSIDE_QUERY;
+ }
+}
+template <predicate QT>
+lucene::util::bkd::relation TestVisitor<QT>::compare(std::vector<uint8_t>&
min_packed,
+ std::vector<uint8_t>&
max_packed) {
+ bool crosses = false;
+ for (int dim = 0; dim < reader->num_data_dims_; dim++) {
+ int offset = dim * reader->bytes_per_dim_;
+ if (lucene::util::FutureArrays::CompareUnsigned(
+ min_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMax, offset,
+ offset + reader->bytes_per_dim_) > 0 ||
+ lucene::util::FutureArrays::CompareUnsigned(
+ max_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMin, offset,
+ offset + reader->bytes_per_dim_) < 0) {
+ return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
+ }
+ crosses |= lucene::util::FutureArrays::CompareUnsigned(
+ min_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMin,
+ offset, offset + reader->bytes_per_dim_) < 0 ||
+ lucene::util::FutureArrays::CompareUnsigned(
+ max_packed.data(), offset, offset +
reader->bytes_per_dim_, queryMax,
+ offset, offset + reader->bytes_per_dim_) > 0;
+ }
if (crosses) {
return lucene::util::bkd::relation::CELL_CROSSES_QUERY;
} else {
@@ -200,6 +374,23 @@ lucene::util::bkd::relation
TestVisitor::compare(std::vector<uint8_t> &minPacked
}
}
+template <predicate QT>
+lucene::util::bkd::relation
TestVisitor<QT>::compare_prefix(std::vector<uint8_t>& prefix) {
+ if (lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0,
prefix.size(), queryMax, 0,
+ prefix.size()) > 0 ||
+ lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0,
prefix.size(), queryMin, 0,
+ prefix.size()) < 0) {
+ return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY;
+ }
+ if (lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0,
prefix.size(), queryMin, 0,
+ prefix.size()) > 0 &&
+ lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0,
prefix.size(), queryMax, 0,
+ prefix.size()) < 0) {
+ return lucene::util::bkd::relation::CELL_INSIDE_QUERY;
+ }
+ return lucene::util::bkd::relation::CELL_CROSSES_QUERY;
+}
+
Directory *getDirectory(int numPoints) {
Directory *dir;
if (numPoints > 100000) {
@@ -279,7 +470,7 @@ void testSameInts1DRead(CuTest *tc) {
r->intersect(v.get());
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
- printf("clucene error: %s\n", r.what());
+ printf("clucene error in testSameInts1DRead: %s\n", r.what());
}
for (int docID = 0; docID < N; docID++) {
bool expected = docID >= queryMin && docID <= queryMax;
@@ -311,7 +502,7 @@ void testSameInts1DRead(CuTest *tc) {
void testBug1Write(CuTest *tc) {
const int N = 8;
- Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
+ Directory *dir(FSDirectory::getDirectory("testBug1"));
shared_ptr<bkd::bkd_writer> w =
make_shared<bkd::bkd_writer>(N, 1, 1, 4, 4, 100.0f, N, true);
w->docs_seen_ = N;
@@ -331,9 +522,9 @@ void testBug1Write(CuTest *tc) {
int64_t indexFP;
{
- std::unique_ptr<IndexOutput> out(dir->createOutput("bkd3"));
- std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd3_meta"));
- std::unique_ptr<IndexOutput>
index_out(dir->createOutput("bkd3_index"));
+ std::unique_ptr<IndexOutput> out(dir->createOutput("bkd"));
+ std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd_meta"));
+ std::unique_ptr<IndexOutput> index_out(dir->createOutput("bkd_index"));
try {
indexFP = w->finish(out.get(), index_out.get());
w->meta_finish(meta_out.get(), indexFP, 0);
@@ -348,13 +539,15 @@ void testBug1Write(CuTest *tc) {
void testBug1Read(CuTest *tc) {
uint64_t str = Misc::currentTimeMillis();
- Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
+ auto *dir = FSDirectory::getDirectory("testBug1");
{
- IndexInput *in_(dir->openInput("bkd3"));
- IndexInput *meta_in_(dir->openInput("bkd3_meta"));
- IndexInput *index_in_(dir->openInput("bkd3_index"));
-
- shared_ptr<bkd::bkd_reader> r = make_shared<bkd::bkd_reader>(in_);
+ auto closeDirectory = true;
+ auto bkd_reader =
+ std::make_shared<lucene::util::bkd::bkd_reader>(dir,
closeDirectory);
+ if (!bkd_reader->open()) {
+ printf("can not open bkd file\n");
+ exit(1);
+ }
// Simple 1D range query:
int value = 0;
auto result = std::make_unique<BitSet>(10);
@@ -364,27 +557,24 @@ void testBug1Read(CuTest *tc) {
const auto *max = reinterpret_cast<const uint8_t
*>(value_bytes.data());
const auto *min = reinterpret_cast<const uint8_t
*>(value_bytes.data());
- auto v = std::make_unique<TestVisitor>(min, max, result.get(), EQ);
+ auto v = std::make_unique<TestVisitor<EQ>>(min, max, result.get());
try {
- v->setReader(r);
- r->read_meta(meta_in_);
- //auto type = r->read_type();
- CuAssertEquals(tc, 0, r->type);
- r->read_index(index_in_);
- r->intersect(v.get());
+ v->setReader(bkd_reader);
+ bkd_reader->intersect(v.get());
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
- printf("clucene error: %s\n", r.what());
+ printf("clucene error in testBug1Read: %s\n", r.what());
}
//printf("hits count=%d\n", result->count());
CuAssertEquals(tc, result->count(), 6);
//printf("\nFirst search time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
}
+ _CLLDECDELETE(dir)
}
void testLowCardinalInts1DWrite(CuTest *tc) {
const int N = 1024 * 1024;
- Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
+ Directory *dir(FSDirectory::getDirectory("testLowCardinalInts1D"));
shared_ptr<bkd::bkd_writer> w =
make_shared<bkd::bkd_writer>(N, 1, 1, 4, 512, 100.0f, N, true);
w->docs_seen_ = N;
@@ -402,9 +592,9 @@ void testLowCardinalInts1DWrite(CuTest *tc) {
// equivalent: ORIGINAL LINE: try (org.apache.lucene.store.IndexOutput out
=
// dir.createOutput("bkd", org.apache.lucene.store.IOContext.DEFAULT))
{
- std::unique_ptr<IndexOutput> out(dir->createOutput("bkd2"));
- std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd2_meta"));
- std::unique_ptr<IndexOutput>
index_out(dir->createOutput("bkd2_index"));
+ std::unique_ptr<IndexOutput> out(dir->createOutput("bkd"));
+ std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd_meta"));
+ std::unique_ptr<IndexOutput> index_out(dir->createOutput("bkd_index"));
//auto metaOffset = w->MetaInit(out.get());
try {
@@ -422,68 +612,68 @@ void testLowCardinalInts1DWrite(CuTest *tc) {
void testLowCardinalInts1DRead2(CuTest *tc) {
uint64_t str = Misc::currentTimeMillis();
const int N = 1024 * 1024;
- Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
+ Directory *dir = FSDirectory::getDirectory("testLowCardinalInts1D");
{
- IndexInput *in_(dir->openInput("bkd2"));
- IndexInput *meta_in_(dir->openInput("bkd2_meta"));
- IndexInput *index_in_(dir->openInput("bkd2_index"));
- shared_ptr<bkd::bkd_reader> r = make_shared<bkd::bkd_reader>(in_);
+ auto closeDirectory = true;
+ auto bkd_reader =
+ std::make_shared<lucene::util::bkd::bkd_reader>(dir,
closeDirectory);
+ if (!bkd_reader->open()) {
+ printf("can not open bkd file\n");
+ exit(1);
+ }
// Simple 1D range query:
constexpr int queryMin = 0; //std::numeric_limits<int>::min();
constexpr int queryMax = 100;//std::numeric_limits<int>::max();
auto hits = std::make_shared<BitSet>(N);
auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits);
try {
- r->read_meta(meta_in_);
- //auto type = r->read_type();
- CuAssertEquals(tc, 0, r->type);
- r->read_index(index_in_);
- r->intersect(v.get());
+ bkd_reader->intersect(v.get());
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
- printf("clucene error: %s\n", r.what());
+ printf("clucene error in testLowCardinalInts1DRead2: %s\n",
r.what());
}
//printf("hits count=%d\n", hits->count());
CuAssertEquals(tc, hits->count(), 12928);
//printf("\nFirst search time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
+ _CLLDECDELETE(dir)
}
}
void testLowCardinalInts1DRead(CuTest *tc) {
uint64_t str = Misc::currentTimeMillis();
const int N = 1024 * 1024;
- Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
+ Directory *dir = FSDirectory::getDirectory("testLowCardinalInts1D");
{
- IndexInput *in_(dir->openInput("bkd2"));
- IndexInput *meta_in_(dir->openInput("bkd2_meta"));
- IndexInput *index_in_(dir->openInput("bkd2_index"));
+ auto closeDirectory = true;
+ auto bkd_reader =
+ std::make_shared<lucene::util::bkd::bkd_reader>(dir,
closeDirectory);
+ if (!bkd_reader->open()) {
+ printf("can not open bkd file\n");
+ exit(1);
+ }
- shared_ptr<bkd::bkd_reader> r = make_shared<bkd::bkd_reader>(in_);
// Simple 1D range query:
constexpr int queryMin = 0;//std::numeric_limits<int>::min();
constexpr int queryMax = 1;//std::numeric_limits<int>::max();
auto hits = std::make_shared<BitSet>(N);
auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits);
try {
- r->read_meta(meta_in_);
- //auto type = r->read_type();
- CuAssertEquals(tc, 0, r->type);
- r->read_index(index_in_);
- r->intersect(v.get());
+ bkd_reader->intersect(v.get());
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
- printf("clucene error: %s\n", r.what());
+ printf("clucene error in testLowCardinalInts1DRead: %s\n",
r.what());
}
//printf("hits count=%d\n", hits->count());
CuAssertEquals(tc, hits->count(), 256);
//printf("\nFirst search time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
+ _CLLDECDELETE(dir)
}
}
void testBasicsInts1DWrite(CuTest *tc) {
const int N = 1024 * 1024;
- Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
+ Directory *dir(FSDirectory::getDirectory("testBasicsInts1D"));
shared_ptr<bkd::bkd_writer> w =
make_shared<bkd::bkd_writer>(N, 1, 1, 4, 512, 100.0f, N, true);
w->docs_seen_ = N;
@@ -520,26 +710,26 @@ void testBasicsInts1DWrite(CuTest *tc) {
void testBasicsInts1DRead(CuTest *tc) {
uint64_t str = Misc::currentTimeMillis();
const int N = 1024 * 1024;
- Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
+ Directory *dir = FSDirectory::getDirectory("testBasicsInts1D");
{
- IndexInput *in_(dir->openInput("bkd"));
- IndexInput *meta_in_(dir->openInput("bkd_meta"));
- IndexInput *index_in_(dir->openInput("bkd_index"));
- shared_ptr<bkd::bkd_reader> r = make_shared<bkd::bkd_reader>(in_);
+ auto closeDirectory = true;
+ auto bkd_reader =
+ std::make_shared<lucene::util::bkd::bkd_reader>(dir,
closeDirectory);
+ if (!bkd_reader->open()) {
+ printf("can not open bkd file\n");
+ exit(1);
+ }
+
// Simple 1D range query:
constexpr int queryMin = 1024;
constexpr int queryMax = std::numeric_limits<int>::max();
auto hits = std::make_shared<BitSet>(N);
auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits);
try {
- r->read_meta(meta_in_);
- //auto type = r->read_type();
- CuAssertEquals(tc, 0, r->type);
- r->read_index(index_in_);
- r->intersect(v.get());
+ bkd_reader->intersect(v.get());
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
- printf("clucene error: %s\n", r.what());
+ printf("clucene error in testBasicsInts1DRead: %s\n", r.what());
}
for (int docID = 0; docID < N; docID++) {
bool expected = docID >= queryMin && docID <= queryMax;
@@ -556,7 +746,7 @@ void testBasicsInts1DRead(CuTest *tc) {
auto v1 = std::make_unique<TestVisitor1>(queryMin, queryMax, hits1);
str = Misc::currentTimeMillis();
- r->intersect(v1.get());
+ bkd_reader->intersect(v1.get());
for (int docID = 0; docID < N; docID++) {
bool expected = docID >= queryMin && docID <= queryMax;
bool actual = hits1->get(N - docID - 1);
@@ -568,7 +758,7 @@ void testBasicsInts1DRead(CuTest *tc) {
}
//printf("\nSecond search time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
}
- dir->close();
+ //dir->close();
_CLDECDELETE(dir);
}
@@ -595,7 +785,7 @@ void testHttplogsRead(CuTest *tc) {
const auto *max = reinterpret_cast<const uint8_t *>(scratch2.data());
const auto *min = reinterpret_cast<const uint8_t *>(scratch.data());
- auto v = std::make_unique<TestVisitor>(min, max, result.get(), G);
+ auto v = std::make_unique<TestVisitor<G>>(min, max, result.get());
v->setReader(r);
try {
str = Misc::currentTimeMillis();
@@ -608,7 +798,7 @@ void testHttplogsRead(CuTest *tc) {
//printf("\nsearch time taken: %d ms\n\n", (int32_t)
(Misc::currentTimeMillis() - str));
} catch (CLuceneError &r) {
//printf("something wrong in read\n");
- printf("clucene error: %s\n", r.what());
+ printf("clucene error in testHttplogsRead: %s\n", r.what());
}
//printf("result size = %d\n", result->count());
CuAssertEquals(tc, result->count(), 8445);
@@ -696,7 +886,7 @@ void testSame(CuTest *tc) {
{
//std::shared_ptr<Directory> dir{getDirectory(10001)};
const int N = 1024 * 1024;
- Directory *dir(FSDirectory::getDirectory("TestBKDTree"));
+ Directory *dir(FSDirectory::getDirectory("testSame"));
shared_ptr<bkd::bkd_writer> w =
make_shared<bkd::bkd_writer>(N, 1, 1, 4, 512, 100.0f, N, true);
@@ -724,11 +914,13 @@ void testSame(CuTest *tc) {
// equivalent: ORIGINAL LINE: try (org.apache.lucene.store.IndexInput
in =
// dir.openInput("bkd", org.apache.lucene.store.IOContext.DEFAULT))
{
- IndexInput *in_(dir->openInput("bkd"));
- IndexInput *meta_in_(dir->openInput("bkd_meta"));
- IndexInput *index_in_(dir->openInput("bkd_index"));
- //in_->seek(indexFP);
- shared_ptr<bkd::bkd_reader> r = make_shared<bkd::bkd_reader>(in_);
+ auto closeDirectory = true;
+ auto bkd_reader =
+ std::make_shared<lucene::util::bkd::bkd_reader>(dir,
closeDirectory);
+ if (!bkd_reader->open()) {
+ printf("can not open bkd file\n");
+ exit(1);
+ }
// Simple 1D range query:
constexpr int queryMin = 100;
@@ -737,11 +929,7 @@ void testSame(CuTest *tc) {
//std::shared_ptr<BitSet> hits;
auto hits = std::make_shared<BitSet>(N);
auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits);
- r->read_meta(meta_in_);
- //auto type = r->read_type();
- CuAssertEquals(tc, 0, r->type);
- r->read_index(index_in_);
- r->intersect(v.get());
+ bkd_reader->intersect(v.get());
for (int docID = 0; docID < N; docID++) {
bool expected = (100 >= queryMin && 100 <= queryMax);
@@ -753,7 +941,7 @@ void testSame(CuTest *tc) {
//assertEquals(L"docID=" + to_wstring(docID), expected,
actual);
}
}
- dir->close();
+ //dir->close();
_CLDECDELETE(dir);
}
}
@@ -765,7 +953,7 @@ void
equal_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) {
const auto *max = reinterpret_cast<const uint8_t *>(&value);
const auto *min = reinterpret_cast<const uint8_t *>(&value);
- auto v = std::make_unique<TestVisitor>(min, max, result.get(), EQ);
+ auto v = std::make_unique<TestVisitor<EQ>>(min, max, result.get());
v->setReader(r);
r->intersect(v.get());
printf("count: %d\n", result->count());
@@ -788,7 +976,7 @@ void
less_equal_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) {
}
const auto *max = reinterpret_cast<const uint8_t *>(&value);
- auto v = std::make_unique<TestVisitor>(min.data(), max, result.get(),
LE);
+ auto v = std::make_unique<TestVisitor<LE>>(min.data(), max,
result.get());
v->setReader(r);
r->intersect(v.get());
printf("\ncount: %d\n", result->count());
@@ -814,7 +1002,7 @@ void
less_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) {
}
const auto *max = reinterpret_cast<const uint8_t *>(&value);
- auto v = std::make_unique<TestVisitor>(min.data(), max, result.get(), L);
+ auto v = std::make_unique<TestVisitor<L>>(min.data(), max, result.get());
v->setReader(r);
r->intersect(v.get());
printf("count: %d\n", result->count());
@@ -836,7 +1024,7 @@ void
greater_equal_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) {
}
const auto *min = reinterpret_cast<const uint8_t *>(&value);
- auto v = std::make_unique<TestVisitor>(min, max.data(), result.get(), GE);
+ auto v = std::make_unique<TestVisitor<GE>>(min, max.data(), result.get());
v->setReader(r);
r->intersect(v.get());
printf("count: %d\n", result->count());
@@ -858,7 +1046,7 @@ void
greater_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) {
}
const auto *min = reinterpret_cast<const uint8_t *>(&value);
- auto v = std::make_unique<TestVisitor>(min, max.data(), result.get(), G);
+ auto v = std::make_unique<TestVisitor<G>>(min, max.data(), result.get());
v->setReader(r);
r->intersect(v.get());
printf("count: %d\n", result->count());
diff --git a/src/test/util/TestBKD.h b/src/test/util/TestBKD.h
index a66f252a..327c70fe 100644
--- a/src/test/util/TestBKD.h
+++ b/src/test/util/TestBKD.h
@@ -26,19 +26,20 @@ public:
}
}
void visit(std::vector<char>& docID, std::vector<uint8_t> &packedValue)
override {
- if (!matches(packedValue.data())) {
+ if (matches(packedValue.data()) != 0) {
return;
}
visit(roaring::Roaring::read(docID.data(), false));
}
void visit(roaring::Roaring *docID, std::vector<uint8_t> &packedValue)
override;
- void visit(int docID, std::vector<uint8_t> &packedValue) override;
void visit(lucene::util::bkd::bkd_docid_set_iterator *iter,
std::vector<uint8_t> &packedValue) override;
+ int visit(int docid, std::vector<uint8_t> &packedValue) override;
- bool matches(uint8_t *packedValue);
+ int matches(uint8_t *packedValue);
lucene::util::bkd::relation compare(std::vector<uint8_t> &minPacked,
std::vector<uint8_t> &maxPacked)
override;
+ lucene::util::bkd::relation compare_prefix(std::vector<uint8_t> &prefix)
override;
};
enum predicate {
@@ -49,20 +50,17 @@ enum predicate {
EQ
};
+template <predicate QT>
class TestVisitor : public lucene::util::bkd::bkd_reader::intersect_visitor {
private:
const uint8_t *queryMin;
const uint8_t *queryMax;
- //int queryMin = 0;
- //int queryMax = 0;
lucene::util::BitSet *hits;
- //std::shared_ptr<lucene::util::BitSet> hits;
std::shared_ptr<lucene::util::bkd::bkd_reader> reader;
- predicate pred;
public:
- TestVisitor(const uint8_t *queryMin, const uint8_t *queryMax,
lucene::util::BitSet *hits, predicate p);
- virtual ~TestVisitor() = default;
+ TestVisitor(const uint8_t *queryMin, const uint8_t *queryMax,
lucene::util::BitSet *hits);
+ ~TestVisitor() override = default;
void setReader(std::shared_ptr<lucene::util::bkd::bkd_reader> &r) { reader
= r; };
@@ -101,9 +99,10 @@ public:
docID = iter->docid_set->nextDoc();
}
};
- bool matches(uint8_t *packedValue);
+ int matches(uint8_t *packedValue);
+ lucene::util::bkd::relation compare_prefix(std::vector<uint8_t> &prefix)
override;
- void visit(int rowID, std::vector<uint8_t> &packedValue) override;
+ int visit(int rowID, std::vector<uint8_t> &packedValue) override;
lucene::util::bkd::relation compare(std::vector<uint8_t> &minPacked,
std::vector<uint8_t> &maxPacked)
override;
diff --git a/src/test/util/TestMSBRadixSorter.cpp
b/src/test/util/TestMSBRadixSorter.cpp
index d817e118..347ee43e 100644
--- a/src/test/util/TestMSBRadixSorter.cpp
+++ b/src/test/util/TestMSBRadixSorter.cpp
@@ -89,23 +89,21 @@ void TestMSBRadixSorter::testOneValue()
void TestMSBRadixSorter::testNValues()
{
const int n = 1000;
- std::vector<uint8_t> scratch(4);
auto y = std::vector<BytesRef>();
auto z = std::vector<BytesRef>();
for (int docID = 0; docID < n; docID++) {
+ std::vector<uint8_t> scratch(4);
NumericUtils::intToSortableBytes(docID, scratch, 0);
- BytesRef x1(scratch);
- y.emplace_back(x1);
+ y.emplace_back(scratch);
}
- //for (int docID = 0; docID <n; docID++) {
for (int docID = n-1; docID >= 0; docID--) {
+ std::vector<uint8_t> scratch(4);
NumericUtils::intToSortableBytes(docID, scratch, 0);
- BytesRef x1(scratch);
- z.emplace_back(x1);
+ z.emplace_back(scratch);
}
- test(y,z, n);
+ test(y, z, n);
}
void testSorter(CuTest *tc) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]