This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 6495eb28d81 [fix](index compaction)support compact multi segments in
one index #28889 (#29276)
6495eb28d81 is described below
commit 6495eb28d8118f8266a5212ca31e1af80e51e320
Author: qiye <[email protected]>
AuthorDate: Fri Dec 29 16:10:26 2023 +0800
[fix](index compaction)support compact multi segments in one index #28889
(#29276)
---
.gitmodules | 2 +-
be/src/clucene | 2 +-
be/src/common/config.cpp | 5 +-
be/src/common/config.h | 3 +-
be/src/olap/compaction.cpp | 38 ++-
.../segment_v2/inverted_index_compaction.cpp | 1 +
.../rowset/segment_v2/inverted_index_writer.cpp | 3 +-
build.sh | 2 +-
..._index_compaction_with_multi_index_segments.out | 214 +++++++++++++++
...dex_compaction_with_multi_index_segments.groovy | 286 +++++++++++++++++++++
10 files changed, 544 insertions(+), 12 deletions(-)
diff --git a/.gitmodules b/.gitmodules
index 9fe51bfd1d0..729354ec9c7 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -32,4 +32,4 @@
[submodule "be/src/clucene"]
path = be/src/clucene
url = https://github.com/apache/doris-thirdparty.git
- branch = clucene
+ branch = clucene-2.0
diff --git a/be/src/clucene b/be/src/clucene
index ed92e181310..c9030853082 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit ed92e1813103a513aa0ee16730b94cc840daec73
+Subproject commit c90308530828a24fe421a9e19bc1e5e06f1460cd
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index ee7df5d96db..af08d2deaab 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -996,6 +996,9 @@ DEFINE_String(inverted_index_query_cache_limit, "10%");
// inverted index
DEFINE_mDouble(inverted_index_ram_buffer_size, "512");
+// -1 indicates not working.
+// Normally we should not change this, it's useful for testing.
+DEFINE_mInt32(inverted_index_max_buffered_docs, "-1");
DEFINE_Int32(query_bkd_inverted_index_limit_percent, "5"); // 5%
// dict path for chinese analyzer
DEFINE_String(inverted_index_dict_path, "${DORIS_HOME}/dict");
@@ -1003,7 +1006,7 @@ DEFINE_Int32(inverted_index_read_buffer_size, "4096");
// tree depth for bkd index
DEFINE_Int32(max_depth_in_bkd_tree, "32");
// index compaction
-DEFINE_Bool(inverted_index_compaction_enable, "false");
+DEFINE_mBool(inverted_index_compaction_enable, "false");
// use num_broadcast_buffer blocks as buffer to do broadcast
DEFINE_Int32(num_broadcast_buffer, "32");
// semi-structure configs
diff --git a/be/src/common/config.h b/be/src/common/config.h
index c865c3b72b2..4fc3bc8dbfa 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1037,10 +1037,11 @@ DECLARE_Int32(query_bkd_inverted_index_limit_percent);
// 5%
// dict path for chinese analyzer
DECLARE_String(inverted_index_dict_path);
DECLARE_Int32(inverted_index_read_buffer_size);
+DECLARE_mInt32(inverted_index_max_buffered_docs);
// tree depth for bkd index
DECLARE_Int32(max_depth_in_bkd_tree);
// index compaction
-DECLARE_Bool(inverted_index_compaction_enable);
+DECLARE_mBool(inverted_index_compaction_enable);
// use num_broadcast_buffer blocks as buffer to do broadcast
DECLARE_Int32(num_broadcast_buffer);
// semi-structure configs
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 761e50db73e..f6c8b3bb5bf 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -403,6 +403,34 @@ Status Compaction::do_compaction_impl(int64_t permits) {
if (_input_row_num > 0 && stats.rowid_conversion &&
config::inverted_index_compaction_enable) {
OlapStopWatch inverted_watch;
+
+ // check rowid_conversion correctness
+ Version version = _tablet->max_version();
+ DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id());
+ std::set<RowLocation> missed_rows;
+ std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>> location_map;
+ // Convert the delete bitmap of the input rowsets to output rowset.
+ std::size_t missed_rows_size = 0;
+ _tablet->calc_compaction_output_rowset_delete_bitmap(
+ _input_rowsets, _rowid_conversion, 0, version.second + 1,
&missed_rows,
+ &location_map, _tablet->tablet_meta()->delete_bitmap(),
+ &output_rowset_delete_bitmap);
+ if (!allow_delete_in_cumu_compaction()) {
+ missed_rows_size = missed_rows.size();
+ if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION
&&
+ stats.merged_rows != missed_rows_size) {
+ std::string err_msg = fmt::format(
+ "cumulative compaction: the merged rows({}) is not
equal to missed "
+ "rows({}) in rowid conversion, tablet_id: {},
table_id:{}",
+ stats.merged_rows, missed_rows_size,
_tablet->tablet_id(),
+ _tablet->table_id());
+ DCHECK(false) << err_msg;
+ LOG(WARNING) << err_msg;
+ }
+ }
+
+ RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset,
location_map));
+
// translation vec
// <<dest_idx_num, dest_docId>>
// the first level vector: index indicates src segment.
@@ -428,7 +456,7 @@ Status Compaction::do_compaction_impl(int64_t permits) {
// src index files
// format: rowsetId_segmentId
std::vector<std::string> src_index_files(src_segment_num);
- for (auto m : src_seg_to_id_map) {
+ for (const auto& m : src_seg_to_id_map) {
std::pair<RowsetId, uint32_t> p = m.first;
src_index_files[m.second] = p.first.to_string() + "_" +
std::to_string(p.second);
}
@@ -677,11 +705,11 @@ Status Compaction::modify_rowsets(const
Merger::Statistics* stats) {
// of incremental data later.
// TODO(LiaoXin): check if there are duplicate keys
std::size_t missed_rows_size = 0;
+ _tablet->calc_compaction_output_rowset_delete_bitmap(
+ _input_rowsets, _rowid_conversion, 0, version.second + 1,
&missed_rows,
+ &location_map, _tablet->tablet_meta()->delete_bitmap(),
+ &output_rowset_delete_bitmap);
if (!allow_delete_in_cumu_compaction()) {
- _tablet->calc_compaction_output_rowset_delete_bitmap(
- _input_rowsets, _rowid_conversion, 0, version.second + 1,
&missed_rows,
- &location_map, _tablet->tablet_meta()->delete_bitmap(),
- &output_rowset_delete_bitmap);
missed_rows_size = missed_rows.size();
if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION
&& stats != nullptr &&
stats->merged_rows != missed_rows_size) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
index 7f653a93591..b3a28c6ebfc 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
@@ -56,6 +56,7 @@ Status compact_column(int32_t index_id, int src_segment_num,
int dest_segment_nu
dest_index_dirs[i] = DorisCompoundDirectory::getDirectory(fs,
path.c_str(), true);
}
+ DCHECK_EQ(src_index_dirs.size(), trans_vec.size());
index_writer->indexCompaction(src_index_dirs, dest_index_dirs, trans_vec,
dest_segment_num_rows);
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index 0949d708742..744710d9082 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -60,7 +60,6 @@
namespace doris::segment_v2 {
const int32_t MAX_FIELD_LEN = 0x7FFFFFFFL;
-const int32_t MAX_BUFFER_DOCS = 100000000;
const int32_t MERGE_FACTOR = 100000000;
const int32_t MAX_LEAF_COUNT = 1024;
const float MAXMBSortInHeap = 512.0 * 8;
@@ -193,8 +192,8 @@ public:
}
_index_writer =
std::make_unique<lucene::index::IndexWriter>(_dir.get(), _analyzer.get(),
create,
true);
- _index_writer->setMaxBufferedDocs(MAX_BUFFER_DOCS);
_index_writer->setRAMBufferSizeMB(config::inverted_index_ram_buffer_size);
+
_index_writer->setMaxBufferedDocs(config::inverted_index_max_buffered_docs);
_index_writer->setMaxFieldLength(MAX_FIELD_LEN);
_index_writer->setMergeFactor(MERGE_FACTOR);
_index_writer->setUseCompoundFile(false);
diff --git a/build.sh b/build.sh
index a7e31fa9b3b..15f22b03a72 100755
--- a/build.sh
+++ b/build.sh
@@ -302,7 +302,7 @@ update_submodule() {
}
update_submodule "be/src/apache-orc" "apache-orc"
"https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz"
-update_submodule "be/src/clucene" "clucene"
"https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene.tar.gz"
+update_submodule "be/src/clucene" "clucene"
"https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene-2.0.tar.gz"
if [[ "${CLEAN}" -eq 1 && "${BUILD_BE}" -eq 0 && "${BUILD_FE}" -eq 0 &&
"${BUILD_SPARK_DPP}" -eq 0 ]]; then
clean_gensrc
diff --git
a/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out
b/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out
new file mode 100644
index 00000000000..57ad3c1080c
--- /dev/null
+++
b/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out
@@ -0,0 +1,214 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+
+-- !sql --
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+
+-- !sql --
+
+-- !sql --
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+
+-- !sql --
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+
+-- !sql --
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+
+-- !sql --
+
+-- !sql --
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+
+-- !sql --
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+
+-- !sql --
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 8 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 9 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+2018-02-21T12:00 10 I'm using the builds
+
+-- !sql --
+
+-- !sql --
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 1 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 2 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 3 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 4 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 5 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 6 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+2018-02-21T12:00 7 I'm using the builds
+
diff --git
a/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy
b/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy
new file mode 100644
index 00000000000..03ab16af5b5
--- /dev/null
+++
b/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy
@@ -0,0 +1,286 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_index_compaction_with_multi_index_segments", "p0") {
+ def tableName = "test_index_compaction_with_multi_index_segments"
+
+ def set_be_config = { key, value ->
+ def backendId_to_backendIP = [:]
+ def backendId_to_backendHttpPort = [:]
+ getBackendIpHttpPort(backendId_to_backendIP,
backendId_to_backendHttpPort);
+
+ for (String backend_id: backendId_to_backendIP.keySet()) {
+ def (code, out, err) =
update_be_config(backendId_to_backendIP.get(backend_id),
backendId_to_backendHttpPort.get(backend_id), key, value)
+ logger.info("update config: code=" + code + ", out=" + out + ",
err=" + err)
+ }
+ }
+
+ boolean disableAutoCompaction = true
+ boolean invertedIndexCompactionEnable = false
+ int invertedIndexMaxBufferedDocs = -1;
+ boolean has_update_be_config = false
+
+ try {
+ String backend_id;
+ def backendId_to_backendIP = [:]
+ def backendId_to_backendHttpPort = [:]
+ getBackendIpHttpPort(backendId_to_backendIP,
backendId_to_backendHttpPort);
+
+ backend_id = backendId_to_backendIP.keySet()[0]
+ def (code, out, err) =
show_be_config(backendId_to_backendIP.get(backend_id),
backendId_to_backendHttpPort.get(backend_id))
+
+ logger.info("Show config: code=" + code + ", out=" + out + ", err=" +
err)
+ assertEquals(code, 0)
+ def configList = parseJson(out.trim())
+ assert configList instanceof List
+
+ for (Object ele in (List) configList) {
+ assert ele instanceof List<String>
+ if (((List<String>) ele)[0] == "inverted_index_compaction_enable")
{
+ invertedIndexCompactionEnable =
Boolean.parseBoolean(((List<String>) ele)[2])
+ logger.info("inverted_index_compaction_enable:
${((List<String>) ele)[2]}")
+ }
+ if (((List<String>) ele)[0] == "inverted_index_max_buffered_docs")
{
+ invertedIndexMaxBufferedDocs =
Integer.parseInt(((List<String>) ele)[2])
+ logger.info("inverted_index_max_buffered_docs:
${((List<String>) ele)[2]}")
+ }
+ }
+ set_be_config.call("inverted_index_compaction_enable", "true")
+ set_be_config.call("inverted_index_max_buffered_docs", "5")
+ has_update_be_config = true
+
+ sql """ DROP TABLE IF EXISTS ${tableName}; """
+ sql """
+ CREATE TABLE ${tableName} (
+ `file_time` DATETIME NOT NULL,
+ `comment_id` int(11) NULL,
+ `body` TEXT NULL DEFAULT "",
+ INDEX idx_comment_id (`comment_id`) USING INVERTED COMMENT
'''',
+ INDEX idx_body (`body`) USING INVERTED PROPERTIES("parser" =
"unicode") COMMENT ''''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`file_time`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "disable_auto_compaction" = "true"
+ );
+ """
+
+ // insert 10 rows
+ sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 2,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 3,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 4,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 5,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 6,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 7,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 8,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 9,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 10,
"I\'m using the builds"); """
+ // insert another 10 rows
+ sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 2,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 3,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 4,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 5,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 6,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 7,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 8,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 9,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 10,
"I\'m using the builds"); """
+
+ qt_sql """ select * from ${tableName} order by file_time, comment_id,
body """
+ qt_sql """ select * from ${tableName} where body match "using" order
by file_time, comment_id, body """
+ qt_sql """ select * from ${tableName} where body match "the" order by
file_time, comment_id, body """
+ qt_sql """ select * from ${tableName} where comment_id < 8 order by
file_time, comment_id, body """
+
+
//TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus
+ String[][] tablets = sql """ show tablets from ${tableName}; """
+
+ def replicaNum = 1
+ logger.info("get table replica num: " + replicaNum)
+ // before full compaction, there are 3 rowsets.
+ int rowsetCount = 0
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ def compactionStatusUrlIndex = 18
+ (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+ logger.info("Show tablets status: code=" + code + ", out=" + out +
", err=" + err)
+ assertEquals(code, 0)
+ def tabletJson = parseJson(out.trim())
+ assert tabletJson.rowsets instanceof List
+ rowsetCount +=((List<String>) tabletJson.rowsets).size()
+ }
+ assert (rowsetCount == 3 * replicaNum)
+
+ // trigger full compactions for all tablets in ${tableName}
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ times = 1
+
+ do{
+ (code, out, err) =
be_run_full_compaction(backendId_to_backendIP.get(backend_id),
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+ logger.info("Run compaction: code=" + code + ", out=" + out +
", err=" + err)
+ ++times
+ sleep(2000)
+ } while (parseJson(out.trim()).status.toLowerCase()!="success" &&
times<=10)
+
+ def compactJson = parseJson(out.trim())
+ if (compactJson.status.toLowerCase() == "fail") {
+ assertEquals(disableAutoCompaction, false)
+ logger.info("Compaction was done automatically!")
+ }
+ if (disableAutoCompaction) {
+ assertEquals("success", compactJson.status.toLowerCase())
+ }
+ }
+
+ // wait for full compaction done
+ for (String[] tablet in tablets) {
+ boolean running = true
+ do {
+ Thread.sleep(1000)
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ (code, out, err) =
be_get_compaction_status(backendId_to_backendIP.get(backend_id),
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+ logger.info("Get compaction status: code=" + code + ", out=" +
out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactionStatus = parseJson(out.trim())
+ assertEquals("success", compactionStatus.status.toLowerCase())
+ running = compactionStatus.run_status
+ } while (running)
+ }
+
+ // after full compaction, there is only 1 rowset.
+
+ rowsetCount = 0
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ def compactionStatusUrlIndex = 18
+ (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+ logger.info("Show tablets status: code=" + code + ", out=" + out +
", err=" + err)
+ assertEquals(code, 0)
+ def tabletJson = parseJson(out.trim())
+ assert tabletJson.rowsets instanceof List
+ rowsetCount +=((List<String>) tabletJson.rowsets).size()
+ }
+ assert (rowsetCount == 1 * replicaNum)
+
+ qt_sql """ select * from ${tableName} order by file_time, comment_id,
body """
+ qt_sql """ select * from ${tableName} where body match "using" order
by file_time, comment_id, body """
+ qt_sql """ select * from ${tableName} where body match "the" order by
file_time, comment_id, body """
+ qt_sql """ select * from ${tableName} where comment_id < 8 order by
file_time, comment_id, body """
+
+ // insert 10 rows, again
+ sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 2,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 3,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 4,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 5,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 6,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 7,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 8,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 9,
"I\'m using the builds"),
+ ("2018-02-21 12:00:00", 10,
"I\'m using the builds"); """
+
+ tablets = sql """ show tablets from ${tableName}; """
+
+ logger.info("get table replica num: " + replicaNum)
+ // before full compaction, there are 2 rowsets.
+ rowsetCount = 0
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ def compactionStatusUrlIndex = 18
+ (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+ logger.info("Show tablets status: code=" + code + ", out=" + out +
", err=" + err)
+ assertEquals(code, 0)
+ def tabletJson = parseJson(out.trim())
+ assert tabletJson.rowsets instanceof List
+ rowsetCount +=((List<String>) tabletJson.rowsets).size()
+ }
+ assert (rowsetCount == 2 * replicaNum)
+
+ // trigger full compactions for all tablets in ${tableName}
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ times = 1
+
+ do{
+ (code, out, err) =
be_run_full_compaction(backendId_to_backendIP.get(backend_id),
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+ logger.info("Run compaction: code=" + code + ", out=" + out +
", err=" + err)
+ ++times
+ sleep(2000)
+ } while (parseJson(out.trim()).status.toLowerCase()!="success" &&
times<=10)
+
+ def compactJson = parseJson(out.trim())
+ if (compactJson.status.toLowerCase() == "fail") {
+ assertEquals(disableAutoCompaction, false)
+ logger.info("Compaction was done automatically!")
+ }
+ if (disableAutoCompaction) {
+ assertEquals("success", compactJson.status.toLowerCase())
+ }
+ }
+
+ // wait for full compaction done
+ for (String[] tablet in tablets) {
+ boolean running = true
+ do {
+ Thread.sleep(1000)
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ (code, out, err) =
be_get_compaction_status(backendId_to_backendIP.get(backend_id),
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+ logger.info("Get compaction status: code=" + code + ", out=" +
out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactionStatus = parseJson(out.trim())
+ assertEquals("success", compactionStatus.status.toLowerCase())
+ running = compactionStatus.run_status
+ } while (running)
+ }
+
+ // after full compaction, there is only 1 rowset.
+
+ rowsetCount = 0
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ def compactionStatusUrlIndex = 18
+ (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+ logger.info("Show tablets status: code=" + code + ", out=" + out +
", err=" + err)
+ assertEquals(code, 0)
+ def tabletJson = parseJson(out.trim())
+ assert tabletJson.rowsets instanceof List
+ rowsetCount +=((List<String>) tabletJson.rowsets).size()
+ }
+ assert (rowsetCount == 1 * replicaNum)
+
+ qt_sql """ select * from ${tableName} order by file_time, comment_id,
body """
+ qt_sql """ select * from ${tableName} where body match "using" order
by file_time, comment_id, body """
+ qt_sql """ select * from ${tableName} where body match "the" order by
file_time, comment_id, body """
+ qt_sql """ select * from ${tableName} where comment_id < 8 order by
file_time, comment_id, body """
+
+ } finally {
+ if (has_update_be_config) {
+ set_be_config.call("inverted_index_compaction_enable",
invertedIndexCompactionEnable.toString())
+ set_be_config.call("inverted_index_max_buffered_docs",
invertedIndexMaxBufferedDocs.toString())
+ }
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]