This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new cd7e03f59b2 branch-3.0: [fix](index build) Correct inverted index
behavior after dynamically adding a column #48389 (#48547)
cd7e03f59b2 is described below
commit cd7e03f59b221d259a78dd5ca8aee0533eda4590
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Sat Mar 8 12:12:05 2025 +0800
branch-3.0: [fix](index build) Correct inverted index behavior after
dynamically adding a column #48389 (#48547)
Cherry-picked from #48389
Co-authored-by: airborne12 <[email protected]>
---
be/src/olap/task/index_builder.cpp | 11 +-
be/src/olap/task/index_builder.h | 18 +-
be/test/olap/index_builder_test.cpp | 2422 ++++++++++++++++++++
.../test_index_change_on_new_column.out | Bin 245 -> 611 bytes
.../test_index_change_on_new_column.groovy | 90 +-
5 files changed, 2526 insertions(+), 15 deletions(-)
diff --git a/be/src/olap/task/index_builder.cpp
b/be/src/olap/task/index_builder.cpp
index 85f76651046..792812200bb 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -448,14 +448,13 @@ Status
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
}
}
+ // DO NOT forget inverted_index_file_writer for the segment,
otherwise, original inverted index will be deleted.
+ _inverted_index_file_writers.emplace(seg_ptr->id(),
+
std::move(inverted_index_file_writer));
if (return_columns.empty()) {
// no columns to read
- break;
+ continue;
}
-
- _inverted_index_file_writers.emplace(seg_ptr->id(),
-
std::move(inverted_index_file_writer));
-
// create iterator for each segment
StorageReadOptions read_options;
OlapReaderStatistics stats;
@@ -840,10 +839,12 @@ Status IndexBuilder::modify_rowsets(const
Merger::Statistics* stats) {
RETURN_IF_ERROR(_tablet->modify_rowsets(_output_rowsets,
_input_rowsets, true));
}
+#ifndef BE_TEST
{
std::shared_lock rlock(_tablet->get_header_lock());
_tablet->save_meta();
}
+#endif
return Status::OK();
}
diff --git a/be/src/olap/task/index_builder.h b/be/src/olap/task/index_builder.h
index 8c996bb400d..69ca5f39c4d 100644
--- a/be/src/olap/task/index_builder.h
+++ b/be/src/olap/task/index_builder.h
@@ -47,16 +47,16 @@ public:
IndexBuilder(StorageEngine& engine, TabletSharedPtr tablet, const
std::vector<TColumn>& columns,
const std::vector<doris::TOlapTableIndex>&
alter_inverted_indexes,
bool is_drop_op = false);
- ~IndexBuilder();
+ virtual ~IndexBuilder();
- Status init();
- Status do_build_inverted_index();
- Status update_inverted_index_info();
- Status handle_inverted_index_data();
- Status handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta,
- std::vector<segment_v2::SegmentSharedPtr>&
segments);
- Status modify_rowsets(const Merger::Statistics* stats = nullptr);
- void gc_output_rowset();
+ virtual Status init();
+ virtual Status do_build_inverted_index();
+ virtual Status update_inverted_index_info();
+ virtual Status handle_inverted_index_data();
+ virtual Status handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta,
+
std::vector<segment_v2::SegmentSharedPtr>& segments);
+ virtual Status modify_rowsets(const Merger::Statistics* stats = nullptr);
+ virtual void gc_output_rowset();
private:
Status _write_inverted_index_data(TabletSchemaSPtr tablet_schema, int32_t
segment_idx,
diff --git a/be/test/olap/index_builder_test.cpp
b/be/test/olap/index_builder_test.cpp
new file mode 100644
index 00000000000..c2483e95cad
--- /dev/null
+++ b/be/test/olap/index_builder_test.cpp
@@ -0,0 +1,2422 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/task/index_builder.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "olap/olap_common.h"
+#include "olap/rowset/beta_rowset.h"
+#include "olap/rowset/rowset_factory.h"
+#include "olap/rowset/rowset_writer_context.h"
+#include "olap/storage_engine.h"
+#include "olap/tablet_fwd.h"
+#include "olap/tablet_schema.h"
+
+namespace doris {
+using namespace testing;
+
+class IndexBuilderTest : public ::testing::Test {
+protected:
+ void SetUp() override {
+ char buffer[MAX_PATH_LEN];
+ ASSERT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
+ _current_dir = std::string(buffer);
+ _absolute_dir = _current_dir + "/" + std::string(dest_dir);
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok());
+
+ std::vector<StorePath> paths;
+ paths.emplace_back(config::storage_root_path, -1);
+
+ // tmp dir
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok());
+ paths.emplace_back(std::string(tmp_dir), 1024000000);
+ auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths);
+ EXPECT_TRUE(tmp_file_dirs->init().ok());
+ ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
+
+ // use memory limit
+ int64_t inverted_index_cache_limit = 0;
+ _inverted_index_searcher_cache =
std::unique_ptr<segment_v2::InvertedIndexSearcherCache>(
+
InvertedIndexSearcherCache::create_global_instance(inverted_index_cache_limit,
+ 256));
+
+ ExecEnv::GetInstance()->set_inverted_index_searcher_cache(
+ _inverted_index_searcher_cache.get());
+ doris::EngineOptions options;
+ options.store_paths = paths;
+
+ auto engine = std::make_unique<StorageEngine>(options);
+ _engine_ref = engine.get();
+ _data_dir = std::make_unique<DataDir>(*_engine_ref, _absolute_dir);
+ ASSERT_TRUE(_data_dir->update_capacity().ok());
+ ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
+
+ _tablet_meta = create_tablet_meta();
+
+ // Create tablet meta
+ // auto* tablet_schema = _tablet_meta->mutable_tablet_schema();
+ _tablet_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(_tablet_schema, KeysType::DUP_KEYS);
+ // Initialize tablet
+ _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta,
_data_dir.get());
+ ASSERT_TRUE(_tablet->init().ok());
+ }
+
+ void TearDown() override {
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
+ ExecEnv::GetInstance()->set_storage_engine(nullptr);
+ _tablet.reset();
+ }
+
+ void create_tablet_schema(TabletSchemaSPtr tablet_schema, KeysType
keystype,
+ int num_value_col = 1) {
+ // Set basic properties of TabletSchema directly
+ tablet_schema->_keys_type = keystype;
+ tablet_schema->_inverted_index_storage_format =
InvertedIndexStorageFormatPB::V2;
+
+ // Create the first key column
+ TabletColumn column_1;
+ column_1.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+ column_1.set_unique_id(1);
+ column_1.set_name("k1");
+ column_1.set_is_key(true);
+ tablet_schema->append_column(column_1);
+
+ // Create the second key column
+ TabletColumn column_2;
+ column_2.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+ column_2.set_unique_id(2);
+ column_2.set_name("k2");
+ column_2.set_is_key(false);
+ tablet_schema->append_column(column_2);
+ }
+
+ TabletMetaSharedPtr create_tablet_meta() {
+ TabletMetaPB tablet_meta_pb;
+ tablet_meta_pb.set_table_id(1);
+ tablet_meta_pb.set_tablet_id(15673);
+ tablet_meta_pb.set_schema_hash(567997577);
+ tablet_meta_pb.set_shard_id(0);
+ tablet_meta_pb.set_creation_time(1575351212);
+
+ TabletMetaSharedPtr tablet_meta(new TabletMeta());
+ tablet_meta->init_from_pb(tablet_meta_pb);
+ return tablet_meta;
+ }
+
+ // Helper to create rowset meta
+ void init_rs_meta(RowsetMetaSharedPtr& rs_meta, TabletSchemaSPtr
tablet_schema, int64_t start,
+ int64_t end) {
+ RowsetMetaPB rowset_meta_pb;
+ rowset_meta_pb.set_rowset_id(540081);
+ rowset_meta_pb.set_tablet_id(15673);
+ rowset_meta_pb.set_tablet_schema_hash(567997577);
+ rowset_meta_pb.set_rowset_type(RowsetTypePB::BETA_ROWSET);
+ rowset_meta_pb.set_rowset_state(RowsetStatePB::VISIBLE);
+ rowset_meta_pb.set_start_version(start);
+ rowset_meta_pb.set_end_version(end);
+ rowset_meta_pb.set_num_rows(3929);
+ rowset_meta_pb.set_total_disk_size(84699);
+ rowset_meta_pb.set_data_disk_size(84464);
+ rowset_meta_pb.set_index_disk_size(235);
+ rowset_meta_pb.set_num_segments(2);
+
+ rs_meta->init_from_pb(rowset_meta_pb);
+ rs_meta->set_tablet_schema(tablet_schema);
+ }
+
+ StorageEngine* _engine_ref = nullptr;
+ TabletSharedPtr _tablet;
+ TabletMetaSharedPtr _tablet_meta;
+ TabletSchemaSPtr _tablet_schema;
+ std::vector<TColumn> _columns;
+ std::vector<doris::TOlapTableIndex> _alter_indexes;
+ std::unique_ptr<DataDir> _data_dir = nullptr;
+ std::string _current_dir;
+ std::string _absolute_dir;
+ std::unique_ptr<InvertedIndexSearcherCache> _inverted_index_searcher_cache;
+
+ constexpr static uint32_t MAX_PATH_LEN = 1024;
+ constexpr static std::string_view dest_dir = "./ut_dir/index_builder_test";
+ constexpr static std::string_view tmp_dir = "./ut_dir/index_builder_test";
+};
+
+TEST_F(IndexBuilderTest, BasicBuildTest) {
+ // 1. Prepare test data
+ TOlapTableIndex index;
+ index.index_id = 1;
+ index.columns.emplace_back("col1");
+ _alter_indexes.push_back(index);
+
+ // 2. Create IndexBuilder
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, false);
+
+ // 3. Verify initialization
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok());
+ EXPECT_EQ(builder._alter_index_ids.size(), 1);
+}
+
+TEST_F(IndexBuilderTest, DropIndexTest) {
+ // 0. prepare tablet path
+ auto tablet_path = _absolute_dir + "/" + std::to_string(15676);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 1. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int num_rows = 1000;
+
+ // 2. First add an initial index to the schema (for k1 column)
+ TabletIndex initial_index;
+ initial_index._index_id = 1;
+ initial_index._index_name = "k1_index";
+ initial_index._index_type = IndexType::INVERTED;
+ initial_index._col_unique_ids.push_back(1); // unique_id for k1
+ _tablet_schema->append_index(std::move(initial_index));
+
+ // 3. Create a rowset writer context
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15676);
+ writer_context.tablet_id = 15676;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = tablet_path;
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = _tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 4. Create a rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 5. Write data to the rowset
+ {
+ vectorized::Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < num_rows; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush the writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build the rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add the rowset to the tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 6. Verify index exists before dropping
+ EXPECT_TRUE(_tablet_schema->has_inverted_index());
+ EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1));
+
+ // 7. Prepare index for dropping
+ TOlapTableIndex drop_index;
+ drop_index.index_id = 1;
+ drop_index.columns.emplace_back("k1");
+ _alter_indexes.push_back(drop_index);
+
+ // 8. Create IndexBuilder with drop operation
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, true);
+
+ // 9. Initialize and verify
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ EXPECT_EQ(builder._alter_index_ids.size(), 1);
+
+ // 10. Execute drop operation
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 11. Verify the index has been removed
+ // check old tablet path and new tablet path
+ bool exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path,
&exists).ok());
+ EXPECT_TRUE(exists);
+
+ // Check files in old and new directories
+ std::vector<io::FileInfo> files;
+ bool dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files,
&dir_exists).ok());
+ EXPECT_TRUE(dir_exists);
+ int new_idx_file_count = 0;
+ int new_dat_file_count = 0;
+ int old_idx_file_count = 0;
+ int old_dat_file_count = 0;
+ for (const auto& file : files) {
+ std::string filename = file.file_name;
+ if (filename.find("15676_0.idx") != std::string::npos) {
+ old_idx_file_count++;
+ }
+ if (filename.find("15676_0.dat") != std::string::npos) {
+ old_dat_file_count++;
+ }
+ if
(filename.find("020000000000000100000000000000000000000000000000_0.idx") !=
+ std::string::npos) {
+ new_idx_file_count++;
+ }
+ if
(filename.find("020000000000000100000000000000000000000000000000_0.dat") !=
+ std::string::npos) {
+ new_dat_file_count++;
+ }
+ }
+ // The index should have been removed
+ EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file
before drop";
+ EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file
before drop";
+ EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file
after drop";
+ EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file
after drop";
+
+ //auto tablet_schema = _tablet->tablet_schema();
+ //EXPECT_FALSE(tablet_schema->has_inverted_index_with_index_id(1));
+}
+
+TEST_F(IndexBuilderTest, BuildIndexAfterWritingDataTest) {
+ // 0. prepare tablet path
+ auto tablet_path = _absolute_dir + "/" + std::to_string(14673);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 1. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int num_rows = 1000;
+
+ // 2. Create a rowset writer context
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15673);
+ writer_context.tablet_id = 15673;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15673);
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = _tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 3. Create a rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 4. Write data to the rowset
+ {
+ vectorized::Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns according to the schema
+ for (int i = 0; i < num_rows; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush the writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build the rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add the rowset to the tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 5. Prepare index for building
+ TOlapTableIndex index1;
+ index1.index_id = 1;
+ index1.columns.emplace_back("k1");
+ index1.index_name = "k1_index";
+ index1.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(index1);
+
+ TOlapTableIndex index2;
+ index2.index_id = 2;
+ index2.columns.emplace_back("k2");
+ index2.index_name = "k2_index";
+ index2.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(index2);
+
+ // 6. Create IndexBuilder
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, false);
+
+ // 7. Initialize and verify
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ EXPECT_EQ(builder._alter_index_ids.size(), 2);
+
+ // 8. Build index
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // check old tablet path and new tablet path
+ auto old_tablet_path = _absolute_dir + "/" + std::to_string(15673);
+ auto new_tablet_path = _absolute_dir + "/" + std::to_string(14673);
+ bool old_exists = false;
+ bool new_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path,
&old_exists).ok());
+ EXPECT_TRUE(old_exists);
+ EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path,
&new_exists).ok());
+ EXPECT_TRUE(new_exists);
+
+ // Check files in old and new directories
+ std::vector<io::FileInfo> old_files;
+ bool old_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(old_tablet_path, true, &old_files,
&old_dir_exists)
+ .ok());
+ EXPECT_TRUE(old_dir_exists);
+ int idx_file_count = 0;
+ int dat_file_count = 0;
+ for (const auto& file : old_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ dat_file_count++;
+ }
+ }
+ EXPECT_EQ(idx_file_count, 0) << "Old directory should contain exactly 0
.idx file";
+ EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1
.dat file";
+
+ std::vector<io::FileInfo> new_files;
+ bool new_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(new_tablet_path, true, &new_files,
&new_dir_exists)
+ .ok());
+ EXPECT_TRUE(new_dir_exists);
+ int new_idx_file_count = 0;
+ int new_dat_file_count = 0;
+ for (const auto& file : new_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ new_idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ new_dat_file_count++;
+ }
+ }
+ EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly
1 .idx files";
+ EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly
1 .dat file";
+
+ // 9. Verify the result (indexes should be built successfully)
+ //auto tablet_schema = _tablet->tablet_schema();
+ //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(1));
+ //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(2));
+}
+
+TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTest) {
+ // 0. prepare tablet path
+ auto tablet_path = _absolute_dir + "/" + std::to_string(14675);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 1. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int num_rows = 1000;
+
+ // 2. First add an initial index to the schema (for k1 column)
+ TabletIndex initial_index;
+ initial_index._index_id = 1;
+ initial_index._index_name = "k1_index";
+ initial_index._index_type = IndexType::INVERTED;
+ initial_index._col_unique_ids.push_back(1); // unique_id for k1
+ _tablet_schema->append_index(std::move(initial_index));
+
+ // 3. Create rowset writer context
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15675);
+ writer_context.tablet_id = 15675;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15675);
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = _tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 4. Create rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 5. Write data to rowset
+ {
+ vectorized::Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < num_rows; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add block to rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add rowset to tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 6. Prepare new index information (only add for k2 column)
+ TOlapTableIndex new_index;
+ new_index.index_id = 2; // New index ID is 2
+ new_index.columns.emplace_back("k2");
+ new_index.index_name = "k2_index";
+ new_index.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(new_index);
+
+ // 7. Create IndexBuilder
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, false);
+
+ // 8. Initialize and verify
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only one new index needs
to be built
+
+ // 9. Build index
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // check old tablet path and new tablet path
+ auto old_tablet_path = _absolute_dir + "/" + std::to_string(15675);
+ auto new_tablet_path = _absolute_dir + "/" + std::to_string(14675);
+ bool old_exists = false;
+ bool new_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path,
&old_exists).ok());
+ EXPECT_TRUE(old_exists);
+ EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path,
&new_exists).ok());
+ EXPECT_TRUE(new_exists);
+
+ // Check files in old and new directories
+ std::vector<io::FileInfo> old_files;
+ bool old_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(old_tablet_path, true, &old_files,
&old_dir_exists)
+ .ok());
+ EXPECT_TRUE(old_dir_exists);
+ int idx_file_count = 0;
+ int dat_file_count = 0;
+ for (const auto& file : old_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ dat_file_count++;
+ }
+ }
+ EXPECT_EQ(idx_file_count, 1) << "Old directory should contain exactly 1
.idx file";
+ EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1
.dat file";
+
+ std::vector<io::FileInfo> new_files;
+ bool new_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(new_tablet_path, true, &new_files,
&new_dir_exists)
+ .ok());
+ EXPECT_TRUE(new_dir_exists);
+ int new_idx_file_count = 0;
+ int new_dat_file_count = 0;
+ for (const auto& file : new_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ new_idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ new_dat_file_count++;
+ }
+ }
+ EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly
1 .idx files";
+ EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly
1 .dat file";
+
+ // 10. Verify results (both indexes should exist)
+ // Verify initial index (k1) still exists
+ //EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1));
+ // Verify newly added index (k2) is successfully built
+ //EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(2));
+}
+
+TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTestV1) {
+ // 1. Create new schema using V1 format
+ auto v1_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(v1_schema, KeysType::DUP_KEYS);
+
+ // 2. Modify to V1 format
+ v1_schema->_inverted_index_storage_format =
InvertedIndexStorageFormatPB::V1;
+
+ // 3. First add an initial index to the schema (for k1 column)
+ TabletIndex initial_index;
+ initial_index._index_id = 1;
+ initial_index._index_name = "k1_index";
+ initial_index._index_type = IndexType::INVERTED;
+ initial_index._col_unique_ids.push_back(1); // unique_id for k1
+ v1_schema->append_index(std::move(initial_index));
+
+ // 4. Update schema in tablet
+ TabletMetaPB tablet_meta_pb;
+ _tablet_meta->to_meta_pb(&tablet_meta_pb);
+
+ TabletSchemaPB v1_schema_pb;
+ v1_schema->to_schema_pb(&v1_schema_pb);
+ tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb);
+
+ _tablet_meta->init_from_pb(tablet_meta_pb);
+
+ // Reinitialize tablet to use new schema
+ _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta,
_data_dir.get());
+ ASSERT_TRUE(_tablet->init().ok());
+ auto tablet_path = _absolute_dir + "/" + std::to_string(14674);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 5. Prepare data
+ RowsetSharedPtr rowset;
+ const int num_rows = 1000;
+
+ // 6. Create rowset writer context
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15674);
+ writer_context.tablet_id = 15674;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15674);
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = v1_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 7. Create rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 8. Write data to rowset
+ {
+ vectorized::Block block = v1_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < num_rows; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add block to rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add rowset to tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 9. Clear existing index list, prepare new index
+ _alter_indexes.clear();
+
+ // 10. Prepare new index information (only add for k2 column)
+ TOlapTableIndex new_index;
+ new_index.index_id = 2; // New index ID is 2
+ new_index.columns.emplace_back("k2");
+ new_index.index_name = "k2_index";
+ new_index.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(new_index);
+
+ // 11. Create IndexBuilder
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, false);
+
+ // 12. Initialize and verify
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only one new index needs
to be built
+
+ // 13. Build index
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // check old tablet path and new tablet path
+ auto old_tablet_path = _absolute_dir + "/" + std::to_string(15674);
+ auto new_tablet_path = _absolute_dir + "/" + std::to_string(14674);
+ bool old_exists = false;
+ bool new_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path,
&old_exists).ok());
+ EXPECT_TRUE(old_exists);
+ EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path,
&new_exists).ok());
+ EXPECT_TRUE(new_exists);
+
+ // Check files in old and new directories
+ std::vector<io::FileInfo> old_files;
+ bool old_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(old_tablet_path, true, &old_files,
&old_dir_exists)
+ .ok());
+ EXPECT_TRUE(old_dir_exists);
+ int idx_file_count = 0;
+ int dat_file_count = 0;
+ for (const auto& file : old_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ dat_file_count++;
+ }
+ }
+ EXPECT_EQ(idx_file_count, 1) << "Old directory should contain exactly 1
.idx file";
+ EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1
.dat file";
+
+ std::vector<io::FileInfo> new_files;
+ bool new_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(new_tablet_path, true, &new_files,
&new_dir_exists)
+ .ok());
+ EXPECT_TRUE(new_dir_exists);
+ int new_idx_file_count = 0;
+ int new_dat_file_count = 0;
+ for (const auto& file : new_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ new_idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ new_dat_file_count++;
+ }
+ }
+ EXPECT_EQ(new_idx_file_count, 2) << "New directory should contain exactly
2 .idx files";
+ EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly
1 .dat file";
+ // 14. Verify results (both indexes should exist)
+ // Verify initial index (k1) still exists
+ //EXPECT_TRUE(v1_schema->has_inverted_index_with_index_id(1));
+ // Verify newly added index (k2) is successfully built
+
//EXPECT_TRUE(_tablet->tablet_schema()->has_inverted_index_with_index_id(2));
+
+ // 15. Confirm storage format is still V1
+ //EXPECT_EQ(v1_schema->_inverted_index_storage_format,
InvertedIndexStorageFormatPB::V1);
+}
+
+TEST_F(IndexBuilderTest, MultiSegmentBuildIndexTest) {
+ // 0. prepare tablet path
+ auto tablet_path = _absolute_dir + "/" + std::to_string(14677);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 1. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int rows_per_segment = 500;
+ const int num_segments = 3;
+
+ // 2. Create a rowset writer context with segment size set to trigger
multiple segments
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15677);
+ writer_context.tablet_id = 15677;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15677);
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = _tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+ // Set small segment size to ensure we create multiple segments
+ writer_context.max_rows_per_segment = rows_per_segment;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 3. Create a rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 4. Write data to the rowset in multiple batches to ensure we get
multiple segments
+ for (int segment = 0; segment < num_segments; segment++) {
+ vectorized::Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < rows_per_segment; ++i) {
+ // k1 column (int) - make values different across segments
+ int32_t k1 = (segment * rows_per_segment + i) * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = (segment * rows_per_segment + i) % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush to ensure we create a new segment
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+ }
+
+ // 5. Build the rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Verify we have the expected number of segments
+ ASSERT_EQ(rowset->num_segments(), num_segments)
+ << "Rowset should have " << num_segments << " segments but has "
+ << rowset->num_segments();
+
+ // 6. Add the rowset to the tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+
+ // 7. Prepare indexes for building
+ TOlapTableIndex index1;
+ index1.index_id = 1;
+ index1.columns.emplace_back("k1");
+ index1.index_name = "k1_index";
+ index1.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(index1);
+
+ TOlapTableIndex index2;
+ index2.index_id = 2;
+ index2.columns.emplace_back("k2");
+ index2.index_name = "k2_index";
+ index2.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(index2);
+
+ // 8. Create IndexBuilder
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, false);
+
+ // 9. Initialize and verify
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ EXPECT_EQ(builder._alter_index_ids.size(), 2);
+
+ // 10. Build indexes
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 11. Check paths and files
+ auto old_tablet_path = _absolute_dir + "/" + std::to_string(15677);
+ auto new_tablet_path = _absolute_dir + "/" + std::to_string(14677);
+ bool old_exists = false;
+ bool new_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path,
&old_exists).ok());
+ EXPECT_TRUE(old_exists);
+ EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path,
&new_exists).ok());
+ EXPECT_TRUE(new_exists);
+
+ // 12. Check files in old and new directories
+ std::vector<io::FileInfo> old_files;
+ bool old_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(old_tablet_path, true, &old_files,
&old_dir_exists)
+ .ok());
+ EXPECT_TRUE(old_dir_exists);
+ int old_idx_file_count = 0;
+ int old_dat_file_count = 0;
+ for (const auto& file : old_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ old_idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ old_dat_file_count++;
+ }
+ }
+ EXPECT_EQ(old_idx_file_count, 0) << "Old directory should contain exactly
0 .idx files";
+ EXPECT_EQ(old_dat_file_count, num_segments)
+ << "Old directory should contain exactly " << num_segments << "
.dat files";
+
+ std::vector<io::FileInfo> new_files;
+ bool new_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(new_tablet_path, true, &new_files,
&new_dir_exists)
+ .ok());
+ EXPECT_TRUE(new_dir_exists);
+ int new_idx_file_count = 0;
+ int new_dat_file_count = 0;
+ for (const auto& file : new_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ new_idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ new_dat_file_count++;
+ }
+ }
+ EXPECT_EQ(new_idx_file_count, num_segments)
+ << "New directory should contain exactly " << num_segments << "
.idx files";
+ EXPECT_EQ(new_dat_file_count, num_segments)
+ << "New directory should contain exactly " << num_segments << "
.dat files";
+}
+
+TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) {
+ // 0. prepare tablet path
+ auto tablet_path = _absolute_dir + "/" + std::to_string(14678);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 1. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int num_rows = 1000;
+
+ // 2. Create a rowset writer context
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15678);
+ writer_context.tablet_id = 15678;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15678);
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = _tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 3. Create a rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 4. Write data to the rowset
+ {
+ vectorized::Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < num_rows; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush the writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build the rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add the rowset to the tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 5. Prepare indexes for building - including one for a non-existent
column
+ _alter_indexes.clear();
+
+ // Index for non-existent column "k3"
+ TOlapTableIndex index2;
+ index2.index_id = 2;
+ index2.columns.emplace_back("k3"); // This column doesn't exist in the
schema
+ index2.index_name = "k3_index";
+ index2.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(index2);
+
+ // 6. Create IndexBuilder
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, false);
+
+ // 7. Initialize and verify
+ auto status = builder.init();
+ // The init should succeed, as we'll skip non-existent columns later
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 8. Build indexes - should only build for existing columns
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 9. Check paths and files
+ auto old_tablet_path = _absolute_dir + "/" + std::to_string(15678);
+ auto new_tablet_path = _absolute_dir + "/" + std::to_string(14678);
+ bool old_exists = false;
+ bool new_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path,
&old_exists).ok());
+ EXPECT_TRUE(old_exists);
+ EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path,
&new_exists).ok());
+ EXPECT_TRUE(new_exists);
+
+ // 10. Check files in old and new directories
+ std::vector<io::FileInfo> old_files;
+ bool old_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(old_tablet_path, true, &old_files,
&old_dir_exists)
+ .ok());
+ EXPECT_TRUE(old_dir_exists);
+ int old_idx_file_count = 0;
+ int old_dat_file_count = 0;
+ for (const auto& file : old_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ old_idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ old_dat_file_count++;
+ }
+ }
+ EXPECT_EQ(old_idx_file_count, 0) << "Old directory should contain exactly
0 .idx files";
+ EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly
1 .dat file";
+
+ std::vector<io::FileInfo> new_files;
+ bool new_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(new_tablet_path, true, &new_files,
&new_dir_exists)
+ .ok());
+ EXPECT_TRUE(new_dir_exists);
+ int new_idx_file_count = 0;
+ int new_dat_file_count = 0;
+ for (const auto& file : new_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ new_idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ new_dat_file_count++;
+ }
+ }
+ // Should only have index for k1, not for non-existent k3
+ EXPECT_EQ(new_idx_file_count, 0)
+ << "New directory should contain exactly 0 .idx file for the
existing column";
+ EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly
1 .dat file";
+
+ // 11. Verify logs in the output to confirm k3 index was skipped
+ // This would require examining the log output which isn't easily done in
unit tests,
+ // but the file count verification above should be sufficient to confirm
behavior
+}
+
+TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) {
+ // 0. prepare tablet path
+ auto tablet_path = _absolute_dir + "/" + std::to_string(14679);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 1. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int num_rows = 1000;
+
+ // 2. First add an initial index to the schema (for k1 column)
+ TabletIndex initial_index;
+ initial_index._index_id = 1;
+ initial_index._index_name = "k1_index";
+ initial_index._index_type = IndexType::INVERTED;
+ initial_index._col_unique_ids.push_back(1); // unique_id for k1
+ _tablet_schema->append_index(std::move(initial_index));
+
+ // 3. Create a rowset writer context
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15679);
+ writer_context.tablet_id = 15679;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15679);
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = _tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 4. Create a rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 5. Write data to the rowset
+ {
+ vectorized::Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < num_rows; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush the writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build the rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add the rowset to the tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 6. Prepare indexes for building - valid k2 and non-existent k3
+ _alter_indexes.clear();
+
+ // Index for non-existent column "k3"
+ TOlapTableIndex index2;
+ index2.index_id = 3;
+ index2.columns.emplace_back("k3"); // This column doesn't exist in the
schema
+ index2.index_name = "k3_index";
+ index2.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(index2);
+
+ // 7. Create IndexBuilder
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, false);
+
+ // 8. Initialize and verify
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only k1 is considered
for building
+
+ // 9. Build indexes - should only build for existing columns
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 10. Check paths and files
+ auto old_tablet_path = _absolute_dir + "/" + std::to_string(15679);
+ auto new_tablet_path = _absolute_dir + "/" + std::to_string(14679);
+ bool old_exists = false;
+ bool new_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path,
&old_exists).ok());
+ EXPECT_TRUE(old_exists);
+ EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path,
&new_exists).ok());
+ EXPECT_TRUE(new_exists);
+
+ // 11. Check files in old and new directories
+ std::vector<io::FileInfo> old_files;
+ bool old_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(old_tablet_path, true, &old_files,
&old_dir_exists)
+ .ok());
+ EXPECT_TRUE(old_dir_exists);
+ int old_idx_file_count = 0;
+ int old_dat_file_count = 0;
+ for (const auto& file : old_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ old_idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ old_dat_file_count++;
+ }
+ }
+ EXPECT_EQ(old_idx_file_count, 1)
+ << "Old directory should contain exactly 1 .idx file for the
original k1 index";
+ EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly
1 .dat file";
+
+ std::vector<io::FileInfo> new_files;
+ bool new_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(new_tablet_path, true, &new_files,
&new_dir_exists)
+ .ok());
+ EXPECT_TRUE(new_dir_exists);
+ int new_idx_file_count = 0;
+ int new_dat_file_count = 0;
+ for (const auto& file : new_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ new_idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ new_dat_file_count++;
+ }
+ }
+ // Should have 2 index files: original k1 index and new k2 index (k3
should be skipped)
+ EXPECT_EQ(new_idx_file_count, 1)
+ << "New directory should contain exactly 1 .idx files (for k1 and
k2, not k3)";
+ EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly
1 .dat file";
+
+ // 12. Verify the tablet schema - would need to examine tablet_schema here
+ // k1 and k2 indexes should exist, k3 index should not
+ // Note: In production code, additional verification of schema would be
done here
+}
+
+TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTestV1) {
+ // 1. Create new schema using V1 format
+ auto v1_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(v1_schema, KeysType::DUP_KEYS);
+
+ // 2. Modify to V1 format
+ v1_schema->_inverted_index_storage_format =
InvertedIndexStorageFormatPB::V1;
+
+ // 3. First add an initial index to the schema (for k1 column)
+ TabletIndex initial_index;
+ initial_index._index_id = 1;
+ initial_index._index_name = "k1_index";
+ initial_index._index_type = IndexType::INVERTED;
+ initial_index._col_unique_ids.push_back(1); // unique_id for k1
+ v1_schema->append_index(std::move(initial_index));
+
+ // 4. Update schema in tablet
+ TabletMetaPB tablet_meta_pb;
+ _tablet_meta->to_meta_pb(&tablet_meta_pb);
+
+ TabletSchemaPB v1_schema_pb;
+ v1_schema->to_schema_pb(&v1_schema_pb);
+ tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb);
+
+ _tablet_meta->init_from_pb(tablet_meta_pb);
+
+ // 5. Reinitialize tablet to use new schema
+ _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta,
_data_dir.get());
+ ASSERT_TRUE(_tablet->init().ok());
+ auto tablet_path = _absolute_dir + "/" + std::to_string(14680);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 6. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int num_rows = 1000;
+
+ // 7. Create rowset writer context
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15680);
+ writer_context.tablet_id = 15680;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15680);
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = v1_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 8. Create rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 9. Write data to rowset
+ {
+ vectorized::Block block = v1_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < num_rows; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add block to rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add rowset to tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 10. Prepare indexes for building - valid k2 and non-existent k3
+ _alter_indexes.clear();
+
+ // Index for non-existent column "k3"
+ TOlapTableIndex index2;
+ index2.index_id = 3;
+ index2.columns.emplace_back("k3"); // This column doesn't exist in the
schema
+ index2.index_name = "k3_index";
+ index2.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(index2);
+
+ // Add column information for the non-existent column
+ _columns.clear(); // Clear previous column info
+ TColumn non_existent_column;
+ non_existent_column.column_name = "k3";
+ non_existent_column.column_type.type = TPrimitiveType::INT;
+ _columns.push_back(non_existent_column);
+
+ // 11. Create IndexBuilder
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, false);
+
+ // 12. Initialize and verify
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ EXPECT_EQ(builder._alter_index_ids.size(), 1);
+ // 13. Build indexes - should only build for existing columns
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 14. Check paths and files
+ auto old_tablet_path = _absolute_dir + "/" + std::to_string(15680);
+ auto new_tablet_path = _absolute_dir + "/" + std::to_string(14680);
+ bool old_exists = false;
+ bool new_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path,
&old_exists).ok());
+ EXPECT_TRUE(old_exists);
+ EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path,
&new_exists).ok());
+ EXPECT_TRUE(new_exists);
+
+ // 15. Check files in old and new directories
+ std::vector<io::FileInfo> old_files;
+ bool old_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(old_tablet_path, true, &old_files,
&old_dir_exists)
+ .ok());
+ EXPECT_TRUE(old_dir_exists);
+ int old_idx_file_count = 0;
+ int old_dat_file_count = 0;
+ for (const auto& file : old_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ old_idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ old_dat_file_count++;
+ }
+ }
+ EXPECT_EQ(old_idx_file_count, 1)
+ << "Old directory should contain exactly 1 .idx file for the
original k1 index";
+ EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly
1 .dat file";
+
+ std::vector<io::FileInfo> new_files;
+ bool new_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(new_tablet_path, true, &new_files,
&new_dir_exists)
+ .ok());
+ EXPECT_TRUE(new_dir_exists);
+ int new_idx_file_count = 0;
+ int new_dat_file_count = 0;
+ for (const auto& file : new_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ new_idx_file_count++;
+ }
+ if (filename.find(".dat") != std::string::npos) {
+ new_dat_file_count++;
+ }
+ }
+ // Should have 2 index files: original k1 index and new k2 index (k3
should be skipped)
+ EXPECT_EQ(new_idx_file_count, 1)
+ << "New directory should contain exactly 1 .idx files (for k1 and
k2, not k3)";
+ EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly
1 .dat file";
+
+ // 16. Confirm storage format is still V1
+ EXPECT_EQ(v1_schema->_inverted_index_storage_format,
InvertedIndexStorageFormatPB::V1);
+}
+
+TEST_F(IndexBuilderTest, NonNullIndexDataTest) {
+ // 0. prepare tablet path
+ auto tablet_path = _absolute_dir + "/" + std::to_string(14681);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 1. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int num_rows = 1000;
+
+ // 2. Create a rowset writer context
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15681);
+ writer_context.tablet_id = 15681;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15681);
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = _tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 3. Create a rowset writer with non-null values
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 4. Write non-null data to the rowset
+ {
+ vectorized::Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns with no null values
+ for (int i = 0; i < num_rows; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush the writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build the rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add the rowset to the tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 5. Prepare indexes for building - only use non-nullable fields
+ TOlapTableIndex index1;
+ index1.index_id = 1;
+ index1.columns.emplace_back("k1");
+ index1.index_name = "k1_index";
+ index1.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(index1);
+
+ // 6. Force columns to be treated as non-null by modifying schema
+ TabletSchemaSPtr non_null_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(non_null_schema, KeysType::DUP_KEYS);
+ // Set the second column to be non-nullable explicitly
+ TabletColumn& k2_column = non_null_schema->mutable_column(1);
+ k2_column.set_is_nullable(false);
+
+ // 7. Create IndexBuilder with the modified schema
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, false);
+
+ // 8. Initialize and verify
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ EXPECT_EQ(builder._alter_index_ids.size(), 1);
+
+ // 9. Build index - should trigger _add_data rather than _add_nullable
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 10. Verify results
+ auto old_tablet_path = _absolute_dir + "/" + std::to_string(15681);
+ auto new_tablet_path = _absolute_dir + "/" + std::to_string(14681);
+ bool old_exists = false;
+ bool new_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path,
&old_exists).ok());
+ EXPECT_TRUE(old_exists);
+ EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path,
&new_exists).ok());
+ EXPECT_TRUE(new_exists);
+
+ // 11. Check files in old and new directories
+ std::vector<io::FileInfo> old_files;
+ bool old_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(old_tablet_path, true, &old_files,
&old_dir_exists)
+ .ok());
+ EXPECT_TRUE(old_dir_exists);
+
+ std::vector<io::FileInfo> new_files;
+ bool new_dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()
+ ->list(new_tablet_path, true, &new_files,
&new_dir_exists)
+ .ok());
+ EXPECT_TRUE(new_dir_exists);
+ int new_idx_file_count = 0;
+ for (const auto& file : new_files) {
+ std::string filename = file.file_name;
+ if (filename.find(".idx") != std::string::npos) {
+ new_idx_file_count++;
+ }
+ }
+ EXPECT_EQ(new_idx_file_count, 1) << "Should have created 1 index file";
+}
+
+TEST_F(IndexBuilderTest, NonExistentColumnUniqueIdTest) {
+ // 0. prepare tablet path
+ auto tablet_path = _absolute_dir + "/" + std::to_string(14682);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 1. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int num_rows = 1000;
+
+ // 2. Create a rowset writer context
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15682);
+ writer_context.tablet_id = 15682;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15682);
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = _tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 3. Create a rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 4. Write data to the rowset
+ {
+ vectorized::Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < num_rows; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush the writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build the rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add the rowset to the tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 5. First add an initial index to the schema (for k1 column)
+ TabletIndex initial_index;
+ initial_index._index_id = 1;
+ initial_index._index_name = "k1_index";
+ initial_index._index_type = IndexType::INVERTED;
+ initial_index._col_unique_ids.push_back(1); // unique_id for k1
+ _tablet_schema->append_index(std::move(initial_index));
+
+ // 6. Prepare indexes for building - specifying column by unique_id that
doesn't exist
+ _alter_indexes.clear();
+
+ // Use drop operation to test column_unique_ids path
+ TOlapTableIndex drop_index;
+ drop_index.index_id = 1;
+ drop_index.columns.emplace_back("non_existent_column");
+ drop_index.column_unique_ids.push_back(999); // This unique ID doesn't
exist
+ _alter_indexes.push_back(drop_index);
+
+ // 7. Create IndexBuilder with drop operation
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, true);
+
+ // 8. Initialize and verify
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ EXPECT_EQ(builder._alter_index_ids.size(), 1);
+
+ // 9. Execute drop operation - should handle non-existent column gracefully
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 10. Verify paths exists - operations should complete without errors
+ auto old_tablet_path = _absolute_dir + "/" + std::to_string(15682);
+ auto new_tablet_path = _absolute_dir + "/" + std::to_string(14682);
+ bool old_exists = false;
+ bool new_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path,
&old_exists).ok());
+ EXPECT_TRUE(old_exists);
+ EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path,
&new_exists).ok());
+ EXPECT_TRUE(new_exists);
+}
+
+TEST_F(IndexBuilderTest, DropIndexV1FormatTest) {
+ // 1. Create new schema using V1 format
+ auto v1_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(v1_schema, KeysType::DUP_KEYS);
+
+ // 2. Modify to V1 format
+ v1_schema->_inverted_index_storage_format =
InvertedIndexStorageFormatPB::V1;
+
+ // 3. Add an initial index to the schema (for k1 column)
+ TabletIndex initial_index;
+ initial_index._index_id = 1;
+ initial_index._index_name = "k1_index";
+ initial_index._index_type = IndexType::INVERTED;
+ initial_index._col_unique_ids.push_back(1); // unique_id for k1
+ v1_schema->append_index(std::move(initial_index));
+
+ // 4. Update schema in tablet
+ TabletMetaPB tablet_meta_pb;
+ _tablet_meta->to_meta_pb(&tablet_meta_pb);
+
+ TabletSchemaPB v1_schema_pb;
+ v1_schema->to_schema_pb(&v1_schema_pb);
+ tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb);
+
+ _tablet_meta->init_from_pb(tablet_meta_pb);
+
+ // 5. Reinitialize tablet to use new schema
+ _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta,
_data_dir.get());
+ ASSERT_TRUE(_tablet->init().ok());
+ auto tablet_path = _absolute_dir + "/" + std::to_string(15683);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 6. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int num_rows = 1000;
+
+ // 7. Create a rowset writer context
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15683);
+ writer_context.tablet_id = 15683;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = tablet_path;
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = v1_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 8. Create a rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 9. Write data to the rowset
+ {
+ vectorized::Block block = v1_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < num_rows; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush the writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build the rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add the rowset to the tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 10. Prepare to drop the k1 index
+ _alter_indexes.clear();
+ TOlapTableIndex drop_index;
+ drop_index.index_id = 1;
+ drop_index.columns.emplace_back("k1");
+ drop_index.index_name = "k1_index";
+ drop_index.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(drop_index);
+
+ // 11. Create IndexBuilder with drop operation
+ IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(),
_tablet, _columns,
+ _alter_indexes, true);
+
+ // 12. Initialize and verify
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ EXPECT_EQ(builder._alter_index_ids.size(), 1);
+
+ // 13. Execute drop operation
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 14. Verify paths exists
+ bool exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path,
&exists).ok());
+ EXPECT_TRUE(exists);
+
+ // 15. Verify the index has been removed
+ std::vector<io::FileInfo> files;
+ bool dir_exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files,
&dir_exists).ok());
+ EXPECT_TRUE(dir_exists);
+
+ // Verify no index files in the new directory
+ int new_idx_file_count = 0;
+ int new_dat_file_count = 0;
+ int old_idx_file_count = 0;
+ int old_dat_file_count = 0;
+ for (const auto& file : files) {
+ std::string filename = file.file_name;
+ if (filename.find("15683_0_1.idx") != std::string::npos) {
+ old_idx_file_count++;
+ }
+ if (filename.find("15683_0.dat") != std::string::npos) {
+ old_dat_file_count++;
+ }
+ if
(filename.find("020000000000000100000000000000000000000000000000_0_1.idx") !=
+ std::string::npos) {
+ new_idx_file_count++;
+ }
+ if
(filename.find("020000000000000100000000000000000000000000000000_0.dat") !=
+ std::string::npos) {
+ new_dat_file_count++;
+ }
+ }
+ // The index should have been removed
+ EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file
before drop";
+ EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file
before drop";
+ EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file
after drop";
+ EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file
after drop";
+}
+
+TEST_F(IndexBuilderTest, ResourceCleanupTest) {
+ // 0. prepare tablet path
+ auto tablet_path = _absolute_dir + "/" + std::to_string(15684);
+ _tablet->_tablet_path = tablet_path;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 1. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int num_rows = 1000;
+
+ // 2. Create a rowset writer context
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15684);
+ writer_context.tablet_id = 15684;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = tablet_path;
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = _tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 3. Create a rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 4. Write data to the rowset
+ {
+ vectorized::Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < num_rows; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush the writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build the rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add the rowset to the tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 5. Prepare indexes for building
+ TOlapTableIndex index1;
+ index1.index_id = 1;
+ index1.columns.emplace_back("k1");
+ index1.index_name = "k1_index";
+ index1.index_type = TIndexType::INVERTED;
+ _alter_indexes.push_back(index1);
+
+ // Create a custom IndexBuilder with a spy function to test resource
cleanup
+ class TestIndexBuilder : public IndexBuilder {
+ public:
+ TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet,
+ const std::vector<TColumn>& columns,
+ const std::vector<doris::TOlapTableIndex>&
alter_inverted_indexes,
+ bool is_drop_op)
+ : IndexBuilder(engine, tablet, columns,
alter_inverted_indexes, is_drop_op) {}
+
+ ~TestIndexBuilder() override = default;
+ // Override update_inverted_index_info to inject failure
+ Status update_inverted_index_info() override {
+ RETURN_IF_ERROR(IndexBuilder::update_inverted_index_info());
+ // Create a fake error to trigger cleanup
+ return Status::Error<ErrorCode::INTERNAL_ERROR>("Simulated error
for testing cleanup");
+ }
+ };
+
+ // 6. Create our test builder
+ TestIndexBuilder
builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns,
+ _alter_indexes, false);
+
+ // 7. Initialize and verify
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ EXPECT_EQ(builder._alter_index_ids.size(), 1);
+
+ // 8. Build index - should fail with our simulated error
+ status = builder.do_build_inverted_index();
+ EXPECT_FALSE(status.ok()) << "Expected failure, but got success";
+ EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>()) << "Expected internal
error";
+ EXPECT_EQ(status.to_string(), "[INTERNAL_ERROR]Simulated error for testing
cleanup")
+ << "Error message doesn't match expected";
+
+ // Verify the paths haven't been modified since the operation failed
+ bool exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path,
&exists).ok());
+ EXPECT_TRUE(exists);
+
+ auto rowset_id =
extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat");
+ EXPECT_TRUE(_engine_ref->check_rowset_id_in_unused_rowsets(rowset_id))
+ << "Rowset id should be in unused rowsets";
+}
+
+TEST_F(IndexBuilderTest, ArrayTypeIndexTest) {
+ // 1. Prepare tablet path
+ auto tablet_path = _absolute_dir + "/" + std::to_string(14685);
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 2. Create tablet schema with array type
+ auto tablet_schema = std::make_shared<TabletSchema>();
+ TabletColumn column_1(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
+ FieldType::OLAP_FIELD_TYPE_INT, true);
+ column_1.set_unique_id(1);
+ column_1.set_is_key(true);
+ column_1.set_name("k1");
+ tablet_schema->append_column(column_1);
+
+ // Array type column
+ TabletColumn column_2;
+ column_2.set_unique_id(2);
+ column_2.set_is_key(false);
+ column_2.set_name("array_col");
+ column_2.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY);
+ column_2.set_is_nullable(false);
+ // Add a primitive type for array items
+ TabletColumn
array_item_column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
+ FieldType::OLAP_FIELD_TYPE_VARCHAR, true);
+ array_item_column.set_is_key(false);
+ array_item_column.set_length(64);
+ column_2.add_sub_column(array_item_column);
+ tablet_schema->append_column(column_2);
+
+ // 3. Create tablet
+ auto tablet_meta = create_tablet_meta();
+ auto tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
+ tablet->_tablet_path = tablet_path;
+ ASSERT_TRUE(tablet->init().ok());
+
+ // 4. Add inverted index for array column
+ _columns.clear();
+ TColumn tc1;
+ tc1.column_name = "array_col";
+ _columns.push_back(tc1);
+
+ _alter_indexes.clear();
+ TOlapTableIndex tt_index;
+ tt_index.index_id = 1;
+ tt_index.index_name = "array_index";
+ tt_index.columns.emplace_back("array_col");
+ tt_index.column_unique_ids.push_back(2);
+ tt_index.index_type = TIndexType::type::INVERTED;
+ _alter_indexes.push_back(tt_index);
+
+ // 5. Create a rowset writer
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(14685);
+ writer_context.tablet_id = 14685;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = tablet_path;
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ // 6. Create rowset writer
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ // 7. Create data block and write data
+ {
+ vectorized::Block block = tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Prepare columns for k1 and array_col
+ for (int i = 0; i < 1000; i++) {
+ // k1 column (int)
+ int32_t k1 = i;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // array_col column
+ // Create array data with 1-5 elements
+ int array_size = i % 5 + 1;
+
+ // For array type, we need to create a complex nested column
structure
+ auto& array_col =
static_cast<vectorized::ColumnArray&>(*columns[1]);
+ vectorized::Array arr;
+ // Add string elements to the array
+ for (int j = 0; j < array_size; j++) {
+ std::string val = "item_" + std::to_string(i) + "_" +
std::to_string(j);
+ arr.push_back(vectorized::Field(val));
+ }
+ array_col.insert(arr);
+ }
+
+ // Add block to rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+ }
+
+ // 8. Build rowset
+ RowsetSharedPtr rowset;
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+ ASSERT_TRUE(rowset != nullptr);
+ ASSERT_TRUE(tablet->add_rowset(rowset).ok());
+
+ // 9. Initialize and build inverted index
+ IndexBuilder builder(*_engine_ref, tablet, _columns, _alter_indexes,
false);
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 10. Verify that the index has been created
+ std::string segment_path = local_segment_path(
+ tablet->tablet_path(),
+
extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat").to_string(),
+ 0);
+
+ if (tablet_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
+ // V1 format
+ auto index_path = InvertedIndexDescriptor::get_index_file_path_v1(
+
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), 1, "");
+ bool exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(index_path,
&exists).ok());
+ EXPECT_TRUE(exists) << "Index file not found: " << index_path;
+ } else {
+ // V2+ format
+ auto index_path = InvertedIndexDescriptor::get_index_file_path_v2(
+
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path));
+ bool exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(index_path,
&exists).ok());
+ EXPECT_TRUE(exists) << "Index file not found: " << index_path;
+ }
+}
+
+TEST_F(IndexBuilderTest, UniqueKeysTableIndexTest) {
+ // 0. prepare tablet path
+ auto tablet_path = _absolute_dir + "/" + std::to_string(14688);
+ _tablet->_tablet_path = tablet_path;
+ _tablet->_tablet_meta->_schema = _tablet_schema;
+ _tablet->_tablet_meta->_schema->_keys_type = KeysType::UNIQUE_KEYS;
+ _tablet->_tablet_meta->_enable_unique_key_merge_on_write = true;
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 1. Prepare data for writing
+ RowsetSharedPtr rowset;
+ const int rows_per_segment = 500;
+
+ // 2. Create a rowset writer context with segment size set to trigger
multiple segments
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id.init(15677);
+ writer_context.tablet_id = 15677;
+ writer_context.tablet_schema_hash = 567997577;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.tablet_path = tablet_path;
+ writer_context.rowset_state = VISIBLE;
+ writer_context.tablet_schema = _tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+ // Set small segment size to ensure we create multiple segments
+ writer_context.max_rows_per_segment = rows_per_segment;
+
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok());
+
+ auto res = RowsetFactory::create_rowset_writer(*_engine_ref,
writer_context, false);
+ ASSERT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ {
+ vectorized::Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < 1000; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush the writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build the rowset
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add the rowset to the tablet
+ ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
+ }
+
+ // 6. Create test class that overrides methods to simulate unique key
table behavior
+ class TestIndexBuilder : public IndexBuilder {
+ public:
+ TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet,
+ const std::vector<TColumn>& columns,
+ const std::vector<doris::TOlapTableIndex>&
alter_inverted_indexes,
+ bool is_drop_op)
+ : IndexBuilder(engine, tablet, columns,
alter_inverted_indexes, is_drop_op) {}
+
+ ~TestIndexBuilder() override = default;
+
+ // Override to make sure modify_rowsets with UNIQUE_KEYS path is called
+ Status modify_rowsets(const Merger::Statistics* stats = nullptr)
override {
+ // Call parent method which should use the UNIQUE_KEYS path
+ return IndexBuilder::modify_rowsets(stats);
+ }
+ };
+
+ _alter_indexes.clear();
+ TOlapTableIndex tt_index;
+ tt_index.index_id = 1;
+ tt_index.index_name = "k1_index";
+ tt_index.columns.emplace_back("k1");
+ tt_index.column_unique_ids.push_back(1);
+ tt_index.index_type = TIndexType::type::INVERTED;
+ _alter_indexes.push_back(tt_index);
+ // 7. Initialize and build inverted index
+ TestIndexBuilder builder(*_engine_ref, _tablet, _columns, _alter_indexes,
false);
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 8. Execute build index, which should go through UNIQUE_KEYS path in
modify_rowsets
+ status = builder.do_build_inverted_index();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // 9. Verify that the index was created successfully
+ std::string segment_path = local_segment_path(
+ _tablet->tablet_path(),
+
extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat").to_string(),
+ 0);
+
+ if (_tablet_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
+ auto index_path = InvertedIndexDescriptor::get_index_file_path_v1(
+
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), 1, "");
+ bool exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(index_path,
&exists).ok());
+ EXPECT_TRUE(exists) << "Index file not found: " << index_path;
+ } else {
+ auto index_path = InvertedIndexDescriptor::get_index_file_path_v2(
+
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path));
+ bool exists = false;
+ EXPECT_TRUE(io::global_local_filesystem()->exists(index_path,
&exists).ok());
+ EXPECT_TRUE(exists) << "Index file not found: " << index_path;
+ }
+}
+
+TEST_F(IndexBuilderTest, HandleSingleRowsetErrorTest) {
+ // 1. Create a test class that overrides handle_single_rowset to simulate
error scenarios
+ class TestIndexBuilder : public IndexBuilder {
+ public:
+ TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet,
+ const std::vector<TColumn>& columns,
+ const std::vector<doris::TOlapTableIndex>&
alter_inverted_indexes,
+ bool is_drop_op, bool simulate_non_local_rowset_error
= false)
+ : IndexBuilder(engine, tablet, columns,
alter_inverted_indexes, is_drop_op),
+
_simulate_non_local_rowset_error(simulate_non_local_rowset_error) {}
+
+ ~TestIndexBuilder() override = default;
+
+ // Override to simulate error conditions
+ Status handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta,
+ std::vector<segment_v2::SegmentSharedPtr>&
segments) override {
+ if (_simulate_non_local_rowset_error) {
+ // Simulate the condition where is_local_rowset is false
+ return Status::InternalError("should be local rowset.
tablet_id={} rowset_id={}",
+ 123, "test_rowset_id");
+ }
+
+ // Call parent method for normal processing
+ return IndexBuilder::handle_single_rowset(output_rowset_meta,
segments);
+ }
+
+ private:
+ bool _simulate_non_local_rowset_error;
+ };
+
+ // 2. Prepare tablet path
+ std::string tablet_path = _absolute_dir + "/" + std::to_string(14687);
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 3. Set up tablet schema and tablet
+ TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(tablet_schema, KeysType::DUP_KEYS, 2);
+
+ auto tablet_meta = create_tablet_meta();
+ tablet_meta->_schema = tablet_schema;
+ auto tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
+ tablet->_tablet_path = tablet_path;
+ ASSERT_TRUE(tablet->init().ok());
+
+ // 4. Create inverted index definition
+ _alter_indexes.clear();
+ TOlapTableIndex tt_index;
+ tt_index.index_id = 1;
+ tt_index.index_name = "k1_index";
+ tt_index.columns.emplace_back("k1");
+ tt_index.column_unique_ids.push_back(1);
+ tt_index.index_type = TIndexType::type::INVERTED;
+ _alter_indexes.push_back(tt_index);
+
+ // 5. Create a rowset
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id = _engine_ref->next_rowset_id();
+ writer_context.tablet_id = 14687;
+ writer_context.tablet_path = tablet_path;
+ writer_context.tablet_schema_hash = 1111;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.segments_overlap = NONOVERLAPPING;
+ writer_context.tablet_schema = tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+ auto result = tablet->create_rowset_writer(writer_context, false);
+ EXPECT_TRUE(result.has_value()) << result.error();
+ auto rowset_writer = std::move(result).value();
+
+ {
+ vectorized::Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < 1000; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush the writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build the rowset
+ RowsetSharedPtr rowset;
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add the rowset to the tablet
+ ASSERT_TRUE(tablet->add_rowset(rowset).ok());
+ }
+
+ // 6. Test error scenario with non-local rowset
+ TestIndexBuilder builder(*_engine_ref, tablet, _columns, _alter_indexes,
false, true);
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // Execute build_index, which should fail due to simulated error
+ status = builder.do_build_inverted_index();
+ EXPECT_FALSE(status.ok()) << "Expected failure but got success";
+ EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>())
+ << "Expected internal error but got: " << status.to_string();
+ EXPECT_TRUE(status.to_string().find("should be local rowset") !=
std::string::npos)
+ << "Error message doesn't match expected: " << status.to_string();
+}
+
+TEST_F(IndexBuilderTest, UpdateInvertedIndexInfoErrorTest) {
+ // 1. Create a test class that overrides update_inverted_index_info to
simulate error scenarios
+ class TestIndexBuilder : public IndexBuilder {
+ public:
+ TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet,
+ const std::vector<TColumn>& columns,
+ const std::vector<doris::TOlapTableIndex>&
alter_inverted_indexes,
+ bool is_drop_op, int error_type = 0)
+ : IndexBuilder(engine, tablet, columns,
alter_inverted_indexes, is_drop_op),
+ _error_type(error_type) {}
+
+ ~TestIndexBuilder() override = default;
+
+ // Override update_inverted_index_info to inject errors
+ Status update_inverted_index_info() override {
+ if (_error_type == 1) {
+ // Simulate non-local rowset error in
update_inverted_index_info
+ return Status::InternalError("should be local rowset.
tablet_id={} rowset_id={}",
+ 123, "test_rowset_id");
+ } else if (_error_type == 2) {
+ // Simulate size retrieval error
+ return Status::Error<ErrorCode::INIT_FAILED>("debug point: get
fs failed");
+ }
+
+ // Call parent method for normal processing
+ return IndexBuilder::update_inverted_index_info();
+ }
+
+ private:
+ int _error_type; // 0: no error, 1: non-local rowset error, 2: size
retrieval error
+ };
+
+ // 2. Prepare tablet path
+ std::string tablet_path = _absolute_dir + "/" + std::to_string(14688);
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
+
+ // 3. Set up tablet schema and tablet
+ TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(tablet_schema, KeysType::DUP_KEYS, 2);
+
+ auto tablet_meta = create_tablet_meta();
+ tablet_meta->_schema = tablet_schema;
+ auto tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
+ tablet->_tablet_path = tablet_path;
+ ASSERT_TRUE(tablet->init().ok());
+
+ // 4. Create inverted index definition
+ _alter_indexes.clear();
+ TOlapTableIndex tt_index;
+ tt_index.index_id = 1;
+ tt_index.index_name = "k1_index";
+ tt_index.columns.emplace_back("k1");
+ tt_index.column_unique_ids.push_back(1);
+ tt_index.index_type = TIndexType::type::INVERTED;
+ _alter_indexes.push_back(tt_index);
+
+ // 5. Create a rowset
+ RowsetWriterContext writer_context;
+ writer_context.rowset_id = _engine_ref->next_rowset_id();
+ writer_context.tablet_id = 14688;
+ writer_context.tablet_path = tablet_path;
+ writer_context.tablet_schema_hash = 1111;
+ writer_context.partition_id = 10;
+ writer_context.rowset_type = BETA_ROWSET;
+ writer_context.segments_overlap = NONOVERLAPPING;
+ writer_context.tablet_schema = tablet_schema;
+ writer_context.version.first = 10;
+ writer_context.version.second = 10;
+
+ auto result = tablet->create_rowset_writer(writer_context, false);
+ EXPECT_TRUE(result.has_value()) << result.error();
+ auto rowset_writer = std::move(result).value();
+
+ // Write data
+ {
+ vectorized::Block block = tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Add data for k1 and k2 columns
+ for (int i = 0; i < 1000; ++i) {
+ // k1 column (int)
+ int32_t k1 = i * 10;
+ columns[0]->insert_data((const char*)&k1, sizeof(k1));
+
+ // k2 column (int)
+ int32_t k2 = i % 100;
+ columns[1]->insert_data((const char*)&k2, sizeof(k2));
+ }
+
+ // Add the block to the rowset
+ Status s = rowset_writer->add_block(&block);
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Flush the writer
+ s = rowset_writer->flush();
+ ASSERT_TRUE(s.ok()) << s.to_string();
+
+ // Build the rowset
+ RowsetSharedPtr rowset;
+ ASSERT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Add the rowset to the tablet
+ ASSERT_TRUE(tablet->add_rowset(rowset).ok());
+ }
+ // 6. Test error scenarios
+
+ // 6.1 Test non-local rowset error
+ {
+ TestIndexBuilder builder(*_engine_ref, tablet, _columns,
_alter_indexes, false, 1);
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // Execute build_index, which should fail due to simulated error
+ status = builder.do_build_inverted_index();
+ EXPECT_FALSE(status.ok()) << "Expected failure but got success";
+ EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>())
+ << "Expected internal error but got: " << status.to_string();
+ EXPECT_TRUE(status.to_string().find("should be local rowset") !=
std::string::npos)
+ << "Error message doesn't match expected: " <<
status.to_string();
+ }
+
+ // 6.2 Test size retrieval error
+ {
+ TestIndexBuilder builder(*_engine_ref, tablet, _columns,
_alter_indexes, false, 2);
+ auto status = builder.init();
+ EXPECT_TRUE(status.ok()) << status.to_string();
+
+ // Execute build_index, which should fail due to simulated error
+ status = builder.do_build_inverted_index();
+ EXPECT_FALSE(status.ok()) << "Expected failure but got success";
+ EXPECT_TRUE(status.is<ErrorCode::INIT_FAILED>())
+ << "Expected INIT_FAILED but got: " << status.to_string();
+ EXPECT_TRUE(status.to_string().find("debug point: get fs failed") !=
std::string::npos)
+ << "Error message doesn't match expected: " <<
status.to_string();
+ }
+}
+
+} // namespace doris
diff --git
a/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out
b/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out
index 78a797ead64..d538508a204 100644
Binary files
a/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out
and
b/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out
differ
diff --git
a/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy
b/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy
index c6b3ec8df2d..170a7e0afaf 100644
---
a/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy
+++
b/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy
@@ -99,5 +99,93 @@ suite("test_index_change_on_new_column") {
assertEquals(show_result[0][2], "idx_s1")
qt_select2 """ SELECT * FROM ${tableName} order by id; """
- qt_select3 """ SELECT * FROM ${tableName} where s1 match 'welcome'; """
+ qt_select3 """ SELECT
/*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM
${tableName} where s1 match 'welcome'; """
+
+ tableName = "test_index_change_on_new_column1"
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ `id` INT COMMENT "",
+ `s` STRING COMMENT "",
+ INDEX idx_s(s) USING INVERTED
+ )
+ DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`)
+ PROPERTIES ( "replication_num" = "1" );
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, 'hello world')
+ """
+
+ // add new column
+ sql """ alter table ${tableName} add column s1 varchar(50) default null
after s; """
+
+ qt_select1 """ SELECT * FROM ${tableName}; """
+
+ // create inverted index on new column
+ sql """ alter table ${tableName} add index idx_s1(s1) USING INVERTED
PROPERTIES('parser' = 'english')"""
+ wait_for_latest_op_on_table_finish(tableName, timeout)
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, 'hello wold', 'welcome to the world')
+ """
+ // build inverted index on new column
+ if (!isCloudMode()) {
+ sql """ build index idx_s1 on ${tableName} """
+ wait_for_build_index_on_partition_finish(tableName, timeout)
+ }
+
+ show_result = sql "show index from ${tableName}"
+ logger.info("show index from " + tableName + " result: " + show_result)
+ assertEquals(show_result.size(), 2)
+ assertEquals(show_result[0][2], "idx_s")
+ assertEquals(show_result[1][2], "idx_s1")
+ qt_select2 """ SELECT * FROM ${tableName} order by id; """
+ qt_select3 """ SELECT
/*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM
${tableName} where s1 match 'welcome'; """
+ qt_select4 """ SELECT
/*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM
${tableName} where s match 'hello world'; """
+
+ tableName = "test_index_change_on_new_column1_index_v1"
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ `id` INT COMMENT "",
+ `s` STRING COMMENT "",
+ INDEX idx_s(s) USING INVERTED
+ )
+ DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`)
+ PROPERTIES ( "inverted_index_storage_format" = "v1", "replication_num"
= "1" );
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, 'hello world')
+ """
+
+ // add new column
+ sql """ alter table ${tableName} add column s1 varchar(50) default null
after s; """
+
+ qt_select1 """ SELECT * FROM ${tableName}; """
+
+ // create inverted index on new column
+ sql """ alter table ${tableName} add index idx_s1(s1) USING INVERTED
PROPERTIES('parser' = 'english')"""
+ wait_for_latest_op_on_table_finish(tableName, timeout)
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, 'hello wold', 'welcome to the world')
+ """
+ // build inverted index on new column
+ if (!isCloudMode()) {
+ sql """ build index idx_s1 on ${tableName} """
+ wait_for_build_index_on_partition_finish(tableName, timeout)
+ }
+
+ show_result = sql "show index from ${tableName}"
+ logger.info("show index from " + tableName + " result: " + show_result)
+ assertEquals(show_result.size(), 2)
+ assertEquals(show_result[0][2], "idx_s")
+ assertEquals(show_result[1][2], "idx_s1")
+ qt_select2 """ SELECT * FROM ${tableName} order by id; """
+ qt_select3 """ SELECT
/*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM
${tableName} where s1 match 'welcome'; """
+ qt_select4 """ SELECT
/*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM
${tableName} where s match 'hello world'; """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]