Copilot commented on code in PR #60668:
URL: https://github.com/apache/doris/pull/60668#discussion_r2791641575


##########
be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp:
##########
@@ -280,6 +280,143 @@ SubcolumnWritePlan build_subcolumn_write_plan(const 
vectorized::ColumnVariant& v
     return plan;
 }
 
+template <typename WriteMaterializedFn, typename WriteDocValueFn>
+Status execute_doc_write_pipeline(const vectorized::ColumnVariant& variant, 
size_t num_rows,
+                                  int64_t 
variant_doc_materialization_min_rows, int& column_id,
+                                  WriteMaterializedFn&& write_materialized_fn,
+                                  WriteDocValueFn&& write_doc_value_fn,
+                                  DocValuePathStats* out_column_stats) {
+    SubcolumnWritePlan plan =
+            build_subcolumn_write_plan(variant, num_rows, 
variant_doc_materialization_min_rows);
+    *out_column_stats = std::move(plan.stats);
+    if (out_column_stats->empty()) {
+        build_doc_value_stats(variant, out_column_stats);
+    }
+
+    for (auto& entry : plan.entries) {
+        RETURN_IF_ERROR(write_materialized_fn(entry, column_id));
+    }
+    RETURN_IF_ERROR(write_doc_value_fn(column_id));
+    return Status::OK();
+}
+
+bool is_invalid_materialized_subcolumn_type(const vectorized::DataTypePtr& 
type) {
+    return 
vectorized::variant_util::get_base_type_of_array(type)->get_primitive_type() ==
+           PrimitiveType::INVALID_TYPE;
+}
+
+Status prepare_materialized_subcolumn_writer(
+        const TabletColumn& parent_column, std::string_view path,
+        const vectorized::ColumnVariant::Subcolumn& subcolumn,
+        vectorized::ColumnPtr& current_column, vectorized::DataTypePtr& 
current_type,
+        int64_t none_null_value_size, int current_column_id, size_t num_rows,
+        const ColumnWriterOptions& base_opts, std::vector<TabletIndexes>* 
subcolumns_indexes,
+        std::vector<ColumnWriterOptions>* subcolumn_opts,
+        std::vector<std::unique_ptr<ColumnWriter>>* subcolumn_writers,
+        TabletColumn* out_tablet_column) {
+    TabletColumn tablet_column;
+    TabletIndexes subcolumn_indexes;
+    TabletSchema::SubColumnInfo sub_column_info;
+    if 
(vectorized::variant_util::generate_sub_column_info(*base_opts.rowset_ctx->tablet_schema,
+                                                           
parent_column.unique_id(),
+                                                           std::string(path), 
&sub_column_info)) {
+        tablet_column = std::move(sub_column_info.column);
+        subcolumn_indexes = std::move(sub_column_info.indexes);
+        vectorized::DataTypePtr storage_type =
+                
vectorized::DataTypeFactory::instance().create_data_type(tablet_column);
+        if (!storage_type->equals(*current_type)) {
+            RETURN_IF_ERROR(vectorized::variant_util::cast_column(
+                    {current_column, current_type, ""}, storage_type, 
&current_column));
+        }
+        current_type = std::move(storage_type);
+    } else {
+        const std::string column_name = parent_column.name_lower_case() + "." 
+ std::string(path);
+        const vectorized::DataTypePtr& final_data_type_from_object =
+                subcolumn.get_least_common_type();
+        vectorized::PathInData full_path = vectorized::PathInData(column_name);
+        tablet_column = vectorized::variant_util::get_column_by_type(
+                final_data_type_from_object, column_name,
+                vectorized::variant_util::ExtraInfo {.unique_id = -1,
+                                                     .parent_unique_id = 
parent_column.unique_id(),
+                                                     .path_info = full_path});
+        const auto& indexes =
+                
base_opts.rowset_ctx->tablet_schema->inverted_indexs(parent_column.unique_id());
+        vectorized::variant_util::inherit_index(indexes, subcolumn_indexes, 
tablet_column);
+    }
+
+    ColumnWriterOptions opts;
+    opts.meta = base_opts.footer->add_columns();
+    opts.index_file_writer = base_opts.index_file_writer;
+    opts.compression_type = base_opts.compression_type;

Review Comment:
   `prepare_materialized_subcolumn_writer()` builds a fresh 
`ColumnWriterOptions opts` but doesn’t propagate `encoding_preference` from 
`base_opts`. This can change encodings chosen for materialized subcolumns vs 
the rest of the segment. Consider copying `base_opts.encoding_preference` into 
`opts` (or initializing `opts` from `base_opts` and overriding only the fields 
that must differ).
   ```suggestion
       opts.compression_type = base_opts.compression_type;
       opts.encoding_preference = base_opts.encoding_preference;
   ```



##########
be/test/olap/rowset/segment_v2/variant_util_test.cpp:
##########
@@ -278,7 +286,7 @@ TEST(VariantUtilTest, 
ParseVariantColumns_DocModeRejectOnlySubcolumnsConfig) {
 
     vectorized::ParseConfig parse_cfg;
     parse_cfg.enable_flatten_nested = false;
-    parse_cfg.parse_to = 
vectorized::ParseConfig::ParseTo::BothSubcolumnsAndDocValueColumn;
+    parse_cfg.parse_to = vectorized::ParseConfig::ParseTo::OnlyDocValueColumn;
     Status st =
             parse_and_materialize_variant_columns(block, std::vector<uint32_t> 
{0}, {parse_cfg});
     EXPECT_TRUE(st.ok()) << st.to_string();

Review Comment:
   The test name `ParseVariantColumns_DocModeRejectOnlySubcolumnsConfig` no 
longer matches what the test does: it sets `parse_to = OnlyDocValueColumn` and 
expects success. Please rename the test (or restore the intended rejection 
assertion if the goal was to verify `OnlySubcolumns` is rejected in doc mode) 
so the test’s intent is clear.



##########
be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp:
##########
@@ -280,6 +280,143 @@ SubcolumnWritePlan build_subcolumn_write_plan(const 
vectorized::ColumnVariant& v
     return plan;
 }
 
+template <typename WriteMaterializedFn, typename WriteDocValueFn>
+Status execute_doc_write_pipeline(const vectorized::ColumnVariant& variant, 
size_t num_rows,
+                                  int64_t 
variant_doc_materialization_min_rows, int& column_id,
+                                  WriteMaterializedFn&& write_materialized_fn,
+                                  WriteDocValueFn&& write_doc_value_fn,
+                                  DocValuePathStats* out_column_stats) {
+    SubcolumnWritePlan plan =
+            build_subcolumn_write_plan(variant, num_rows, 
variant_doc_materialization_min_rows);
+    *out_column_stats = std::move(plan.stats);
+    if (out_column_stats->empty()) {
+        build_doc_value_stats(variant, out_column_stats);
+    }

Review Comment:
   `execute_doc_write_pipeline()` can call `build_doc_value_stats()` when 
`out_column_stats` is empty, but `build_doc_value_stats()` (and 
`build_sparse_subcolumns_and_stats()`) currently compute `start` as 
`offsets[row - 1]`, which underflows for `row == 0` and is undefined behavior. 
Please fix the offset iteration to use `start = (row == 0 ? 0 : offsets[row - 
1])` (and apply the same pattern to the sparse-path builder) so doc 
stats/materialization are memory-safe for the first row.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to