Gabriel39 commented on code in PR #8438:
URL: https://github.com/apache/incubator-doris/pull/8438#discussion_r849533945


##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -96,6 +102,78 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec 
__attribute__((unused))
     return Status::OK();
 }
 
+Status SegmentWriter::append_block(const vectorized::Block* block, size_t 
row_pos,
+                                   size_t num_rows) {
+    assert(block && num_rows > 0 && row_pos + num_rows <= block->rows() &&
+           block->columns() == _column_writers.size());
+    _olap_data_convertor.set_source_content(block, row_pos, num_rows);
+
+    // find all row pos for short key indexes
+    std::vector<size_t> short_key_pos;
+    if (UNLIKELY(_short_key_row_pos == 0)) {
+        short_key_pos.push_back(0);
+    }
+    while (_short_key_row_pos + _opts.num_rows_per_block < _row_count + 
num_rows) {
+        _short_key_row_pos += _opts.num_rows_per_block;
+        short_key_pos.push_back(_short_key_row_pos - _row_count);
+    }
+
+    // convert column data from engine format to storage layer format
+    std::vector<vectorized::IOlapColumnDataAccessorSPtr> short_key_columns;
+    size_t num_key_columns = _tablet_schema->num_short_key_columns();
+    for (size_t cid = 0; cid < _column_writers.size(); ++cid) {
+        auto converted_result = _olap_data_convertor.convert_column_data(cid);
+        if (converted_result.first != Status::OK()) {
+            return converted_result.first;
+        }
+        if (cid < num_key_columns) {
+            short_key_columns.push_back(converted_result.second);
+        }
+        _column_writers[cid]->append(converted_result.second->get_nullmap(),
+                                     converted_result.second->get_data(), 
num_rows);
+    }
+
+    // create short key indexes
+    std::vector<const void*> key_column_fields;
+    for (const auto pos : short_key_pos) {
+        for (const auto& column : short_key_columns) {
+            key_column_fields.push_back(column->get_data_at(pos));
+        }
+        std::string encoded_key = encode_short_keys(key_column_fields);
+        RETURN_IF_ERROR(_index_builder->add_item(encoded_key));
+        key_column_fields.clear();
+    }
+    
+    _row_count += num_rows;
+    _olap_data_convertor.clear_source_content();
+    return Status::OK();
+}
+
+std::string SegmentWriter::encode_short_keys(
+        const std::vector<const void*> key_column_fields, bool null_first) {
+    size_t num_key_columns = _tablet_schema->num_short_key_columns();

Review Comment:
   how about to remove `num_key_columns` and just use 
`key_column_fields.size()` instead



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to