github-actions[bot] commented on code in PR #64262:
URL: https://github.com/apache/doris/pull/64262#discussion_r3407652188


##########
be/src/storage/segment/segment_iterator.cpp:
##########
@@ -800,20 +805,35 @@ Status SegmentIterator::_get_row_ranges_by_keys() {
         return Status::OK();
     }
 
-    // Read & seek key columns is a waste of time when no key column in _schema
-    if (std::none_of(_schema->columns().begin(), _schema->columns().end(),
-                     [&](const TabletColumnPtr& col) {
-                         return col &&
-                                
_opts.tablet_schema->column_by_uid(col->unique_id()).is_key();
-                     })) {
-        return Status::OK();
+    bool use_pk_index_for_row_store_only = false;
+    if (_is_row_store_only_segment()) {
+        const auto& tablet_schema = _segment->tablet_schema();
+        if (tablet_schema->keys_type() != UNIQUE_KEYS ||
+            _segment->get_primary_key_index() == nullptr) {
+            return Status::NotSupported(
+                    "row_store_only key range pruning requires primary key 
index, tablet_id={}, "
+                    "rowset_id={}, segment_id={}",
+                    _opts.tablet_id, _segment->rowset_id().to_string(), 
_segment->id());
+        }
+        use_pk_index_for_row_store_only = true;
+    } else {
+        // Read & seek key columns is a waste of time when no key column in 
_schema
+        if (std::none_of(_schema->columns().begin(), _schema->columns().end(),
+                         [&](const TabletColumnPtr& col) {
+                             return col &&
+                                    
_opts.tablet_schema->column_by_uid(col->unique_id()).is_key();
+                         })) {
+            return Status::OK();
+        }
     }
 
     RowRanges result_ranges;
     for (auto& key_range : _opts.key_ranges) {
         rowid_t lower_rowid = 0;
         rowid_t upper_rowid = num_rows();
-        RETURN_IF_ERROR(_prepare_seek(key_range));
+        if (!use_pk_index_for_row_store_only) {
+            RETURN_IF_ERROR(_prepare_seek(key_range));
+        }
         if (key_range.upper_key != nullptr) {
             // If client want to read upper_bound, the include_upper is true. 
So we
             // should get the first ordinal at which key is larger than 
upper_bound.

Review Comment:
   For row-store-only segments this skips 
`_get_row_ranges_by_column_conditions()`, but `_lazy_init()` still calls 
`_get_row_ranges_from_conditions()` a few lines later whenever 
`_opts.col_id_to_predicates` is non-empty. That later helper calls 
`_column_iterators[cid]->get_row_ranges_by_dict/bloom_filter/zone_map`, while 
the row-store-only `_init_return_column_iterators()` only creates iterators for 
rowid/virtual columns and the hidden row-store column, not ordinary logical 
columns. A query like `SELECT * FROM t WHERE v1 = 10` will therefore 
dereference a null iterator before the JSONB predicate path gets a chance to 
evaluate the filter. Please also bypass `_get_row_ranges_from_conditions()` for 
row-store-only scans (or make it skip non-persisted logical columns) and add a 
non-key predicate regression case.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to