avantgardnerio commented on code in PR #19285:
URL: https://github.com/apache/datafusion/pull/19285#discussion_r2615257686
##########
datafusion/physical-plan/src/aggregates/topk/heap.rs:
##########
@@ -161,6 +180,108 @@ where
}
}
+/// An implementation of `ArrowHeap` that deals with string values.
+///
+/// Supports all three UTF-8 string types: `Utf8`, `LargeUtf8`, and `Utf8View`.
+/// String values are compared lexicographically. Null values are not
explicitly handled
+/// and should not appear in the input; the aggregation layer ensures nulls
are managed
+/// appropriately before calling this heap.
+pub struct StringHeap {
+ batch: ArrayRef,
+ heap: TopKHeap<String>,
+ desc: bool,
+ data_type: DataType,
+}
+
+impl StringHeap {
+ pub fn new(limit: usize, desc: bool, data_type: DataType) -> Self {
+ let batch: ArrayRef = Arc::new(StringArray::from(Vec::<&str>::new()));
+ Self {
+ batch,
+ heap: TopKHeap::new(limit, desc),
+ desc,
+ data_type,
+ }
+ }
+
+ /// Extracts a string value from the current batch at the given row index.
+ ///
+ /// Panics if the row index is out of bounds or if the data type is not
one of
+ /// the supported UTF-8 string types.
+ ///
+ /// Note: Null values should not appear in the input; the aggregation layer
+ /// ensures nulls are filtered before reaching this code.
+ fn value(&self, row_idx: usize) -> String {
+ extract_string_value(&self.batch, &self.data_type, row_idx)
+ }
+}
+
+/// Helper to extract a string value from an ArrayRef at a given index.
+///
+/// Supports `Utf8`, `LargeUtf8`, and `Utf8View` data types. This helper
reduces
+/// duplication between `StringHeap::value()` and `StringHeap::drain()`.
+///
+/// # Panics
+/// Panics if the index is out of bounds or if the data type is unsupported.
+fn extract_string_value(batch: &ArrayRef, data_type: &DataType, idx: usize) ->
String {
Review Comment:
I think a bench would be important for a change like this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]