This is an automated email from the ASF dual-hosted git repository.
yangjiang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new eb05741e21 Minor: Clarify documentation on PruningStatistics and make
test match (#10004)
eb05741e21 is described below
commit eb05741e2167dec5df10a10c8435ebedbea6787d
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Apr 9 03:08:45 2024 -0400
Minor: Clarify documentation on PruningStatistics and make test match
(#10004)
---
datafusion/core/src/physical_optimizer/pruning.rs | 27 ++++++++++++++---------
1 file changed, 16 insertions(+), 11 deletions(-)
diff --git a/datafusion/core/src/physical_optimizer/pruning.rs
b/datafusion/core/src/physical_optimizer/pruning.rs
index 19e71a92a7..dc7e0529de 100644
--- a/datafusion/core/src/physical_optimizer/pruning.rs
+++ b/datafusion/core/src/physical_optimizer/pruning.rs
@@ -105,19 +105,23 @@ pub trait PruningStatistics {
fn num_containers(&self) -> usize;
/// Return the number of null values for the named column as an
- /// `Option<UInt64Array>`.
+ /// [`UInt64Array`]
///
/// See [`Self::min_values`] for when to return `None` and null values.
///
/// Note: the returned array must contain [`Self::num_containers`] rows
+ ///
+ /// [`UInt64Array`]: arrow::array::UInt64Array
fn null_counts(&self, column: &Column) -> Option<ArrayRef>;
/// Return the number of rows for the named column in each container
- /// as an `Option<UInt64Array>`.
+ /// as an [`UInt64Array`].
///
/// See [`Self::min_values`] for when to return `None` and null values.
///
/// Note: the returned array must contain [`Self::num_containers`] rows
+ ///
+ /// [`UInt64Array`]: arrow::array::UInt64Array
fn row_counts(&self, column: &Column) -> Option<ArrayRef>;
/// Returns [`BooleanArray`] where each row represents information known
@@ -1519,6 +1523,7 @@ mod tests {
array::{BinaryArray, Int32Array, Int64Array, StringArray},
datatypes::{DataType, TimeUnit},
};
+ use arrow_array::UInt64Array;
use datafusion_common::{ScalarValue, ToDFSchema};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::expr::InList;
@@ -1684,10 +1689,10 @@ mod tests {
/// there are containers
fn with_null_counts(
mut self,
- counts: impl IntoIterator<Item = Option<i64>>,
+ counts: impl IntoIterator<Item = Option<u64>>,
) -> Self {
let null_counts: ArrayRef =
- Arc::new(counts.into_iter().collect::<Int64Array>());
+ Arc::new(counts.into_iter().collect::<UInt64Array>());
self.assert_invariants();
self.null_counts = Some(null_counts);
@@ -1698,10 +1703,10 @@ mod tests {
/// there are containers
fn with_row_counts(
mut self,
- counts: impl IntoIterator<Item = Option<i64>>,
+ counts: impl IntoIterator<Item = Option<u64>>,
) -> Self {
let row_counts: ArrayRef =
- Arc::new(counts.into_iter().collect::<Int64Array>());
+ Arc::new(counts.into_iter().collect::<UInt64Array>());
self.assert_invariants();
self.row_counts = Some(row_counts);
@@ -1753,13 +1758,13 @@ mod tests {
self
}
- /// Add null counts for the specified columm.
+ /// Add null counts for the specified column.
/// There must be the same number of null counts as
/// there are containers
fn with_null_counts(
mut self,
name: impl Into<String>,
- counts: impl IntoIterator<Item = Option<i64>>,
+ counts: impl IntoIterator<Item = Option<u64>>,
) -> Self {
let col = Column::from_name(name.into());
@@ -1775,13 +1780,13 @@ mod tests {
self
}
- /// Add row counts for the specified columm.
+ /// Add row counts for the specified column.
/// There must be the same number of row counts as
/// there are containers
fn with_row_counts(
mut self,
name: impl Into<String>,
- counts: impl IntoIterator<Item = Option<i64>>,
+ counts: impl IntoIterator<Item = Option<u64>>,
) -> Self {
let col = Column::from_name(name.into());
@@ -1797,7 +1802,7 @@ mod tests {
self
}
- /// Add contained information for the specified columm.
+ /// Add contained information for the specified column.
fn with_contained(
mut self,
name: impl Into<String>,