pitrou commented on code in PR #45550:
URL: https://github.com/apache/arrow/pull/45550#discussion_r1967707923
##########
python/pyarrow/array.pxi:
##########
@@ -704,6 +707,95 @@ def _restore_array(data):
return pyarrow_wrap_array(MakeArray(ad))
+cdef class ArrayStatistics(_Weakrefable):
+ """
+ The class for statistics of an array.
+ """
+
+ def __init__(self):
+ raise TypeError("Do not call {}'s constructor directly"
+ .format(self.__class__.__name__))
+
+ cdef void init(self, const shared_ptr[CArrayStatistics]& sp_statistics)
except *:
+ self.sp_statistics = sp_statistics
+
+ def __repr__(self):
+ return ("arrow.ArrayStatistics<null_count={}, distinct_count={}, "
+ "min={}, is_min_exact={}, max={}, is_max_exact={}>"
+ .format(self.null_count, self.distinct_count, self.min,
+ self.is_min_exact, self.max, self.is_max_exact))
+
+ @property
+ def null_count(self):
+ """
+ The number of nulls.
+ """
+ null_count = self.sp_statistics.get().null_count
+ if null_count.has_value():
+ return null_count.value()
+ else:
+ return None
Review Comment:
For the record, I've opened a [Cython feature
request](https://github.com/cython/cython/issues/6692) to make this more
automatic.
##########
python/pyarrow/array.pxi:
##########
@@ -704,6 +707,95 @@ def _restore_array(data):
return pyarrow_wrap_array(MakeArray(ad))
+cdef class ArrayStatistics(_Weakrefable):
+ """
+ The class for statistics of an array.
+ """
+
+ def __init__(self):
+ raise TypeError("Do not call {}'s constructor directly"
+ .format(self.__class__.__name__))
+
+ cdef void init(self, const shared_ptr[CArrayStatistics]& sp_statistics)
except *:
+ self.sp_statistics = sp_statistics
+
+ def __repr__(self):
+ return ("arrow.ArrayStatistics<null_count={}, distinct_count={}, "
+ "min={}, is_min_exact={}, max={}, is_max_exact={}>"
+ .format(self.null_count, self.distinct_count, self.min,
+ self.is_min_exact, self.max, self.is_max_exact))
+
+ @property
+ def null_count(self):
+ """
+ The number of nulls.
+ """
+ null_count = self.sp_statistics.get().null_count
+ if null_count.has_value():
+ return null_count.value()
+ else:
+ return None
+
+ @property
+ def distinct_count(self):
+ """
+ The number of distinct values.
+ """
+ distinct_count = self.sp_statistics.get().distinct_count
+ if distinct_count.has_value():
+ return distinct_count.value()
+ else:
+ return None
+
+ @property
+ def min(self):
+ """
+ The minimum value.
+ """
+ return self._get_value(self.sp_statistics.get().min)
+
+ @property
+ def is_min_exact(self):
+ """
+ Whether the minimum value is an exact value or not.
+ """
+ return self.sp_statistics.get().is_min_exact
+
+ @property
+ def max(self):
+ """
+ The maximum value.
+ """
+ return self._get_value(self.sp_statistics.get().max)
+
+ @property
+ def is_max_exact(self):
+ """
+ Whether the maximum value is an exact value or not.
+ """
+ return self.sp_statistics.get().is_max_exact
+
+ cdef _get_value(self, const optional[CArrayStatisticsValueType]&
optional_value):
+ """
+ Get a raw value from
+ std::optional<arrow::ArrayStatistics::ValueType>> data.
+
+ arrow::ArrayStatistics::ValueType is
+ std::variant<bool, int64_t, uint64_t, double, std::string>.
Review Comment:
`uint64_t` isn't handled below, should the docstring or the code be fixed?
##########
python/pyarrow/array.pxi:
##########
@@ -704,6 +707,95 @@ def _restore_array(data):
return pyarrow_wrap_array(MakeArray(ad))
+cdef class ArrayStatistics(_Weakrefable):
+ """
+ The class for statistics of an array.
+ """
+
+ def __init__(self):
+ raise TypeError("Do not call {}'s constructor directly"
+ .format(self.__class__.__name__))
+
+ cdef void init(self, const shared_ptr[CArrayStatistics]& sp_statistics)
except *:
+ self.sp_statistics = sp_statistics
+
+ def __repr__(self):
+ return ("arrow.ArrayStatistics<null_count={}, distinct_count={}, "
+ "min={}, is_min_exact={}, max={}, is_max_exact={}>"
+ .format(self.null_count, self.distinct_count, self.min,
+ self.is_min_exact, self.max, self.is_max_exact))
Review Comment:
You can use a f-string, something like (probably needs reformatting):
```suggestion
return (f"arrow.ArrayStatistics<null_count={self.null_count},
distinct_count={self.distinct_count}, "
f"min={self.min}, is_min_exact={self.is_min_exact},
max={self.max}, is_max_exact={self.is_max_exact}>")
```
##########
python/pyarrow/array.pxi:
##########
@@ -704,6 +707,95 @@ def _restore_array(data):
return pyarrow_wrap_array(MakeArray(ad))
+cdef class ArrayStatistics(_Weakrefable):
+ """
+ The class for statistics of an array.
+ """
+
+ def __init__(self):
+ raise TypeError("Do not call {}'s constructor directly"
+ .format(self.__class__.__name__))
Review Comment:
Can also use a f-string here
```suggestion
raise TypeError(f"Do not call {self.__class__.__name__}'s
constructor directly")
```
##########
python/pyarrow/array.pxi:
##########
@@ -704,6 +707,95 @@ def _restore_array(data):
return pyarrow_wrap_array(MakeArray(ad))
+cdef class ArrayStatistics(_Weakrefable):
+ """
+ The class for statistics of an array.
+ """
+
+ def __init__(self):
+ raise TypeError("Do not call {}'s constructor directly"
+ .format(self.__class__.__name__))
+
+ cdef void init(self, const shared_ptr[CArrayStatistics]& sp_statistics)
except *:
Review Comment:
`except *` means it could raise Python exceptions, but it doesn't here, so
perhaps you can remove that annotation (though it's not really a problem
either).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]