kou commented on code in PR #46422:
URL: https://github.com/apache/arrow/pull/46422#discussion_r2119857675
##########
cpp/src/arrow/array/statistics.h:
##########
@@ -127,18 +128,24 @@ struct ARROW_EXPORT ArrayStatistics {
/// \brief Whether the maximum value is exact or not
bool is_max_exact = false;
- /// \brief Check two statistics for equality
- bool Equals(const ArrayStatistics& other) const {
- return null_count == other.null_count && distinct_count ==
other.distinct_count &&
- min == other.min && is_min_exact == other.is_min_exact && max ==
other.max &&
- is_max_exact == other.is_max_exact;
- }
+ /// \brief Checks whether this ArrayStatistics instance is equal to another.
+ ///
+ /// \param other The \ref ArrayStatistics instance to compare against.
+ ///
+ /// \param equal_options Options used to compare double values for equality.
+ ///
+ /// \return True if the two \ref arrow::ArrayStatistics instances are equal;
otherwise,
+ /// false.
+ bool Equals(const ArrayStatistics& other,
+ const EqualOptions& equal_options = EqualOptions::Defaults())
const;
- /// \brief Check two statistics for equality
- bool operator==(const ArrayStatistics& other) const { return Equals(other); }
+ bool operator==(const ArrayStatistics& other) const {
+ return Equals(other, EqualOptions::Defaults());
+ }
- /// \brief Check two statistics for not equality
- bool operator!=(const ArrayStatistics& other) const { return !Equals(other);
}
+ bool operator!=(const ArrayStatistics& other) const {
+ return !Equals(other, EqualOptions::Defaults());
Review Comment:
ditto.
##########
cpp/src/arrow/array/statistics.cc:
##########
@@ -15,7 +15,63 @@
// specific language governing permissions and limitations
// under the License.
-// This empty .cc file is for embedding not inlined symbols in
-// arrow::ArrayStatistics into libarrow.
-
#include "arrow/array/statistics.h"
+
+#include <cmath>
+#include <type_traits>
+
+#include "arrow/compare.h"
+#include "arrow/util/logging_internal.h"
+namespace arrow {
+using ValueType = ArrayStatistics::ValueType;
Review Comment:
Can we do this in an anonymous namespace not in `::arrow`?
##########
cpp/src/arrow/array/statistics_test.cc:
##########
@@ -96,6 +99,72 @@ TEST(ArrayStatisticsTest, TestEquality) {
ASSERT_NE(statistics1, statistics2);
statistics2.is_max_exact = true;
ASSERT_EQ(statistics1, statistics2);
+
+ // Test different index
+ statistics1.max = static_cast<uint64_t>(29);
+ ASSERT_NE(statistics1, statistics2);
+}
+
+TEST(ArrayStatisticsTest, TestEqualityDoubleValue) {
+ ArrayStatistics statistics1;
+ ArrayStatistics statistics2;
+ EqualOptions options = EqualOptions::Defaults();
+ auto Reset = [&]() {
+ statistics1.min = std::nullopt;
+ statistics2.min = std::nullopt;
+ };
+
+ ASSERT_EQ(statistics1, statistics2);
+ statistics2.min = 29.0;
+ ASSERT_NE(statistics1, statistics2);
+ statistics1.min = 29.0;
+ ASSERT_EQ(statistics1, statistics2);
+ statistics2.min = 30.0;
+ ASSERT_NE(statistics1, statistics2);
+
+ // Check Signed Zeros
+ Reset();
Review Comment:
Could you split tests instead of using `Reset()`?
```cpp
TEST(ArrayStatisticsTest, TestEqualityDoubleValue) {
ASSERT_EQ(statistics1, statistics2);
statistics2.min = 29.0;
ASSERT_NE(statistics1, statistics2);
statistics1.min = 29.0;
ASSERT_EQ(statistics1, statistics2);
statistics2.min = 30.0;
ASSERT_NE(statistics1, statistics2);
}
TEST(ArrayStatisticsTest, TestEqualitySignedZeros) {
}
TEST(ArrayStatisticsTest, TestEqualityInfinity) {
}
...
```
##########
cpp/src/arrow/compare.h:
##########
@@ -57,8 +57,18 @@ class EqualOptions {
res.signed_zeros_equal_ = v;
return res;
}
+ /// Whether the "atol" property is used in the comparison.
+ bool allow_atol() const { return allow_atol_; }
Review Comment:
How about `use_atol()`?
##########
cpp/src/arrow/array/statistics.h:
##########
@@ -127,18 +128,24 @@ struct ARROW_EXPORT ArrayStatistics {
/// \brief Whether the maximum value is exact or not
bool is_max_exact = false;
- /// \brief Check two statistics for equality
- bool Equals(const ArrayStatistics& other) const {
- return null_count == other.null_count && distinct_count ==
other.distinct_count &&
- min == other.min && is_min_exact == other.is_min_exact && max ==
other.max &&
- is_max_exact == other.is_max_exact;
- }
+ /// \brief Checks whether this ArrayStatistics instance is equal to another.
+ ///
+ /// \param other The \ref ArrayStatistics instance to compare against.
+ ///
+ /// \param equal_options Options used to compare double values for equality.
+ ///
+ /// \return True if the two \ref arrow::ArrayStatistics instances are equal;
otherwise,
+ /// false.
+ bool Equals(const ArrayStatistics& other,
+ const EqualOptions& equal_options = EqualOptions::Defaults())
const;
- /// \brief Check two statistics for equality
- bool operator==(const ArrayStatistics& other) const { return Equals(other); }
+ bool operator==(const ArrayStatistics& other) const {
+ return Equals(other, EqualOptions::Defaults());
Review Comment:
Can we omit `EqualOptions::Defaults()` because it's the default argument?
```suggestion
return Equals(other);
```
##########
cpp/src/arrow/array/statistics.h:
##########
@@ -127,18 +128,24 @@ struct ARROW_EXPORT ArrayStatistics {
/// \brief Whether the maximum value is exact or not
bool is_max_exact = false;
- /// \brief Check two statistics for equality
- bool Equals(const ArrayStatistics& other) const {
- return null_count == other.null_count && distinct_count ==
other.distinct_count &&
- min == other.min && is_min_exact == other.is_min_exact && max ==
other.max &&
- is_max_exact == other.is_max_exact;
- }
+ /// \brief Checks whether this ArrayStatistics instance is equal to another.
+ ///
+ /// \param other The \ref ArrayStatistics instance to compare against.
+ ///
+ /// \param equal_options Options used to compare double values for equality.
+ ///
+ /// \return True if the two \ref arrow::ArrayStatistics instances are equal;
otherwise,
+ /// false.
+ bool Equals(const ArrayStatistics& other,
+ const EqualOptions& equal_options = EqualOptions::Defaults())
const;
- /// \brief Check two statistics for equality
- bool operator==(const ArrayStatistics& other) const { return Equals(other); }
+ bool operator==(const ArrayStatistics& other) const {
+ return Equals(other, EqualOptions::Defaults());
+ }
- /// \brief Check two statistics for not equality
Review Comment:
ditto.
##########
cpp/src/arrow/array/statistics_test.cc:
##########
@@ -96,6 +99,72 @@ TEST(ArrayStatisticsTest, TestEquality) {
ASSERT_NE(statistics1, statistics2);
statistics2.is_max_exact = true;
ASSERT_EQ(statistics1, statistics2);
+
+ // Test different index
+ statistics1.max = static_cast<uint64_t>(29);
+ ASSERT_NE(statistics1, statistics2);
Review Comment:
Do we need this?
I think that existing assertion covers this case.
##########
cpp/src/arrow/array/statistics.h:
##########
@@ -127,18 +128,24 @@ struct ARROW_EXPORT ArrayStatistics {
/// \brief Whether the maximum value is exact or not
bool is_max_exact = false;
- /// \brief Check two statistics for equality
- bool Equals(const ArrayStatistics& other) const {
- return null_count == other.null_count && distinct_count ==
other.distinct_count &&
- min == other.min && is_min_exact == other.is_min_exact && max ==
other.max &&
- is_max_exact == other.is_max_exact;
- }
+ /// \brief Checks whether this ArrayStatistics instance is equal to another.
+ ///
+ /// \param other The \ref ArrayStatistics instance to compare against.
+ ///
+ /// \param equal_options Options used to compare double values for equality.
+ ///
+ /// \return True if the two \ref arrow::ArrayStatistics instances are equal;
otherwise,
+ /// false.
+ bool Equals(const ArrayStatistics& other,
+ const EqualOptions& equal_options = EqualOptions::Defaults())
const;
- /// \brief Check two statistics for equality
Review Comment:
Can we write a docstring instead of removing this?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]