pitrou commented on code in PR #13567: URL: https://github.com/apache/arrow/pull/13567#discussion_r918111292
########## cpp/src/arrow/dataset/partition.cc: ########## @@ -115,6 +117,21 @@ static Result<RecordBatchVector> ApplyGroupings( return out; } +bool KeyValuePartitioning::Equals(const Partitioning& other) const { + if (this == &other) { + return true; + } + const auto& kv_partion = checked_cast<const KeyValuePartitioning&>(other); + int64_t idx = 0; + for (const auto& array : dictionaries_) { + if (!array->Equals(kv_partion.dictionaries()[idx++])) { Review Comment: Hmm, what if `kv_partion.dictionaries()` does not have the same size as `this->dictionaries_`. Is it possible? ########## cpp/src/arrow/dataset/partition.h: ########## @@ -63,13 +64,18 @@ struct ARROW_DS_EXPORT PartitionPathFormat { /// Paths are consumed from left to right. Paths must be relative to /// the root of a partition; path prefixes must be removed before passing /// the path to a partitioning for parsing. -class ARROW_DS_EXPORT Partitioning { +class ARROW_DS_EXPORT Partitioning : public util::EqualityComparable<Partitioning> { public: virtual ~Partitioning() = default; /// \brief The name identifying the kind of partitioning virtual std::string type_name() const = 0; + /// \brief determines if partiioning is exactly equal Review Comment: ```suggestion /// \brief Return whether the partitionings are equal ``` ########## cpp/src/arrow/dataset/partition_test.cc: ########## @@ -206,6 +206,15 @@ TEST_F(TestPartitioning, DirectoryPartitioning) { equal(field_ref("beta"), literal("foo")))); } +TEST_F(TestPartitioning, DirectoryPartitioningEquals) { + auto part = std::make_shared<DirectoryPartitioning>( + schema({field("alpha", int32()), field("beta", utf8())})); + auto other = std::make_shared<DirectoryPartitioning>( + schema({field("alpha", int32()), field("beta", utf8())})); + EXPECT_TRUE(part->Equals(*part)); + EXPECT_FALSE(part->Equals(*other)); Review Comment: (same remark for the below tests) ########## cpp/src/arrow/dataset/partition_test.cc: ########## @@ -206,6 +206,15 @@ TEST_F(TestPartitioning, DirectoryPartitioning) { equal(field_ref("beta"), literal("foo")))); } +TEST_F(TestPartitioning, DirectoryPartitioningEquals) { + auto part = std::make_shared<DirectoryPartitioning>( + schema({field("alpha", int32()), field("beta", utf8())})); + auto other = std::make_shared<DirectoryPartitioning>( + schema({field("alpha", int32()), field("beta", utf8())})); + EXPECT_TRUE(part->Equals(*part)); + EXPECT_FALSE(part->Equals(*other)); Review Comment: Given that `Equals` generally starts by comparing pointer equality, these tests are not sufficient. You should also test a third `DirectoryPartitioning` that has exactly the same attributes as `part`. ########## cpp/src/arrow/dataset/partition.cc: ########## @@ -82,6 +82,8 @@ std::shared_ptr<Partitioning> Partitioning::Default() { std::string type_name() const override { return "default"; } + bool Equals(const Partitioning& other) const override { return false; } Review Comment: Are you planning to actually implement this? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org