jorisvandenbossche commented on a change in pull request #7608:
URL: https://github.com/apache/arrow/pull/7608#discussion_r451801680
##########
File path: cpp/src/arrow/dataset/partition.cc
##########
@@ -646,15 +657,26 @@ class HivePartitioningFactory : public
PartitioningFactory {
}
}
- return impl.Finish(&dictionaries_);
+ auto schema_result = impl.Finish(&dictionaries_);
+ field_names_ = impl.FieldNames();
+ return schema_result;
}
Result<std::shared_ptr<Partitioning>> Finish(
const std::shared_ptr<Schema>& schema) const override {
- return std::shared_ptr<Partitioning>(new HivePartitioning(schema,
dictionaries_));
+ for (FieldRef ref : field_names_) {
+ // ensure all of field_names_ are present in schema
+ RETURN_NOT_OK(ref.FindOne(*schema).status());
+ }
+
+ // drop fields which aren't in field_names_
+ auto out_schema = SchemaFromColumnNames(schema, field_names_);
+
+ return std::make_shared<HivePartitioning>(std::move(out_schema),
dictionaries_);
Review comment:
Thanks, that's a nice way by checking `dictionaries_` to ensure that
`field_name_` is set or not
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]