westonpace commented on a change in pull request #11991:
URL: https://github.com/apache/arrow/pull/11991#discussion_r780473574
##########
File path: cpp/src/arrow/dataset/scanner.cc
##########
@@ -823,10 +584,77 @@ Result<int64_t> AsyncScanner::CountRows() {
return total.load();
}
+Result<std::shared_ptr<RecordBatchReader>> AsyncScanner::ToRecordBatchReader()
{
+ ARROW_ASSIGN_OR_RAISE(auto it, ScanBatches());
+ return
std::make_shared<ScannerRecordBatchReader>(options()->projected_schema,
+ std::move(it));
+}
+
const std::shared_ptr<Dataset>& AsyncScanner::dataset() const { return
dataset_; }
+Status NestedFieldRefsNotImplemented() {
+ // TODO(ARROW-11259) Several functions (for example, IpcScanTask::Make)
assume that
+ // only top level fields will be materialized.
+ return Status::NotImplemented("Nested field references in scans.");
+}
+
} // namespace
+Result<ProjectionDescr> ProjectionDescr::FromStructExpression(
+ const compute::Expression& projection, const Schema& dataset_schema) {
+ ARROW_ASSIGN_OR_RAISE(compute::Expression bound_expression,
+ projection.Bind(dataset_schema));
+
+ if (bound_expression.type()->id() != Type::STRUCT) {
+ return Status::Invalid("Projection ", projection.ToString(),
+ " cannot yield record batches");
+ }
+ std::shared_ptr<Schema> projection_schema =
+ ::arrow::schema(checked_cast<const
StructType&>(*bound_expression.type()).fields(),
+ dataset_schema.metadata());
+
+ return ProjectionDescr{std::move(bound_expression),
std::move(projection_schema)};
+}
+
+Result<ProjectionDescr> ProjectionDescr::FromExpressions(
+ const std::vector<compute::Expression>& exprs, std::vector<std::string>
names,
+ const Schema& dataset_schema) {
+ compute::MakeStructOptions project_options{std::move(names)};
+
+ for (size_t i = 0; i < exprs.size(); ++i) {
+ if (auto ref = exprs[i].field_ref()) {
+ if (!ref->name()) return NestedFieldRefsNotImplemented();
+
+ // set metadata and nullability for plain field references
+ ARROW_ASSIGN_OR_RAISE(auto field, ref->GetOne(dataset_schema));
+ project_options.field_nullability[i] = field->nullable();
+ project_options.field_metadata[i] = field->metadata();
+ }
+ }
+
+ return ProjectionDescr::FromStructExpression(
+ call("make_struct", std::move(exprs), std::move(project_options)),
dataset_schema);
Review comment:
Good catch. Fixed. All the callers were moving into it anyways.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]