westonpace commented on a change in pull request #11991: URL: https://github.com/apache/arrow/pull/11991#discussion_r779905690
########## File path: cpp/src/arrow/dataset/scanner.h ########## @@ -138,41 +133,40 @@ struct ARROW_DS_EXPORT ScanOptions { // This is used by Fragment implementations to apply the column // sub-selection optimization. std::vector<std::string> MaterializedFields() const; +}; - // Return a threaded or serial TaskGroup according to use_threads. - std::shared_ptr<::arrow::internal::TaskGroup> TaskGroup() const; +struct ARROW_DS_EXPORT ProjectionDescr { + compute::Expression expression; + std::shared_ptr<Schema> schema; }; -/// \brief Read record batches from a range of a single data fragment. A -/// ScanTask is meant to be a unit of work to be dispatched. The implementation -/// must be thread and concurrent safe. -class ARROW_DS_EXPORT ScanTask { - public: - /// \brief Iterate through sequence of materialized record batches - /// resulting from the Scan. Execution semantics are encapsulated in the - /// particular ScanTask implementation - virtual Result<RecordBatchIterator> Execute() = 0; - virtual Future<RecordBatchVector> SafeExecute(::arrow::internal::Executor* executor); - virtual Future<> SafeVisit(::arrow::internal::Executor* executor, - std::function<Status(std::shared_ptr<RecordBatch>)> visitor); +/// \brief Create a ProjectionDescr by binding an expression to the dataset schema +/// +/// expression must return a struct +ARROW_DS_EXPORT Result<ProjectionDescr> MakeProjectionFromStructExpression( + const compute::Expression& expression, const Schema& dataset_schema); - virtual ~ScanTask() = default; +/// \brief Create a ProjectionDescr from expressions/names for each field +ARROW_DS_EXPORT Result<ProjectionDescr> MakeProjectionFromFieldExpressions( + const std::vector<compute::Expression>& exprs, std::vector<std::string> names, + const Schema& dataset_schema); - const std::shared_ptr<ScanOptions>& options() const { return options_; } - const std::shared_ptr<Fragment>& fragment() const { return fragment_; } +/// \brief Create a default projection referencing fields in the dataset schema +ARROW_DS_EXPORT Result<ProjectionDescr> MakeProjectionFromNames( + std::vector<std::string> names, const Schema& dataset_schema); Review comment: Good idea. I did both of these things. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org