bkietz commented on a change in pull request #10070:
URL: https://github.com/apache/arrow/pull/10070#discussion_r625999228



##########
File path: cpp/src/arrow/dataset/dataset.cc
##########
@@ -244,6 +222,81 @@ Result<FragmentIterator> 
InMemoryDataset::GetFragmentsImpl(compute::Expression)
   return MakeMaybeMapIterator(std::move(create_fragment), 
std::move(batches_it));
 }
 
+struct ReaderRecordBatchGenerator : InMemoryDataset::RecordBatchGenerator {
+  explicit ReaderRecordBatchGenerator(std::shared_ptr<RecordBatchReader> 
reader)
+      : reader_(std::move(reader)), consumed_(false) {}
+
+  RecordBatchIterator Get() const final {
+    if (consumed_) {
+      return MakeErrorIterator<std::shared_ptr<RecordBatch>>(
+          Status::Invalid("OneShotDataset was already consumed"));
+    }
+    consumed_ = true;
+    auto reader = reader_;
+    return MakeFunctionIterator([reader] { return reader->Next(); });

Review comment:
       ```suggestion
       return MakeIteratorFromReader(reader_);
   ```

##########
File path: cpp/src/arrow/dataset/dataset.h
##########
@@ -219,6 +218,22 @@ class ARROW_DS_EXPORT InMemoryDataset : public Dataset {
   std::shared_ptr<RecordBatchGenerator> get_batches_;
 };
 
+/// \brief A Source which yields fragments wrapping a one-shot stream
+/// of record batches.
+///
+/// Unlike other datasets, this can be scanned only once. This is
+/// intended to support writing data from streaming sources or other
+/// sources that can be iterated only once.
+class ARROW_DS_EXPORT OneShotDataset : public InMemoryDataset {

Review comment:
       Nit: this is no longer necessarily in memory
   ```suggestion
   class ARROW_DS_EXPORT OneShotDataset : public Dataset {
   ```

##########
File path: cpp/src/arrow/dataset/scanner.cc
##########
@@ -673,6 +673,62 @@ ScannerBuilder::ScannerBuilder(std::shared_ptr<Schema> 
schema,
   DCHECK_OK(Filter(scan_options_->filter));
 }
 
+class ARROW_DS_EXPORT OneShotScanTask : public ScanTask {

Review comment:
       Instead of exporting these, I think we can keep them in an anonymous 
namespace




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to