pitrou commented on code in PR #14257:
URL: https://github.com/apache/arrow/pull/14257#discussion_r988621032
##########
cpp/src/arrow/dataset/dataset_writer_test.cc:
##########
@@ -234,6 +234,18 @@ TEST_F(DatasetWriterTestFixture, BasicFileDirectoryPrefix)
{
AssertFilesCreated({"testdir/a/1_chunk-0.arrow"});
}
+TEST_F(DatasetWriterTestFixture, DirectoryCreateFails) {
+ // This should fail to be created
+ write_options_.base_dir = "///doesnotexist";
+ EXPECT_OK_AND_ASSIGN(auto dataset_writer,
+ DatasetWriter::Make(write_options_, scheduler_.get()));
+ Future<> queue_fut = dataset_writer->WriteRecordBatch(MakeBatch(100), "a",
"1_");
+ AssertFinished(queue_fut);
+ ASSERT_OK(dataset_writer->Finish());
+ scheduler_->End();
+ ASSERT_FINISHES_AND_RAISES(Invalid, scheduler_->OnFinished());
Review Comment:
Hmm... so the only way to know that writing failed is to end the scheduler?
Why isn't it popped up earlier?
##########
cpp/src/arrow/util/async_util.h:
##########
@@ -311,6 +306,8 @@ class ARROW_EXPORT AsyncTaskScheduler {
/// The default (nullptr) will use a FIFO queue if there is a
throttle.
static std::unique_ptr<AsyncTaskScheduler> Make(Throttle* throttle = NULLPTR,
std::unique_ptr<Queue> queue
= NULLPTR);
+
+ virtual bool IsEnded() = 0;
Review Comment:
+1 for const and a docstring (also stating the potential race condition in
usage)
##########
cpp/src/arrow/util/async_util.cc:
##########
@@ -98,13 +98,53 @@ class FifoQueue : public AsyncTaskScheduler::Queue {
std::list<std::unique_ptr<Task>> tasks_;
};
+class AlreadyFailedScheduler : public AsyncTaskScheduler {
+ public:
+ explicit AlreadyFailedScheduler(Status failure_reason,
+ FnOnce<Status(Status)> finish_callback)
+ : failure_reason_(std::move(failure_reason)),
+ finish_callback_(std::move(finish_callback)) {}
+ bool AddTask(std::unique_ptr<Task> task) override { return false; }
+ void End() override {
+ std::ignore = std::move(finish_callback_)(failure_reason_);
+ self.reset();
+ }
+ Future<> OnFinished() const override {
+ DCHECK(false) << "You should not rely on sub-scheduler's OnFinished. Use
a "
+ "finished callback when creating the sub-scheduler
instead";
+ return Future<>::MakeFinished(Status::UnknownError("Unreachable code
encountered"));
Review Comment:
Can use `Status::Abort` perhaps instead.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]