fsaintjacques commented on a change in pull request #7156:
URL: https://github.com/apache/arrow/pull/7156#discussion_r426085245
##
File path: python/pyarrow/_dataset.pyx
##
@@ -42,6 +43,51 @@ def _forbid_instantiation(klass, subclasses_instead=True):
raise TypeError(msg)
+ctypedef CResult[shared_ptr[CRandomAccessFile]] CCustomOpen()
+
+cdef class FileSource:
+
+cdef:
+# XXX why is shared_ptr necessary here? CFileSource shouldn't need it
Review comment:
This comment is dead.
##
File path: cpp/src/arrow/dataset/test_util.h
##
@@ -251,6 +251,15 @@ class JSONRecordBatchFileFormat : public FileFormat {
SchemaResolver resolver_;
};
+inline static std::vector SourcesFromPaths(
Review comment:
Seems like a useful static method `FileSource::FromPaths`.
##
File path: cpp/src/arrow/result.h
##
@@ -298,7 +298,7 @@ class ARROW_MUST_USE_TYPE Result : public
util::EqualityComparable> {
///
/// \return The stored non-OK status object, or an OK status if this object
/// has a value.
- Status status() const { return status_; }
+ const Status& status() const { return status_; }
Review comment:
Good catch.
##
File path: cpp/src/arrow/flight/client.cc
##
@@ -108,7 +108,7 @@ class FinishableStream {
std::shared_ptr stream() const { return stream_; }
/// \brief Finish the call, adding server context to the given status.
- virtual Status Finish(Status&& st) {
+ virtual Status Finish(Status st) {
Review comment:
This PR shouldn't touch this code unless it's failing at building?
##
File path: cpp/src/arrow/util/checked_cast.h
##
@@ -39,11 +40,11 @@ inline OutputType checked_cast(InputType&& value) {
}
template
-std::shared_ptr checked_pointer_cast(const std::shared_ptr& r) noexcept {
+std::shared_ptr checked_pointer_cast(std::shared_ptr r) noexcept {
Review comment:
Shouldn't we stick with the signature from the standard?
https://en.cppreference.com/w/cpp/memory/shared_ptr/pointer_cast
##
File path: cpp/src/arrow/status.h
##
@@ -306,8 +306,12 @@ class ARROW_MUST_USE_TYPE ARROW_EXPORT Status : public
util::EqualityComparable<
std::string message() const { return ok() ? "" : state_->msg; }
/// \brief Return the status detail attached to this message.
- std::shared_ptr detail() const {
-return state_ == NULLPTR ? NULLPTR : state_->detail;
+ const std::shared_ptr& detail() const {
+if (state_ == NULLPTR) {
Review comment:
```suggestion
static std::shared_ptr no_detail = NULLPTR;
return state_ ? state_->detail : no_detail;
```
##
File path: cpp/src/arrow/dataset/file_base.h
##
@@ -45,82 +46,150 @@ namespace dataset {
/// be read like a file
class ARROW_DS_EXPORT FileSource {
public:
- // NOTE(kszucs): it'd be better to separate the BufferSource from FileSource
- enum SourceKind { PATH, BUFFER };
-
FileSource(std::string path, std::shared_ptr filesystem,
- Compression::type compression = Compression::UNCOMPRESSED,
- bool writable = true)
+ Compression::type compression = Compression::UNCOMPRESSED)
: impl_(PathAndFileSystem{std::move(path), std::move(filesystem)}),
-compression_(compression),
-writable_(writable) {}
+compression_(compression) {}
explicit FileSource(std::shared_ptr buffer,
Compression::type compression =
Compression::UNCOMPRESSED)
: impl_(std::move(buffer)), compression_(compression) {}
- explicit FileSource(std::shared_ptr buffer,
- Compression::type compression =
Compression::UNCOMPRESSED)
- : impl_(std::move(buffer)), compression_(compression), writable_(true) {}
-
- bool operator==(const FileSource& other) const {
-if (id() != other.id()) {
- return false;
-}
+ using CustomOpen =
std::function>()>;
+ explicit FileSource(CustomOpen open) : impl_(std::move(open)) {}
-if (id() == PATH) {
- return path() == other.path() && filesystem() == other.filesystem();
-}
+ using CustomOpenWithCompression =
+
std::function>(Compression::type)>;
+ explicit FileSource(CustomOpenWithCompression open_with_compression,
+ Compression::type compression =
Compression::UNCOMPRESSED)
+ : impl_(std::bind(std::move(open_with_compression), compression)),
+compression_(compression) {}
-return buffer()->Equals(*other.buffer());
- }
+ explicit FileSource(std::shared_ptr file,
+ Compression::type compression =
Compression::UNCOMPRESSED)
+ : impl_([=] { return ToResult(file); }), compression_(compression) {}
- /// \brief The kind of file, whether stored in a filesystem, memory
- /// resident, or other
- SourceKind id() const { return static_cast(impl_.index()); }
+ FileSource() : impl_(CustomOpen{}) {}
/// \brief Return