pitrou commented on code in PR #12763:
URL: https://github.com/apache/arrow/pull/12763#discussion_r855118409
##########
ci/scripts/python_wheel_unix_test.sh:
##########
@@ -78,6 +83,7 @@ fi
if [ "${CHECK_UNITTESTS}" == "ON" ]; then
# Install testing dependencies
pip install -U -r ${source_dir}/python/requirements-wheel-test.txt
+ PYTHON=python ${source_dir}/ci/scripts/install_gcs_testbench.sh default
Review Comment:
Hmm, what is `PYTHON=python` supposed to solve here?
##########
cpp/src/arrow/filesystem/gcsfs.h:
##########
@@ -27,11 +27,38 @@
namespace arrow {
namespace fs {
-struct GcsCredentials;
+// Opaque wrapper for GCS's library credentials to avoid exposing in Arrow
headers.
+struct GcsCredentialsHolder;
+class GcsFileSystem;
+
+/// \brief Container for GCS Credentials and information necessary to recreate
them.
+class ARROW_EXPORT GcsCredentials {
+ public:
+ bool Equals(const GcsCredentials& other) const;
+ bool anonymous() const { return anonymous_; }
+ const std::string& access_token() const { return access_token_; }
+ TimePoint expiration() const { return expiration_; }
+ const std::string& target_service_account() const { return
target_service_account_; }
+ const std::string& json_credentials() const { return json_credentials_; }
+ const std::shared_ptr<GcsCredentialsHolder>& holder() const { return
holder_; }
+
+ private:
+ GcsCredentials() = default;
+ bool anonymous_ = false;
+ std::string access_token_;
+ TimePoint expiration_;
+ std::string target_service_account_;
+ std::string json_credentials_;
+ std::shared_ptr<GcsCredentialsHolder> holder_;
+ friend class GcsFileSystem;
+ friend struct GcsOptions;
+};
/// Options for the GcsFileSystem implementation.
struct ARROW_EXPORT GcsOptions {
- std::shared_ptr<GcsCredentials> credentials;
+ /// \brief Equivelant to the GcsOptions::Defaults() that returns.
Review Comment:
May Defaults() engage non-trivial operations such as file or network
lookups?
##########
cpp/src/arrow/filesystem/gcsfs_test.cc:
##########
@@ -425,13 +431,13 @@ TEST(GcsFileSystem, ToArrowStatus) {
}
TEST(GcsFileSystem, FileSystemCompare) {
- GcsOptions a_options;
+ GcsOptions a_options = GcsOptions::Defaults();
Review Comment:
Revert these changes now?
##########
cpp/src/arrow/filesystem/gcsfs_test.cc:
##########
@@ -1257,6 +1314,14 @@ TEST_F(GcsIntegrationTest, OpenInputFileClosed) {
ASSERT_RAISES(Invalid, stream->Seek(2));
}
+TEST_F(GcsIntegrationTest, TestFileSystemFromUri) {
+ // Smoke test for FileSystemFromUri
+ ASSERT_OK_AND_ASSIGN(auto fs,
FileSystemFromUri(std::string("gs://anonymous@") +
+ PreexistingBucketPath()));
+ ASSERT_OK_AND_ASSIGN(auto fs2,
FileSystemFromUri(std::string("gcs://anonymous@") +
+ PreexistingBucketPath()));
Review Comment:
Also call `type_name()` to ensure that it's a GCS filesystem?
##########
cpp/src/arrow/filesystem/gcsfs.cc:
##########
@@ -156,17 +166,24 @@ class GcsInputStream : public arrow::io::InputStream {
mutable gcs::ObjectReadStream stream_;
GcsPath path_;
gcs::Generation generation_;
- std::int64_t offset_;
gcs::Client client_;
bool closed_ = false;
};
class GcsOutputStream : public arrow::io::OutputStream {
public:
explicit GcsOutputStream(gcs::ObjectWriteStream stream) :
stream_(std::move(stream)) {}
- ~GcsOutputStream() override = default;
+ ~GcsOutputStream() {
+ if (!closed_) {
+ // The common pattern is to close OutputStreams from destructor in arrow.
+ io::internal::CloseFromDestructor(this);
+ }
+ }
Status Close() override {
+ if (closed_) {
+ return Status::Invalid("Already closed stream.");
Review Comment:
Close() should be idempotent so just return Ok here.
##########
cpp/src/arrow/filesystem/gcsfs.cc:
##########
@@ -596,9 +639,15 @@ class GcsFileSystem::Impl {
}
static FileInfo ToFileInfo(const std::string& full_path,
- const gcs::ObjectMetadata& meta) {
- if (IsDirectory(meta)) {
- return FileInfo(full_path, FileType::Directory);
+ const gcs::ObjectMetadata& meta,
+ bool normalize_directories = false) {
Review Comment:
Hmm, I think we may want to make the behavior consistent. Perhaps open a
separate JIRA?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]