coryan commented on a change in pull request #11842:
URL: https://github.com/apache/arrow/pull/11842#discussion_r764196833



##########
File path: cpp/src/arrow/filesystem/gcsfs.cc
##########
@@ -275,12 +273,67 @@ class GcsFileSystem::Impl {
   const GcsOptions& options() const { return options_; }
 
   Result<FileInfo> GetFileInfo(const GcsPath& path) {
-    if (!path.object.empty()) {
-      auto meta = client_.GetObjectMetadata(path.bucket, path.object);
-      return GetFileInfoImpl(path, std::move(meta).status(), FileType::File);
+    if (path.object.empty()) {
+      auto meta = client_.GetBucketMetadata(path.bucket);
+      return GetFileInfoImpl(path, std::move(meta).status(), 
FileType::Directory);
     }
-    auto meta = client_.GetBucketMetadata(path.bucket);
-    return GetFileInfoImpl(path, std::move(meta).status(), 
FileType::Directory);
+    auto meta = client_.GetObjectMetadata(path.bucket, path.object);
+    return GetFileInfoImpl(
+        path, std::move(meta).status(),
+        path.object.back() == '/' ? FileType::Directory : FileType::File);
+  }
+
+  // GCS does not have directories or folders. But folders can be emulated 
(with some
+  // limitations) using marker objects.  That and listing with prefixes 
creates the
+  // illusion of folders.
+  google::cloud::Status CreateDirMarker(const std::string& bucket,
+                                        util::string_view name) {
+    // Make the name canonical.
+    const auto canonical = internal::EnsureTrailingSlash(name);
+    return client_
+        .InsertObject(bucket, canonical, "arrow gcsfs directory " + canonical,
+                      
gcs::WithObjectMetadata(gcs::ObjectMetadata().upsert_metadata(
+                          "arrow/gcsfs", "directory")))
+        .status();
+  }
+
+  google::cloud::Status CreateDirMarkerRecursive(const std::string& bucket,
+                                                 const std::string& object) {
+    auto get_parent = [](std::string const& path) {
+      return std::move(internal::GetAbstractPathParent(path).first);
+    };
+    // Maybe counterintuitively we create the markers from the most nested and 
up. Because
+    // GCS does not have directories creating `a/b/c` will succeed, even if 
`a/` or `a/b/`
+    // does not exist.  In the common case, where `a/b/` may already exist, it 
is more
+    // efficient to just create `a/b/c/` and then find out that `a/b/` was 
already there.
+    // In the case where none exists, it does not matter which order we follow.
+    for (auto parent = object; !parent.empty(); parent = get_parent(parent)) {
+      auto status = CreateDirMarker(bucket, parent);
+      if (status.code() == google::cloud::StatusCode::kAlreadyExists) {
+        break;
+      }
+      if (!status.ok()) {
+        return status;
+      }
+    }
+    return {};
+  }
+
+  Status CreateDir(const GcsPath& p) {
+    if (p.object.empty()) {
+      return internal::ToArrowStatus(
+          client_.CreateBucket(p.bucket, gcs::BucketMetadata()).status());
+    }
+    return internal::ToArrowStatus(CreateDirMarker(p.bucket, p.object));
+  }
+
+  Status CreateDirRecursive(const GcsPath& p) {
+    auto status = client_.CreateBucket(p.bucket, 
gcs::BucketMetadata()).status();

Review comment:
       Good point, fixed.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to