Tom-Newton commented on code in PR #38780:
URL: https://github.com/apache/arrow/pull/38780#discussion_r1398395091


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -461,6 +463,224 @@ class ObjectInputFile final : public io::RandomAccessFile 
{
   int64_t content_length_ = kNoSize;
   std::shared_ptr<const KeyValueMetadata> metadata_;
 };
+
+Status CreateEmptyBlockBlob(
+    std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client) 
{
+  std::string s = "";
+  try {
+    block_blob_client->UploadFrom(
+        const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(s.data())), 
s.size());
+  } catch (const Azure::Storage::StorageException& exception) {
+    return internal::ExceptionToStatus(
+        "UploadFrom failed for '" + block_blob_client->GetUrl() +
+            "' with an unexpected Azure error. There is no existing blob at 
this "
+            "location or the existing blob must be replaced so 
ObjectAppendStream must "
+            "create a new empty block blob.",
+        exception);
+  }
+  return Status::OK();
+}
+
+Result<Azure::Storage::Blobs::Models::GetBlockListResult> GetBlockList(
+    std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client) 
{
+  try {
+    return block_blob_client->GetBlockList().Value;
+  } catch (Azure::Storage::StorageException& exception) {
+    return internal::ExceptionToStatus(
+        "GetBlockList failed for '" + block_blob_client->GetUrl() +
+            "' with an unexpected Azure error. Cannot write to a file without 
first "
+            "fetching the existing block list.",
+        exception);
+  }
+}
+
+Azure::Storage::Metadata ArrowMetadataToAzureMetadata(
+    const std::shared_ptr<const KeyValueMetadata>& arrow_metadata) {
+  Azure::Storage::Metadata azure_metadata;
+  for (auto key_value : arrow_metadata->sorted_pairs()) {
+    azure_metadata[key_value.first] = key_value.second;
+  }
+  return azure_metadata;
+}
+
+Status CommitBlockList(
+    std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client,
+    const std::vector<std::string>& block_ids, const Azure::Storage::Metadata& 
metadata) {
+  Azure::Storage::Blobs::CommitBlockListOptions options;
+  options.Metadata = metadata;
+  try {
+    // CommitBlockList puts all block_ids in the latest element. That means in 
the case of
+    // overlapping block_ids the newly staged block ids will always replace the
+    // previously committed blocks.
+    // 
https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list?tabs=microsoft-entra-id#request-body
+    block_blob_client->CommitBlockList(block_ids, options);
+  } catch (const Azure::Storage::StorageException& exception) {
+    return internal::ExceptionToStatus(
+        "CommitBlockList failed for '" + block_blob_client->GetUrl() +
+            "' with an unexpected Azure error. Committing is required to flush 
an "
+            "output/append stream.",
+        exception);
+  }
+  return Status::OK();
+}
+
+class ObjectAppendStream final : public io::OutputStream {
+ public:
+  ObjectAppendStream(
+      std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> 
block_blob_client,
+      const io::IOContext& io_context, const AzureLocation& location,
+      const std::shared_ptr<const KeyValueMetadata>& metadata,
+      const AzureOptions& options, int64_t size = kNoSize)
+      : block_blob_client_(std::move(block_blob_client)),
+        io_context_(io_context),
+        location_(location),
+        content_length_(size) {
+    if (metadata && metadata->size() != 0) {
+      metadata_ = ArrowMetadataToAzureMetadata(metadata);
+    } else if (options.default_metadata && options.default_metadata->size() != 
0) {
+      metadata_ = ArrowMetadataToAzureMetadata(options.default_metadata);
+    }

Review Comment:
   Closing/flushing an append stream will always completely replace the old 
metadata. This is covered by the `AzuriteFileSystemTest, TestWriteMetadata` 
test which I largely copied from `gcsfs_test.cc`. 
   
   I don't feel strongly but I think this is a reasonable choice. I think if id 
did merge then there would be no way to remove metadata keys through the arrow 
file system. Also replacing is simpler to implement than merging. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to