This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c24bc29100 GH-49576: [Ruby] Add support for custom metadata in Footer 
(#49577)
c24bc29100 is described below

commit c24bc29100a141ecc7372e1f786d6505c6337360
Author: Sutou Kouhei <[email protected]>
AuthorDate: Mon Mar 23 06:32:49 2026 +0900

    GH-49576: [Ruby] Add support for custom metadata in Footer (#49577)
    
    ### Rationale for this change
    
    In file format, Footer can have custom metadata.
    
    ### What changes are included in this PR?
    
    * Add `garrow_record_batch_file_reader_get_metadata()`
    * Add `garrow_record_batch_file_writer_new_full()`
    * Add `ArrowFormat::FileReader#metadata`
    * Add `metadata` to `ArrowFormat::FileWriter#finish`
    * Add `metadata:` to `Arrow::Table#save`
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    
    * GitHub Issue: #49576
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 c_glib/arrow-glib/reader.cpp                       | 29 +++++++++++++++
 c_glib/arrow-glib/reader.h                         |  4 +++
 c_glib/arrow-glib/writer.cpp                       | 42 ++++++++++++++++++++--
 c_glib/arrow-glib/writer.h                         |  9 +++++
 c_glib/test/test-file-writer.rb                    | 32 +++++++++++++++++
 ruby/red-arrow-format/Gemfile                      |  1 +
 .../lib/arrow-format/file-reader.rb                |  2 ++
 .../lib/arrow-format/file-writer.rb                | 21 +++++++----
 ruby/red-arrow-format/test/helper.rb               |  1 +
 ruby/red-arrow-format/test/test-reader.rb          | 32 +++++++++++++----
 ruby/red-arrow-format/test/test-writer.rb          | 21 +++++++++++
 ruby/red-arrow/lib/arrow/table-saver.rb            |  6 ++--
 12 files changed, 181 insertions(+), 19 deletions(-)

diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp
index 9fe9d9d1b3..f6e0d3064d 100644
--- a/c_glib/arrow-glib/reader.cpp
+++ b/c_glib/arrow-glib/reader.cpp
@@ -668,6 +668,35 @@ 
garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *r
   }
 }
 
+/**
+ * garrow_record_batch_file_reader_get_metadata:
+ * @reader: A #GArrowRecordBatchFileReader.
+ *
+ * Returns: (nullable) (element-type utf8 utf8) (transfer full):
+ *   The metadata in the footer.
+ *
+ * Since: 24.0.0
+ */
+GHashTable *
+garrow_record_batch_file_reader_get_metadata(GArrowRecordBatchFileReader 
*reader)
+{
+  auto arrow_reader = garrow_record_batch_file_reader_get_raw(reader);
+  auto arrow_metadata = arrow_reader->metadata();
+
+  if (!arrow_metadata) {
+    return nullptr;
+  }
+
+  auto metadata = g_hash_table_new(g_str_hash, g_str_equal);
+  const auto n = arrow_metadata->size();
+  for (int64_t i = 0; i < n; ++i) {
+    g_hash_table_insert(metadata,
+                        const_cast<gchar *>(arrow_metadata->key(i).c_str()),
+                        const_cast<gchar *>(arrow_metadata->value(i).c_str()));
+  }
+  return metadata;
+}
+
 struct GArrowFeatherFileReaderPrivate
 {
   std::shared_ptr<arrow::ipc::feather::Reader> feather_reader;
diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h
index 5401aa3bb1..1e896fd09f 100644
--- a/c_glib/arrow-glib/reader.h
+++ b/c_glib/arrow-glib/reader.h
@@ -166,6 +166,10 @@ 
garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *r
                                                   guint i,
                                                   GError **error);
 
+GARROW_AVAILABLE_IN_24_0
+GHashTable *
+garrow_record_batch_file_reader_get_metadata(GArrowRecordBatchFileReader 
*reader);
+
 #define GARROW_TYPE_FEATHER_FILE_READER (garrow_feather_file_reader_get_type())
 GARROW_AVAILABLE_IN_ALL
 G_DECLARE_DERIVABLE_TYPE(GArrowFeatherFileReader,
diff --git a/c_glib/arrow-glib/writer.cpp b/c_glib/arrow-glib/writer.cpp
index 4228b60910..0cbd88a769 100644
--- a/c_glib/arrow-glib/writer.cpp
+++ b/c_glib/arrow-glib/writer.cpp
@@ -20,6 +20,8 @@
 #include <arrow-glib/array.hpp>
 #include <arrow-glib/enums.h>
 #include <arrow-glib/error.hpp>
+#include <arrow-glib/internal-hash-table.hpp>
+#include <arrow-glib/ipc-options.hpp>
 #include <arrow-glib/record-batch.hpp>
 #include <arrow-glib/schema.hpp>
 #include <arrow-glib/table.hpp>
@@ -288,16 +290,50 @@ GArrowRecordBatchFileWriter *
 garrow_record_batch_file_writer_new(GArrowOutputStream *sink,
                                     GArrowSchema *schema,
                                     GError **error)
+{
+  return garrow_record_batch_file_writer_new_full(sink, schema, nullptr, 
nullptr, error);
+}
+
+/**
+ * garrow_record_batch_file_writer_new_full:
+ * @sink: The output of the writer.
+ * @schema: The schema of the writer.
+ * @options: (nullable): The options for serialization.
+ * @metadata: (nullable) (element-type utf8 utf8): The custom metadata in
+ *   the footer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowRecordBatchFileWriter
+ *   or %NULL on error.
+ *
+ * Since: 24.0.0
+ */
+GArrowRecordBatchFileWriter *
+garrow_record_batch_file_writer_new_full(GArrowOutputStream *sink,
+                                         GArrowSchema *schema,
+                                         GArrowWriteOptions *options,
+                                         GHashTable *metadata,
+                                         GError **error)
 {
   auto arrow_sink = garrow_output_stream_get_raw(sink);
   auto arrow_schema = garrow_schema_get_raw(schema);
+  arrow::ipc::IpcWriteOptions arrow_options = 
arrow::ipc::IpcWriteOptions::Defaults();
+  if (options) {
+    arrow_options = *garrow_write_options_get_raw(options);
+  }
+  std::shared_ptr<arrow::KeyValueMetadata> arrow_metadata;
+  if (metadata) {
+    arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
+  }
+
   std::shared_ptr<arrow::ipc::RecordBatchWriter> arrow_writer;
-  auto arrow_writer_result = arrow::ipc::MakeFileWriter(arrow_sink, 
arrow_schema);
-  if (garrow::check(error, arrow_writer_result, 
"[record-batch-file-writer][open]")) {
+  auto arrow_writer_result =
+    arrow::ipc::MakeFileWriter(arrow_sink, arrow_schema, arrow_options, 
arrow_metadata);
+  if (garrow::check(error, arrow_writer_result, 
"[record-batch-file-writer][new]")) {
     auto arrow_writer = *arrow_writer_result;
     return garrow_record_batch_file_writer_new_raw(&arrow_writer);
   } else {
-    return NULL;
+    return nullptr;
   }
 }
 
diff --git a/c_glib/arrow-glib/writer.h b/c_glib/arrow-glib/writer.h
index fc5fe0c2c7..e02da0e30d 100644
--- a/c_glib/arrow-glib/writer.h
+++ b/c_glib/arrow-glib/writer.h
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <arrow-glib/array.h>
+#include <arrow-glib/ipc-options.h>
 #include <arrow-glib/record-batch.h>
 #include <arrow-glib/schema.h>
 
@@ -94,6 +95,14 @@ garrow_record_batch_file_writer_new(GArrowOutputStream *sink,
                                     GArrowSchema *schema,
                                     GError **error);
 
+GARROW_AVAILABLE_IN_24_0
+GArrowRecordBatchFileWriter *
+garrow_record_batch_file_writer_new_full(GArrowOutputStream *sink,
+                                         GArrowSchema *schema,
+                                         GArrowWriteOptions *options,
+                                         GHashTable *metadata,
+                                         GError **error);
+
 /**
  * GArrowCSVQuotingStyle:
  * @GARROW_CSV_QUOTING_STYLE_NEEDED: Only enclose values in quotes which need 
them.
diff --git a/c_glib/test/test-file-writer.rb b/c_glib/test/test-file-writer.rb
index 06c9dfa25c..41fd00cee4 100644
--- a/c_glib/test/test-file-writer.rb
+++ b/c_glib/test/test-file-writer.rb
@@ -88,4 +88,36 @@ class TestFileWriter < Test::Unit::TestCase
       input.close
     end
   end
+
+  def test_footer_custom_metadata
+    tempfile = Tempfile.open("arrow-ipc-file-writer")
+    output = Arrow::FileOutputStream.new(tempfile.path, false)
+
+    array = build_boolean_array([true, false, true])
+    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
+    schema = Arrow::Schema.new([field])
+
+    options = Arrow::WriteOptions.new
+    metadata = {"key1" => "value1", "key2" => "value2"}
+    begin
+      file_writer = Arrow::RecordBatchFileWriter.new(output,
+                                                     schema,
+                                                     options,
+                                                     metadata)
+      file_writer.close
+      assert do
+        file_writer.closed?
+      end
+    ensure
+      output.close
+    end
+
+    input = Arrow::MemoryMappedInputStream.new(tempfile.path)
+    begin
+      file_reader = Arrow::RecordBatchFileReader.new(input)
+      assert_equal(metadata, file_reader.metadata)
+    ensure
+      input.close
+    end
+  end
 end
diff --git a/ruby/red-arrow-format/Gemfile b/ruby/red-arrow-format/Gemfile
index 296a7b4435..34c981237c 100644
--- a/ruby/red-arrow-format/Gemfile
+++ b/ruby/red-arrow-format/Gemfile
@@ -26,5 +26,6 @@ gem "red-arrow", path: "../red-arrow"
 group :development do
   gem "benchmark-driver"
   gem "rake"
+  gem "stringio"
   gem "test-unit"
 end
diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb 
b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
index 7c749e5fbf..cec3711096 100644
--- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
@@ -35,6 +35,7 @@ module ArrowFormat
     FOOTER_SIZE_SIZE = IO::Buffer.size_of(FOOTER_SIZE_FORMAT)
 
     attr_reader :schema
+    attr_reader :metadata
     def initialize(input)
       case input
       when IO
@@ -47,6 +48,7 @@ module ArrowFormat
 
       validate
       @footer = read_footer
+      @metadata = read_custom_metadata(@footer.custom_metadata)
       @record_batch_blocks = @footer.record_batches || []
       @schema = read_schema(@footer.schema)
       @dictionaries = read_dictionaries
diff --git a/ruby/red-arrow-format/lib/arrow-format/file-writer.rb 
b/ruby/red-arrow-format/lib/arrow-format/file-writer.rb
index 27b6b55bbf..2ac4695180 100644
--- a/ruby/red-arrow-format/lib/arrow-format/file-writer.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/file-writer.rb
@@ -29,26 +29,33 @@ module ArrowFormat
       super
     end
 
-    def finish
-      super
-      write_footer
+    def finish(metadata=nil)
+      super()
+      write_footer(metadata)
       write_data(MAGIC)
       @output
     end
 
     private
-    def build_footer
+    def build_footer(metadata)
       fb_footer = FB::Footer::Data.new
       fb_footer.version = FB::MetadataVersion::V5
       fb_footer.schema = @fb_schema
       fb_footer.dictionaries = @fb_dictionary_blocks
       fb_footer.record_batches = @fb_record_batch_blocks
-      # fb_footer.custom_metadata = ... # TODO
+      if metadata
+        fb_footer.custom_metadata = metadata.collect do |key, value|
+          fb_key_value = FB::KeyValue::Data.new
+          fb_key_value.key = key
+          fb_key_value.value = value
+          fb_key_value
+        end
+      end
       FB::Footer.serialize(fb_footer)
     end
 
-    def write_footer
-      footer = build_footer
+    def write_footer(metadata)
+      footer = build_footer(metadata)
       write_data(footer)
       write_data([footer.bytesize].pack("l<"))
     end
diff --git a/ruby/red-arrow-format/test/helper.rb 
b/ruby/red-arrow-format/test/helper.rb
index 394d92d0dd..29fbfaec4c 100644
--- a/ruby/red-arrow-format/test/helper.rb
+++ b/ruby/red-arrow-format/test/helper.rb
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+require "stringio"
 require "tmpdir"
 
 require "test-unit"
diff --git a/ruby/red-arrow-format/test/test-reader.rb 
b/ruby/red-arrow-format/test/test-reader.rb
index c1c6b26288..1d3202f4f7 100644
--- a/ruby/red-arrow-format/test/test-reader.rb
+++ b/ruby/red-arrow-format/test/test-reader.rb
@@ -675,18 +675,36 @@ module ReaderTests
   end
 end
 
+module FileReaderTests
+  def test_custom_metadata_footer
+    Dir.mktmpdir do |tmp_dir|
+      table = Arrow::Table.new(value: Arrow::Int8Array.new([1, 2, 3]))
+      metadata = {
+        "key1" => "value1",
+        "key2" => "value2",
+      }
+      open_input(table, tmp_dir, metadata: metadata) do |input|
+        reader = reader_class.new(input)
+        assert_equal(metadata, reader.metadata)
+      end
+    ensure
+      GC.start
+    end
+  end
+end
+
 module FileInput
-  def open_input(table, tmp_dir, &block)
+  def open_input(table, tmp_dir, **options, &block)
     path = File.join(tmp_dir, "data.#{file_extension}")
-    table.save(path)
+    table.save(path, **options)
     File.open(path, "rb", &block)
   end
 end
 
 module PipeInput
-  def open_input(table, tmp_dir, &block)
+  def open_input(table, tmp_dir, **options)
     buffer = Arrow::ResizableBuffer.new(4096)
-    table.save(buffer, format: format)
+    table.save(buffer, format: format, **options)
     IO.pipe do |input, output|
       write_thread = Thread.new do
         output.write(buffer.data.to_s)
@@ -701,15 +719,16 @@ module PipeInput
 end
 
 module StringInput
-  def open_input(table, tmp_dir)
+  def open_input(table, tmp_dir, **options)
     buffer = Arrow::ResizableBuffer.new(4096)
-    table.save(buffer, format: format)
+    table.save(buffer, format: format, **options)
     yield(buffer.data.to_s)
   end
 end
 
 class TestFileReaderFileInput < Test::Unit::TestCase
   include ReaderTests
+  include FileReaderTests
   include FileInput
 
   def file_extension
@@ -723,6 +742,7 @@ end
 
 class TestFileReaderStringInput < Test::Unit::TestCase
   include ReaderTests
+  include FileReaderTests
   include StringInput
 
   def format
diff --git a/ruby/red-arrow-format/test/test-writer.rb 
b/ruby/red-arrow-format/test/test-writer.rb
index 72776f01ab..55b3c22b7a 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -924,6 +924,26 @@ module WriterTests
   end
 end
 
+module FileWriterTests
+  def test_custom_metadata_footer
+    output = StringIO.new(+"".b)
+    writer = writer_class.new(output)
+    field = ArrowFormat::Field.new("value", ArrowFormat::BooleanType.new)
+    schema = ArrowFormat::Schema.new([field])
+    writer.start(schema)
+    metadata = {
+      "key1" => "value1",
+      "key2" => "value2",
+    }
+    writer.finish(metadata)
+    buffer = Arrow::Buffer.new(output.string)
+    Arrow::BufferInputStream.open(buffer) do |input|
+      reader = Arrow::RecordBatchFileReader.new(input)
+      assert_equal(metadata, reader.metadata)
+    end
+  end
+end
+
 module WriterDictionaryDeltaTests
   def build_schema(value_type)
     index_type = ArrowFormat::Int32Type.singleton
@@ -1513,6 +1533,7 @@ class TestFileWriter < Test::Unit::TestCase
 
   sub_test_case("Basic") do
     include WriterTests
+    include FileWriterTests
   end
 
   sub_test_case("Dictionary: delta") do
diff --git a/ruby/red-arrow/lib/arrow/table-saver.rb 
b/ruby/red-arrow/lib/arrow/table-saver.rb
index c33e641438..d456f235e5 100644
--- a/ruby/red-arrow/lib/arrow/table-saver.rb
+++ b/ruby/red-arrow/lib/arrow/table-saver.rb
@@ -130,9 +130,9 @@ module Arrow
       end
     end
 
-    def save_raw(writer_class)
+    def save_raw(writer_class, *args)
       open_output_stream do |output|
-        writer_class.open(output, @table.schema) do |writer|
+        writer_class.open(output, @table.schema, *args) do |writer|
           writer.write_table(@table)
         end
       end
@@ -144,7 +144,7 @@ module Arrow
 
     # @since 1.0.0
     def save_as_arrow_file
-      save_raw(RecordBatchFileWriter)
+      save_raw(RecordBatchFileWriter, nil, @options[:metadata])
     end
 
     # @deprecated Use `format: :arrow_batch` instead.

Reply via email to