This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new e6146ee654 GH-48294: [Ruby] Add RecordBatch#merge (#50175)
e6146ee654 is described below

commit e6146ee654dcf3640442352598e62a981c6f0798
Author: Aaditya Srinivasan <[email protected]>
AuthorDate: Wed Jun 17 07:39:38 2026 +0530

    GH-48294: [Ruby] Add RecordBatch#merge (#50175)
    
    ### Rationale for this change
    
    This adds `Arrow::RecordBatch#merge` with behavior analogous to
    `Arrow::Table#merge`.
    
    ### What changes are included in this PR?
    
    - Add `RecordBatch#merge`
    - Support merging with a `Hash`
    - Support merging with another `RecordBatch`
    - Support column addition, replacement, and removal (via `Hash` values of 
`nil`)
    - Add tests covering the new behavior
    
    ### Are these changes tested?
    
    Yes. Added unit tests covering:
    - add column
    - remove column
    - replace column
    - merge with another RecordBatch
    - invalid merge target
    * GitHub Issue: #48294
    
    Authored-by: Aaditya Srinivasan <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 ruby/red-arrow/lib/arrow/record-batch.rb | 78 +++++++++++++++++++++++++++++
 ruby/red-arrow/test/test-record-batch.rb | 84 ++++++++++++++++++++++++++++++++
 2 files changed, 162 insertions(+)

diff --git a/ruby/red-arrow/lib/arrow/record-batch.rb 
b/ruby/red-arrow/lib/arrow/record-batch.rb
index 7a9ea296ea..0a2ba359b3 100644
--- a/ruby/red-arrow/lib/arrow/record-batch.rb
+++ b/ruby/red-arrow/lib/arrow/record-batch.rb
@@ -63,6 +63,63 @@ module Arrow
       table
     end
 
+    def merge(other)
+      added_columns = {}
+      removed_columns = {}
+
+      case other
+      when Hash
+        other.each do |name, value|
+          name = name.to_s
+          if value
+            added_columns[name] = ensure_raw_column(name, value)
+          else
+            removed_columns[name] = true
+          end
+        end
+      when RecordBatch
+        other.columns.each do |column|
+          name = column.name
+          added_columns[name] = ensure_raw_column(name, column)
+        end
+      else
+        message = "merge target must be Hash or Arrow::RecordBatch: " +
+          "<#{other.inspect}>: #{inspect}"
+        raise ArgumentError, message
+      end
+
+      new_columns = []
+      columns.each do |column|
+        column_name = column.name
+        new_column = added_columns.delete(column_name)
+        if new_column
+          new_columns << new_column
+          next
+        end
+        next if removed_columns.key?(column_name)
+        new_columns << ensure_raw_column(column_name, column)
+      end
+
+      added_columns.each_value do |new_column|
+        new_columns << new_column
+      end
+
+      new_fields = []
+      new_arrays = []
+      new_columns.each do |new_column|
+        new_fields << new_column[:field]
+        new_arrays << new_column[:data]
+      end
+
+      record_batch = self.class.new(
+        Schema.new(new_fields),
+        n_rows,
+        new_arrays,
+      )
+      share_input(record_batch)
+      record_batch
+    end
+
     def respond_to_missing?(name, include_private)
       return true if find_column(name)
       super
@@ -75,5 +132,26 @@ module Arrow
       end
       super
     end
+
+    private
+
+    def ensure_raw_column(name, data)
+      case data
+      when Array
+        {
+          field: Field.new(name, data.value_data_type),
+          data: data,
+        }
+      when Column
+        {
+          field: data.field,
+          data: data.data,
+        }
+      else
+        message = "column must be Arrow::Array or Arrow::Column: " +
+          "<#{name}>: <#{data.inspect}>: #{inspect}"
+        raise ArgumentError, message
+      end
+    end
   end
 end
diff --git a/ruby/red-arrow/test/test-record-batch.rb 
b/ruby/red-arrow/test/test-record-batch.rb
index fec640343c..5789eb9da4 100644
--- a/ruby/red-arrow/test/test-record-batch.rb
+++ b/ruby/red-arrow/test/test-record-batch.rb
@@ -184,5 +184,89 @@ class RecordBatchTest < Test::Unit::TestCase
         assert_equal(@counts.size, @record_batch[:count].size)
       end
     end
+
+    sub_test_case("#merge") do
+      def setup
+        @count_array = Arrow::UInt8Array.new([1, 2, 4, 8])
+        @visible_array = Arrow::BooleanArray.new([true, false, nil, true])
+
+        @record_batch = Arrow::RecordBatch.new(
+          count: @count_array,
+          visible: @visible_array,
+        )
+      end
+
+      test("add column") do
+        name_array = Arrow::StringArray.new(["a", "b", "c", "d"])
+
+        expected = Arrow::RecordBatch.new(
+          count: @count_array,
+          visible: @visible_array,
+          name: name_array,
+        )
+
+        assert_equal(expected,
+                     @record_batch.merge(name: name_array))
+      end
+      test("remove column") do
+        expected = Arrow::RecordBatch.new(
+          count: @count_array,
+        )
+
+        assert_equal(expected,
+                     @record_batch.merge(visible: nil))
+      end
+
+      test("replace column") do
+        visible_array = Arrow::Int32Array.new([1, 1, 1, 1])
+
+        expected = Arrow::RecordBatch.new(
+          count: @count_array,
+          visible: visible_array,
+        )
+
+        assert_equal(expected,
+                     @record_batch.merge(visible: visible_array))
+      end
+
+      test("merge record batch add") do
+        name_array = Arrow::StringArray.new(["a", "b", "c", "d"])
+
+        other = Arrow::RecordBatch.new(
+          name: name_array,
+        )
+
+        expected = Arrow::RecordBatch.new(
+          count: @count_array,
+          visible: @visible_array,
+          name: name_array,
+        )
+
+        assert_equal(expected,
+                     @record_batch.merge(other))
+      end
+
+      test("merge record batch replace") do
+        visible_array = Arrow::Int32Array.new([1, 1, 1, 1])
+
+        other = Arrow::RecordBatch.new(
+          visible: visible_array,
+        )
+
+        expected = Arrow::RecordBatch.new(
+          count: @count_array,
+          visible: visible_array,
+        )
+
+        assert_equal(expected,
+                     @record_batch.merge(other))
+      end
+
+      test("invalid target") do
+        assert_raise(ArgumentError) do
+          @record_batch.merge(29)
+        end
+      end
+    end
   end
 end

Reply via email to