This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new e6146ee654 GH-48294: [Ruby] Add RecordBatch#merge (#50175)
e6146ee654 is described below
commit e6146ee654dcf3640442352598e62a981c6f0798
Author: Aaditya Srinivasan <[email protected]>
AuthorDate: Wed Jun 17 07:39:38 2026 +0530
GH-48294: [Ruby] Add RecordBatch#merge (#50175)
### Rationale for this change
This adds `Arrow::RecordBatch#merge` with behavior analogous to
`Arrow::Table#merge`.
### What changes are included in this PR?
- Add `RecordBatch#merge`
- Support merging with a `Hash`
- Support merging with another `RecordBatch`
- Support column addition, replacement, and removal (via `Hash` values of
`nil`)
- Add tests covering the new behavior
### Are these changes tested?
Yes. Added unit tests covering:
- add column
- remove column
- replace column
- merge with another RecordBatch
- invalid merge target
* GitHub Issue: #48294
Authored-by: Aaditya Srinivasan <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ruby/red-arrow/lib/arrow/record-batch.rb | 78 +++++++++++++++++++++++++++++
ruby/red-arrow/test/test-record-batch.rb | 84 ++++++++++++++++++++++++++++++++
2 files changed, 162 insertions(+)
diff --git a/ruby/red-arrow/lib/arrow/record-batch.rb
b/ruby/red-arrow/lib/arrow/record-batch.rb
index 7a9ea296ea..0a2ba359b3 100644
--- a/ruby/red-arrow/lib/arrow/record-batch.rb
+++ b/ruby/red-arrow/lib/arrow/record-batch.rb
@@ -63,6 +63,63 @@ module Arrow
table
end
+ def merge(other)
+ added_columns = {}
+ removed_columns = {}
+
+ case other
+ when Hash
+ other.each do |name, value|
+ name = name.to_s
+ if value
+ added_columns[name] = ensure_raw_column(name, value)
+ else
+ removed_columns[name] = true
+ end
+ end
+ when RecordBatch
+ other.columns.each do |column|
+ name = column.name
+ added_columns[name] = ensure_raw_column(name, column)
+ end
+ else
+ message = "merge target must be Hash or Arrow::RecordBatch: " +
+ "<#{other.inspect}>: #{inspect}"
+ raise ArgumentError, message
+ end
+
+ new_columns = []
+ columns.each do |column|
+ column_name = column.name
+ new_column = added_columns.delete(column_name)
+ if new_column
+ new_columns << new_column
+ next
+ end
+ next if removed_columns.key?(column_name)
+ new_columns << ensure_raw_column(column_name, column)
+ end
+
+ added_columns.each_value do |new_column|
+ new_columns << new_column
+ end
+
+ new_fields = []
+ new_arrays = []
+ new_columns.each do |new_column|
+ new_fields << new_column[:field]
+ new_arrays << new_column[:data]
+ end
+
+ record_batch = self.class.new(
+ Schema.new(new_fields),
+ n_rows,
+ new_arrays,
+ )
+ share_input(record_batch)
+ record_batch
+ end
+
def respond_to_missing?(name, include_private)
return true if find_column(name)
super
@@ -75,5 +132,26 @@ module Arrow
end
super
end
+
+ private
+
+ def ensure_raw_column(name, data)
+ case data
+ when Array
+ {
+ field: Field.new(name, data.value_data_type),
+ data: data,
+ }
+ when Column
+ {
+ field: data.field,
+ data: data.data,
+ }
+ else
+ message = "column must be Arrow::Array or Arrow::Column: " +
+ "<#{name}>: <#{data.inspect}>: #{inspect}"
+ raise ArgumentError, message
+ end
+ end
end
end
diff --git a/ruby/red-arrow/test/test-record-batch.rb
b/ruby/red-arrow/test/test-record-batch.rb
index fec640343c..5789eb9da4 100644
--- a/ruby/red-arrow/test/test-record-batch.rb
+++ b/ruby/red-arrow/test/test-record-batch.rb
@@ -184,5 +184,89 @@ class RecordBatchTest < Test::Unit::TestCase
assert_equal(@counts.size, @record_batch[:count].size)
end
end
+
+ sub_test_case("#merge") do
+ def setup
+ @count_array = Arrow::UInt8Array.new([1, 2, 4, 8])
+ @visible_array = Arrow::BooleanArray.new([true, false, nil, true])
+
+ @record_batch = Arrow::RecordBatch.new(
+ count: @count_array,
+ visible: @visible_array,
+ )
+ end
+
+ test("add column") do
+ name_array = Arrow::StringArray.new(["a", "b", "c", "d"])
+
+ expected = Arrow::RecordBatch.new(
+ count: @count_array,
+ visible: @visible_array,
+ name: name_array,
+ )
+
+ assert_equal(expected,
+ @record_batch.merge(name: name_array))
+ end
+ test("remove column") do
+ expected = Arrow::RecordBatch.new(
+ count: @count_array,
+ )
+
+ assert_equal(expected,
+ @record_batch.merge(visible: nil))
+ end
+
+ test("replace column") do
+ visible_array = Arrow::Int32Array.new([1, 1, 1, 1])
+
+ expected = Arrow::RecordBatch.new(
+ count: @count_array,
+ visible: visible_array,
+ )
+
+ assert_equal(expected,
+ @record_batch.merge(visible: visible_array))
+ end
+
+ test("merge record batch add") do
+ name_array = Arrow::StringArray.new(["a", "b", "c", "d"])
+
+ other = Arrow::RecordBatch.new(
+ name: name_array,
+ )
+
+ expected = Arrow::RecordBatch.new(
+ count: @count_array,
+ visible: @visible_array,
+ name: name_array,
+ )
+
+ assert_equal(expected,
+ @record_batch.merge(other))
+ end
+
+ test("merge record batch replace") do
+ visible_array = Arrow::Int32Array.new([1, 1, 1, 1])
+
+ other = Arrow::RecordBatch.new(
+ visible: visible_array,
+ )
+
+ expected = Arrow::RecordBatch.new(
+ count: @count_array,
+ visible: visible_array,
+ )
+
+ assert_equal(expected,
+ @record_batch.merge(other))
+ end
+
+ test("invalid target") do
+ assert_raise(ArgumentError) do
+ @record_batch.merge(29)
+ end
+ end
+ end
end
end