This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 84a57018b6 GH-49364: [Ruby] Simplify reader tests (#49365)
84a57018b6 is described below
commit 84a57018b68ddba9f71f95c4a16b31a2c05f2bcf
Author: Sutou Kouhei <[email protected]>
AuthorDate: Sun Feb 22 23:25:46 2026 +0900
GH-49364: [Ruby] Simplify reader tests (#49365)
### Rationale for this change
The current reader tests use a sub test case per type but we can use a test
per type like the current writer tests.
### What changes are included in this PR?
* Create test data in a test not a setup to use a test no a sub test case
per type
* Add `ArrowFormat::Type#to_s`
* Add `ArrowFormat::FileReader#schema`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No.
* GitHub Issue: #49364
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
.../lib/arrow-format/file-reader.rb | 1 +
ruby/red-arrow-format/lib/arrow-format/type.rb | 53 +
ruby/red-arrow-format/test/test-reader.rb | 1275 ++++++++------------
3 files changed, 536 insertions(+), 793 deletions(-)
diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
index 03514a3cc2..05399180be 100644
--- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
@@ -34,6 +34,7 @@ module ArrowFormat
FOOTER_SIZE_FORMAT = :s32
FOOTER_SIZE_SIZE = IO::Buffer.size_of(FOOTER_SIZE_FORMAT)
+ attr_reader :schema
def initialize(input)
case input
when IO
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index bc2b313285..3c2d5f3ac9 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -16,6 +16,9 @@
module ArrowFormat
class Type
+ def to_s
+ name
+ end
end
class NullType < Type
@@ -412,6 +415,10 @@ module ArrowFormat
@unit = unit
end
+ def to_s
+ "#{super}(#{unit})"
+ end
+
def to_flatbuffers
fb_type = FB::Time::Data.new
fb_type.bit_width = @bit_width
@@ -477,6 +484,12 @@ module ArrowFormat
TimestampArray.new(self, size, validity_buffer, values_buffer)
end
+ def to_s
+ options = [@unit]
+ options << @time_zone if @time_zone
+ "#{super}(#{options.join(", ")})"
+ end
+
def to_flatbuffers
fb_type = FB::Timestamp::Data.new
fb_type.unit = FB::TimeUnit.try_convert(@unit.to_s.upcase)
@@ -581,6 +594,10 @@ module ArrowFormat
DurationArray.new(self, size, validity_buffer, values_buffer)
end
+ def to_s
+ "#{super}(#{@unit})"
+ end
+
def to_flatbuffers
fb_type = FB::Duration::Data.new
fb_type.unit = FB::TimeUnit.try_convert(@unit.to_s.upcase)
@@ -730,6 +747,10 @@ module ArrowFormat
FixedSizeBinaryArray.new(self, size, validity_buffer, values_buffer)
end
+ def to_s
+ "#{super}(#{@byte_width})"
+ end
+
def to_flatbuffers
fb_type = FB::FixedSizeBinary::Data.new
fb_type.byte_width = @byte_width
@@ -746,6 +767,10 @@ module ArrowFormat
@scale = scale
end
+ def to_s
+ "#{name}(#{@precision}, #{@scale})"
+ end
+
def to_flatbuffers
fb_type = FB::Decimal::Data.new
fb_type.bit_width = @byte_width * 8
@@ -789,6 +814,10 @@ module ArrowFormat
super()
@child = child
end
+
+ def to_s
+ "#{super}<#{child.name}: #{child.type}>"
+ end
end
class ListType < VariableSizeListType
@@ -842,6 +871,13 @@ module ArrowFormat
StructArray.new(self, size, validity_buffer, children)
end
+ def to_s
+ fields = children.collect do |child|
+ "#{child.name}: #{child.type}"
+ end
+ "#{super}<#{fields.join(", ")}>"
+ end
+
def to_flatbuffers
FB::Struct::Data.new
end
@@ -880,6 +916,11 @@ module ArrowFormat
MapArray.new(self, size, validity_buffer, offsets_buffer, child)
end
+ def to_s
+ key, value, = child.type.children
+ "#{name}<#{key.type}, #{value.type}>"
+ end
+
def to_flatbuffers
FB::Map::Data.new
end
@@ -900,6 +941,13 @@ module ArrowFormat
@type_indexes[type] ||= @type_ids.index(type)
end
+ def to_s
+ children = @children.collect.with_index do |child, i|
+ "#{child.name}: #{child.type}=#{@type_ids[i]}"
+ end
+ "#{super}<#{children.join(", ")}>"
+ end
+
def to_flatbuffers
fb_type = FB::Union::Data.new
fb_type.mode = FB::UnionMode.try_convert(@mode.to_s.capitalize)
@@ -975,5 +1023,10 @@ module ArrowFormat
fb_field.type = @value_type.to_flatbuffers
fb_field.dictionary = fb_dictionary_encoding
end
+
+ def to_s
+ "#{super}<index=#{@index_type}, value=#{@value_type}, " +
+ "ordered=#{@ordered}>"
+ end
end
end
diff --git a/ruby/red-arrow-format/test/test-reader.rb
b/ruby/red-arrow-format/test/test-reader.rb
index 10a2597f4a..763e8737b5 100644
--- a/ruby/red-arrow-format/test/test-reader.rb
+++ b/ruby/red-arrow-format/test/test-reader.rb
@@ -16,534 +16,327 @@
# under the License.
module ReaderTests
- def read
- @reader.collect do |record_batch|
- record_batch.to_h.tap do |hash|
- hash.each do |key, value|
- hash[key] = value.to_a
- end
+ def roundtrip(array)
+ Dir.mktmpdir do |tmp_dir|
+ table = Arrow::Table.new(value: array)
+ path = File.join(tmp_dir, "data.#{file_extension}")
+ table.save(path)
+ File.open(path, "rb") do |input|
+ reader = reader_class.new(input)
+ values = []
+ reader.each do |record_batch|
+ values.concat(record_batch.columns[0].to_a)
+ end
+ [reader.schema.fields[0].type, values]
end
+ ensure
+ GC.start
end
end
- def type
- @type ||= @reader.first.schema.fields[0].type
+ def test_null
+ type, values = roundtrip(Arrow::NullArray.new(3))
+ assert_equal(["Null", [nil, nil, nil]],
+ [type.to_s, values])
end
- class << self
- def included(base)
- base.class_eval do
- sub_test_case("Null") do
- def build_array
- Arrow::NullArray.new(3)
- end
-
- def test_read
- assert_equal([{"value" => [nil, nil, nil]}],
- read)
- end
- end
-
- sub_test_case("Boolean") do
- def build_array
- Arrow::BooleanArray.new([true, nil, false])
- end
-
- def test_read
- assert_equal([{"value" => [true, nil, false]}],
- read)
- end
- end
-
- sub_test_case("Int8") do
- def build_array
- Arrow::Int8Array.new([-128, nil, 127])
- end
-
- def test_read
- assert_equal([{"value" => [-128, nil, 127]}],
- read)
- end
- end
-
- sub_test_case("UInt8") do
- def build_array
- Arrow::UInt8Array.new([0, nil, 255])
- end
-
- def test_read
- assert_equal([{"value" => [0, nil, 255]}],
- read)
- end
- end
-
- sub_test_case("Int16") do
- def build_array
- Arrow::Int16Array.new([-32768, nil, 32767])
- end
-
- def test_read
- assert_equal([{"value" => [-32768, nil, 32767]}],
- read)
- end
- end
+ def test_boolean
+ type, values = roundtrip(Arrow::BooleanArray.new([true, nil, false]))
+ assert_equal(["Boolean", [true, nil, false]],
+ [type.to_s, values])
+ end
- sub_test_case("UInt16") do
- def build_array
- Arrow::UInt16Array.new([0, nil, 65535])
- end
+ def test_int8
+ type, values = roundtrip(Arrow::Int8Array.new([-128, nil, 127]))
+ assert_equal(["Int8", [-128, nil, 127]],
+ [type.to_s, values])
+ end
- def test_read
- assert_equal([{"value" => [0, nil, 65535]}],
- read)
- end
- end
+ def test_uint8
+ type, values = roundtrip(Arrow::UInt8Array.new([0, nil, 255]))
+ assert_equal(["UInt8", [0, nil, 255]],
+ [type.to_s, values])
+ end
- sub_test_case("Int32") do
- def build_array
- Arrow::Int32Array.new([-2147483648, nil, 2147483647])
- end
+ def test_int16
+ type, values = roundtrip(Arrow::Int16Array.new([-32768, nil, 32767]))
+ assert_equal(["Int16", [-32768, nil, 32767]],
+ [type.to_s, values])
+ end
- def test_read
- assert_equal([{"value" => [-2147483648, nil, 2147483647]}],
- read)
- end
- end
+ def test_uint16
+ type, values = roundtrip(Arrow::UInt16Array.new([0, nil, 65535]))
+ assert_equal(["UInt16", [0, nil, 65535]],
+ [type.to_s, values])
+ end
- sub_test_case("UInt32") do
- def build_array
- Arrow::UInt32Array.new([0, nil, 4294967295])
- end
+ def test_int32
+ array = Arrow::Int32Array.new([-2147483648, nil, 2147483647])
+ type, values = roundtrip(array)
+ assert_equal(["Int32", [-2147483648, nil, 2147483647]],
+ [type.to_s, values])
+ end
- def test_read
- assert_equal([{"value" => [0, nil, 4294967295]}],
- read)
- end
- end
+ def test_uint32
+ array = Arrow::UInt32Array.new([0, nil, 4294967295])
+ type, values = roundtrip(array)
+ assert_equal(["UInt32", [0, nil, 4294967295]],
+ [type.to_s, values])
+ end
- sub_test_case("Int64") do
- def build_array
- Arrow::Int64Array.new([
+ def test_int64
+ array = Arrow::Int64Array.new([
-9223372036854775808,
nil,
9223372036854775807
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- -9223372036854775808,
- nil,
- 9223372036854775807
- ]
- }
- ],
- read)
- end
- end
-
- sub_test_case("UInt64") do
- def build_array
- Arrow::UInt64Array.new([0, nil, 18446744073709551615])
- end
-
- def test_read
- assert_equal([{"value" => [0, nil, 18446744073709551615]}],
- read)
- end
- end
-
- sub_test_case("Float32") do
- def build_array
- Arrow::FloatArray.new([-0.5, nil, 0.5])
- end
-
- def test_read
- assert_equal([{"value" => [-0.5, nil, 0.5]}],
- read)
- end
- end
+ type, values = roundtrip(array)
+ assert_equal(["Int64", [-9223372036854775808, nil, 9223372036854775807]],
+ [type.to_s, values])
+ end
- sub_test_case("Float64") do
- def build_array
- Arrow::DoubleArray.new([-0.5, nil, 0.5])
- end
+ def test_uint64
+ array = Arrow::UInt64Array.new([0, nil, 18446744073709551615])
+ type, values = roundtrip(array)
+ assert_equal(["UInt64", [0, nil, 18446744073709551615]],
+ [type.to_s, values])
+ end
- def test_read
- assert_equal([{"value" => [-0.5, nil, 0.5]}],
- read)
- end
- end
+ def test_float32
+ type, values = roundtrip(Arrow::FloatArray.new([-0.5, nil, 0.5]))
+ assert_equal(["Float32", [-0.5, nil, 0.5]],
+ [type.to_s, values])
+ end
- sub_test_case("Date32") do
- def setup(&block)
- @date_2017_08_28 = 17406
- @date_2025_12_09 = 20431
- super(&block)
- end
-
- def build_array
- Arrow::Date32Array.new([@date_2017_08_28, nil, @date_2025_12_09])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- @date_2017_08_28,
- nil,
- @date_2025_12_09,
- ],
- },
- ],
- read)
- end
- end
+ def test_float64
+ type, values = roundtrip(Arrow::DoubleArray.new([-0.5, nil, 0.5]))
+ assert_equal(["Float64", [-0.5, nil, 0.5]],
+ [type.to_s, values])
+ end
- sub_test_case("Date64") do
- def setup(&block)
- @date_2017_08_28_00_00_00 = 1503878400000
- @date_2025_12_10_00_00_00 = 1765324800000
- super(&block)
- end
+ def test_date32
+ date_2017_08_28 = 17406
+ date_2025_12_09 = 20431
+ array = Arrow::Date32Array.new([date_2017_08_28, nil, date_2025_12_09])
+ type, values = roundtrip(array)
+ assert_equal(["Date32", [date_2017_08_28, nil, date_2025_12_09]],
+ [type.to_s, values])
+ end
- def build_array
- Arrow::Date64Array.new([
- @date_2017_08_28_00_00_00,
+ def test_date64
+ date_2017_08_28_00_00_00 = 1503878400000
+ date_2025_12_10_00_00_00 = 1765324800000
+ array = Arrow::Date64Array.new([
+ date_2017_08_28_00_00_00,
nil,
- @date_2025_12_10_00_00_00,
+ date_2025_12_10_00_00_00,
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- @date_2017_08_28_00_00_00,
- nil,
- @date_2025_12_10_00_00_00,
- ],
- },
- ],
- read)
- end
- end
-
- sub_test_case("Time32(:second)") do
- def setup(&block)
- @time_00_00_10 = 10
- @time_00_01_10 = 60 + 10
- super(&block)
- end
-
- def build_array
- Arrow::Time32Array.new(:second,
- [@time_00_00_10, nil, @time_00_01_10])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- @time_00_00_10,
- nil,
- @time_00_01_10,
- ],
- },
- ],
- read)
- end
-
- def test_type
- assert_equal(:second, type.unit)
- end
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Date64",
+ [date_2017_08_28_00_00_00, nil, date_2025_12_10_00_00_00],
+ ],
+ [type.to_s, values])
+ end
- sub_test_case("Time32(:millisecond)") do
- def setup(&block)
- @time_00_00_10_000 = 10 * 1000
- @time_00_01_10_000 = (60 + 10) * 1000
- super(&block)
- end
+ def test_time32_second
+ time_00_00_10 = 10
+ time_00_01_10 = 60 + 10
+ array = Arrow::Time32Array.new(:second,
+ [time_00_00_10, nil, time_00_01_10])
+ type, values = roundtrip(array)
+ assert_equal(["Time32(second)", [time_00_00_10, nil, time_00_01_10]],
+ [type.to_s, values])
+ end
- def build_array
- Arrow::Time32Array.new(:milli,
+ def test_time32_millisecond
+ time_00_00_10_000 = 10 * 1000
+ time_00_01_10_000 = (60 + 10) * 1000
+ array = Arrow::Time32Array.new(:milli,
[
- @time_00_00_10_000,
+ time_00_00_10_000,
nil,
- @time_00_01_10_000,
+ time_00_01_10_000,
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- @time_00_00_10_000,
- nil,
- @time_00_01_10_000,
- ],
- },
- ],
- read)
- end
-
- def test_type
- assert_equal(:millisecond, type.unit)
- end
- end
-
- sub_test_case("Time64(:microsecond)") do
- def setup(&block)
- @time_00_00_10_000_000 = 10 * 1_000_000
- @time_00_01_10_000_000 = (60 + 10) * 1_000_000
- super(&block)
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Time32(millisecond)",
+ [time_00_00_10_000, nil, time_00_01_10_000],
+ ],
+ [type.to_s, values])
+ end
- def build_array
- Arrow::Time64Array.new(:micro,
+ def test_time64_microsecond
+ time_00_00_10_000_000 = 10 * 1_000_000
+ time_00_01_10_000_000 = (60 + 10) * 1_000_000
+ array = Arrow::Time64Array.new(:micro,
[
- @time_00_00_10_000_000,
+ time_00_00_10_000_000,
nil,
- @time_00_01_10_000_000,
+ time_00_01_10_000_000,
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- @time_00_00_10_000_000,
- nil,
- @time_00_01_10_000_000,
- ],
- },
- ],
- read)
- end
-
- def test_type
- assert_equal(:microsecond, type.unit)
- end
- end
-
- sub_test_case("Time64(:nanosecond)") do
- def setup(&block)
- @time_00_00_10_000_000_000 = 10 * 1_000_000_000
- @time_00_01_10_000_000_000 = (60 + 10) * 1_000_000_000
- super(&block)
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Time64(microsecond)",
+ [time_00_00_10_000_000, nil, time_00_01_10_000_000],
+ ],
+ [type.to_s, values])
+ end
- def build_array
- Arrow::Time64Array.new(:nano,
+ def test_time64_nanosecond
+ time_00_00_10_000_000_000 = 10 * 1_000_000_000
+ time_00_01_10_000_000_000 = (60 + 10) * 1_000_000_000
+ array = Arrow::Time64Array.new(:nano,
[
- @time_00_00_10_000_000_000,
+ time_00_00_10_000_000_000,
nil,
- @time_00_01_10_000_000_000,
+ time_00_01_10_000_000_000,
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- @time_00_00_10_000_000_000,
- nil,
- @time_00_01_10_000_000_000,
- ],
- },
- ],
- read)
- end
-
- def test_type
- assert_equal(:nanosecond, type.unit)
- end
- end
-
- sub_test_case("Timestamp(:second)") do
- def setup(&block)
- @timestamp_2019_11_17_15_09_11 = 1574003351
- @timestamp_2025_12_16_05_33_58 = 1765863238
- super(&block)
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Time64(nanosecond)",
+ [
+ time_00_00_10_000_000_000,
+ nil,
+ time_00_01_10_000_000_000,
+ ],
+ ],
+ [type.to_s, values])
+ end
- def build_array
- Arrow::TimestampArray.new(:second,
+ def test_timestamp_second
+ timestamp_2019_11_17_15_09_11 = 1574003351
+ timestamp_2025_12_16_05_33_58 = 1765863238
+ array = Arrow::TimestampArray.new(:second,
[
- @timestamp_2019_11_17_15_09_11,
+ timestamp_2019_11_17_15_09_11,
nil,
- @timestamp_2025_12_16_05_33_58,
+ timestamp_2025_12_16_05_33_58,
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- @timestamp_2019_11_17_15_09_11,
- nil,
- @timestamp_2025_12_16_05_33_58,
- ],
- },
- ],
- read)
- end
- end
-
- sub_test_case("Timestamp(:millisecond)") do
- def setup(&block)
- @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000
- @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000
- super(&block)
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Timestamp(second)",
+ [
+ timestamp_2019_11_17_15_09_11,
+ nil,
+ timestamp_2025_12_16_05_33_58,
+ ],
+ ],
+ [type.to_s, values])
+ end
- def build_array
- Arrow::TimestampArray.new(:milli,
+ def test_timestamp_millisecond
+ timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000
+ timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000
+ array = Arrow::TimestampArray.new(:milli,
[
- @timestamp_2019_11_17_15_09_11,
+ timestamp_2019_11_17_15_09_11,
nil,
- @timestamp_2025_12_16_05_33_58,
+ timestamp_2025_12_16_05_33_58,
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- @timestamp_2019_11_17_15_09_11,
- nil,
- @timestamp_2025_12_16_05_33_58,
- ],
- },
- ],
- read)
- end
- end
-
- sub_test_case("Timestamp(:microsecond)") do
- def setup(&block)
- @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000
- @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000
- super(&block)
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Timestamp(millisecond)",
+ [
+ timestamp_2019_11_17_15_09_11,
+ nil,
+ timestamp_2025_12_16_05_33_58,
+ ],
+ ],
+ [type.to_s, values])
+ end
- def build_array
- Arrow::TimestampArray.new(:micro,
+ def test_timestamp_microsecond
+ timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000
+ timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000
+ array = Arrow::TimestampArray.new(:micro,
[
- @timestamp_2019_11_17_15_09_11,
+ timestamp_2019_11_17_15_09_11,
nil,
- @timestamp_2025_12_16_05_33_58,
+ timestamp_2025_12_16_05_33_58,
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- @timestamp_2019_11_17_15_09_11,
- nil,
- @timestamp_2025_12_16_05_33_58,
- ],
- },
- ],
- read)
- end
- end
-
- sub_test_case("Timestamp(:nanosecond)") do
- def setup(&block)
- @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000
- @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000
- super(&block)
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Timestamp(microsecond)",
+ [
+ timestamp_2019_11_17_15_09_11,
+ nil,
+ timestamp_2025_12_16_05_33_58,
+ ],
+ ],
+ [type.to_s, values])
+ end
- def build_array
- Arrow::TimestampArray.new(:nano,
+ def test_timestamp_nanosecond
+ timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000
+ timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000
+ array = Arrow::TimestampArray.new(:nano,
[
- @timestamp_2019_11_17_15_09_11,
+ timestamp_2019_11_17_15_09_11,
nil,
- @timestamp_2025_12_16_05_33_58,
+ timestamp_2025_12_16_05_33_58,
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- @timestamp_2019_11_17_15_09_11,
- nil,
- @timestamp_2025_12_16_05_33_58,
- ],
- },
- ],
- read)
- end
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Timestamp(nanosecond)",
+ [
+ timestamp_2019_11_17_15_09_11,
+ nil,
+ timestamp_2025_12_16_05_33_58,
+ ],
+ ],
+ [type.to_s, values])
+ end
- sub_test_case("Timestamp(time_zone)") do
- def setup(&block)
- @time_zone = "UTC"
- @timestamp_2019_11_17_15_09_11 = 1574003351
- @timestamp_2025_12_16_05_33_58 = 1765863238
- super(&block)
- end
-
- def build_array
- data_type = Arrow::TimestampDataType.new(:second, @time_zone)
- Arrow::TimestampArray.new(data_type,
+ def test_timestamp_time_zone
+ time_zone = "UTC"
+ timestamp_2019_11_17_15_09_11 = 1574003351
+ timestamp_2025_12_16_05_33_58 = 1765863238
+ data_type = Arrow::TimestampDataType.new(:second, time_zone)
+ array = Arrow::TimestampArray.new(data_type,
[
- @timestamp_2019_11_17_15_09_11,
+ timestamp_2019_11_17_15_09_11,
nil,
- @timestamp_2025_12_16_05_33_58,
+ timestamp_2025_12_16_05_33_58,
])
- end
-
- def test_type
- assert_equal([:second, @time_zone],
- [type.unit, type.time_zone])
- end
- end
-
- sub_test_case("YearMonthInterval") do
- def build_array
- Arrow::MonthIntervalArray.new([0, nil, 100])
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Timestamp(second, #{time_zone})",
+ [
+ timestamp_2019_11_17_15_09_11,
+ nil,
+ timestamp_2025_12_16_05_33_58,
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_read
- assert_equal([{"value" => [0, nil, 100]}],
- read)
- end
- end
+ def test_year_month_interval
+ type, values = roundtrip(Arrow::MonthIntervalArray.new([0, nil, 100]))
+ assert_equal(["YearMonthInterval", [0, nil, 100]],
+ [type.to_s, values])
+ end
- sub_test_case("DayTimeInterval") do
- def build_array
- Arrow::DayTimeIntervalArray.new([
+ def test_day_time_interval
+ array = Arrow::DayTimeIntervalArray.new([
{day: 1, millisecond: 100},
nil,
{day: 3, millisecond: 300},
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- [1, 100],
- nil,
- [3, 300],
- ],
- },
- ],
- read)
- end
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "DayTimeInterval",
+ [
+ [1, 100],
+ nil,
+ [3, 300],
+ ],
+ ],
+ [type.to_s, values])
+ end
- sub_test_case("MonthDayNanoInterval") do
- def build_array
- Arrow::MonthDayNanoIntervalArray.new([
+ def test_month_day_nano_interval
+ array = Arrow::MonthDayNanoIntervalArray.new([
{
month: 1,
day: 1,
@@ -556,381 +349,277 @@ module ReaderTests
nanosecond: 300,
},
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- [1, 1, 100],
- nil,
- [3, 3, 300],
- ],
- },
- ],
- read)
- end
- end
-
- sub_test_case("Duration(:second)") do
- def build_array
- Arrow::DurationArray.new(:second, [0, nil, 100])
- end
-
- def test_read
- assert_equal([{"value" => [0, nil, 100]}],
- read)
- end
-
- def test_type
- assert_equal(:second, type.unit)
- end
- end
-
- sub_test_case("Duration(:millisecond)") do
- def build_array
- Arrow::DurationArray.new(:milli, [0, nil, 100_000])
- end
-
- def test_read
- assert_equal([{"value" => [0, nil, 100_000]}],
- read)
- end
-
- def test_type
- assert_equal(:millisecond, type.unit)
- end
- end
-
- sub_test_case("Duration(:microsecond)") do
- def build_array
- Arrow::DurationArray.new(:micro, [0, nil, 100_000_000])
- end
-
- def test_read
- assert_equal([{"value" => [0, nil, 100_000_000]}],
- read)
- end
-
- def test_type
- assert_equal(:microsecond, type.unit)
- end
- end
-
- sub_test_case("Duration(:nanosecond)") do
- def build_array
- Arrow::DurationArray.new(:nano, [0, nil, 100_000_000_000])
- end
-
- def test_read
- assert_equal([{"value" => [0, nil, 100_000_000_000]}],
- read)
- end
-
- def test_type
- assert_equal(:nanosecond, type.unit)
- end
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "MonthDayNanoInterval",
+ [
+ [1, 1, 100],
+ nil,
+ [3, 3, 300],
+ ],
+ ],
+ [type.to_s, values])
+ end
- sub_test_case("Binary") do
- def build_array
- Arrow::BinaryArray.new(["Hello".b, nil, "World".b])
- end
+ def test_duration_second
+ type, values = roundtrip(Arrow::DurationArray.new(:second, [0, nil, 100]))
+ assert_equal(["Duration(second)", [0, nil, 100]],
+ [type.to_s, values])
+ end
- def test_read
- assert_equal([{"value" => ["Hello".b, nil, "World".b]}],
- read)
- end
- end
+ def test_duration_millisecond
+ array = Arrow::DurationArray.new(:milli, [0, nil, 100_000])
+ type, values = roundtrip(array)
+ assert_equal(["Duration(millisecond)", [0, nil, 100_000]],
+ [type.to_s, values])
+ end
- sub_test_case("LargeBinary") do
- def build_array
- Arrow::LargeBinaryArray.new(["Hello".b, nil, "World".b])
- end
+ def test_duration_microsecond
+ array = Arrow::DurationArray.new(:micro, [0, nil, 100_000_000])
+ type, values = roundtrip(array)
+ assert_equal(["Duration(microsecond)", [0, nil, 100_000_000]],
+ [type.to_s, values])
+ end
- def test_read
- assert_equal([{"value" => ["Hello".b, nil, "World".b]}],
- read)
- end
- end
+ def test_duration_nanosecond
+ array = Arrow::DurationArray.new(:nano, [0, nil, 100_000_000_000])
+ type, values = roundtrip(array)
+ assert_equal(["Duration(nanosecond)", [0, nil, 100_000_000_000]],
+ [type.to_s, values])
+ end
- sub_test_case("UTF8") do
- def build_array
- Arrow::StringArray.new(["Hello", nil, "World"])
- end
+ def test_binary
+ array = Arrow::BinaryArray.new(["Hello".b, nil, "World".b])
+ type, values = roundtrip(array)
+ assert_equal(["Binary", ["Hello".b, nil, "World".b]],
+ [type.to_s, values])
+ end
- def test_read
- assert_equal([{"value" => ["Hello", nil, "World"]}],
- read)
- end
- end
+ def test_large_binary
+ array = Arrow::LargeBinaryArray.new(["Hello".b, nil, "World".b])
+ type, values = roundtrip(array)
+ assert_equal(["LargeBinary", ["Hello".b, nil, "World".b]],
+ [type.to_s, values])
+ end
- sub_test_case("LargeUTF8") do
- def build_array
- Arrow::LargeStringArray.new(["Hello", nil, "World"])
- end
+ def test_utf8
+ array = Arrow::StringArray.new(["Hello", nil, "World"])
+ type, values = roundtrip(array)
+ assert_equal(["UTF8", ["Hello", nil, "World"]],
+ [type.to_s, values])
+ end
- def test_read
- assert_equal([{"value" => ["Hello", nil, "World"]}],
- read)
- end
- end
+ def test_large_utf8
+ array = Arrow::LargeStringArray.new(["Hello", nil, "World"])
+ type, values = roundtrip(array)
+ assert_equal(["LargeUTF8", ["Hello", nil, "World"]],
+ [type.to_s, values])
+ end
- sub_test_case("FixedSizeBinary") do
- def build_array
- data_type = Arrow::FixedSizeBinaryDataType.new(4)
- Arrow::FixedSizeBinaryArray.new(data_type,
+ def test_fixed_size_binary
+ data_type = Arrow::FixedSizeBinaryDataType.new(4)
+ array = Arrow::FixedSizeBinaryArray.new(data_type,
["0124".b, nil, "abcd".b])
- end
-
- def test_read
- assert_equal([{"value" => ["0124".b, nil, "abcd".b]}],
- read)
- end
- end
+ type, values = roundtrip(array)
+ assert_equal(["FixedSizeBinary(4)", ["0124".b, nil, "abcd".b]],
+ [type.to_s, values])
+ end
- sub_test_case("Decimal128") do
- def build_array
- @positive_small = "1.200"
- @positive_large = ("1234567890" * 3) + "12345.678"
- @negative_small = "-1.200"
- @negative_large = "-" + ("1234567890" * 3) + "12345.678"
- Arrow::Decimal128Array.new({precision: 38, scale: 3},
+ def test_decimal128
+ positive_small = "1.200"
+ positive_large = ("1234567890" * 3) + "12345.678"
+ negative_small = "-1.200"
+ negative_large = "-" + ("1234567890" * 3) + "12345.678"
+ array = Arrow::Decimal128Array.new({precision: 38, scale: 3},
[
- @positive_large,
- @positive_small,
+ positive_large,
+ positive_small,
nil,
- @negative_small,
- @negative_large,
+ negative_small,
+ negative_large,
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- BigDecimal(@positive_large),
- BigDecimal(@positive_small),
- nil,
- BigDecimal(@negative_small),
- BigDecimal(@negative_large),
- ],
- },
- ],
- read)
- end
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Decimal128(38, 3)",
+ [
+ BigDecimal(positive_large),
+ BigDecimal(positive_small),
+ nil,
+ BigDecimal(negative_small),
+ BigDecimal(negative_large),
+ ],
+ ],
+ [type.to_s, values])
+ end
- sub_test_case("Decimal256") do
- def build_array
- @positive_small = "1.200"
- @positive_large = ("1234567890" * 7) + "123.456"
- @negative_small = "-1.200"
- @negative_large = "-" + ("1234567890" * 7) + "123.456"
- Arrow::Decimal256Array.new({precision: 76, scale: 3},
+ def test_decimal256
+ positive_small = "1.200"
+ positive_large = ("1234567890" * 7) + "123.456"
+ negative_small = "-1.200"
+ negative_large = "-" + ("1234567890" * 7) + "123.456"
+ array = Arrow::Decimal256Array.new({precision: 76, scale: 3},
[
- @positive_large,
- @positive_small,
+ positive_large,
+ positive_small,
nil,
- @negative_small,
- @negative_large,
+ negative_small,
+ negative_large,
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- BigDecimal(@positive_large),
- BigDecimal(@positive_small),
- nil,
- BigDecimal(@negative_small),
- BigDecimal(@negative_large),
- ],
- },
- ],
- read)
- end
- end
-
- sub_test_case("List") do
- def build_array
- data_type = Arrow::ListDataType.new(name: "count", type: :int8)
- Arrow::ListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]])
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Decimal256(76, 3)",
+ [
+ BigDecimal(positive_large),
+ BigDecimal(positive_small),
+ nil,
+ BigDecimal(negative_small),
+ BigDecimal(negative_large),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_read
- assert_equal([{"value" => [[-128, 127], nil, [-1, 0, 1]]}],
- read)
- end
- end
+ def test_list
+ data_type = Arrow::ListDataType.new(name: "count", type: :int8)
+ array = Arrow::ListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]])
+ type, values = roundtrip(array)
+ assert_equal(["List<count: Int8>", [[-128, 127], nil, [-1, 0, 1]]],
+ [type.to_s, values])
+ end
- sub_test_case("LargeList") do
- def build_array
- data_type = Arrow::LargeListDataType.new(name: "count",
- type: :int8)
- Arrow::LargeListArray.new(data_type,
+ def test_large_list
+ data_type = Arrow::LargeListDataType.new(name: "count",
+ type: :int8)
+ array = Arrow::LargeListArray.new(data_type,
[[-128, 127], nil, [-1, 0, 1]])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- [-128, 127],
- nil,
- [-1, 0, 1],
- ],
- },
- ],
- read)
- end
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "LargeList<count: Int8>",
+ [
+ [-128, 127],
+ nil,
+ [-1, 0, 1],
+ ],
+ ],
+ [type.to_s, values])
+ end
- sub_test_case("Struct") do
- def build_array
- data_type = Arrow::StructDataType.new(count: :int8,
- visible: :boolean)
- Arrow::StructArray.new(data_type,
+ def test_struct
+ data_type = Arrow::StructDataType.new(count: :int8,
+ visible: :boolean)
+ array = Arrow::StructArray.new(data_type,
[[-128, nil], nil, [nil, true]])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- [-128, nil],
- nil,
- [nil, true],
- ],
- },
- ],
- read)
- end
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Struct<count: Int8, visible: Boolean>",
+ [
+ [-128, nil],
+ nil,
+ [nil, true],
+ ],
+ ],
+ [type.to_s, values])
+ end
- sub_test_case("DenseUnion") do
- def build_array
- fields = [
- Arrow::Field.new("number", :int8),
- Arrow::Field.new("text", :string),
- ]
- type_ids = [11, 13]
- data_type = Arrow::DenseUnionDataType.new(fields, type_ids)
- types = Arrow::Int8Array.new([11, 13, 11, 13, 13])
- value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2])
- children = [
- Arrow::Int8Array.new([1, nil]),
- Arrow::StringArray.new(["a", "b", "c"])
- ]
- Arrow::DenseUnionArray.new(data_type,
+ def test_dense_union
+ fields = [
+ Arrow::Field.new("number", :int8),
+ Arrow::Field.new("text", :string),
+ ]
+ type_ids = [11, 13]
+ data_type = Arrow::DenseUnionDataType.new(fields, type_ids)
+ types = Arrow::Int8Array.new([11, 13, 11, 13, 13])
+ value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2])
+ children = [
+ Arrow::Int8Array.new([1, nil]),
+ Arrow::StringArray.new(["a", "b", "c"])
+ ]
+ array = Arrow::DenseUnionArray.new(data_type,
types,
value_offsets,
children)
- end
-
- def test_read
- assert_equal([{"value" => [1, "a", nil, "b", "c"]}],
- read)
- end
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "DenseUnion<number: Int8=11, text: UTF8=13>",
+ [1, "a", nil, "b", "c"],
+ ],
+ [type.to_s, values])
+ end
- sub_test_case("SparseUnion") do
- def build_array
- fields = [
- Arrow::Field.new("number", :int8),
- Arrow::Field.new("text", :string),
- ]
- type_ids = [11, 13]
- data_type = Arrow::SparseUnionDataType.new(fields, type_ids)
- types = Arrow::Int8Array.new([11, 13, 11, 13, 11])
- children = [
- Arrow::Int8Array.new([1, nil, nil, nil, 5]),
- Arrow::StringArray.new([nil, "b", nil, "d", nil])
- ]
- Arrow::SparseUnionArray.new(data_type, types, children)
- end
-
- def test_read
- assert_equal([{"value" => [1, "b", nil, "d", 5]}],
- read)
- end
- end
+ def test_sparse_union
+ fields = [
+ Arrow::Field.new("number", :int8),
+ Arrow::Field.new("text", :string),
+ ]
+ type_ids = [11, 13]
+ data_type = Arrow::SparseUnionDataType.new(fields, type_ids)
+ types = Arrow::Int8Array.new([11, 13, 11, 13, 11])
+ children = [
+ Arrow::Int8Array.new([1, nil, nil, nil, 5]),
+ Arrow::StringArray.new([nil, "b", nil, "d", nil])
+ ]
+ array = Arrow::SparseUnionArray.new(data_type, types, children)
+ type, values = roundtrip(array)
+ assert_equal([
+ "SparseUnion<number: Int8=11, text: UTF8=13>",
+ [1, "b", nil, "d", 5],
+ ],
+ [type.to_s, values])
+ end
- sub_test_case("Map") do
- def build_array
- data_type = Arrow::MapDataType.new(:string, :int8)
- Arrow::MapArray.new(data_type,
+ def test_map
+ data_type = Arrow::MapDataType.new(:string, :int8)
+ array = Arrow::MapArray.new(data_type,
[
{"a" => -128, "b" => 127},
nil,
{"c" => nil},
])
- end
-
- def test_read
- assert_equal([
- {
- "value" => [
- {"a" => -128, "b" => 127},
- nil,
- {"c" => nil},
- ],
- },
- ],
- read)
- end
- end
+ type, values = roundtrip(array)
+ assert_equal([
+ "Map<UTF8, Int8>",
+ [
+ {"a" => -128, "b" => 127},
+ nil,
+ {"c" => nil},
+ ],
+ ],
+ [type.to_s, values])
+ end
- sub_test_case("Dictionary") do
- def build_array
- values = ["a", "b", "c", nil, "a"]
- string_array = Arrow::StringArray.new(values)
- string_array.dictionary_encode
- end
-
- def test_read
- assert_equal([{"value" => ["a", "b", "c", nil, "a"]}],
- read)
- end
- end
- end
- end
+ def test_dictionary
+ values = ["a", "b", "c", nil, "a"]
+ string_array = Arrow::StringArray.new(values)
+ array = string_array.dictionary_encode
+ type, values = roundtrip(array)
+ assert_equal([
+ "Dictionary<index=Int32, value=UTF8, ordered=false>",
+ ["a", "b", "c", nil, "a"],
+ ],
+ [type.to_s, values])
end
end
class TestFileReader < Test::Unit::TestCase
include ReaderTests
- def setup
- Dir.mktmpdir do |tmp_dir|
- table = Arrow::Table.new(value: build_array)
- @path = File.join(tmp_dir, "data.arrow")
- table.save(@path)
- File.open(@path, "rb") do |input|
- @reader = ArrowFormat::FileReader.new(input)
- yield
- @reader = nil
- end
- GC.start
- end
+ def file_extension
+ "arrow"
+ end
+
+ def reader_class
+ ArrowFormat::FileReader
end
end
class TestStreamingReader < Test::Unit::TestCase
include ReaderTests
- def setup
- Dir.mktmpdir do |tmp_dir|
- table = Arrow::Table.new(value: build_array)
- @path = File.join(tmp_dir, "data.arrows")
- table.save(@path)
- File.open(@path, "rb") do |input|
- @reader = ArrowFormat::StreamingReader.new(input)
- yield
- @reader = nil
- end
- GC.start
- end
+ def file_extension
+ "arrows"
+ end
+
+ def reader_class
+ ArrowFormat::StreamingReader
end
end