This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new eb9d5194a3 GH-47648: [Archery][Integration] More granularity in JSON 
test cases (#47649)
eb9d5194a3 is described below

commit eb9d5194a306f8145f8600b176f3bd391ee4397c
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Oct 1 08:47:22 2025 +0200

    GH-47648: [Archery][Integration] More granularity in JSON test cases 
(#47649)
    
    ### Rationale for this change
    
    Separate binary columns from other primitives in the integration test 
cases, to make life easier when writing a new Arrow implementation.
    
    ### Are these changes tested?
    
    Yes, by integration CI test.
    
    ### Are there any user-facing changes?
    
    No, only for implementers who would use the generated files directly.
    
    * GitHub Issue: #47648
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 dev/archery/archery/integration/datagen.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/dev/archery/archery/integration/datagen.py 
b/dev/archery/archery/integration/datagen.py
index 5eea756026..1d3f8720fa 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1562,8 +1562,7 @@ def generate_duplicate_fieldnames_case():
 def generate_primitive_case(batch_sizes, name='primitive'):
     types = ['bool', 'int8', 'int16', 'int32', 'int64',
              'uint8', 'uint16', 'uint32', 'uint64',
-             'float32', 'float64', 'binary', 'utf8',
-             'fixedsizebinary_19', 'fixedsizebinary_120']
+             'float32', 'float64']
 
     fields = []
 
@@ -1574,7 +1573,19 @@ def generate_primitive_case(batch_sizes, 
name='primitive'):
     return _generate_file(name, fields, batch_sizes)
 
 
-def generate_primitive_large_offsets_case(batch_sizes):
+def generate_binary_case(batch_sizes, name='binary'):
+    types = ['binary', 'utf8', 'fixedsizebinary_19', 'fixedsizebinary_120']
+
+    fields = []
+
+    for type_ in types:
+        fields.append(get_field(type_ + "_nullable", type_, nullable=True))
+        fields.append(get_field(type_ + "_nonnullable", type_, nullable=False))
+
+    return _generate_file(name, fields, batch_sizes)
+
+
+def generate_large_binary_case(batch_sizes):
     types = ['largebinary', 'largeutf8']
 
     fields = []
@@ -1583,7 +1594,7 @@ def generate_primitive_large_offsets_case(batch_sizes):
         fields.append(get_field(type_ + "_nullable", type_, nullable=True))
         fields.append(get_field(type_ + "_nonnullable", type_, nullable=False))
 
-    return _generate_file('primitive_large_offsets', fields, batch_sizes)
+    return _generate_file('large_binary', fields, batch_sizes)
 
 
 def generate_null_case(batch_sizes):
@@ -1907,7 +1918,11 @@ def get_generated_json_files(tempdir=None):
         generate_primitive_case([17, 20], name='primitive'),
         generate_primitive_case([0, 0, 0], name='primitive_zerolength'),
 
-        generate_primitive_large_offsets_case([17, 20]),
+        generate_binary_case([], name='binary_no_batches'),
+        generate_binary_case([17, 20], name='binary'),
+        generate_binary_case([0, 0, 0], name='binary_zerolength'),
+
+        generate_large_binary_case([17, 20]),
 
         generate_null_case([10, 0]),
 

Reply via email to