This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new eb9d5194a3 GH-47648: [Archery][Integration] More granularity in JSON
test cases (#47649)
eb9d5194a3 is described below
commit eb9d5194a306f8145f8600b176f3bd391ee4397c
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Oct 1 08:47:22 2025 +0200
GH-47648: [Archery][Integration] More granularity in JSON test cases
(#47649)
### Rationale for this change
Separate binary columns from other primitives in the integration test
cases, to make life easier when writing a new Arrow implementation.
### Are these changes tested?
Yes, by integration CI test.
### Are there any user-facing changes?
No, only for implementers who would use the generated files directly.
* GitHub Issue: #47648
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
dev/archery/archery/integration/datagen.py | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/dev/archery/archery/integration/datagen.py
b/dev/archery/archery/integration/datagen.py
index 5eea756026..1d3f8720fa 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1562,8 +1562,7 @@ def generate_duplicate_fieldnames_case():
def generate_primitive_case(batch_sizes, name='primitive'):
types = ['bool', 'int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64',
- 'float32', 'float64', 'binary', 'utf8',
- 'fixedsizebinary_19', 'fixedsizebinary_120']
+ 'float32', 'float64']
fields = []
@@ -1574,7 +1573,19 @@ def generate_primitive_case(batch_sizes,
name='primitive'):
return _generate_file(name, fields, batch_sizes)
-def generate_primitive_large_offsets_case(batch_sizes):
+def generate_binary_case(batch_sizes, name='binary'):
+ types = ['binary', 'utf8', 'fixedsizebinary_19', 'fixedsizebinary_120']
+
+ fields = []
+
+ for type_ in types:
+ fields.append(get_field(type_ + "_nullable", type_, nullable=True))
+ fields.append(get_field(type_ + "_nonnullable", type_, nullable=False))
+
+ return _generate_file(name, fields, batch_sizes)
+
+
+def generate_large_binary_case(batch_sizes):
types = ['largebinary', 'largeutf8']
fields = []
@@ -1583,7 +1594,7 @@ def generate_primitive_large_offsets_case(batch_sizes):
fields.append(get_field(type_ + "_nullable", type_, nullable=True))
fields.append(get_field(type_ + "_nonnullable", type_, nullable=False))
- return _generate_file('primitive_large_offsets', fields, batch_sizes)
+ return _generate_file('large_binary', fields, batch_sizes)
def generate_null_case(batch_sizes):
@@ -1907,7 +1918,11 @@ def get_generated_json_files(tempdir=None):
generate_primitive_case([17, 20], name='primitive'),
generate_primitive_case([0, 0, 0], name='primitive_zerolength'),
- generate_primitive_large_offsets_case([17, 20]),
+ generate_binary_case([], name='binary_no_batches'),
+ generate_binary_case([17, 20], name='binary'),
+ generate_binary_case([0, 0, 0], name='binary_zerolength'),
+
+ generate_large_binary_case([17, 20]),
generate_null_case([10, 0]),