This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 235032ad24 GH-47633: [Dev][Integration] Write all files with
`--write_generated_json` (#47634)
235032ad24 is described below
commit 235032ad245030c6364a9c8ec02066c0aa0bb18d
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Sep 24 15:18:39 2025 +0200
GH-47633: [Dev][Integration] Write all files with `--write_generated_json`
(#47634)
### Rationale for this change
The `--write_generated_json` option to `archery integration` was originally
a JS-specific hack, but its usage was removed from the JS codebase in commit
[c65bcda60b899649c3daf6f16ff5c4e12a41f06d](https://github.com/apache/arrow-js/commit/c65bcda60b899649c3daf6f16ff5c4e12a41f06d).
### What changes are included in this PR?
Rework this option to write all generated JSON integration files, not a
subset of them.
Also fix the option's spelling to use hyphens not underscores.
### Are these changes tested?
Only manually.
### Are there any user-facing changes?
No.
* GitHub Issue: #47633
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
dev/archery/archery/cli.py | 18 ++++---
dev/archery/archery/integration/runner.py | 78 -------------------------------
2 files changed, 8 insertions(+), 88 deletions(-)
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 48ad466977..7206e56df5 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -691,7 +691,7 @@ def _set_default(opt, default):
@click.option('--target-implementations', default='',
help=('Target implementations in this integration tests'),
envvar="ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS")
[email protected]('--write_generated_json', default="",
[email protected]('--write-generated-json', default="",
help='Generate test JSON to indicated path')
@click.option('--run-ipc', is_flag=True, default=False,
help='Run IPC integration tests')
@@ -714,7 +714,8 @@ def _set_default(opt, default):
@click.option('-k', '--match',
help=("Substring for test names to include in run, "
"e.g. -k primitive"))
-def integration(with_all=False, random_seed=12345, **args):
+def integration(with_all=False, random_seed=12345, write_generated_json="",
+ **args):
"""If you don't specify the "--target-implementations" option nor
the "ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS" environment
variable, test patterns are product of all specified
@@ -773,7 +774,8 @@ def integration(with_all=False, random_seed=12345, **args):
"""
- from .integration.runner import write_js_test_json, run_all_tests
+ from .integration.datagen import get_generated_json_files
+ from .integration.runner import run_all_tests
import numpy as np
# FIXME(bkietz) Include help strings for individual testers.
@@ -782,8 +784,6 @@ def integration(with_all=False, random_seed=12345, **args):
# Make runs involving data generation deterministic
np.random.seed(random_seed)
- gen_path = args['write_generated_json']
-
implementations = ['cpp', 'dotnet', 'java', 'js', 'go', 'nanoarrow',
'rust']
formats = ['ipc', 'flight', 'c_data']
@@ -799,11 +799,9 @@ def integration(with_all=False, random_seed=12345, **args):
param = f'run_{fmt}'
enabled_formats += args[param]
- if gen_path:
- # XXX See GH-37575: this option is only used by the JS test suite
- # and might not be useful anymore.
- os.makedirs(gen_path, exist_ok=True)
- write_js_test_json(gen_path)
+ if write_generated_json:
+ os.makedirs(write_generated_json, exist_ok=True)
+ get_generated_json_files(tempdir=write_generated_json)
else:
if enabled_formats == 0:
raise click.UsageError(
diff --git a/dev/archery/archery/integration/runner.py
b/dev/archery/archery/integration/runner.py
index ebcbc52d86..3c2c34f894 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -750,81 +750,3 @@ def run_all_tests(with_cpp=True, with_java=True,
with_js=True,
log(f"{fail_count} failures, {len(runner.skips)} skips")
if fail_count > 0:
sys.exit(1)
-
-
-def write_js_test_json(directory):
- datagen.generate_primitive_case([], name='primitive_no_batches').write(
- os.path.join(directory, 'primitive-no-batches.json')
- )
- datagen.generate_primitive_case([17, 20], name='primitive').write(
- os.path.join(directory, 'primitive.json')
- )
- datagen.generate_primitive_case([0, 0, 0],
name='primitive_zerolength').write(
- os.path.join(directory, 'primitive-empty.json')
- )
- # datagen.generate_primitive_large_offsets_case([17, 20]).write(
- # os.path.join(directory, 'primitive-large-offsets.json')
- # )
- datagen.generate_null_case([10, 0]).write(
- os.path.join(directory, 'null.json')
- )
- datagen.generate_null_trivial_case([0, 0]).write(
- os.path.join(directory, 'null-trivial.json')
- )
- datagen.generate_decimal128_case().write(
- os.path.join(directory, 'decimal128.json')
- )
- # datagen.generate_decimal256_case().write(
- # os.path.join(directory, 'decimal256.json')
- # )
- datagen.generate_datetime_case().write(
- os.path.join(directory, 'datetime.json')
- )
- # datagen.generate_duration_case().write(
- # os.path.join(directory, 'duration.json')
- # )
- # datagen.generate_interval_case().write(
- # os.path.join(directory, 'interval.json')
- # )
- # datagen.generate_month_day_nano_interval_case().write(
- # os.path.join(directory, 'month_day_nano_interval.json')
- # )
- datagen.generate_map_case().write(
- os.path.join(directory, 'map.json')
- )
- datagen.generate_non_canonical_map_case().write(
- os.path.join(directory, 'non_canonical_map.json')
- )
- datagen.generate_nested_case().write(
- os.path.join(directory, 'nested.json')
- )
- datagen.generate_recursive_nested_case().write(
- os.path.join(directory, 'recursive-nested.json')
- )
- # datagen.generate_nested_large_offsets_case().write(
- # os.path.join(directory, 'nested-large-offsets.json')
- # )
- datagen.generate_unions_case().write(
- os.path.join(directory, 'unions.json')
- )
- datagen.generate_custom_metadata_case().write(
- os.path.join(directory, 'custom-metadata.json')
- )
- # datagen.generate_duplicate_fieldnames_case().write(
- # os.path.join(directory, 'duplicate-fieldnames.json')
- # )
- datagen.generate_dictionary_case().write(
- os.path.join(directory, 'dictionary.json')
- )
- datagen.generate_dictionary_unsigned_case().write(
- os.path.join(directory, 'dictionary-unsigned.json')
- )
- datagen.generate_nested_dictionary_case().write(
- os.path.join(directory, 'dictionary-nested.json')
- )
- # datagen.generate_run_end_encoded_case().write(
- # os.path.join(directory, 'run_end_encoded.json')
- # )
- datagen.generate_extension_case().write(
- os.path.join(directory, 'extension.json')
- )