This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ddf5436fa8 ARROW-16372: [Python] Use IPC over Parquet for tests where
Parquet is unnecessary
ddf5436fa8 is described below
commit ddf5436fa883a77188d9a34795008551b45431cf
Author: Alessandro Molina <[email protected]>
AuthorDate: Tue May 31 14:24:33 2022 +0200
ARROW-16372: [Python] Use IPC over Parquet for tests where Parquet is
unnecessary
Because Parquet is optional and isn't supported on big-endian platforms yet.
Closes #13243 from amol-/ARROW-16372
Authored-by: Alessandro Molina <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
.travis.yml | 1 -
python/pyarrow/tests/test_dataset.py | 28 ++++++++++++++--------------
2 files changed, 14 insertions(+), 15 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index a2ce0af63b..f906ba8686 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -164,7 +164,6 @@ jobs:
allow_failures:
- name: "Java on s390x"
- - name: "Python on s390x"
before_install:
- eval "$(python ci/detect-changes.py)"
diff --git a/python/pyarrow/tests/test_dataset.py
b/python/pyarrow/tests/test_dataset.py
index 288301fd4b..0be01d2336 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -4556,15 +4556,15 @@ def test_dataset_join(tempdir):
"colA": [1, 2, 6],
"col2": ["a", "b", "f"]
})
- ds.write_dataset(t1, tempdir / "t1", format="parquet")
- ds1 = ds.dataset(tempdir / "t1")
+ ds.write_dataset(t1, tempdir / "t1", format="ipc")
+ ds1 = ds.dataset(tempdir / "t1", format="ipc")
t2 = pa.table({
"colB": [99, 2, 1],
"col3": ["Z", "B", "A"]
})
- ds.write_dataset(t2, tempdir / "t2", format="parquet")
- ds2 = ds.dataset(tempdir / "t2")
+ ds.write_dataset(t2, tempdir / "t2", format="ipc")
+ ds2 = ds.dataset(tempdir / "t2", format="ipc")
result = ds1.join(ds2, "colA", "colB")
assert result.to_table() == pa.table({
@@ -4587,15 +4587,15 @@ def test_dataset_join_unique_key(tempdir):
"colA": [1, 2, 6],
"col2": ["a", "b", "f"]
})
- ds.write_dataset(t1, tempdir / "t1", format="parquet")
- ds1 = ds.dataset(tempdir / "t1")
+ ds.write_dataset(t1, tempdir / "t1", format="ipc")
+ ds1 = ds.dataset(tempdir / "t1", format="ipc")
t2 = pa.table({
"colA": [99, 2, 1],
"col3": ["Z", "B", "A"]
})
- ds.write_dataset(t2, tempdir / "t2", format="parquet")
- ds2 = ds.dataset(tempdir / "t2")
+ ds.write_dataset(t2, tempdir / "t2", format="ipc")
+ ds2 = ds.dataset(tempdir / "t2", format="ipc")
result = ds1.join(ds2, "colA")
assert result.to_table() == pa.table({
@@ -4619,16 +4619,16 @@ def test_dataset_join_collisions(tempdir):
"colB": [10, 20, 60],
"colVals": ["a", "b", "f"]
})
- ds.write_dataset(t1, tempdir / "t1", format="parquet")
- ds1 = ds.dataset(tempdir / "t1")
+ ds.write_dataset(t1, tempdir / "t1", format="ipc")
+ ds1 = ds.dataset(tempdir / "t1", format="ipc")
t2 = pa.table({
"colA": [99, 2, 1],
"colB": [99, 20, 10],
"colVals": ["Z", "B", "A"]
})
- ds.write_dataset(t2, tempdir / "t2", format="parquet")
- ds2 = ds.dataset(tempdir / "t2")
+ ds.write_dataset(t2, tempdir / "t2", format="ipc")
+ ds2 = ds.dataset(tempdir / "t2", format="ipc")
result = ds1.join(ds2, "colA", join_type="full outer", right_suffix="_r")
assert result.to_table().sort_by("colA") == pa.table([
@@ -4646,8 +4646,8 @@ def test_dataset_filter(tempdir):
"colA": [1, 2, 6],
"col2": ["a", "b", "f"]
})
- ds.write_dataset(t1, tempdir / "t1", format="parquet")
- ds1 = ds.dataset(tempdir / "t1")
+ ds.write_dataset(t1, tempdir / "t1", format="ipc")
+ ds1 = ds.dataset(tempdir / "t1", format="ipc")
result = ds1.scanner(filter=pc.field("colA") < 3)
assert result.to_table() == pa.table({