kou commented on code in PR #13677:
URL: https://github.com/apache/arrow/pull/13677#discussion_r956555290


##########
python/pyarrow/tests/test_dataset.py:
##########
@@ -4192,27 +4192,27 @@ def test_write_table_multiple_fragments(tempdir):
     # Table with multiple batches written as single Fragment by default
     base_dir = tempdir / 'single'
     ds.write_dataset(table, base_dir, format="feather")
-    assert set(base_dir.rglob("*")) == set([base_dir / "part-0.feather"])
+    assert set(base_dir.rglob("*")) == set([base_dir / "part-0.arrow"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(table)
 
     # Same for single-element list of Table
     base_dir = tempdir / 'single-list'
     ds.write_dataset([table], base_dir, format="feather")
-    assert set(base_dir.rglob("*")) == set([base_dir / "part-0.feather"])
+    assert set(base_dir.rglob("*")) == set([base_dir / "part-0.arrow"])
     assert ds.dataset(base_dir, format="ipc").to_table().equals(table)
 
     # Provide list of batches to write multiple fragments
     base_dir = tempdir / 'multiple'
     ds.write_dataset(table.to_batches(), base_dir, format="feather")
     assert set(base_dir.rglob("*")) == set(
-        [base_dir / "part-0.feather"])
+        [base_dir / "part-0.arrow"])

Review Comment:
   Ah, it's a good point.
   If users specify `"feather"` explicitly, it may be better that we use 
`".feather"` for extension name. Does this work?
   
   ```diff
   diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
   index 93d3555b94..f52f8a6519 100644
   --- a/python/pyarrow/_dataset.pyx
   +++ b/python/pyarrow/_dataset.pyx
   @@ -1154,6 +1154,13 @@ cdef class IpcFileFormat(FileFormat):
            return IpcFileFormat, tuple()
    
    
   +cdef class FeatherFileFormat(IpcFileFormat):
   +
   +    @property
   +    def default_extname(self):
   +        return "feather"
   +
   +
    cdef class CsvFileFormat(FileFormat):
        """
        FileFormat for CSV files.
   diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
   index 326b37ec6e..5816bd54ae 100644
   --- a/python/pyarrow/dataset.py
   +++ b/python/pyarrow/dataset.py
   @@ -284,8 +284,10 @@ def _ensure_format(obj):
            if not _parquet_available:
                raise ValueError(_parquet_msg)
            return ParquetFileFormat()
   -    elif obj in {"ipc", "arrow", "feather"}:
   +    elif obj in {"ipc", "arrow"}:
            return IpcFileFormat()
   +    elif obj in {"feather"}:
   +        return FeatherFileFormat()
        elif obj == "csv":
            return CsvFileFormat()
        elif obj == "orc":
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to