This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b1b29d9eeb76 [SPARK-46781][PYTHON][TESTS] Test custom data source and input partition (pyspark.sql.datasource) b1b29d9eeb76 is described below commit b1b29d9eeb76951a0129529f2075046cde91937a Author: Xinrong Meng <xinr...@apache.org> AuthorDate: Tue Jan 23 09:14:31 2024 +0900 [SPARK-46781][PYTHON][TESTS] Test custom data source and input partition (pyspark.sql.datasource) ### What changes were proposed in this pull request? Test custom data source and input partition (pyspark.sql.datasource) ### Why are the changes needed? Subtasks of [SPARK-46041](https://issues.apache.org/jira/browse/SPARK-46041) to improve test coverage ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Test change only. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44808 from xinrong-meng/test_datasource. Authored-by: Xinrong Meng <xinr...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/tests/test_datasources.py | 50 ++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/python/pyspark/sql/tests/test_datasources.py b/python/pyspark/sql/tests/test_datasources.py index 8c16904544b2..ece4839d88a8 100644 --- a/python/pyspark/sql/tests/test_datasources.py +++ b/python/pyspark/sql/tests/test_datasources.py @@ -21,7 +21,9 @@ import uuid import os from pyspark.sql import Row +from pyspark.sql.datasource import InputPartition, DataSource from pyspark.sql.types import IntegerType, StructField, StructType, LongType, StringType +from pyspark.errors import PySparkNotImplementedError from pyspark.testing.sqlutils import ReusedSQLTestCase @@ -283,6 +285,54 @@ class DataSourcesTestsMixin: url=f"{url};drop=true", dbtable=dbtable ).load().collect() + def test_custom_data_source(self): + class MyCustomDataSource(DataSource): + pass + + custom_data_source = MyCustomDataSource(options={"path": "/path/to/custom/data"}) + + with self.assertRaises(PySparkNotImplementedError) as pe: + custom_data_source.schema() + + self.check_error( + exception=pe.exception, + error_class="NOT_IMPLEMENTED", + message_parameters={"feature": "schema"}, + ) + + with self.assertRaises(PySparkNotImplementedError) as pe: + custom_data_source.reader(schema=None) + + self.check_error( + exception=pe.exception, + error_class="NOT_IMPLEMENTED", + message_parameters={"feature": "reader"}, + ) + + with self.assertRaises(PySparkNotImplementedError) as pe: + custom_data_source.writer(schema=None, overwrite=False) + + self.check_error( + exception=pe.exception, + error_class="NOT_IMPLEMENTED", + message_parameters={"feature": "writer"}, + ) + + def test_input_partition(self): + partition = InputPartition(1) + expected_repr = "InputPartition(value=1)" + actual_repr = repr(partition) + self.assertEqual(expected_repr, actual_repr) + + class RangeInputPartition(InputPartition): + def __init__(self, start, end): + super().__init__((start, end)) + + partition = RangeInputPartition(1, 3) + expected_repr = "RangeInputPartition(value=(1, 3))" + actual_repr = repr(partition) + self.assertEqual(expected_repr, actual_repr) + class DataSourcesTests(DataSourcesTestsMixin, ReusedSQLTestCase): pass --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org