This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 8cbff3d5b6f [SPARK-41878][CONNECT][TESTS] pyspark.sql.tests.test_dataframe - Add JIRAs or messages for skipped messages 8cbff3d5b6f is described below commit 8cbff3d5b6f6a34e551aa42e965a16c3cb41e4c7 Author: Sandeep Singh <sand...@techaddict.me> AuthorDate: Thu Jan 5 08:53:08 2023 +0900 [SPARK-41878][CONNECT][TESTS] pyspark.sql.tests.test_dataframe - Add JIRAs or messages for skipped messages ### What changes were proposed in this pull request? This PR enables the reused PySpark tests in Spark Connect that pass now. And add JIRAs/ Messages to the skipped ones ### Why are the changes needed? To make sure on the test coverage. ### Does this PR introduce any user-facing change? No, test-only. ### How was this patch tested? Enabling tests Closes #39382 from techaddict/SPARK-41878. Authored-by: Sandeep Singh <sand...@techaddict.me> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../sql/tests/connect/test_parity_dataframe.py | 44 ++++++++++++++++++---- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/python/pyspark/sql/tests/connect/test_parity_dataframe.py b/python/pyspark/sql/tests/connect/test_parity_dataframe.py index ea1eb23fd4f..69f445b69ca 100644 --- a/python/pyspark/sql/tests/connect/test_parity_dataframe.py +++ b/python/pyspark/sql/tests/connect/test_parity_dataframe.py @@ -41,154 +41,182 @@ class DataFrameParityTests(DataFrameTestsMixin, ReusedSQLTestCase): cls._spark.stop() del os.environ["SPARK_REMOTE"] + # TODO(SPARK-41612): support Catalog.isCached @unittest.skip("Fails in Spark Connect, should enable.") def test_cache(self): super().test_cache() + # TODO(SPARK-41866): createDataframe support array type @unittest.skip("Fails in Spark Connect, should enable.") def test_create_dataframe_from_array_of_long(self): super().test_create_dataframe_from_array_of_long() + # TODO(SPARK-41868): Support data type Duration(NANOSECOND) @unittest.skip("Fails in Spark Connect, should enable.") def test_create_dataframe_from_pandas_with_day_time_interval(self): super().test_create_dataframe_from_pandas_with_day_time_interval() + # TODO(SPARK-41842): Support data type Timestamp(NANOSECOND, null) @unittest.skip("Fails in Spark Connect, should enable.") def test_create_dataframe_from_pandas_with_dst(self): super().test_create_dataframe_from_pandas_with_dst() + # TODO(SPARK-41842): Support data type Timestamp(NANOSECOND, null) @unittest.skip("Fails in Spark Connect, should enable.") def test_create_dataframe_from_pandas_with_timestamp(self): super().test_create_dataframe_from_pandas_with_timestamp() - @unittest.skip("Fails in Spark Connect, should enable.") - def test_create_dataframe_required_pandas_not_found(self): - super().test_create_dataframe_required_pandas_not_found() - + # TODO(SPARK-41855): createDataFrame doesn't handle None/NaN properly @unittest.skip("Fails in Spark Connect, should enable.") def test_create_nan_decimal_dataframe(self): super().test_create_nan_decimal_dataframe() + # TODO(SPARK-41869): DataFrame dropDuplicates should throw error on non list argument @unittest.skip("Fails in Spark Connect, should enable.") def test_drop_duplicates(self): super().test_drop_duplicates() + # TODO(SPARK-41870): Handle duplicate columns in `createDataFrame` @unittest.skip("Fails in Spark Connect, should enable.") def test_duplicated_column_names(self): super().test_duplicated_column_names() + # TODO(SPARK-41871): DataFrame hint parameter can be a float @unittest.skip("Fails in Spark Connect, should enable.") def test_extended_hint_types(self): super().test_extended_hint_types() + # TODO(SPARK-41872): Fix DataFrame createDataframe handling of None @unittest.skip("Fails in Spark Connect, should enable.") def test_fillna(self): super().test_fillna() + # TODO: comparing types, need to expose connect types @unittest.skip("Fails in Spark Connect, should enable.") def test_generic_hints(self): super().test_generic_hints() + # Spark Connect does not support RDD but the tests depend on them. @unittest.skip("Fails in Spark Connect, should enable.") def test_help_command(self): super().test_help_command() + # Spark Connect throws NotImplementedError tests expects IllegalArgumentException @unittest.skip("Fails in Spark Connect, should enable.") def test_invalid_join_method(self): super().test_invalid_join_method() + # TODO(SPARK-41834): Implement SparkSession.conf @unittest.skip("Fails in Spark Connect, should enable.") def test_join_without_on(self): super().test_join_without_on() + # TODO(SPARK-41527): Implement DataFrame.observe @unittest.skip("Fails in Spark Connect, should enable.") def test_observe(self): super().test_observe() + # TODO(SPARK-41625): Support Structured Streaming @unittest.skip("Fails in Spark Connect, should enable.") def test_observe_str(self): super().test_observe_str() + # TODO(SPARK-41873): Implement DataFrame `pandas_api` @unittest.skip("Fails in Spark Connect, should enable.") def test_pandas_api(self): super().test_pandas_api() + # TODO(SPARK-41840): DataFrame.show(): 'Column' object is not callable @unittest.skip("Fails in Spark Connect, should enable.") def test_repartitionByRange_dataframe(self): super().test_repartitionByRange_dataframe() + # TODO(SPARK-41872): Fix DataFrame createDataframe handling of None @unittest.skip("Fails in Spark Connect, should enable.") def test_replace(self): super().test_replace() + # TODO(SPARK-41834): Implement SparkSession.conf @unittest.skip("Fails in Spark Connect, should enable.") def test_repr_behaviors(self): super().test_repr_behaviors() + # TODO(SPARK-41834): Implement SparkSession.conf @unittest.skip("Fails in Spark Connect, should enable.") def test_require_cross(self): super().test_require_cross() + # TODO(SPARK-41874): Implement DataFrame `sameSemantics` @unittest.skip("Fails in Spark Connect, should enable.") def test_same_semantics_error(self): super().test_same_semantics_error() + # TODO(SPARK-41830): Fix DataFrame.sample parameters @unittest.skip("Fails in Spark Connect, should enable.") def test_sample(self): super().test_sample() + # TODO(SPARK-41875): throw proper errors in Dataset.to() @unittest.skip("Fails in Spark Connect, should enable.") def test_to(self): super().test_to() + # Spark Connect does not support RDD but the tests depend on them. @unittest.skip("Fails in Spark Connect, should enable.") def test_toDF_with_schema_string(self): super().test_toDF_with_schema_string() + # TODO(SPARK-41876): Implement DataFrame `toLocalIterator` @unittest.skip("Fails in Spark Connect, should enable.") def test_to_local_iterator(self): super().test_to_local_iterator() + # TODO(SPARK-41876): Implement DataFrame `toLocalIterator` @unittest.skip("Fails in Spark Connect, should enable.") def test_to_local_iterator_not_fully_consumed(self): super().test_to_local_iterator_not_fully_consumed() + # TODO(SPARK-41876): Implement DataFrame `toLocalIterator` @unittest.skip("Fails in Spark Connect, should enable.") def test_to_local_iterator_prefetch(self): super().test_to_local_iterator_prefetch() + # TODO(SPARK-41884): DataFrame `toPandas` parity in return types @unittest.skip("Fails in Spark Connect, should enable.") def test_to_pandas(self): super().test_to_pandas() - @unittest.skip("Fails in Spark Connect, should enable.") - def test_to_pandas_avoid_astype(self): - super().test_to_pandas_avoid_astype() - + # TODO(SPARK-41884): DataFrame `toPandas` parity in return types @unittest.skip("Fails in Spark Connect, should enable.") def test_to_pandas_for_array_of_struct(self): super().test_to_pandas_for_array_of_struct() + # TODO(SPARK-41834): Implement SparkSession.conf @unittest.skip("Fails in Spark Connect, should enable.") def test_to_pandas_from_empty_dataframe(self): super().test_to_pandas_from_empty_dataframe() + # TODO(SPARK-41834): Implement SparkSession.conf @unittest.skip("Fails in Spark Connect, should enable.") def test_to_pandas_from_mixed_dataframe(self): super().test_to_pandas_from_mixed_dataframe() + # TODO(SPARK-41834): Implement SparkSession.conf @unittest.skip("Fails in Spark Connect, should enable.") def test_to_pandas_from_null_dataframe(self): super().test_to_pandas_from_null_dataframe() + # TODO(SPARK-41834): Implement SparkSession.conf @unittest.skip("Fails in Spark Connect, should enable.") def test_to_pandas_on_cross_join(self): super().test_to_pandas_on_cross_join() + # TODO(SPARK-41834): Implement SparkSession.conf @unittest.skip("Fails in Spark Connect, should enable.") def test_to_pandas_with_duplicated_column_names(self): super().test_to_pandas_with_duplicated_column_names() + # TODO(SPARK-41877): createDataframe throw proper errors @unittest.skip("Fails in Spark Connect, should enable.") def test_unpivot(self): super().test_unpivot() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org