This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 3b2c1b916a2 [SPARK-39252][PYSPARK][TESTS] Remove flaky test_df_is_empty 3b2c1b916a2 is described below commit 3b2c1b916a29fef0463dc3a6d9df2e46a91cf446 Author: Ivan Sadikov <ivan.sadi...@databricks.com> AuthorDate: Wed May 25 11:39:54 2022 +0900 [SPARK-39252][PYSPARK][TESTS] Remove flaky test_df_is_empty This PR removes flaky `test_df_is_empty` as reported in https://issues.apache.org/jira/browse/SPARK-39252. I will open a follow-up PR to reintroduce the test and fix the flakiness (or see if it was a regression). No. Existing unit tests. Closes #36656 from sadikovi/SPARK-39252. Authored-by: Ivan Sadikov <ivan.sadi...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit 9823bb385cd6dca7c4fb5a6315721420ad42f80a) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/tests/test_dataframe.py | 46 +++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index 6b9ac24d8c1..72fed2856a6 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -21,7 +21,7 @@ import shutil import tempfile import time import unittest -import uuid +from typing import cast from pyspark.sql import SparkSession, Row from pyspark.sql.types import StringType, IntegerType, DoubleType, StructType, StructField, \ @@ -838,6 +838,7 @@ class DataFrameTests(ReusedSQLTestCase): finally: shutil.rmtree(tpath) +<<<<<<< HEAD def test_df_is_empty(self): # SPARK-39084: Fix df.rdd.isEmpty() resulting in JVM crash. @@ -872,6 +873,49 @@ class DataFrameTests(ReusedSQLTestCase): self.assertFalse(res.rdd.isEmpty()) finally: shutil.rmtree(tmpPath) +======= + def test_df_show(self): + # SPARK-35408: ensure better diagnostics if incorrect parameters are passed + # to DataFrame.show + + df = self.spark.createDataFrame([("foo",)]) + df.show(5) + df.show(5, True) + df.show(5, 1, True) + df.show(n=5, truncate="1", vertical=False) + df.show(n=5, truncate=1.5, vertical=False) + + with self.assertRaisesRegex(TypeError, "Parameter 'n'"): + df.show(True) + with self.assertRaisesRegex(TypeError, "Parameter 'vertical'"): + df.show(vertical="foo") + with self.assertRaisesRegex(TypeError, "Parameter 'truncate=foo'"): + df.show(truncate="foo") + + @unittest.skipIf( + not have_pandas or not have_pyarrow, + cast(str, pandas_requirement_message or pyarrow_requirement_message), + ) + def test_pandas_api(self): + import pandas as pd + from pandas.testing import assert_frame_equal + + sdf = self.spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)], ["Col1", "Col2"]) + psdf_from_sdf = sdf.pandas_api() + psdf_from_sdf_with_index = sdf.pandas_api(index_col="Col1") + pdf = pd.DataFrame({"Col1": ["a", "b", "c"], "Col2": [1, 2, 3]}) + pdf_with_index = pdf.set_index("Col1") + + assert_frame_equal(pdf, psdf_from_sdf.to_pandas()) + assert_frame_equal(pdf_with_index, psdf_from_sdf_with_index.to_pandas()) + + # test for SPARK-36337 + def test_create_nan_decimal_dataframe(self): + self.assertEqual( + self.spark.createDataFrame(data=[Decimal("NaN")], schema="decimal").collect(), + [Row(value=None)], + ) +>>>>>>> 9823bb385cd ([SPARK-39252][PYSPARK][TESTS] Remove flaky test_df_is_empty) class QueryExecutionListenerTests(unittest.TestCase, SQLTestUtils): --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org