This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f4e41e0e318 [SPARK-46287][PYTHON][CONNECT] `DataFrame.isEmpty` should work with all datatypes f4e41e0e318 is described below commit f4e41e0e318ea1269de5991f4635637e6e5233f3 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Wed Dec 6 10:45:12 2023 -0800 [SPARK-46287][PYTHON][CONNECT] `DataFrame.isEmpty` should work with all datatypes ### What changes were proposed in this pull request? `DataFrame.isEmpty` should work with all datatypes the schema maybe not compatible with arrow, so should not use `collect/take` to check `isEmpty` ### Why are the changes needed? bugfix ### Does this PR introduce _any_ user-facing change? before: ``` In [1]: spark.sql("SELECT INTERVAL '10-8' YEAR TO MONTH AS interval").isEmpty() 23/12/06 20:39:58 WARN CheckAllocator: More than one DefaultAllocationManager on classpath. Choosing first found --------------------------------------------------------------------------- / 1] KeyError Traceback (most recent call last) Cell In[1], line 1 ----> 1 spark.sql("SELECT INTERVAL '10-8' YEAR TO MONTH AS interval").isEmpty() File ~/Dev/spark/python/pyspark/sql/connect/dataframe.py:181, in DataFrame.isEmpty(self) 180 def isEmpty(self) -> bool: --> 181 return len(self.take(1)) == 0 ... File ~/.dev/miniconda3/envs/spark_dev_311/lib/python3.11/site-packages/pyarrow/public-api.pxi:208, in pyarrow.lib.pyarrow_wrap_array() File ~/.dev/miniconda3/envs/spark_dev_311/lib/python3.11/site-packages/pyarrow/array.pxi:3659, in pyarrow.lib.get_array_class_from_type() KeyError: 21 ``` after ``` In [1]: spark.sql("SELECT INTERVAL '10-8' YEAR TO MONTH AS interval").isEmpty() 23/12/06 20:40:26 WARN CheckAllocator: More than one DefaultAllocationManager on classpath. Choosing first found Out[1]: False ``` ### How was this patch tested? added ut ### Was this patch authored or co-authored using generative AI tooling? no Closes #44209 from zhengruifeng/py_connect_df_isempty. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- python/pyspark/sql/connect/dataframe.py | 2 +- python/pyspark/sql/tests/connect/test_connect_basic.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index 6a1d4571216..66059ad96eb 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -178,7 +178,7 @@ class DataFrame: write.__doc__ = PySparkDataFrame.write.__doc__ def isEmpty(self) -> bool: - return len(self.take(1)) == 0 + return len(self.select().take(1)) == 0 isEmpty.__doc__ = PySparkDataFrame.isEmpty.__doc__ diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index fb5eaece7f4..5e0cf535391 100755 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -2004,6 +2004,11 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase): self.assertFalse(self.connect.sql("SELECT 1 AS X").isEmpty()) self.assertTrue(self.connect.sql("SELECT 1 AS X LIMIT 0").isEmpty()) + def test_is_empty_with_unsupported_types(self): + df = self.spark.sql("SELECT INTERVAL '10-8' YEAR TO MONTH AS interval") + self.assertEqual(df.count(), 1) + self.assertFalse(df.isEmpty()) + def test_session(self): self.assertEqual(self.connect, self.connect.sql("SELECT 1").sparkSession) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org