itholic commented on code in PR #42369: URL: https://github.com/apache/spark/pull/42369#discussion_r1286592428
########## python/pyspark/sql/connect/dataframe.py: ########## @@ -1732,6 +1732,23 @@ def to(self, schema: StructType) -> "DataFrame": to.__doc__ = PySparkDataFrame.to.__doc__ def toDF(self, *cols: str) -> "DataFrame": + expected_len_cols = len(self.columns) + actual_len_cols = len(cols) + if expected_len_cols != actual_len_cols: Review Comment: Yeah, the existing error is nice enough, but raised from JVM and captured from PySpark, so the log is quite long as below: ```python Traceback (most recent call last): File "/.../spark/python/pyspark/sql/tests/test_dataframe.py", line 1028, in test_toDF_with_string df.toDF("key") File "/.../spark/python/pyspark/sql/dataframe.py", line 5324, in toDF jdf = self._jdf.toDF(self._jseq(cols)) File "/.../spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1322, in __call__ return_value = get_return_value( File "/.../spark/python/pyspark/errors/exceptions/captured.py", line 185, in deco raise converted from None pyspark.errors.exceptions.captured.IllegalArgumentException: requirement failed: The number of columns doesn't match. Old column names (2): _1, _2 New column names (1): key JVM stacktrace: java.lang.IllegalArgumentException: requirement failed: The number of columns doesn't match. Old column names (2): _1, _2 New column names (1): key at scala.Predef$.require(Predef.scala:281) at org.apache.spark.sql.Dataset.toDF(Dataset.scala:534) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.base/java.lang.reflect.Method.invoke(Method.java:566) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) at py4j.ClientServerConnection.run(ClientServerConnection.java:106) at java.base/java.lang.Thread.run(Thread.java:829) ``` So I wanted to make it better by shorten and capture by `PySparkValueError` as below: ```python Traceback (most recent call last): File "/.../spark/python/pyspark/sql/tests/test_dataframe.py", line 1028, in test_toDF_with_string df.toDF("key") File "/.../spark/python/pyspark/sql/dataframe.py", line 5310, in toDF raise PySparkValueError( pyspark.errors.exceptions.base.PySparkValueError: [LENGTH_MISMATCH] The length of `cols` must be 2, got 1. ``` Do we want to just keep the current behavior and revert the changes ?? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org