This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new b02afd869714 [SPARK-51182][SQL] DataFrameWriter should throw
dataPathNotSpecifiedError when path is not specified
b02afd869714 is described below
commit b02afd869714eea7ccd2858e0e93cb19aac1d072
Author: Vlad Rozov <[email protected]>
AuthorDate: Tue May 6 17:38:43 2025 +0800
[SPARK-51182][SQL] DataFrameWriter should throw dataPathNotSpecifiedError
when path is not specified
### What changes were proposed in this pull request?
Change error message from `Expected exactly one path to be specified, but
got: .` to `'path' is not specified.` when path is not specified in the call to
`DataFrame.write().save(path)` explicitly or using `option(path, ...)`,
`parquet(path)` and etc.
### Why are the changes needed?
The error message is more accurate.
### Does this PR introduce _any_ user-facing change?
Yes, user would get corrected error message when they do not specify path.
### How was this patch tested?
Updated error message in the R test suite.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #49928 from vrozov/SPARK-51182.
Authored-by: Vlad Rozov <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
R/pkg/tests/fulltests/test_sparkSQL.R | 2 +-
python/pyspark/sql/tests/test_readwriter.py | 7 +++++++
.../sql/execution/datasources/DataSource.scala | 4 +++-
.../spark/sql/JavaDataFrameReaderWriterSuite.java | 19 +++++++++++++++++
.../execution/datasources/DataSourceSuite.scala | 24 ++++++++++++++++++++--
.../sql/test/DataFrameReaderWriterSuite.scala | 10 ++++++++-
6 files changed, 61 insertions(+), 5 deletions(-)
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R
b/R/pkg/tests/fulltests/test_sparkSQL.R
index c93b92edbff8..cada7813c950 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -4000,7 +4000,7 @@ test_that("Call DataFrameWriter.save() API in Java
without path and check argume
# DataFrameWriter.save() without path.
expect_error(write.df(df, source = "csv"),
paste("Error in save :
org.apache.spark.SparkIllegalArgumentException:",
- "Expected exactly one path to be specified"))
+ "'path' is not specified."))
expect_error(write.json(df, jsonPath),
"Error in json : analysis error - \\[PATH_ALREADY_EXISTS\\].*")
expect_error(write.text(df, jsonPath),
diff --git a/python/pyspark/sql/tests/test_readwriter.py
b/python/pyspark/sql/tests/test_readwriter.py
index 683c925eefc2..1d1cc3507f0e 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -238,6 +238,13 @@ class ReadwriterTestsMixin:
self.assertEqual(join2.columns, ["id", "value_1", "index",
"value_2"])
+ # "[SPARK-51182]: DataFrameWriter should throw dataPathNotSpecifiedError
when path is not
+ # specified"
+ def test_save(self):
+ writer = self.df.write
+ with self.assertRaisesRegex(Exception, "'path' is not specified."):
+ writer.save()
+
class ReadwriterV2TestsMixin:
def test_api(self):
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 882bc12a0d29..489f62bd9776 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -467,8 +467,10 @@ case class DataSource(
val allPaths = paths ++ caseInsensitiveOptions.get("path")
val outputPath = if (allPaths.length == 1) {
makeQualified(new Path(allPaths.head))
- } else {
+ } else if (allPaths.length > 1) {
throw QueryExecutionErrors.multiplePathsSpecifiedError(allPaths)
+ } else {
+ throw QueryExecutionErrors.dataPathNotSpecifiedError()
}
val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
diff --git
a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java
b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java
index 691fb67bbe90..9a086b9d76f7 100644
---
a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java
+++
b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java
@@ -20,15 +20,22 @@ package test.org.apache.spark.sql;
import java.io.File;
import java.util.HashMap;
+import org.apache.spark.SparkIllegalArgumentException;
+import org.apache.spark.sql.DataFrameWriter;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.test.TestSparkSession;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.util.Utils;
+
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrowsExactly;
+
public class JavaDataFrameReaderWriterSuite {
private SparkSession spark = new TestSparkSession();
private StructType schema = new StructType().add("s", "string");
@@ -152,4 +159,16 @@ public class JavaDataFrameReaderWriterSuite {
spark.read().schema(schema).orc(new String[]{input, input})
.write().orc(output);
}
+
+ @Test
+ @DisplayName("[SPARK-51182]: DataFrameWriter should throw
dataPathNotSpecifiedError when path " +
+ "is not specified")
+ public void testPathNotSpecified() {
+ DataFrameWriter<Long> dataFrameWriter = spark.range(0).write();
+ SparkIllegalArgumentException e = assertThrowsExactly(
+ SparkIllegalArgumentException.class,
+ () -> dataFrameWriter.save(),
+ "Expected save() to throw SparkIllegalArgumentException when path is
not specified");
+ assertEquals("'path' is not specified.", e.getMessage());
+ }
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index d2acdcfc6205..865a89692d6d 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -25,8 +25,8 @@ import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
import org.scalatest.PrivateMethodTester
-import org.apache.spark.SparkUnsupportedOperationException
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.{SparkIllegalArgumentException,
SparkUnsupportedOperationException}
+import org.apache.spark.sql.{AnalysisException, SaveMode}
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.util.Utils
@@ -220,6 +220,26 @@ class DataSourceSuite extends SharedSparkSession with
PrivateMethodTester {
"fileSystemClass" -> "org.apache.hadoop.fs.http.HttpsFileSystem",
"method" -> "listStatus"))
}
+
+ test("SPARK-51182: DataFrameWriter should throw dataPathNotSpecifiedError
when path is not " +
+ "specified") {
+ val df = new DataSource(spark, "parquet")
+ checkError(exception = intercept[SparkIllegalArgumentException](
+ df.planForWriting(SaveMode.ErrorIfExists, spark.range(0).logicalPlan)),
+ condition = "_LEGACY_ERROR_TEMP_2047")
+ }
+
+ test("SPARK-51182: DataFrameWriter should throw multiplePathsSpecifiedError
when more than " +
+ "one path is specified") {
+ val dataSources: List[DataSource] = List(
+ new DataSource(spark, "parquet", Seq("/path1"), options = Map("path" ->
"/path2")),
+ new DataSource(spark, "parquet", Seq("/path1", "/path2")))
+ dataSources.foreach(df => checkError(exception =
intercept[SparkIllegalArgumentException](
+ df.planForWriting(SaveMode.ErrorIfExists, spark.range(0).logicalPlan)),
+ condition = "_LEGACY_ERROR_TEMP_2050",
+ parameters = Map("paths" -> "/path1, /path2"))
+ )
+ }
}
object TestPaths {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 300807cf0586..f1f92f73197f 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -32,7 +32,7 @@ import
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
import org.apache.parquet.schema.Type.Repetition
import org.scalatest.BeforeAndAfter
-import org.apache.spark.{SparkContext, TestUtils}
+import org.apache.spark.{SparkContext, SparkIllegalArgumentException,
TestUtils}
import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
import org.apache.spark.internal.io.HadoopMapReduceCommitProtocol
import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
@@ -1467,4 +1467,12 @@ class DataFrameReaderWriterSuite extends QueryTest with
SharedSparkSession with
testRead(spark.read.schema(fileSchema).csv(s"$harPath/test.csv"), data,
fileSchema)
}
+
+ test("SPARK-51182: DataFrameWriter should throw dataPathNotSpecifiedError
when path is not " +
+ "specified") {
+ val dataFrameWriter = spark.range(0).write
+ checkError(
+ exception =
intercept[SparkIllegalArgumentException](dataFrameWriter.save()),
+ condition = "_LEGACY_ERROR_TEMP_2047")
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]