(spark) branch master updated: [SPARK-51182][SQL] DataFrameWriter should throw dataPathNotSpecifiedError when path is not specified

wenchen Tue, 06 May 2025 02:39:03 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new b02afd869714 [SPARK-51182][SQL] DataFrameWriter should throw 
dataPathNotSpecifiedError when path is not specified
b02afd869714 is described below

commit b02afd869714eea7ccd2858e0e93cb19aac1d072
Author: Vlad Rozov <[email protected]>
AuthorDate: Tue May 6 17:38:43 2025 +0800

    [SPARK-51182][SQL] DataFrameWriter should throw dataPathNotSpecifiedError 
when path is not specified
    
    ### What changes were proposed in this pull request?
    Change error message from `Expected exactly one path to be specified, but 
got: .` to `'path' is not specified.` when path is not specified in the call to 
`DataFrame.write().save(path)` explicitly or using `option(path, ...)`, 
`parquet(path)` and etc.
    
    ### Why are the changes needed?
    The error message is more accurate.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, user would get corrected error message when they do not specify path.
    
    ### How was this patch tested?
    Updated error message in the R test suite.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #49928 from vrozov/SPARK-51182.
    
    Authored-by: Vlad Rozov <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 R/pkg/tests/fulltests/test_sparkSQL.R              |  2 +-
 python/pyspark/sql/tests/test_readwriter.py        |  7 +++++++
 .../sql/execution/datasources/DataSource.scala     |  4 +++-
 .../spark/sql/JavaDataFrameReaderWriterSuite.java  | 19 +++++++++++++++++
 .../execution/datasources/DataSourceSuite.scala    | 24 ++++++++++++++++++++--
 .../sql/test/DataFrameReaderWriterSuite.scala      | 10 ++++++++-
 6 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R 
b/R/pkg/tests/fulltests/test_sparkSQL.R
index c93b92edbff8..cada7813c950 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -4000,7 +4000,7 @@ test_that("Call DataFrameWriter.save() API in Java 
without path and check argume
   # DataFrameWriter.save() without path.
   expect_error(write.df(df, source = "csv"),
                paste("Error in save : 
org.apache.spark.SparkIllegalArgumentException:",
-                     "Expected exactly one path to be specified"))
+                     "'path' is not specified."))
   expect_error(write.json(df, jsonPath),
               "Error in json : analysis error - \\[PATH_ALREADY_EXISTS\\].*")
   expect_error(write.text(df, jsonPath),
diff --git a/python/pyspark/sql/tests/test_readwriter.py 
b/python/pyspark/sql/tests/test_readwriter.py
index 683c925eefc2..1d1cc3507f0e 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -238,6 +238,13 @@ class ReadwriterTestsMixin:
 
                 self.assertEqual(join2.columns, ["id", "value_1", "index", 
"value_2"])
 
+    # "[SPARK-51182]: DataFrameWriter should throw dataPathNotSpecifiedError 
when path is not
+    # specified"
+    def test_save(self):
+        writer = self.df.write
+        with self.assertRaisesRegex(Exception, "'path' is not specified."):
+            writer.save()
+
 
 class ReadwriterV2TestsMixin:
     def test_api(self):
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 882bc12a0d29..489f62bd9776 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -467,8 +467,10 @@ case class DataSource(
     val allPaths = paths ++ caseInsensitiveOptions.get("path")
     val outputPath = if (allPaths.length == 1) {
       makeQualified(new Path(allPaths.head))
-    } else {
+    } else if (allPaths.length > 1) {
       throw QueryExecutionErrors.multiplePathsSpecifiedError(allPaths)
+    } else {
+      throw QueryExecutionErrors.dataPathNotSpecifiedError()
     }
 
     val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
diff --git 
a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java
 
b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java
index 691fb67bbe90..9a086b9d76f7 100644
--- 
a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java
+++ 
b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java
@@ -20,15 +20,22 @@ package test.org.apache.spark.sql;
 import java.io.File;
 import java.util.HashMap;
 
+import org.apache.spark.SparkIllegalArgumentException;
+import org.apache.spark.sql.DataFrameWriter;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.sql.test.TestSparkSession;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.util.Utils;
+
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.DisplayName;
 import org.junit.jupiter.api.Test;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrowsExactly;
+
 public class JavaDataFrameReaderWriterSuite {
   private SparkSession spark = new TestSparkSession();
   private StructType schema = new StructType().add("s", "string");
@@ -152,4 +159,16 @@ public class JavaDataFrameReaderWriterSuite {
     spark.read().schema(schema).orc(new String[]{input, input})
         .write().orc(output);
   }
+
+  @Test
+  @DisplayName("[SPARK-51182]: DataFrameWriter should throw 
dataPathNotSpecifiedError when path " +
+      "is not specified")
+  public void testPathNotSpecified() {
+    DataFrameWriter<Long> dataFrameWriter = spark.range(0).write();
+    SparkIllegalArgumentException e = assertThrowsExactly(
+        SparkIllegalArgumentException.class,
+        () -> dataFrameWriter.save(),
+        "Expected save() to throw SparkIllegalArgumentException when path is 
not specified");
+    assertEquals("'path' is not specified.", e.getMessage());
+  }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index d2acdcfc6205..865a89692d6d 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -25,8 +25,8 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
 import org.scalatest.PrivateMethodTester
 
-import org.apache.spark.SparkUnsupportedOperationException
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.{SparkIllegalArgumentException, 
SparkUnsupportedOperationException}
+import org.apache.spark.sql.{AnalysisException, SaveMode}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.Utils
 
@@ -220,6 +220,26 @@ class DataSourceSuite extends SharedSparkSession with 
PrivateMethodTester {
         "fileSystemClass" -> "org.apache.hadoop.fs.http.HttpsFileSystem",
         "method" -> "listStatus"))
   }
+
+  test("SPARK-51182: DataFrameWriter should throw dataPathNotSpecifiedError 
when path is not " +
+    "specified") {
+    val df = new DataSource(spark, "parquet")
+    checkError(exception = intercept[SparkIllegalArgumentException](
+      df.planForWriting(SaveMode.ErrorIfExists, spark.range(0).logicalPlan)),
+      condition = "_LEGACY_ERROR_TEMP_2047")
+  }
+
+  test("SPARK-51182: DataFrameWriter should throw multiplePathsSpecifiedError 
when more than " +
+    "one path is specified") {
+    val dataSources: List[DataSource] = List(
+      new DataSource(spark, "parquet", Seq("/path1"), options = Map("path" -> 
"/path2")),
+      new DataSource(spark, "parquet", Seq("/path1", "/path2")))
+    dataSources.foreach(df => checkError(exception = 
intercept[SparkIllegalArgumentException](
+      df.planForWriting(SaveMode.ErrorIfExists, spark.range(0).logicalPlan)),
+      condition = "_LEGACY_ERROR_TEMP_2050",
+      parameters = Map("paths" -> "/path1, /path2"))
+    )
+  }
 }
 
 object TestPaths {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 300807cf0586..f1f92f73197f 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -32,7 +32,7 @@ import 
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
 import org.apache.parquet.schema.Type.Repetition
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.{SparkContext, TestUtils}
+import org.apache.spark.{SparkContext, SparkIllegalArgumentException, 
TestUtils}
 import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
 import org.apache.spark.internal.io.HadoopMapReduceCommitProtocol
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
@@ -1467,4 +1467,12 @@ class DataFrameReaderWriterSuite extends QueryTest with 
SharedSparkSession with
 
     testRead(spark.read.schema(fileSchema).csv(s"$harPath/test.csv"), data, 
fileSchema)
   }
+
+  test("SPARK-51182: DataFrameWriter should throw dataPathNotSpecifiedError 
when path is not " +
+    "specified") {
+    val dataFrameWriter = spark.range(0).write
+    checkError(
+      exception = 
intercept[SparkIllegalArgumentException](dataFrameWriter.save()),
+      condition = "_LEGACY_ERROR_TEMP_2047")
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-51182][SQL] DataFrameWriter should throw dataPathNotSpecifiedError when path is not specified

Reply via email to