Repository: spark
Updated Branches:
  refs/heads/branch-2.0 e53a8f218 -> 7e25131a9


[SPARK-15416][SQL] Display a better message for not finding classes removed in 
Spark 2.0

## What changes were proposed in this pull request?

If finding `NoClassDefFoundError` or `ClassNotFoundException`, check if the 
class name is removed in Spark 2.0. If so, the user must be using an 
incompatible library and we can provide a better message.

## How was this patch tested?

1. Run `bin/pyspark --packages com.databricks:spark-avro_2.10:2.0.1`
2. type 
`sqlContext.read.format("com.databricks.spark.avro").load("src/test/resources/episodes.avro")`.

It will show `java.lang.ClassNotFoundException: 
org.apache.spark.sql.sources.HadoopFsRelationProvider is removed in Spark 2.0. 
Please check if your library is compatible with Spark 2.0`

Author: Shixiong Zhu <shixi...@databricks.com>

Closes #13201 from zsxwing/better-message.

(cherry picked from commit 16ba71aba4e68bbb892d4ceb38d6d1d135d63fd3)
Signed-off-by: Michael Armbrust <mich...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7e25131a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7e25131a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7e25131a

Branch: refs/heads/branch-2.0
Commit: 7e25131a93f2f13ecb525179bbe4ad77def84292
Parents: e53a8f2
Author: Shixiong Zhu <shixi...@databricks.com>
Authored: Thu May 19 18:31:05 2016 -0700
Committer: Michael Armbrust <mich...@databricks.com>
Committed: Thu May 19 18:31:17 2016 -0700

----------------------------------------------------------------------
 .../sql/execution/datasources/DataSource.scala  | 61 ++++++++++++++------
 1 file changed, 44 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/7e25131a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index ce45168..ccad9b3 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -83,6 +83,14 @@ case class DataSource(
     "com.databricks.spark.csv" -> classOf[csv.DefaultSource].getCanonicalName
   )
 
+  /**
+   * Class that were removed in Spark 2.0. Used to detect incompatibility 
libraries for Spark 2.0.
+   */
+  private val spark2RemovedClasses = Set(
+    "org.apache.spark.sql.DataFrame",
+    "org.apache.spark.sql.sources.HadoopFsRelationProvider",
+    "org.apache.spark.Logging")
+
   /** Given a provider name, look up the data source class definition. */
   private def lookupDataSource(provider0: String): Class[_] = {
     val provider = backwardCompatibilityMap.getOrElse(provider0, provider0)
@@ -93,26 +101,45 @@ case class DataSource(
     
serviceLoader.asScala.filter(_.shortName().equalsIgnoreCase(provider)).toList 
match {
       // the provider format did not match any given registered aliases
       case Nil =>
-        
Try(loader.loadClass(provider)).orElse(Try(loader.loadClass(provider2))) match {
-          case Success(dataSource) =>
-            // Found the data source using fully qualified path
-            dataSource
-          case Failure(error) =>
-            if (provider.startsWith("org.apache.spark.sql.hive.orc")) {
-              throw new ClassNotFoundException(
-                "The ORC data source must be used with Hive support enabled.", 
error)
-            } else {
-              if (provider == "avro" || provider == 
"com.databricks.spark.avro") {
+        try {
+          
Try(loader.loadClass(provider)).orElse(Try(loader.loadClass(provider2))) match {
+            case Success(dataSource) =>
+              // Found the data source using fully qualified path
+              dataSource
+            case Failure(error) =>
+              if (error.isInstanceOf[ClassNotFoundException]) {
+                val className = error.getMessage
+                if (spark2RemovedClasses.contains(className)) {
+                  throw new ClassNotFoundException(s"$className is removed in 
Spark 2.0. " +
+                    "Please check if your library is compatible with Spark 
2.0")
+                }
+              }
+              if (provider.startsWith("org.apache.spark.sql.hive.orc")) {
                 throw new ClassNotFoundException(
-                  s"Failed to find data source: $provider. Please use Spark 
package " +
-                  "http://spark-packages.org/package/databricks/spark-avro";,
-                  error)
+                  "The ORC data source must be used with Hive support 
enabled.", error)
               } else {
-                throw new ClassNotFoundException(
-                  s"Failed to find data source: $provider. Please find 
packages at " +
-                  "http://spark-packages.org";,
-                  error)
+                if (provider == "avro" || provider == 
"com.databricks.spark.avro") {
+                  throw new ClassNotFoundException(
+                    s"Failed to find data source: $provider. Please use Spark 
package " +
+                      
"http://spark-packages.org/package/databricks/spark-avro";,
+                    error)
+                } else {
+                  throw new ClassNotFoundException(
+                    s"Failed to find data source: $provider. Please find 
packages at " +
+                      "http://spark-packages.org";,
+                    error)
+                }
               }
+          }
+        } catch {
+          case e: NoClassDefFoundError => // This one won't be caught by Scala 
NonFatal
+            // NoClassDefFoundError's class name uses "/" rather than "." for 
packages
+            val className = e.getMessage.replaceAll("/", ".")
+            if (spark2RemovedClasses.contains(className)) {
+              throw new ClassNotFoundException(s"$className was removed in 
Spark 2.0. " +
+                "Please check if your library is compatible with Spark 2.0", e)
+            } else {
+              throw e
             }
         }
       case head :: Nil =>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to