Repository: spark
Updated Branches:
  refs/heads/branch-2.2 e278876ba -> b48bb3ab2


[SPARK-12868][SQL] Allow adding jars from hdfs

## What changes were proposed in this pull request?
Spark 2.2 is going to be cut, it'll be great if SPARK-12868 can be resolved 
before that. There have been several PRs for this like 
[PR#16324](https://github.com/apache/spark/pull/16324) , but all of them are 
inactivity for a long time or have been closed.

This PR added a SparkUrlStreamHandlerFactory, which relies on 'protocol' to 
choose the appropriate
UrlStreamHandlerFactory like FsUrlStreamHandlerFactory to create 
URLStreamHandler.

## How was this patch tested?
1. Add a new unit test.
2. Check manually.
Before: throw an exception with " failed unknown protocol: hdfs"
<img width="914" alt="screen shot 2017-03-17 at 9 07 36 pm" 
src="https://cloud.githubusercontent.com/assets/8546874/24075277/5abe0a7c-0bd5-11e7-900e-ec3d3105da0b.png";>

After:
<img width="1148" alt="screen shot 2017-03-18 at 11 42 18 am" 
src="https://cloud.githubusercontent.com/assets/8546874/24075283/69382a60-0bd5-11e7-8d30-d9405c3aaaba.png";>

Author: Weiqing Yang <yangweiqing...@gmail.com>

Closes #17342 from weiqingy/SPARK-18910.

(cherry picked from commit 2ba1eba371213d1ac3d1fa1552e5906e043c2ee4)
Signed-off-by: Marcelo Vanzin <van...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b48bb3ab
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b48bb3ab
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b48bb3ab

Branch: refs/heads/branch-2.2
Commit: b48bb3ab2c8134f6b533af29a241dce114076720
Parents: e278876
Author: Weiqing Yang <yangweiqing...@gmail.com>
Authored: Wed Apr 26 13:54:40 2017 -0700
Committer: Marcelo Vanzin <van...@cloudera.com>
Committed: Wed Apr 26 13:54:49 2017 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/internal/SharedState.scala    | 10 +++++++++-
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala     | 13 +++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b48bb3ab/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index f834569..a93b701 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.sql.internal
 
+import java.net.URL
 import java.util.Locale
 
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FsUrlStreamHandlerFactory
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.internal.Logging
@@ -154,7 +156,13 @@ private[sql] class SharedState(val sparkContext: 
SparkContext) extends Logging {
   }
 }
 
-object SharedState {
+object SharedState extends Logging {
+  try {
+    URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory())
+  } catch {
+    case e: Error =>
+      logWarning("URL.setURLStreamHandlerFactory failed to set 
FsUrlStreamHandlerFactory")
+  }
 
   private val HIVE_EXTERNAL_CATALOG_CLASS_NAME = 
"org.apache.spark.sql.hive.HiveExternalCatalog"
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b48bb3ab/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 0dd9296..3ecbf96 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import java.io.File
 import java.math.MathContext
+import java.net.{MalformedURLException, URL}
 import java.sql.Timestamp
 import java.util.concurrent.atomic.AtomicBoolean
 
@@ -2606,4 +2607,16 @@ class SQLQuerySuite extends QueryTest with 
SharedSQLContext {
       case ae: AnalysisException => assert(ae.plan == null && ae.getMessage == 
ae.getSimpleMessage)
     }
   }
+
+  test("SPARK-12868: Allow adding jars from hdfs ") {
+    val jarFromHdfs = "hdfs://doesnotmatter/test.jar"
+    val jarFromInvalidFs = "fffs://doesnotmatter/test.jar"
+
+    // if 'hdfs' is not supported, MalformedURLException will be thrown
+    new URL(jarFromHdfs)
+
+    intercept[MalformedURLException] {
+      new URL(jarFromInvalidFs)
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to