Hi, all:
   As recorded in https://issues.apache.org/jira/browse/SPARK-16408, when
using Spark-sql to execute sql like:
   add file hdfs://xxx/user/test;
   If the HDFS path( hdfs://xxx/user/test) is a directory, then we will get
an exception like:

org.apache.spark.SparkException: Added file hdfs://xxx/user/test is a
directory and recursive is not turned on.
       at org.apache.spark.SparkContext.addFile(SparkContext.scala:1372)
       at org.apache.spark.SparkContext.addFile(SparkContext.scala:1340)
       at
org.apache.spark.sql.hive.execution.AddFile.run(commands.scala:117)
       at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58)
       at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56)
       at
org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70)


   I think we should add an parameter (spark.input.dir.recursive) to
control the value of recursive, and make this parameter works by modify
some code, like:

diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index 6b16d59..3be8553 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -113,8 +113,9 @@ case class AddFile(path: String) extends
RunnableCommand {

   override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
+    val recursive =
sqlContext.sparkContext.getConf.getBoolean("spark.input.dir.recursive",
false)
     hiveContext.runSqlHive(s"ADD FILE $path")
-    hiveContext.sparkContext.addFile(path)
+    hiveContext.sparkContext.addFile(path, recursive)
     Seq.empty[Row]
   }
 }

Reply via email to