Hi, all: As recorded in https://issues.apache.org/jira/browse/SPARK-16408, when using Spark-sql to execute sql like: add file hdfs://xxx/user/test; If the HDFS path( hdfs://xxx/user/test) is a directory, then we will get an exception like:
org.apache.spark.SparkException: Added file hdfs://xxx/user/test is a directory and recursive is not turned on. at org.apache.spark.SparkContext.addFile(SparkContext.scala:1372) at org.apache.spark.SparkContext.addFile(SparkContext.scala:1340) at org.apache.spark.sql.hive.execution.AddFile.run(commands.scala:117) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70) I think we should add an parameter (spark.input.dir.recursive) to control the value of recursive, and make this parameter works by modify some code, like: diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala index 6b16d59..3be8553 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala @@ -113,8 +113,9 @@ case class AddFile(path: String) extends RunnableCommand { override def run(sqlContext: SQLContext): Seq[Row] = { val hiveContext = sqlContext.asInstanceOf[HiveContext] + val recursive = sqlContext.sparkContext.getConf.getBoolean("spark.input.dir.recursive", false) hiveContext.runSqlHive(s"ADD FILE $path") - hiveContext.sparkContext.addFile(path) + hiveContext.sparkContext.addFile(path, recursive) Seq.empty[Row] } }