Hi, all:
As recorded in https://issues.apache.org/jira/browse/SPARK-16408, when
using Spark-sql to execute sql like:
add file hdfs://xxx/user/test;
If the HDFS path( hdfs://xxx/user/test) is a directory, then we will get
an exception like:
org.apache.spark.SparkException: Added file hdfs://xxx/user/test is a
directory and recursive is not turned on.
at org.apache.spark.SparkContext.addFile(SparkContext.scala:1372)
at org.apache.spark.SparkContext.addFile(SparkContext.scala:1340)
at
org.apache.spark.sql.hive.execution.AddFile.run(commands.scala:117)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56)
at
org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70)
I think we should add an parameter (spark.input.dir.recursive) to
control the value of recursive, and make this parameter works by modify
some code, like:
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index 6b16d59..3be8553 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -113,8 +113,9 @@ case class AddFile(path: String) extends
RunnableCommand {
override def run(sqlContext: SQLContext): Seq[Row] = {
val hiveContext = sqlContext.asInstanceOf[HiveContext]
+ val recursive =
sqlContext.sparkContext.getConf.getBoolean("spark.input.dir.recursive",
false)
hiveContext.runSqlHive(s"ADD FILE $path")
- hiveContext.sparkContext.addFile(path)
+ hiveContext.sparkContext.addFile(path, recursive)
Seq.empty[Row]
}
}