Hi, Can Spark using HiveContext External Tables read sub-directories?
Example: import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql._ import sqlContext.implicits._ //prepare data and create subdirectories with parquet val df = Seq("id1" -> 1, "id2" -> 4, "id3"-> 5).toDF("id", "value") df.write.parquet("/tmp/df/1") val df2 = Seq("id6"-> 6, "id7"-> 7, "id8"-> 8).toDF("id", "value") df2.write.parquet("/tmp/df/2") val dfall = sqlContext.read.load("/tmp/df/*/") assert(dfall.count == 6) //convert to HiveContext val hc = new HiveContext(sqlContext.sparkContext) hc.sql("SET hive.mapred.supports.subdirectories=true") hc.sql("SET mapreduce.input.fileinputformat.input.dir.recursive=true") hc.sql("create external table testsubdirectories (id string, value string) STORED AS PARQUET location '/tmp/df'") val hcall = hc.sql("select * from testsubdirectories") assert(hcall.count() == 6) //shoud return 6 but it is 0 as not read from subdirectories Thanks, Arkadiusz Bicz --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org