Teng Qiu created SPARK-3138: ------------------------------- Summary: sqlContext.parquetFile should be able to take a single file as parameter Key: SPARK-3138 URL: https://issues.apache.org/jira/browse/SPARK-3138 Project: Spark Issue Type: Bug Components: SQL Reporter: Teng Qiu
http://apache-spark-user-list.1001560.n3.nabble.com/sqlContext-parquetFile-path-fails-if-path-is-a-file-but-succeeds-if-a-directory-tp12345.html to reproduce this issue in spark-shell {code:java} val sqlContext = new org.apache.spark.sql.SQLContext(sc) import sqlContext._ import org.apache.hadoop.fs.{FileSystem, Path} case class TestRDDEntry(key: Int, value: String) val path = "/tmp/parquet_test" sc.parallelize((1 to 100)).map(i => TestRDDEntry(i, s"val_$i")).coalesce(1).saveAsParquetFile(path) val fsPath = new Path(path) val fs: FileSystem = fsPath.getFileSystem(sc.hadoopConfiguration) val children = fs.listStatus(fsPath).filter(_.getPath.getName.endsWith(".parquet")) val readFile = sqlContext.parquetFile(path + "/" + children(0).getPath.getName) {code} it throws exception: {code} java.lang.IllegalArgumentException: Expected file:/tmp/parquet_test/part-r-1.parquet for be a directory with Parquet files/metadata at org.apache.spark.sql.parquet.ParquetTypesConverter$.readMetaData(ParquetTypes.scala:374) at org.apache.spark.sql.parquet.ParquetTypesConverter$.readSchemaFromFile(ParquetTypes.scala:414) at org.apache.spark.sql.parquet.ParquetRelation.<init>(ParquetRelation.scala:66) {code} -- This message was sent by Atlassian JIRA (v6.2#6252) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org