Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/19882#discussion_r155390570 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala --- @@ -614,27 +523,88 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest { } } - test("read from multiple orc input paths") { - val path1 = Utils.createTempDir() - val path2 = Utils.createTempDir() - makeOrcFile((1 to 10).map(Tuple1.apply), path1) - makeOrcFile((1 to 10).map(Tuple1.apply), path2) - assertResult(20)(read.orc(path1.getCanonicalPath, path2.getCanonicalPath).count()) - } + test("read from multiple orc input paths") { + val path1 = Utils.createTempDir() + val path2 = Utils.createTempDir() + makeOrcFile((1 to 10).map(Tuple1.apply), path1) + makeOrcFile((1 to 10).map(Tuple1.apply), path2) + val df = spark.read.orc(path1.getCanonicalPath, path2.getCanonicalPath) + assert(df.count() == 20) + } +} + +class OrcQuerySuite extends OrcQueryTest with SharedSQLContext { + import testImplicits._ + + test("LZO compression options for writing to an ORC file") { + withTempPath { file => + spark.range(0, 10).write + .option("compression", "LZO") + .orc(file.getCanonicalPath) + + val maybeOrcFile = file.listFiles().find(_.getName.endsWith(".lzo.orc")) + assert(maybeOrcFile.isDefined) + + val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath) + val conf = OrcFile.readerOptions(new Configuration()) + assert("LZO" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name) + } + } + + test("Schema discovery on empty ORC files") { + // SPARK-8501 is fixed. + withTempPath { dir => + val path = dir.getCanonicalPath + + withTable("empty_orc") { + withTempView("empty", "single") { + spark.sql( + s"""CREATE TABLE empty_orc(key INT, value STRING) + |USING ORC + |LOCATION '${dir.toURI}' + """.stripMargin) + + val emptyDF = Seq.empty[(Int, String)].toDF("key", "value").coalesce(1) + emptyDF.createOrReplaceTempView("empty") + + // This creates 1 empty ORC file with ORC SerDe. We are using this trick because + // Spark SQL ORC data source always avoids write empty ORC files. + spark.sql( + s"""INSERT INTO TABLE empty_orc + |SELECT key, value FROM empty + """.stripMargin) + + val df = spark.read.orc(path) + assert(df.schema === emptyDF.schema.asNullable) + checkAnswer(df, emptyDF) + } + } + } + } + + test("SPARK-21791 ORC should support column names with dot") { --- End diff -- Old OrcFileFormat fails on this test case. Do you mean adding an exception-catching test case?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org