Github user liancheng commented on a diff in the pull request: https://github.com/apache/spark/pull/9060#discussion_r44765188 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala --- @@ -513,6 +515,41 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { } } + test("SPARK-11044 Parquet writer version fixed as version1 ") { + + // For dictionary encoding, Parquet changes the encoding types according to its writer version + // So, this test checks the encoding types in order to ensure that the file is written with + // writer version2. + withTempPath { dir => + val clonedConf = new Configuration(hadoopConfiguration) + try { + + // Write a Parquet file with writer version 2 + hadoopConfiguration.set(ParquetOutputFormat.WRITER_VERSION, + ParquetProperties.WriterVersion.PARQUET_2_0.toString) + + // By default, dictionary encoding is enabled from Parquet 1.2.0 but + // it is enabled just in case. + hadoopConfiguration.setBoolean(ParquetOutputFormat.ENABLE_DICTIONARY, true) + val path = s"${dir.getCanonicalPath}/part-r-0.parquet" + sqlContext.range(1 << 16).selectExpr("(id % 4) AS i") + .coalesce(1).write.mode("overwrite").parquet(path) + + val blockMetadata = readFooter(new Path(path), hadoopConfiguration).getBlocks.asScala.head + val columnChunkMetadata = blockMetadata.getColumns.asScala.head + + // If the file is written with version 2, this should include + // [[Encoding.RLE_DICTIONARY]] type. For version 1, it is Encoding.PLAIN_DICTIONARY --- End diff -- BTW, the `[[...]]` notation is only useful when writing ScalaDoc. In case of inline comment s like this, you may either omit the brackets or use backquotes to emphasize that the quoted part is a Scala/Java entity.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org