[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/20076 --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r159889119 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala --- @@ -323,11 +323,13 @@ object SQLConf { .createWithDefault(false) val PARQUET_COMPRESSION = buildConf("spark.sql.parquet.compression.codec") -.doc("Sets the compression codec use when writing Parquet files. Acceptable values include: " + - "uncompressed, snappy, gzip, lzo.") +.doc("Sets the compression codec used when writing Parquet files. If other compression codec " + + "configuration was found through hive or parquet, the precedence would be `compression`, " + --- End diff -- > Sets the compression codec used when writing Parquet files. If either `compression` or `parquet.compression` is specified in the table-specific options/properties, the precedence would be `compression`, ... --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r159885686 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala --- @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.parquet + +import java.io.File + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.fs.Path +import org.apache.parquet.hadoop.ParquetOutputFormat + +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSQLContext + +class ParquetCompressionCodecPrecedenceSuite extends ParquetTest with SharedSQLContext { + test("Test `spark.sql.parquet.compression.codec` config") { +Seq("NONE", "UNCOMPRESSED", "SNAPPY", "GZIP", "LZO").foreach { c => + withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> c) { +val expected = if (c == "NONE") "UNCOMPRESSED" else c +val option = new ParquetOptions(Map.empty[String, String], spark.sessionState.conf) +assert(option.compressionCodecClassName == expected) + } +} + } + + test("[SPARK-21786] Test Acquiring 'compressionCodecClassName' for parquet in right order.") { +// When "compression" is configured, it should be the first choice. +withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> "snappy") { + val props = Map("compression" -> "uncompressed", ParquetOutputFormat.COMPRESSION -> "gzip") + val option = new ParquetOptions(props, spark.sessionState.conf) + assert(option.compressionCodecClassName == "UNCOMPRESSED") +} + +// When "compression" is not configured, "parquet.compression" should be the preferred choice. +withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> "snappy") { + val props = Map(ParquetOutputFormat.COMPRESSION -> "gzip") + val option = new ParquetOptions(props, spark.sessionState.conf) + assert(option.compressionCodecClassName == "GZIP") +} + +// When both "compression" and "parquet.compression" are not configured, +// spark.sql.parquet.compression.codec should be the right choice. +withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> "snappy") { + val props = Map.empty[String, String] + val option = new ParquetOptions(props, spark.sessionState.conf) + assert(option.compressionCodecClassName == "SNAPPY") +} + } + + private def getTableCompressionCodec(path: String): Seq[String] = { +val hadoopConf = spark.sessionState.newHadoopConf() +val codecs = for { + footer <- readAllFootersWithoutSummaryFiles(new Path(path), hadoopConf) + block <- footer.getParquetMetadata.getBlocks.asScala + column <- block.getColumns.asScala +} yield column.getCodec.name() +codecs.distinct + } + + private def createTableWithCompression( + tableName: String, + isPartitioned: Boolean, + compressionCodec: String, + rootDir: File): Unit = { +val options = + s"""OPTIONS('path'='${rootDir.toURI.toString.stripSuffix("/")}/$tableName', + |'parquet.compression'='$compressionCodec')""".stripMargin +val partitionCreate = if (isPartitioned) "PARTITIONED BY (p)" else "" +sql(s"""CREATE TABLE $tableName USING Parquet $options $partitionCreate +|as select 1 as col1, 2 as p""".stripMargin) --- End diff -- ``` val options = s""" |OPTIONS('path'='${rootDir.toURI.toString.stripSuffix("/")}/$tableName', |'parquet.compression'='$compressionCodec') """.stripMargin val partitionCreate = if (isPartitioned) "PARTITIONED BY (p)" else "" sql( s""" |CREATE TABLE $tableName USING Parquet $options $partitionCreate |AS SELECT 1 AS col1, 2 AS p """.stripMargin) ``` --
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user fjh100456 commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r159802320 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala --- @@ -27,7 +28,7 @@ import org.apache.spark.sql.internal.SQLConf /** * Options for the Parquet data source. */ -private[parquet] class ParquetOptions( --- End diff -- Yes, It should be revived. Thanks. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r159653433 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala --- @@ -27,7 +28,7 @@ import org.apache.spark.sql.internal.SQLConf /** * Options for the Parquet data source. */ -private[parquet] class ParquetOptions( --- End diff -- Can we revive `private[parquet]`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user fjh100456 commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r159218648 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala --- @@ -364,7 +366,9 @@ object SQLConf { .createWithDefault(true) val ORC_COMPRESSION = buildConf("spark.sql.orc.compression.codec") -.doc("Sets the compression codec use when writing ORC files. Acceptable values include: " + +.doc("Sets the compression codec use when writing ORC files. If other compression codec " + --- End diff -- Thank you. I had fixed them. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user fjh100456 commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r159218522 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/CompressionCodecPrecedenceSuite.scala --- @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql --- End diff -- Thank you. I had move it to `org.apache.spark.sql.execution.datasources.parquet`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user jaceklaskowski commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r159142765 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala --- @@ -323,11 +323,13 @@ object SQLConf { .createWithDefault(false) val PARQUET_COMPRESSION = buildConf("spark.sql.parquet.compression.codec") -.doc("Sets the compression codec use when writing Parquet files. Acceptable values include: " + - "uncompressed, snappy, gzip, lzo.") +.doc("Sets the compression codec use when writing Parquet files. If other compression codec " + --- End diff -- s/use when/used when --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user jaceklaskowski commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r159142783 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala --- @@ -364,7 +366,9 @@ object SQLConf { .createWithDefault(true) val ORC_COMPRESSION = buildConf("spark.sql.orc.compression.codec") -.doc("Sets the compression codec use when writing ORC files. Acceptable values include: " + +.doc("Sets the compression codec use when writing ORC files. If other compression codec " + --- End diff -- s/use when/used when --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user jaceklaskowski commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r159142760 --- Diff: docs/sql-programming-guide.md --- @@ -953,8 +953,10 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession spark.sql.parquet.compression.codec snappy -Sets the compression codec use when writing Parquet files. Acceptable values include: -uncompressed, snappy, gzip, lzo. +Sets the compression codec use when writing Parquet files. If other compression codec --- End diff -- s/use when/used when --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r159136922 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/CompressionCodecPrecedenceSuite.scala --- @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql --- End diff -- Should we move this to `org.apache.spark.sql.execution.datasources.parquet`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r158663731 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala --- @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive --- End diff -- Move it to sql/core. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r158663721 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala --- @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive + +import org.apache.parquet.hadoop.ParquetOutputFormat + +import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions +import org.apache.spark.sql.hive.test.TestHiveSingleton +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SQLTestUtils + +class CompressionCodecSuite extends TestHiveSingleton with SQLTestUtils { --- End diff -- This suite does not need `TestHiveSingleton `. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r158636269 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala --- @@ -42,8 +43,15 @@ private[parquet] class ParquetOptions( * Acceptable values are defined in [[shortParquetCompressionCodecNames]]. */ val compressionCodecClassName: String = { --- End diff -- Let's do Parquet and ORC ones here for now if that's also fine to @gatorsmile. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user fjh100456 commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r158632847 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala --- @@ -42,8 +43,15 @@ private[parquet] class ParquetOptions( * Acceptable values are defined in [[shortParquetCompressionCodecNames]]. */ val compressionCodecClassName: String = { --- End diff -- @gatorsmile @HyukjinKwon In `TextOptions` ,`JSONOptions` and `CSVOptions`, it's "Option[String]", but in `OrcOptions` and `ParquetOptions`, it's a "String". Just change `compressionCodecClassName` in `OrcOptions` and `ParquetOptions` to `compressionCodecName` is ok ? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user fjh100456 commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r158631203 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala --- @@ -42,8 +43,15 @@ private[parquet] class ParquetOptions( * Acceptable values are defined in [[shortParquetCompressionCodecNames]]. */ val compressionCodecClassName: String = { --- End diff -- So, change all `compressionCodecClassName` and `compressionCodec` to `compressionCodecName`? In `TextOptions` ,`JSONOptions` and `CSVOptions` too ? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r158630439 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala --- @@ -42,8 +43,15 @@ private[parquet] class ParquetOptions( * Acceptable values are defined in [[shortParquetCompressionCodecNames]]. */ val compressionCodecClassName: String = { --- End diff -- `compressionCodecName ` is also fine to me. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r158629711 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala --- @@ -42,8 +43,15 @@ private[parquet] class ParquetOptions( * Acceptable values are defined in [[shortParquetCompressionCodecNames]]. */ val compressionCodecClassName: String = { --- End diff -- We could alternatively say `compressionCodecName` here. It's rather names like `UNCOMPRESSED`, `LZO`, etc in this case. For the text based sources, they are canonical class names so I am okay with `compressionCodecClassName` but for ORC and Parquet these are not classes. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r158629541 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala --- @@ -42,8 +43,15 @@ private[parquet] class ParquetOptions( * Acceptable values are defined in [[shortParquetCompressionCodecNames]]. */ val compressionCodecClassName: String = { --- End diff -- `compressionCodecClassName ` is a better name. We should change all the others to this. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user fjh100456 commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r158628479 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala --- @@ -42,8 +43,15 @@ private[parquet] class ParquetOptions( * Acceptable values are defined in [[shortParquetCompressionCodecNames]]. */ val compressionCodecClassName: String = { --- End diff -- @HyukjinKwon Seems you're right. @gatorsmile Are we mistaken, shouldn't we change ParquetOptions's `compressionCodec ` to `compressionCodecClassName `? Because `OrcOptions` and `TextOptions` are all using `compressionCodec `. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20076#discussion_r158627363 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala --- @@ -42,8 +43,15 @@ private[parquet] class ParquetOptions( * Acceptable values are defined in [[shortParquetCompressionCodecNames]]. */ val compressionCodecClassName: String = { --- End diff -- Can we change `compressionCodecClassName` to `compressionCodec` instead? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20076: [SPARK-21786][SQL] When acquiring 'compressionCod...
GitHub user fjh100456 opened a pull request: https://github.com/apache/spark/pull/20076 [SPARK-21786][SQL] When acquiring 'compressionCodecClassName' in 'ParquetOptions', `parquet.compression` needs to be considered. [SPARK-21786][SQL] When acquiring 'compressionCodecClassName' in 'ParquetOptions', `parquet.compression` needs to be considered. ## What changes were proposed in this pull request? 1.Increased acquiring 'compressionCodecClassName' from `parquet.compression`,and the precedence order is `compression`,`parquet.compression`,`spark.sql.parquet.compression.codec`, just like what we do in `OrcOptions`. 2.Change `spark.sql.parquet.compression.codec` to support "none".Actually in `ParquetOptions`,we do support "none" as equivalent to "uncompressed", but it does not allowed to configured to "none". 3.Change `compressionCode` to `compressionCodecClassName`. ## How was this patch tested? Add test. You can merge this pull request into a Git repository by running: $ git pull https://github.com/fjh100456/spark ParquetOptionIssue Alternatively you can review and apply these changes as the patch at: https://github.com/apache/spark/pull/20076.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #20076 commit 9bbfe6ef4b5a418373c2250ad676233fb05df7f7 Author: fjh100456 Date: 2017-12-25T02:29:53Z [SPARK-21786][SQL] When acquiring 'compressionCodecClassName' in 'ParquetOptions', `parquet.compression` needs to be considered. ## What changes were proposed in this pull request? 1.Increased acquiring 'compressionCodecClassName' from `parquet.compression`,and the order is `compression`,`parquet.compression`,`spark.sql.parquet.compression.codec`, just like what we do in `OrcOptions`. 2.Change `spark.sql.parquet.compression.codec` to support "none".Actually in `ParquetOptions`,we do support "none" as equivalent to "uncompressed", but it does not allowed to configured to "none". ## How was this patch tested? Manual test. commit 48cf108ed5c3298eb860d9735b439ac89d65765e Author: fjh100456 Date: 2017-12-25T02:30:24Z [SPARK-21786][SQL] When acquiring 'compressionCodecClassName' in 'ParquetOptions', `parquet.compression` needs to be considered. ## What changes were proposed in this pull request? 1.Increased acquiring 'compressionCodecClassName' from `parquet.compression`,and the order is `compression`,`parquet.compression`,`spark.sql.parquet.compression.codec`, just like what we do in `OrcOptions`. 2.Change `spark.sql.parquet.compression.codec` to support "none".Actually in `ParquetOptions`,we do support "none" as equivalent to "uncompressed", but it does not allowed to configured to "none". ## How was this patch tested? Manual test. commit 5dbd3edf9e086433d3d3fe9c0ead887d799c61d3 Author: fjh100456 Date: 2017-12-25T02:34:29Z spark.sql.parquet.compression.codec[SPARK-21786][SQL] When acquiring 'compressionCodecClassName' in 'ParquetOptions', `parquet.compression` needs to be considered. ## What changes were proposed in this pull request? 1.Increased acquiring 'compressionCodecClassName' from `parquet.compression`,and the order is `compression`,`parquet.compression`,`spark.sql.parquet.compression.codec`, just like what we do in `OrcOptions`. 2.Change `spark.sql.parquet.compression.codec` to support "none".Actually in `ParquetOptions`,we do support "none" as equivalent to "uncompressed", but it does not allowed to configured to "none". ## How was this patch tested? Manual test. commit 5124f1b560e942c0dc23af31336317a4b995dd8f Author: fjh100456 Date: 2017-12-25T07:06:26Z spark.sql.parquet.compression.codec[SPARK-21786][SQL] When acquiring 'compressionCodecClassName' in 'ParquetOptions', `parquet.compression` needs to be considered. ## What changes were proposed in this pull request? 1.Increased acquiring 'compressionCodecClassName' from `parquet.compression`,and the order is `compression`,`parquet.compression`,`spark.sql.parquet.compression.codec`, just like what we do in `OrcOptions`. 2.Change `spark.sql.parquet.compression.codec` to support "none".Actually in `ParquetOptions`,we do support "none" as equivalent to "uncompressed", but it does not allowed to configured to "none". 3.Change `compressionCode` to `compressionCodecClassName`. ## How was this patch tested? Manual test. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org