spark git commit: [SPARK-21786][SQL] The 'spark.sql.parquet.compression.codec' and 'spark.sql.orc.compression.codec' configuration doesn't take effect on hive table writing

2018-01-20 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/branch-2.3 e11d5eaf7 -> b9c1367b7


[SPARK-21786][SQL] The 'spark.sql.parquet.compression.codec' and 
'spark.sql.orc.compression.codec' configuration doesn't take effect on hive 
table writing

[SPARK-21786][SQL] The 'spark.sql.parquet.compression.codec' and 
'spark.sql.orc.compression.codec' configuration doesn't take effect on hive 
table writing

What changes were proposed in this pull request?

Pass ‘spark.sql.parquet.compression.codec’ value to 
‘parquet.compression’.
Pass ‘spark.sql.orc.compression.codec’ value to ‘orc.compress’.

How was this patch tested?

Add test.

Note:
This is the same issue mentioned in #19218 . That branch was deleted 
mistakenly, so make a new pr instead.

gatorsmile maropu dongjoon-hyun discipleforteen

Author: fjh100456 
Author: Takeshi Yamamuro 
Author: Wenchen Fan 
Author: gatorsmile 
Author: Yinan Li 
Author: Marcelo Vanzin 
Author: Juliusz Sompolski 
Author: Felix Cheung 
Author: jerryshao 
Author: Li Jin 
Author: Gera Shegalov 
Author: chetkhatri 
Author: Joseph K. Bradley 
Author: Bago Amirbekian 
Author: Xianjin YE 
Author: Bruce Robbins 
Author: zuotingbing 
Author: Kent Yao 
Author: hyukjinkwon 
Author: Adrian Ionescu 

Closes #20087 from fjh100456/HiveTableWriting.

(cherry picked from commit 00d169156d4b1c91d2bcfd788b254b03c509dc41)
Signed-off-by: gatorsmile 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b9c1367b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b9c1367b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b9c1367b

Branch: refs/heads/branch-2.3
Commit: b9c1367b7d9240070c5d83572dc7b43c7480b456
Parents: e11d5ea
Author: fjh100456 
Authored: Sat Jan 20 14:49:49 2018 -0800
Committer: gatorsmile 
Committed: Sat Jan 20 14:50:04 2018 -0800

--
 .../execution/datasources/orc/OrcOptions.scala  |   2 +
 .../datasources/parquet/ParquetOptions.scala|   6 +-
 .../spark/sql/hive/execution/HiveOptions.scala  |  22 ++
 .../sql/hive/execution/SaveAsHiveFile.scala |  20 +-
 .../spark/sql/hive/CompressionCodecSuite.scala  | 353 +++
 5 files changed, 397 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b9c1367b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
index c866dd8..0ad3862 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
@@ -67,4 +67,6 @@ object OrcOptions {
 "snappy" -> "SNAPPY",
 "zlib" -> "ZLIB",
 "lzo" -> "LZO")
+
+  def getORCCompressionCodecName(name: String): String = 
shortOrcCompressionCodecNames(name)
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/b9c1367b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
index ef67ea7..f36a89a 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.internal.SQLConf
 /**
  * Options for the Parquet data source.
  */
-private[parquet] class ParquetOptions(
+class ParquetOptions(
 @transient private val parameters: CaseInsensitiveMap[String],
 @transient private val sqlConf: SQLConf)
   extends Serializable {
@@ -82,4 +82,8 @@ object ParquetOptions {
 "snappy" -> CompressionCodecName.SNAPPY,
 "gzip" -> CompressionCodecName.GZIP,
 "lzo" -> CompressionCodecName.LZO)
+
+  def getParquetCompressionCodecName(name: String): String = {
+

spark git commit: [SPARK-21786][SQL] The 'spark.sql.parquet.compression.codec' and 'spark.sql.orc.compression.codec' configuration doesn't take effect on hive table writing

2018-01-20 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master 84a076e0e -> 00d169156


[SPARK-21786][SQL] The 'spark.sql.parquet.compression.codec' and 
'spark.sql.orc.compression.codec' configuration doesn't take effect on hive 
table writing

[SPARK-21786][SQL] The 'spark.sql.parquet.compression.codec' and 
'spark.sql.orc.compression.codec' configuration doesn't take effect on hive 
table writing

What changes were proposed in this pull request?

Pass ‘spark.sql.parquet.compression.codec’ value to 
‘parquet.compression’.
Pass ‘spark.sql.orc.compression.codec’ value to ‘orc.compress’.

How was this patch tested?

Add test.

Note:
This is the same issue mentioned in #19218 . That branch was deleted 
mistakenly, so make a new pr instead.

gatorsmile maropu dongjoon-hyun discipleforteen

Author: fjh100456 
Author: Takeshi Yamamuro 
Author: Wenchen Fan 
Author: gatorsmile 
Author: Yinan Li 
Author: Marcelo Vanzin 
Author: Juliusz Sompolski 
Author: Felix Cheung 
Author: jerryshao 
Author: Li Jin 
Author: Gera Shegalov 
Author: chetkhatri 
Author: Joseph K. Bradley 
Author: Bago Amirbekian 
Author: Xianjin YE 
Author: Bruce Robbins 
Author: zuotingbing 
Author: Kent Yao 
Author: hyukjinkwon 
Author: Adrian Ionescu 

Closes #20087 from fjh100456/HiveTableWriting.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/00d16915
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/00d16915
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/00d16915

Branch: refs/heads/master
Commit: 00d169156d4b1c91d2bcfd788b254b03c509dc41
Parents: 84a076e
Author: fjh100456 
Authored: Sat Jan 20 14:49:49 2018 -0800
Committer: gatorsmile 
Committed: Sat Jan 20 14:49:49 2018 -0800

--
 .../execution/datasources/orc/OrcOptions.scala  |   2 +
 .../datasources/parquet/ParquetOptions.scala|   6 +-
 .../spark/sql/hive/execution/HiveOptions.scala  |  22 ++
 .../sql/hive/execution/SaveAsHiveFile.scala |  20 +-
 .../spark/sql/hive/CompressionCodecSuite.scala  | 353 +++
 5 files changed, 397 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/00d16915/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
index c866dd8..0ad3862 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
@@ -67,4 +67,6 @@ object OrcOptions {
 "snappy" -> "SNAPPY",
 "zlib" -> "ZLIB",
 "lzo" -> "LZO")
+
+  def getORCCompressionCodecName(name: String): String = 
shortOrcCompressionCodecNames(name)
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/00d16915/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
index ef67ea7..f36a89a 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.internal.SQLConf
 /**
  * Options for the Parquet data source.
  */
-private[parquet] class ParquetOptions(
+class ParquetOptions(
 @transient private val parameters: CaseInsensitiveMap[String],
 @transient private val sqlConf: SQLConf)
   extends Serializable {
@@ -82,4 +82,8 @@ object ParquetOptions {
 "snappy" -> CompressionCodecName.SNAPPY,
 "gzip" -> CompressionCodecName.GZIP,
 "lzo" -> CompressionCodecName.LZO)
+
+  def getParquetCompressionCodecName(name: String): String = {
+shortParquetCompressionCodecNames(name).name()
+  }
 }