Repository: hive
Updated Branches:
  refs/heads/master c9c0c5d1d -> f67c862e3


HIVE-13563 : Hive Streaming does not honor orc.compress.size and 
orc.stripe.size table properties (Wei Zheng, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f67c862e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f67c862e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f67c862e

Branch: refs/heads/master
Commit: f67c862e30cc8c3d230853c688c5c70d30d207f3
Parents: c9c0c5d
Author: Wei Zheng <w...@apache.org>
Authored: Thu Jun 9 10:12:19 2016 -0700
Committer: Wei Zheng <w...@apache.org>
Committed: Thu Jun 9 10:12:19 2016 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  2 +
 orc/src/java/org/apache/orc/OrcConf.java        |  2 +
 .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java | 42 +++++++++++++++-----
 .../hive/ql/io/orc/TestOrcRecordUpdater.java    |  4 ++
 .../clientpositive/tez/acid_globallimit.q.out   |  6 +--
 5 files changed, 42 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f67c862e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index bb0ca3a..fe69ffa 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1206,6 +1206,8 @@ public class HiveConf extends Configuration {
         "to use dictionary or not will be retained thereafter."),
     HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", 256 * 
1024,
         "Define the default ORC buffer size, in bytes."),
+    HIVE_ORC_BASE_DELTA_RATIO("hive.exec.orc.base.delta.ratio", 8, "The ratio 
of base writer and\n" +
+        "delta writer in terms of STRIPE_SIZE and BUFFER_SIZE."),
     HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding", true,
         "Define the default block padding, which pads stripes to the HDFS 
block boundaries."),
     HIVE_ORC_BLOCK_PADDING_TOLERANCE("hive.exec.orc.block.padding.tolerance", 
0.05f,

http://git-wip-us.apache.org/repos/asf/hive/blob/f67c862e/orc/src/java/org/apache/orc/OrcConf.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcConf.java 
b/orc/src/java/org/apache/orc/OrcConf.java
index 6fcbb72..357318d 100644
--- a/orc/src/java/org/apache/orc/OrcConf.java
+++ b/orc/src/java/org/apache/orc/OrcConf.java
@@ -40,6 +40,8 @@ public enum OrcConf {
           " number of rows n index entry represents.)"),
   BUFFER_SIZE("orc.compress.size", "hive.exec.orc.default.buffer.size",
       256 * 1024, "Define the default ORC buffer size, in bytes."),
+  BASE_DELTA_RATIO("orc.base.delta.ratio", "hive.exec.orc.base.delta.ratio", 8,
+      "The ratio of base writer and delta writer in terms of STRIPE_SIZE and 
BUFFER_SIZE."),
   BLOCK_PADDING("orc.block.padding", "hive.exec.orc.default.block.padding",
       true,
       "Define whether stripes should be padded to the HDFS block boundaries."),

http://git-wip-us.apache.org/repos/asf/hive/blob/f67c862e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
index 4bf2403..e577961 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
@@ -27,6 +27,7 @@ import java.util.List;
 
 import org.apache.orc.impl.AcidStats;
 import org.apache.orc.impl.OrcAcidUtils;
+import org.apache.orc.OrcConf;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -204,19 +205,38 @@ public class OrcRecordUpdater implements RecordUpdater {
       flushLengths = null;
     }
     OrcFile.WriterOptions writerOptions = null;
-    if (options instanceof OrcOptions) {
-      writerOptions = ((OrcOptions) options).getOrcOptions();
-    }
-    if (writerOptions == null) {
-      writerOptions = OrcFile.writerOptions(options.getTableProperties(),
-          options.getConfiguration());
-    }
-    writerOptions.fileSystem(fs).callback(indexBuilder);
-    if (!options.isWritingBase()) {
+    // If writing delta dirs, we need to make a clone of original options, to 
avoid polluting it for
+    // the base writer
+    if (options.isWritingBase()) {
+      if (options instanceof OrcOptions) {
+        writerOptions = ((OrcOptions) options).getOrcOptions();
+      }
+      if (writerOptions == null) {
+        writerOptions = OrcFile.writerOptions(options.getTableProperties(),
+            options.getConfiguration());
+      }
+    } else {  // delta writer
+      AcidOutputFormat.Options optionsCloneForDelta = options.clone();
+
+      if (optionsCloneForDelta instanceof OrcOptions) {
+        writerOptions = ((OrcOptions) optionsCloneForDelta).getOrcOptions();
+      }
+      if (writerOptions == null) {
+        writerOptions = 
OrcFile.writerOptions(optionsCloneForDelta.getTableProperties(),
+            optionsCloneForDelta.getConfiguration());
+      }
+
+      // get buffer size and stripe size for base writer
+      int baseBufferSizeValue = writerOptions.getBufferSize();
+      long baseStripeSizeValue = writerOptions.getStripeSize();
+
+      // overwrite buffer size and stripe size for delta writer, based on 
BASE_DELTA_RATIO
+      int ratio = (int) 
OrcConf.BASE_DELTA_RATIO.getLong(options.getConfiguration());
+      writerOptions.bufferSize(baseBufferSizeValue / ratio);
+      writerOptions.stripeSize(baseStripeSizeValue / ratio);
       writerOptions.blockPadding(false);
-      writerOptions.bufferSize(DELTA_BUFFER_SIZE);
-      writerOptions.stripeSize(DELTA_STRIPE_SIZE);
     }
+    writerOptions.fileSystem(fs).callback(indexBuilder);
     rowInspector = (StructObjectInspector)options.getInspector();
     writerOptions.inspector(createEventSchema(findRecId(options.getInspector(),
         options.getRecordIdColumn())));

http://git-wip-us.apache.org/repos/asf/hive/blob/f67c862e/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java
index 0a61fb8..67c473e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java
@@ -30,6 +30,7 @@ import java.util.Properties;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.RecordIdentifier;
@@ -197,6 +198,8 @@ public class TestOrcRecordUpdater {
     }
     Properties tblProps = new Properties();
     tblProps.setProperty("orc.compress", "SNAPPY");
+    tblProps.setProperty("orc.compress.size", "8192");
+    HiveConf.setIntVar(conf, HiveConf.ConfVars.HIVE_ORC_BASE_DELTA_RATIO, 4);
     AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
         .filesystem(fs)
         .bucket(10)
@@ -223,6 +226,7 @@ public class TestOrcRecordUpdater {
     System.out.flush();
     String outDump = new String(myOut.toByteArray());
     assertEquals(true, outDump.contains("Compression: SNAPPY"));
+    assertEquals(true, outDump.contains("Compression size: 2048"));
     System.setOut(origOut);
     updater.close(false);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/f67c862e/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out 
b/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out
index 804e5e2..a460182 100644
--- a/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out
+++ b/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out
@@ -45,7 +45,7 @@ Table Parameters:
        numFiles                3                   
        numRows                 0                   
        rawDataSize             0                   
-       totalSize               100540              
+       totalSize               101147              
        transactional           true                
 #### A masked pattern was here ####
                 
@@ -75,9 +75,9 @@ Stage-0
       File Output Operator [FS_3]
         Limit [LIM_2] (rows=10 width=4)
           Number of rows:10
-          Select Operator [SEL_1] (rows=25135 width=4)
+          Select Operator [SEL_1] (rows=25286 width=4)
             Output:["_col0"]
-            TableScan [TS_0] (rows=25135 width=4)
+            TableScan [TS_0] (rows=25286 width=4)
               default@acidtest1,acidtest1, ACID 
table,Tbl:COMPLETE,Col:NONE,Output:["c1"]
 
 PREHOOK: query: select cast (c1 as string) from acidtest1 limit 10

Reply via email to