Repository: hive
Updated Branches:
  refs/heads/branch-1 df3746565 -> 39decb0bf


HIVE-13563 : Hive Streaming does not honor orc.compress.size and 
orc.stripe.size table properties (Wei Zheng, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/39decb0b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/39decb0b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/39decb0b

Branch: refs/heads/branch-1
Commit: 39decb0bfce8a2848fd293c5fc932f610896d31d
Parents: df37465
Author: Wei Zheng <w...@apache.org>
Authored: Thu Jun 9 10:12:19 2016 -0700
Committer: Wei Zheng <w...@apache.org>
Committed: Thu Jun 9 11:26:00 2016 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  2 +
 .../apache/hadoop/hive/ql/io/orc/OrcFile.java   |  7 ++++
 .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java | 40 ++++++++++++++------
 .../hive/ql/io/orc/TestOrcRecordUpdater.java    |  4 ++
 4 files changed, 42 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/39decb0b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index c63c2ca..e760ed5 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1019,6 +1019,8 @@ public class HiveConf extends Configuration {
         "to use dictionary or not will be retained thereafter."),
     HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", 256 * 
1024,
         "Define the default ORC buffer size, in bytes."),
+    HIVE_ORC_BASE_DELTA_RATIO("hive.exec.orc.base.delta.ratio", 8, "The ratio 
of base writer and\n" +
+        "delta writer in terms of STRIPE_SIZE and BUFFER_SIZE."),
     HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding", true,
         "Define the default block padding, which pads stripes to the HDFS 
block boundaries."),
     HIVE_ORC_BLOCK_PADDING_TOLERANCE("hive.exec.orc.block.padding.tolerance", 
0.05f,

http://git-wip-us.apache.org/repos/asf/hive/blob/39decb0b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
index c9ba713..3ca0a6e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
@@ -491,6 +491,13 @@ public final class OrcFile {
       return this;
     }
 
+    public int getBufferSize() {
+      return bufferSizeValue;
+    }
+
+    public long getStripeSize() {
+      return stripeSizeValue;
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hive/blob/39decb0b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
index efe5293..693ffd5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.RecordIdentifier;
@@ -249,19 +250,36 @@ public class OrcRecordUpdater implements RecordUpdater {
       flushLengths = null;
     }
     OrcFile.WriterOptions writerOptions = null;
-    if (options instanceof OrcOptions) {
-      writerOptions = ((OrcOptions) options).getOrcOptions();
-    }
-    if (writerOptions == null) {
-      writerOptions = OrcFile.writerOptions(options.getTableProperties(),
-          options.getConfiguration());
-    }
-    writerOptions.fileSystem(fs).callback(indexBuilder);
-    if (!options.isWritingBase()) {
+    // If writing delta dirs, we need to make a clone of original options, to 
avoid polluting it for
+    // the base writer
+    if (options.isWritingBase()) {
+      if (options instanceof OrcOptions) {
+        writerOptions = ((OrcOptions) options).getOrcOptions();
+      }
+      if (writerOptions == null) {
+        writerOptions = OrcFile.writerOptions(options.getTableProperties(),
+            options.getConfiguration());
+      }
+    } else {  // delta writer
+      AcidOutputFormat.Options optionsCloneForDelta = options.clone();
+      if (optionsCloneForDelta instanceof OrcOptions) {
+        writerOptions = ((OrcOptions) optionsCloneForDelta).getOrcOptions();
+      }
+      if (writerOptions == null) {
+        writerOptions = 
OrcFile.writerOptions(optionsCloneForDelta.getTableProperties(),
+            optionsCloneForDelta.getConfiguration());
+      }
+
+      // get buffer size and stripe size for base writer
+      int baseBufferSizeValue = writerOptions.getBufferSize();
+      long baseStripeSizeValue = writerOptions.getStripeSize();
+      // overwrite buffer size and stripe size for delta writer, based on 
BASE_DELTA_RATIO
+      int ratio = HiveConf.getIntVar(options.getConfiguration(), 
HiveConf.ConfVars.HIVE_ORC_BASE_DELTA_RATIO);
+      writerOptions.bufferSize(baseBufferSizeValue / ratio);
+      writerOptions.stripeSize(baseStripeSizeValue / ratio);
       writerOptions.blockPadding(false);
-      writerOptions.bufferSize(DELTA_BUFFER_SIZE);
-      writerOptions.stripeSize(DELTA_STRIPE_SIZE);
     }
+    writerOptions.fileSystem(fs).callback(indexBuilder);
     rowInspector = (StructObjectInspector)options.getInspector();
     writerOptions.inspector(createEventSchema(findRecId(options.getInspector(),
         options.getRecordIdColumn())));

http://git-wip-us.apache.org/repos/asf/hive/blob/39decb0b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java
index 973cc40..6fa93e2 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java
@@ -30,6 +30,7 @@ import java.util.Properties;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.RecordIdentifier;
@@ -195,6 +196,8 @@ public class TestOrcRecordUpdater {
     }
     Properties tblProps = new Properties();
     tblProps.setProperty("orc.compress", "SNAPPY");
+    tblProps.setProperty("orc.compress.size", "8192");
+    HiveConf.setIntVar(conf, HiveConf.ConfVars.HIVE_ORC_BASE_DELTA_RATIO, 4);
     AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
         .filesystem(fs)
         .bucket(10)
@@ -221,6 +224,7 @@ public class TestOrcRecordUpdater {
     System.out.flush();
     String outDump = new String(myOut.toByteArray());
     assertEquals(true, outDump.contains("Compression: SNAPPY"));
+    assertEquals(true, outDump.contains("Compression size: 2048"));
     System.setOut(origOut);
     updater.close(false);
   }

Reply via email to