HIVE-11541: ORC: Split Strategy should depend on global file count, not per-partition (Gopal V reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f26b2569 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f26b2569 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f26b2569 Branch: refs/heads/hbase-metastore Commit: f26b2569198fbeceaf17a5a77c59eccf5175935c Parents: db46e6e Author: Prasanth Jayachandran <j.prasant...@gmail.com> Authored: Thu Aug 13 12:35:29 2015 -0700 Committer: Prasanth Jayachandran <j.prasant...@gmail.com> Committed: Thu Aug 13 12:35:29 2015 -0700 ---------------------------------------------------------------------- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f26b2569/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 4e6dd7a..fe2eccd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -483,7 +483,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, } private FileInfo verifyCachedFileInfo(FileStatus file) { - context.numFilesCounter.incrementAndGet(); FileInfo fileInfo = Context.footerCache.getIfPresent(file.getPath()); if (fileInfo != null) { if (isDebugEnabled) { @@ -671,6 +670,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, int numFiles = children.size(); long avgFileSize = totalFileSize / numFiles; + int totalFiles = context.numFilesCounter.addAndGet(numFiles); switch(context.splitStrategyKind) { case BI: // BI strategy requested through config @@ -684,7 +684,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, break; default: // HYBRID strategy - if (avgFileSize > context.maxSize || numFiles <= context.minSplits) { + if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) { splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal, deltas, covered); } else {