Repository: hive Updated Branches: refs/heads/master 7580de9a4 -> 6489352b8
HIVE-17010: Fix the overflow problem of Long type in SetSparkReducerParallelism (Liyun Zhang reviewed by Li Rui, Sun Chao, Ferdinand Xu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6489352b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6489352b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6489352b Branch: refs/heads/master Commit: 6489352b85d232e360797ead925b0b25603d3e64 Parents: 7580de9 Author: Ferdinand Xu <cheng.a...@intel.com> Authored: Tue Jul 11 11:43:05 2017 +0800 Committer: Ferdinand Xu <cheng.a...@intel.com> Committed: Tue Jul 11 11:43:05 2017 +0800 ---------------------------------------------------------------------- .../hive/ql/optimizer/spark/SetSparkReducerParallelism.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/6489352b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java index 4924df7..e808a4f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java @@ -22,7 +22,6 @@ import java.util.List; import java.util.Set; import java.util.Stack; -import org.apache.hadoop.hive.ql.exec.TerminalOperator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.ObjectPair; @@ -34,6 +33,7 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.TerminalOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities; import org.apache.hadoop.hive.ql.exec.spark.session.SparkSession; @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.parse.spark.GenSparkUtils; import org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.stats.StatsUtils; /** * SetSparkReducerParallelism determines how many reducers should @@ -126,7 +127,7 @@ public class SetSparkReducerParallelism implements NodeProcessor { for (Operator<? extends OperatorDesc> sibling : sink.getChildOperators().get(0).getParentOperators()) { if (sibling.getStatistics() != null) { - numberOfBytes += sibling.getStatistics().getDataSize(); + numberOfBytes = StatsUtils.safeAdd(numberOfBytes, sibling.getStatistics().getDataSize()); if (LOG.isDebugEnabled()) { LOG.debug("Sibling " + sibling + " has stats: " + sibling.getStatistics()); } @@ -143,7 +144,7 @@ public class SetSparkReducerParallelism implements NodeProcessor { OperatorUtils.findOperatorsUpstream(sibling, TableScanOperator.class); for (TableScanOperator source : sources) { if (source.getStatistics() != null) { - numberOfBytes += source.getStatistics().getDataSize(); + numberOfBytes = StatsUtils.safeAdd(numberOfBytes, source.getStatistics().getDataSize()); if (LOG.isDebugEnabled()) { LOG.debug("Table source " + source + " has stats: " + source.getStatistics()); }