This is an automated email from the ASF dual-hosted git repository. rbalamohan pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new f7be5fe HIVE-23917: Reset key access count during eviction in VectorGroupByOperator (#1306) f7be5fe is described below commit f7be5fe240e5e057edff9c14f03f6cc17367bc92 Author: rbalamohan <rbalamo...@apache.org> AuthorDate: Wed Jul 29 09:13:11 2020 +0530 HIVE-23917: Reset key access count during eviction in VectorGroupByOperator (#1306) --- .../hive/ql/exec/vector/VectorAggregationBufferRow.java | 4 ++++ .../hadoop/hive/ql/exec/vector/VectorGroupByOperator.java | 4 +++- .../hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java | 11 +++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java index a7ef154..a265e52 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java @@ -89,4 +89,8 @@ public class VectorAggregationBufferRow { public void incrementAccessCount() { accessed++; } + + public void resetAccessCount() { + accessed = 0; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 85535f5..02864d9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -599,8 +599,10 @@ public class VectorGroupByOperator extends Operator<GroupByDesc> while(iter.hasNext()) { Map.Entry<KeyWrapper, VectorAggregationBufferRow> pair = iter.next(); if (!all && avgAccess >= 1) { - // Retain entries when access pattern is > than average access if (pair.getValue().getAccessCount() > avgAccess) { + // resetting to give chance for other entries + totalAccessCount -= pair.getValue().getAccessCount(); + pair.getValue().resetAccessCount(); continue; } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index c22a833..d6a8548 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -692,11 +692,18 @@ public class TestVectorGroupByOperator { // This processing would trigger flush for (VectorizedRowBatch unit: data) { + long zeroAccessBeforeFlush = getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers); vgo.process(unit, 0); long freqElementsAfterFlush = getElementsHigherThan(processingMode.mapKeysAggregationBuffers, avgAccess); assertTrue("After flush: " + freqElementsAfterFlush + ", before flush: " + numElementsToBeRetained, (freqElementsAfterFlush >= numElementsToBeRetained)); + + // ensure that freq elements are reset for providing chance for others + long zeroAccessAfterFlush = getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers); + assertTrue("After flush: " + zeroAccessAfterFlush + ", before flush: " + zeroAccessBeforeFlush, + (zeroAccessAfterFlush > zeroAccessBeforeFlush)); + break; } vgo.close(false); @@ -706,6 +713,10 @@ public class TestVectorGroupByOperator { return aggMap.values().stream().filter(v -> (v.getAccessCount() > avgAccess)).count(); } + long getElementsWithZeroAccess(Map<KeyWrapper, VectorAggregationBufferRow> aggMap) { + return aggMap.values().stream().filter(v -> (v.getAccessCount() == 0)).count(); + } + @Test public void testMaxHTEntriesFlush() throws HiveException {