Repository: hive Updated Branches: refs/heads/branch-1.0 84af92e65 -> 1954c9088
HIVE-9619: Uninitialized read of numBitVectors in NumDistinctValueEstimator (Alexander Pivovarov via gopalv) git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1660464 13f79535-47bb-0310-9956-ffa450edef68 Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8b9ba260 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8b9ba260 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8b9ba260 Branch: refs/heads/branch-1.0 Commit: 8b9ba2600e218561e52b8ae38701d801b31ce4da Parents: 84af92e Author: Gopal Vijayaraghavan <gop...@apache.org> Authored: Tue Feb 17 18:32:04 2015 +0000 Committer: Pengcheng Xiong <pxi...@apache.org> Committed: Tue Aug 11 14:16:22 2015 -0700 ---------------------------------------------------------------------- .../udf/generic/NumDistinctValueEstimator.java | 59 +++++++++++--------- 1 file changed, 33 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/8b9ba260/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java index 2817044..8212bea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java @@ -35,18 +35,18 @@ public class NumDistinctValueEstimator { * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1 * thus introducing errors in the estimates. */ - private static final int bitVectorSize = 31; - private int numBitVectors; + private static final int BIT_VECTOR_SIZE = 31; + private final int numBitVectors; // Refer to Flajolet-Martin'86 for the value of phi - private final double phi = 0.77351; + private static final double PHI = 0.77351; - private int[] a; - private int[] b; - private FastBitSet[] bitVector = new FastBitSet[numBitVectors]; + private final int[] a; + private final int[] b; + private final FastBitSet[] bitVector; - private Random aValue; - private Random bValue; + private final Random aValue; + private final Random bValue; /* Create a new distinctValueEstimator */ @@ -54,7 +54,7 @@ public class NumDistinctValueEstimator { this.numBitVectors = numBitVectors; bitVector = new FastBitSet[numBitVectors]; for (int i=0; i< numBitVectors; i++) { - bitVector[i] = new FastBitSet(bitVectorSize); + bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE); } a = new int[numBitVectors]; @@ -98,23 +98,30 @@ public class NumDistinctValueEstimator { b[i] = randVal; if (a[i] < 0) { - a[i] = a[i] + (1 << bitVectorSize - 1); + a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1); } if (b[i] < 0) { - b[i] = b[i] + (1 << bitVectorSize - 1); + b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1); } } } public NumDistinctValueEstimator(String s, int numBitVectors) { - FastBitSet b[] = deserialize(s, numBitVectors); + this.numBitVectors = numBitVectors; + FastBitSet bitVectorDeser[] = deserialize(s, numBitVectors); bitVector = new FastBitSet[numBitVectors]; for(int i=0; i <numBitVectors; i++) { - bitVector[i] = new FastBitSet(bitVectorSize); + bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE); bitVector[i].clear(); - bitVector[i].or(b[i]); + bitVector[i].or(bitVectorDeser[i]); } + + a = null; + b = null; + + aValue = null; + bValue = null; } /** @@ -135,7 +142,7 @@ public class NumDistinctValueEstimator { } public int getBitVectorSize() { - return bitVectorSize; + return BIT_VECTOR_SIZE; } public void printNumDistinctValueEstimator() { @@ -145,7 +152,7 @@ public class NumDistinctValueEstimator { LOG.debug("Number of Vectors:"); LOG.debug(numBitVectors); LOG.debug("Vector Size: "); - LOG.debug(bitVectorSize); + LOG.debug(BIT_VECTOR_SIZE); for (int i=0; i < numBitVectors; i++) { t = t + bitVector[i].toString(); @@ -173,7 +180,7 @@ public class NumDistinctValueEstimator { private FastBitSet[] deserialize(String s, int numBitVectors) { FastBitSet[] b = new FastBitSet[numBitVectors]; for (int j=0; j < numBitVectors; j++) { - b[j] = new FastBitSet(bitVectorSize); + b[j] = new FastBitSet(BIT_VECTOR_SIZE); b[j].clear(); } @@ -219,7 +226,7 @@ public class NumDistinctValueEstimator { } private int generateHash(long v, int hashNum) { - int mod = (1<<bitVectorSize) - 1; + int mod = (1<<BIT_VECTOR_SIZE) - 1; long tempHash = a[hashNum] * v + b[hashNum]; tempHash %= mod; int hash = (int) tempHash; @@ -234,7 +241,7 @@ public class NumDistinctValueEstimator { } private int generateHashForPCSA(long v) { - int mod = 1 << (bitVectorSize - 1) - 1; + int mod = 1 << (BIT_VECTOR_SIZE - 1) - 1; long tempHash = a[0] * v + b[0]; tempHash %= mod; int hash = (int) tempHash; @@ -259,8 +266,8 @@ public class NumDistinctValueEstimator { int index; // Find the index of the least significant bit that is 1 - for (index=0; index<bitVectorSize; index++) { - if (hash % 2 == 1) { + for (index=0; index<BIT_VECTOR_SIZE; index++) { + if (hash % 2 != 0) { break; } hash = hash >> 1; @@ -277,8 +284,8 @@ public class NumDistinctValueEstimator { int index; // Find the index of the least significant bit that is 1 - for (index=0; index<bitVectorSize; index++) { - if (rho % 2 == 1) { + for (index=0; index<BIT_VECTOR_SIZE; index++) { + if (rho % 2 != 0) { break; } rho = rho >> 1; @@ -321,13 +328,13 @@ public class NumDistinctValueEstimator { for (int i=0; i < numBitVectors; i++) { int index = 0; - while (bitVector[i].get(index) && index < bitVectorSize) { + while (bitVector[i].get(index) && index < BIT_VECTOR_SIZE) { index = index + 1; } S = S + index; } - numDistinctValues = ((numBitVectors/phi) * Math.pow(2.0, S/numBitVectors)); + numDistinctValues = ((numBitVectors/PHI) * Math.pow(2.0, S/numBitVectors)); return ((long)numDistinctValues); } @@ -345,7 +352,7 @@ public class NumDistinctValueEstimator { } avgLeastSigZero = - (double)(sumLeastSigZero/(numBitVectors * 1.0)) - (Math.log(phi)/Math.log(2.0)); + (double)(sumLeastSigZero/(numBitVectors * 1.0)) - (Math.log(PHI)/Math.log(2.0)); numDistinctValues = Math.pow(2.0, avgLeastSigZero); return ((long)(numDistinctValues)); }