[CARBONDATA-2772] Size based dictionary fallback is failing even threshold is not reached.
Issue:- Size Based Fallback happened even threshold is not reached. RootCause:- Current size calculation is wrong. it is calculated for each data. instead of generated dictionary data . Solution :- Current size should be calculated only for generated dictionary data. This closes #2542 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5f6116a6 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5f6116a6 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5f6116a6 Branch: refs/heads/branch-1.4 Commit: 5f6116a68d96e499cf94d5b5aac6808965ec47a1 Parents: e21e494 Author: BJangir <babulaljangir...@gmail.com> Authored: Mon Jul 23 22:14:12 2018 +0530 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Tue Jul 31 00:11:26 2018 +0530 ---------------------------------------------------------------------- .../MapBasedDictionaryStore.java | 20 ++++++++++++++------ .../ColumnLocalDictionaryGenerator.java | 8 -------- 2 files changed, 14 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/5f6116a6/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java b/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java index 05ca002..7b8617a 100644 --- a/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java +++ b/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java @@ -55,6 +55,11 @@ public class MapBasedDictionaryStore implements DictionaryStore { */ private boolean isThresholdReached; + /** + * current datasize + */ + private long currentSize; + public MapBasedDictionaryStore(int dictionaryThreshold) { this.dictionaryThreshold = dictionaryThreshold; this.dictionary = new ConcurrentHashMap<>(); @@ -86,11 +91,9 @@ public class MapBasedDictionaryStore implements DictionaryStore { if (null == value) { // increment the value value = ++lastAssignValue; + currentSize += data.length; // if new value is greater than threshold - if (value > dictionaryThreshold) { - // clear the dictionary - dictionary.clear(); - referenceDictionaryArray = null; + if (value > dictionaryThreshold || currentSize >= Integer.MAX_VALUE) { // set the threshold boolean to true isThresholdReached = true; // throw exception @@ -108,8 +111,13 @@ public class MapBasedDictionaryStore implements DictionaryStore { private void checkIfThresholdReached() throws DictionaryThresholdReachedException { if (isThresholdReached) { - throw new DictionaryThresholdReachedException( - "Unable to generate dictionary value. Dictionary threshold reached"); + if (currentSize >= Integer.MAX_VALUE) { + throw new DictionaryThresholdReachedException( + "Unable to generate dictionary. Dictionary Size crossed 2GB limit"); + } else { + throw new DictionaryThresholdReachedException( + "Unable to generate dictionary value. Dictionary threshold reached"); + } } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/5f6116a6/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java b/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java index b0c7275..c55a289 100644 --- a/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java +++ b/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java @@ -33,8 +33,6 @@ public class ColumnLocalDictionaryGenerator implements LocalDictionaryGenerator */ private DictionaryStore dictionaryHolder; - private long currentSize; - public ColumnLocalDictionaryGenerator(int threshold, int lvLength) { // adding 1 to threshold for null value int newThreshold = threshold + 1; @@ -54,7 +52,6 @@ public class ColumnLocalDictionaryGenerator implements LocalDictionaryGenerator } catch (DictionaryThresholdReachedException e) { // do nothing } - currentSize += byteBuffer.array().length; } /** @@ -64,11 +61,6 @@ public class ColumnLocalDictionaryGenerator implements LocalDictionaryGenerator * @return dictionary value */ @Override public int generateDictionary(byte[] data) throws DictionaryThresholdReachedException { - currentSize += data.length; - if (currentSize >= Integer.MAX_VALUE) { - throw new DictionaryThresholdReachedException( - "Unable to generate dictionary. Dictionary Size crossed 2GB limit"); - } return this.dictionaryHolder.putIfAbsent(data); }