This is an automated email from the ASF dual-hosted git repository. ravipesala pushed a commit to branch branch-1.6 in repository https://gitbox.apache.org/repos/asf/carbondata.git
commit f750b6f210ba87923793631f6b4a2cc4f7dbdd3d Author: ajantha-bhat <ajanthab...@gmail.com> AuthorDate: Tue Sep 10 10:48:26 2019 +0530 [CARBONDATA-3515] Limit local dictionary size to 16MB and allow configuration. problem: currently local dictionary max size is 2GB, because of this, for varchar columns or long string columns, local dictionary can be of 2GB size. so, as local dictionary is stored in blocklet. blocklet size will exceed 2 GB, even though configured maximum blocklet size is 64MB. some places inter overflow happens during casting. solution: Limit local dictionary size to 16MB and allow configuration. default size is 4MB This closes #3380 --- .../core/constants/CarbonCommonConstants.java | 11 ++++++ .../dictionaryholder/MapBasedDictionaryStore.java | 16 ++++++-- .../carbondata/core/util/CarbonProperties.java | 43 ++++++++++++++++++++++ docs/configuration-parameters.md | 1 + 4 files changed, 68 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 67fa13f..ac77582 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -1209,6 +1209,17 @@ public final class CarbonCommonConstants { public static final String CARBON_ENABLE_RANGE_COMPACTION_DEFAULT = "true"; + @CarbonProperty + /** + * size based threshold for local dictionary in mb. + */ + public static final String CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB = + "carbon.local.dictionary.size.threshold.inmb"; + + public static final int CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_DEFAULT = 4; + + public static final int CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_MAX = 16; + ////////////////////////////////////////////////////////////////////////////////////////// // Query parameter start here ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java b/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java index 7b8617a..0a50451 100644 --- a/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java +++ b/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java @@ -20,7 +20,9 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import org.apache.carbondata.core.cache.dictionary.DictionaryByteArrayWrapper; +import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException; +import org.apache.carbondata.core.util.CarbonProperties; /** * Map based dictionary holder class, it will use map to hold @@ -51,6 +53,11 @@ public class MapBasedDictionaryStore implements DictionaryStore { private int dictionaryThreshold; /** + * dictionary threshold size in bytes + */ + private long dictionarySizeThresholdInBytes; + + /** * for checking threshold is reached or not */ private boolean isThresholdReached; @@ -62,6 +69,8 @@ public class MapBasedDictionaryStore implements DictionaryStore { public MapBasedDictionaryStore(int dictionaryThreshold) { this.dictionaryThreshold = dictionaryThreshold; + this.dictionarySizeThresholdInBytes = Integer.parseInt(CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB)) << 20; this.dictionary = new ConcurrentHashMap<>(); this.referenceDictionaryArray = new DictionaryByteArrayWrapper[dictionaryThreshold]; } @@ -93,7 +102,7 @@ public class MapBasedDictionaryStore implements DictionaryStore { value = ++lastAssignValue; currentSize += data.length; // if new value is greater than threshold - if (value > dictionaryThreshold || currentSize >= Integer.MAX_VALUE) { + if (value > dictionaryThreshold || currentSize > dictionarySizeThresholdInBytes) { // set the threshold boolean to true isThresholdReached = true; // throw exception @@ -111,9 +120,10 @@ public class MapBasedDictionaryStore implements DictionaryStore { private void checkIfThresholdReached() throws DictionaryThresholdReachedException { if (isThresholdReached) { - if (currentSize >= Integer.MAX_VALUE) { + if (currentSize > dictionarySizeThresholdInBytes) { throw new DictionaryThresholdReachedException( - "Unable to generate dictionary. Dictionary Size crossed 2GB limit"); + "Unable to generate dictionary. Dictionary Size crossed bytes: " + + dictionarySizeThresholdInBytes); } else { throw new DictionaryThresholdReachedException( "Unable to generate dictionary value. Dictionary threshold reached"); diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java index adf4905..e4efc0b 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java @@ -202,6 +202,9 @@ public final class CarbonProperties { case CarbonCommonConstants.CARBON_INDEX_SERVER_SERIALIZATION_THRESHOLD: validateIndexServerSerializationThreshold(); break; + case CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB: + validateAndGetLocalDictionarySizeThresholdInMB(); + break; // TODO : Validation for carbon.lock.type should be handled for addProperty flow default: // none @@ -268,6 +271,7 @@ public final class CarbonProperties { validateStringCharacterLimit(); validateDetailQueryBatchSize(); validateIndexServerSerializationThreshold(); + validateAndGetLocalDictionarySizeThresholdInMB(); } /** @@ -1789,4 +1793,43 @@ public final class CarbonProperties { return !prefetchEnable.equalsIgnoreCase("false"); } } + + /** + * get local dictionary size threshold in mb. + */ + private void validateAndGetLocalDictionarySizeThresholdInMB() { + String sizeStr = carbonProperties + .getProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB); + String defaultValue = Integer + .toString(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_DEFAULT); + if (sizeStr == null) { + carbonProperties + .setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB, + defaultValue); + } else { + try { + int size = Integer.parseInt(sizeStr); + if (size < 0 || size == 0 + || size > CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_MAX) { + LOGGER.info("using default value of carbon.local.dictionary.size.threshold.inmb = " + + defaultValue); + carbonProperties + .setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB, + defaultValue); + } else { + LOGGER.info("using carbon.local.dictionary.size.threshold.inmb = " + size); + carbonProperties + .setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB, + Integer.toString(size)); + } + } catch (Exception ex) { + LOGGER.info( + "using default value of carbon.local.dictionary.size.threshold.inmb = " + defaultValue); + carbonProperties + .setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB, + defaultValue); + } + } + } + } diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md index da226ec..51017fe 100644 --- a/docs/configuration-parameters.md +++ b/docs/configuration-parameters.md @@ -96,6 +96,7 @@ This section provides the details of all the configurations required for the Car | carbon.minmax.allowed.byte.count | 200 | CarbonData will write the min max values for string/varchar types column using the byte count specified by this configuration. Max value is 1000 bytes(500 characters) and Min value is 10 bytes(5 characters). **NOTE:** This property is useful for reducing the store size thereby improving the query performance but can lead to query degradation if value is not configured properly. | | | carbon.merge.index.failure.throw.exception | true | It is used to configure whether or not merge index failure should result in data load failure also. | | carbon.binary.decoder | None | Support configurable decode for loading. Two decoders supported: base64 and hex | +| carbon.local.dictionary.size.threshold.inmb | 4 | size based threshold for local dictionary in MB, maximum allowed size is 16 MB. | ## Compaction Configuration