This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/carbondata.git

commit f750b6f210ba87923793631f6b4a2cc4f7dbdd3d
Author: ajantha-bhat <ajanthab...@gmail.com>
AuthorDate: Tue Sep 10 10:48:26 2019 +0530

    [CARBONDATA-3515] Limit local dictionary size to 16MB and allow 
configuration.
    
    problem: currently local dictionary max size is 2GB, because of this, for 
varchar columns or long string columns,
    local dictionary can be of 2GB size. so, as local dictionary is stored in 
blocklet. blocklet size will exceed 2 GB,
     even though configured maximum blocklet size is 64MB. some places inter 
overflow happens during casting.
    
    solution: Limit local dictionary size to 16MB and allow configuration. 
default size is 4MB
    
    This closes #3380
---
 .../core/constants/CarbonCommonConstants.java      | 11 ++++++
 .../dictionaryholder/MapBasedDictionaryStore.java  | 16 ++++++--
 .../carbondata/core/util/CarbonProperties.java     | 43 ++++++++++++++++++++++
 docs/configuration-parameters.md                   |  1 +
 4 files changed, 68 insertions(+), 3 deletions(-)

diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 67fa13f..ac77582 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -1209,6 +1209,17 @@ public final class CarbonCommonConstants {
 
   public static final String CARBON_ENABLE_RANGE_COMPACTION_DEFAULT = "true";
 
+  @CarbonProperty
+  /**
+   * size based threshold for local dictionary in mb.
+   */
+  public static final String CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB =
+      "carbon.local.dictionary.size.threshold.inmb";
+
+  public static final int CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_DEFAULT 
= 4;
+
+  public static final int CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_MAX = 
16;
+
   
//////////////////////////////////////////////////////////////////////////////////////////
   // Query parameter start here
   
//////////////////////////////////////////////////////////////////////////////////////////
diff --git 
a/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
 
b/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
index 7b8617a..0a50451 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
@@ -20,7 +20,9 @@ import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.carbondata.core.cache.dictionary.DictionaryByteArrayWrapper;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import 
org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException;
+import org.apache.carbondata.core.util.CarbonProperties;
 
 /**
  * Map based dictionary holder class, it will use map to hold
@@ -51,6 +53,11 @@ public class MapBasedDictionaryStore implements 
DictionaryStore {
   private int dictionaryThreshold;
 
   /**
+   * dictionary threshold size in bytes
+   */
+  private long dictionarySizeThresholdInBytes;
+
+  /**
    * for checking threshold is reached or not
    */
   private boolean isThresholdReached;
@@ -62,6 +69,8 @@ public class MapBasedDictionaryStore implements 
DictionaryStore {
 
   public MapBasedDictionaryStore(int dictionaryThreshold) {
     this.dictionaryThreshold = dictionaryThreshold;
+    this.dictionarySizeThresholdInBytes = 
Integer.parseInt(CarbonProperties.getInstance()
+        
.getProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB))
 << 20;
     this.dictionary = new ConcurrentHashMap<>();
     this.referenceDictionaryArray = new 
DictionaryByteArrayWrapper[dictionaryThreshold];
   }
@@ -93,7 +102,7 @@ public class MapBasedDictionaryStore implements 
DictionaryStore {
           value = ++lastAssignValue;
           currentSize += data.length;
           // if new value is greater than threshold
-          if (value > dictionaryThreshold || currentSize >= Integer.MAX_VALUE) 
{
+          if (value > dictionaryThreshold || currentSize > 
dictionarySizeThresholdInBytes) {
             // set the threshold boolean to true
             isThresholdReached = true;
             // throw exception
@@ -111,9 +120,10 @@ public class MapBasedDictionaryStore implements 
DictionaryStore {
 
   private void checkIfThresholdReached() throws 
DictionaryThresholdReachedException {
     if (isThresholdReached) {
-      if (currentSize >= Integer.MAX_VALUE) {
+      if (currentSize > dictionarySizeThresholdInBytes) {
         throw new DictionaryThresholdReachedException(
-            "Unable to generate dictionary. Dictionary Size crossed 2GB 
limit");
+            "Unable to generate dictionary. Dictionary Size crossed bytes: "
+                + dictionarySizeThresholdInBytes);
       } else {
         throw new DictionaryThresholdReachedException(
             "Unable to generate dictionary value. Dictionary threshold 
reached");
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java 
b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
index adf4905..e4efc0b 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
@@ -202,6 +202,9 @@ public final class CarbonProperties {
       case CarbonCommonConstants.CARBON_INDEX_SERVER_SERIALIZATION_THRESHOLD:
         validateIndexServerSerializationThreshold();
         break;
+      case CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB:
+        validateAndGetLocalDictionarySizeThresholdInMB();
+        break;
       // TODO : Validation for carbon.lock.type should be handled for 
addProperty flow
       default:
         // none
@@ -268,6 +271,7 @@ public final class CarbonProperties {
     validateStringCharacterLimit();
     validateDetailQueryBatchSize();
     validateIndexServerSerializationThreshold();
+    validateAndGetLocalDictionarySizeThresholdInMB();
   }
 
   /**
@@ -1789,4 +1793,43 @@ public final class CarbonProperties {
       return !prefetchEnable.equalsIgnoreCase("false");
     }
   }
+
+  /**
+   * get local dictionary size threshold in mb.
+   */
+  private void validateAndGetLocalDictionarySizeThresholdInMB() {
+    String sizeStr = carbonProperties
+        
.getProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB);
+    String defaultValue = Integer
+        
.toString(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_DEFAULT);
+    if (sizeStr == null) {
+      carbonProperties
+          
.setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+              defaultValue);
+    } else {
+      try {
+        int size = Integer.parseInt(sizeStr);
+        if (size < 0 || size == 0
+            || size > 
CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_MAX) {
+          LOGGER.info("using default value of 
carbon.local.dictionary.size.threshold.inmb = "
+              + defaultValue);
+          carbonProperties
+              
.setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+                  defaultValue);
+        } else {
+          LOGGER.info("using carbon.local.dictionary.size.threshold.inmb = " + 
size);
+          carbonProperties
+              
.setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+                  Integer.toString(size));
+        }
+      } catch (Exception ex) {
+        LOGGER.info(
+            "using default value of 
carbon.local.dictionary.size.threshold.inmb = " + defaultValue);
+        carbonProperties
+            
.setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+                defaultValue);
+      }
+    }
+  }
+
 }
diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md
index da226ec..51017fe 100644
--- a/docs/configuration-parameters.md
+++ b/docs/configuration-parameters.md
@@ -96,6 +96,7 @@ This section provides the details of all the configurations 
required for the Car
 | carbon.minmax.allowed.byte.count | 200 | CarbonData will write the min max 
values for string/varchar types column using the byte count specified by this 
configuration. Max value is 1000 bytes(500 characters) and Min value is 10 
bytes(5 characters). **NOTE:** This property is useful for reducing the store 
size thereby improving the query performance but can lead to query degradation 
if value is not configured properly. | |
 | carbon.merge.index.failure.throw.exception | true | It is used to configure 
whether or not merge index failure should result in data load failure also. |
 | carbon.binary.decoder | None | Support configurable decode for loading. Two 
decoders supported: base64 and hex |
+| carbon.local.dictionary.size.threshold.inmb | 4 | size based threshold for 
local dictionary in MB, maximum allowed size is 16 MB. |
 
 ## Compaction Configuration
 

Reply via email to