akashrn5 commented on a change in pull request #4020: URL: https://github.com/apache/carbondata/pull/4020#discussion_r532795309
########## File path: core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java ########## @@ -998,6 +998,33 @@ public long getMajorCompactionSize() { return compactionSize; } + /** + * returns minor compaction size value from carbon properties or 0 if it is not valid or Review comment: shouldn't it be -1?, please change it ########## File path: core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java ########## @@ -998,6 +998,33 @@ public long getMajorCompactionSize() { return compactionSize; } + /** + * returns minor compaction size value from carbon properties or 0 if it is not valid or + * not configured + * + * @return compactionSize + */ + public long getMinorCompactionSize() { + long compactionSize = -1; + try { + compactionSize = Long.parseLong(getProperty( + CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE, "0")); + } catch (NumberFormatException e) { + LOGGER.warn("The specified value for property " + + CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE + " is incorrect." + + " Correct value should be long value great than 1. Taking the default value -1" + + " which means not consider segment size in minor compaction."); + } + if (compactionSize <= 0 && compactionSize != -1) { + LOGGER.warn("The specified value for property " + + CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE + " is incorrect." Review comment: same as above ########## File path: core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java ########## @@ -998,6 +998,33 @@ public long getMajorCompactionSize() { return compactionSize; } + /** + * returns minor compaction size value from carbon properties or 0 if it is not valid or + * not configured + * + * @return compactionSize + */ + public long getMinorCompactionSize() { + long compactionSize = -1; + try { + compactionSize = Long.parseLong(getProperty( + CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE, "0")); + } catch (NumberFormatException e) { + LOGGER.warn("The specified value for property " + + CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE + " is incorrect." + + " Correct value should be long value great than 1. Taking the default value -1" + + " which means not consider segment size in minor compaction."); Review comment: Please change as below: `Invalid value is configured for property CarbonCommonConstants.CARBON_MINOR_COMPACTION_SIZE, considering the default value -1 and not considering segment Size during minor compaction.` ########## File path: integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala ########## @@ -292,6 +293,33 @@ object CommonUtil { } } + /** + * This method will validate the minor compaction size specified by the user + * the property is used while doing minor compaction + * + * @param tableProperties Review comment: remove this, not needed as variable name clearly tells what it is ########## File path: integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala ########## @@ -292,6 +293,33 @@ object CommonUtil { } } + /** + * This method will validate the minor compaction size specified by the user + * the property is used while doing minor compaction + * + * @param tableProperties + */ + def validateMinorCompactionSize(tableProperties: Map[String, String]): Unit = { + var minorCompactionSize: Integer = 0 + val tblPropName = CarbonCommonConstants.TABLE_MINOR_COMPACTION_SIZE Review comment: please give a better variable name ########## File path: processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java ########## @@ -767,18 +784,27 @@ private static boolean isMergedSegment(String segName) { public static long getCompactionSize(CompactionType compactionType, CarbonLoadModel carbonLoadModel) { long compactionSize = 0; + Map<String, String> tblProps = carbonLoadModel.getCarbonDataLoadSchema() + .getCarbonTable().getTableInfo().getFactTable().getTableProperties(); switch (compactionType) { case MAJOR: // default value is system level option compactionSize = CarbonProperties.getInstance().getMajorCompactionSize(); // if table level option is identified, use it to overwrite system level option - Map<String, String> tblProps = carbonLoadModel.getCarbonDataLoadSchema() - .getCarbonTable().getTableInfo().getFactTable().getTableProperties(); if (tblProps.containsKey(CarbonCommonConstants.TABLE_MAJOR_COMPACTION_SIZE)) { compactionSize = Long.parseLong( tblProps.get(CarbonCommonConstants.TABLE_MAJOR_COMPACTION_SIZE)); } break; + case MINOR: + // default value is system level option + compactionSize = CarbonProperties.getInstance().getMinorCompactionSize(); Review comment: you can move this to else block ########## File path: docs/dml-of-carbondata.md ########## @@ -529,6 +529,10 @@ CarbonData DML statements are documented here,which includes: * Level 1: Merging of the segments which are not yet compacted. * Level 2: Merging of the compacted segments again to form a larger segment. + The segment whose data size exceed limit of carbon.minor.compaction.size will not be included in + minor compaction. If user want to control the size of segment included in minor compaction, + configure the property with appropriate value in MB, if not configure, will merge segments only + based on number of segments. Review comment: change to below `User can control the size of a segment to be included in the minor compaction by using carbon.minor.compaction.size. If not configured, minor compaction will consider the segments based on carbon.compaction.level.threshold by neglecting the size of each segment` ########## File path: docs/dml-of-carbondata.md ########## @@ -529,6 +529,10 @@ CarbonData DML statements are documented here,which includes: * Level 1: Merging of the segments which are not yet compacted. * Level 2: Merging of the compacted segments again to form a larger segment. + The segment whose data size exceed limit of carbon.minor.compaction.size will not be included in Review comment: ```suggestion The segment whose data size exceed the limit of carbon.minor.compaction.size will not be included in ``` ########## File path: integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala ########## @@ -292,6 +293,33 @@ object CommonUtil { } } + /** + * This method will validate the minor compaction size specified by the user + * the property is used while doing minor compaction + * + * @param tableProperties + */ + def validateMinorCompactionSize(tableProperties: Map[String, String]): Unit = { + var minorCompactionSize: Integer = 0 + val tblPropName = CarbonCommonConstants.TABLE_MINOR_COMPACTION_SIZE + if (tableProperties.get(tblPropName).isDefined) { + val minorCompactionSizeStr: String = + parsePropertyValueStringInMB(tableProperties(tblPropName)) + try { + minorCompactionSize = Integer.parseInt(minorCompactionSizeStr) Review comment: no need of 306 and 307, directly add that in 309 inside parseInt ########## File path: integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala ########## @@ -292,6 +293,33 @@ object CommonUtil { } } + /** + * This method will validate the minor compaction size specified by the user + * the property is used while doing minor compaction + * + * @param tableProperties + */ + def validateMinorCompactionSize(tableProperties: Map[String, String]): Unit = { + var minorCompactionSize: Integer = 0 + val tblPropName = CarbonCommonConstants.TABLE_MINOR_COMPACTION_SIZE + if (tableProperties.get(tblPropName).isDefined) { + val minorCompactionSizeStr: String = + parsePropertyValueStringInMB(tableProperties(tblPropName)) + try { + minorCompactionSize = Integer.parseInt(minorCompactionSizeStr) + } catch { + case e: NumberFormatException => + throw new MalformedCarbonCommandException(s"Invalid $tblPropName value found: " + + s"$minorCompactionSizeStr, only int value greater than 0 is supported.") + } + if (minorCompactionSize <= 0) { + throw new MalformedCarbonCommandException(s"Invalid $tblPropName value found: " + + s"$minorCompactionSizeStr, only int value greater than 0 is supported.") Review comment: Please change like below `Invalid value $minorCompactionSizeStr configured for $tblPropName. Please consider configuring value greater than 0` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org