Github user jackylk commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2683#discussion_r216982870 --- Diff: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java --- @@ -168,6 +168,65 @@ public static Object getMeasureObjectBasedOnDataType(ColumnPage measurePage, int } } + /** + * Calculate data percentage in [min, max] scope based on data type + * @param data data to calculate the percentage + * @param min min value + * @param max max value + * @param column column schema including data type + * @return result + */ + public static double computePercentage(byte[] data, byte[] min, byte[] max, ColumnSchema column) { + if (column.getDataType() == DataTypes.STRING) { + // for string, we do not calculate + return 0; + } else if (DataTypes.isDecimal(column.getDataType())) { + BigDecimal minValue = DataTypeUtil.byteToBigDecimal(min); + BigDecimal dataValue = DataTypeUtil.byteToBigDecimal(data).subtract(minValue); + BigDecimal factorValue = DataTypeUtil.byteToBigDecimal(max).subtract(minValue); + return dataValue.divide(factorValue).doubleValue(); + } + double dataValue, minValue, factorValue; + if (column.getDataType() == DataTypes.SHORT) { + minValue = ByteUtil.toShort(min, 0); + dataValue = ByteUtil.toShort(data, 0) - minValue; + factorValue = ByteUtil.toShort(max, 0) - ByteUtil.toShort(min, 0); + } else if (column.getDataType() == DataTypes.INT) { + if (column.isSortColumn()) { + minValue = ByteUtil.toXorInt(min, 0, min.length); + dataValue = ByteUtil.toXorInt(data, 0, data.length) - minValue; + factorValue = ByteUtil.toXorInt(max, 0, max.length) - ByteUtil.toXorInt(min, 0, min.length); + } else { + minValue = ByteUtil.toLong(min, 0, min.length); + dataValue = ByteUtil.toLong(data, 0, data.length) - minValue; + factorValue = ByteUtil.toLong(max, 0, max.length) - ByteUtil.toLong(min, 0, min.length); + } + } else if (column.getDataType() == DataTypes.LONG) { + minValue = ByteUtil.toLong(min, 0, min.length); + dataValue = ByteUtil.toLong(data, 0, data.length) - minValue; + factorValue = ByteUtil.toLong(max, 0, max.length) - ByteUtil.toLong(min, 0, min.length); + } else if (column.getDataType() == DataTypes.DATE) { + minValue = ByteUtil.toInt(min, 0, min.length); + dataValue = ByteUtil.toInt(data, 0, data.length) - minValue; + factorValue = ByteUtil.toInt(max, 0, max.length) - ByteUtil.toInt(min, 0, min.length); + } else if (column.getDataType() == DataTypes.TIMESTAMP) { + minValue = ByteUtil.toLong(min, 0, min.length); + dataValue = ByteUtil.toLong(data, 0, data.length) - minValue; + factorValue = ByteUtil.toLong(max, 0, max.length) - ByteUtil.toLong(min, 0, min.length); + } else if (column.getDataType() == DataTypes.DOUBLE) { + minValue = ByteUtil.toDouble(min, 0, min.length); + dataValue = ByteUtil.toDouble(data, 0, data.length) - minValue; + factorValue = ByteUtil.toDouble(max, 0, max.length) - ByteUtil.toDouble(min, 0, min.length); + } else { + throw new UnsupportedOperationException("data type: " + column.getDataType()); + } + + if (factorValue == 0d) { + return Double.MIN_VALUE; --- End diff -- fixed
---