This is an automated email from the ASF dual-hosted git repository.

okumin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 33e11ec6270 HIVE-29442: Avoid redundancy in 
FilterStatsRule#evaluateComparator (#6300)
33e11ec6270 is described below

commit 33e11ec6270ad94370587a682b425cff7b66bdba
Author: Thomas Rebele <[email protected]>
AuthorDate: Mon Feb 9 12:10:45 2026 +0100

    HIVE-29442: Avoid redundancy in FilterStatsRule#evaluateComparator (#6300)
---
 .../stats/annotation/StatsRulesProcFactory.java    | 327 ++++++++-------------
 1 file changed, 129 insertions(+), 198 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 19f83f39147..42d62e0a64e 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -33,6 +33,7 @@
 import java.util.Optional;
 import java.util.Set;
 import java.util.Stack;
+import java.util.function.Function;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
@@ -1070,210 +1071,140 @@ private long evaluateComparator(Statistics stats, 
AnnotateStatsProcCtx aspCtx, E
 
       if (cs != null && cs.getRange() != null &&
           cs.getRange().maxValue != null && cs.getRange().minValue != null) {
-        try {
-          if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)) {
-            byte value = Byte.parseByte(boundValue);
-            byte maxValue = cs.getRange().maxValue.byteValue();
-            byte minValue = cs.getRange().minValue.byteValue();
-            if (upperBound) {
-              if (maxValue < value || maxValue == value && closedBound) {
-                return currNumRows;
-              }
-              if (minValue > value || minValue == value && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(((double) (value - minValue) / (maxValue - 
minValue)) * currNumRows);
-              }
-            } else {
-              if (minValue > value || minValue == value && closedBound) {
-                return currNumRows;
-              }
-              if (maxValue < value || maxValue == value && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(((double) (maxValue - value) / (maxValue - 
minValue)) * currNumRows);
-              }
-            }
-          } else if 
(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)) {
-            short value = Short.parseShort(boundValue);
-            short maxValue = cs.getRange().maxValue.shortValue();
-            short minValue = cs.getRange().minValue.shortValue();
-            if (upperBound) {
-              if (maxValue < value || maxValue == value && closedBound) {
-                return currNumRows;
-              }
-              if (minValue > value || minValue == value && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(((double) (value - minValue) / (maxValue - 
minValue)) * currNumRows);
-              }
-            } else {
-              if (minValue > value || minValue == value && closedBound) {
-                return currNumRows;
-              }
-              if (maxValue < value || maxValue == value && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(((double) (maxValue - value) / (maxValue - 
minValue)) * currNumRows);
-              }
-            }
-          } else if (colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME) ||
-              colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME) ||
-              colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
-            long value;
-            if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
-              DateWritable writableVal = new 
DateWritable(java.sql.Date.valueOf(boundValue));
-              value = writableVal.getDays();
-            } else if 
(colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
-              TimestampWritableV2 timestampWritable = new 
TimestampWritableV2(Timestamp.valueOf(boundValue));
-              value = timestampWritable.getTimestamp().toEpochSecond();
-            } else {
-              value = Integer.parseInt(boundValue);
-            }
-            long maxValue = cs.getRange().maxValue.longValue();
-            long minValue = cs.getRange().minValue.longValue();
-            if (upperBound) {
-              if (maxValue < value || maxValue == value && closedBound) {
-                return currNumRows;
-              }
-              if (minValue > value || minValue == value && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(((double) (value - minValue) / (maxValue - 
minValue)) * currNumRows);
-              }
-            } else {
-              if (minValue > value || minValue == value && closedBound) {
-                return currNumRows;
-              }
-              if (maxValue < value || maxValue == value && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(((double) (maxValue - value) / (maxValue - 
minValue)) * currNumRows);
-              }
-            }
-          } else if 
(colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME) ||
-              colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
-            BigDecimal value = new BigDecimal(boundValue);
-            BigDecimal maxValue = new 
BigDecimal(cs.getRange().maxValue.toString());
-            BigDecimal minValue = new 
BigDecimal(cs.getRange().minValue.toString());
-            int minComparison = value.compareTo(minValue);
-            int maxComparison = value.compareTo(maxValue);
-            if (upperBound) {
-              if (maxComparison > 0 || maxComparison == 0 && closedBound) {
-                return currNumRows;
-              }
-              if (minComparison < 0 || minComparison == 0 && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(
-                    
((value.subtract(minValue)).divide(maxValue.subtract(minValue), 10, 
RoundingMode.UP))
-                        .multiply(BigDecimal.valueOf(currNumRows))
-                        .doubleValue());
-              }
-            } else {
-              if (minComparison < 0 || minComparison == 0 && closedBound) {
-                return currNumRows;
-              }
-              if (maxComparison > 0 || maxComparison == 0 && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(
-                    
((maxValue.subtract(value)).divide(maxValue.subtract(minValue), 10, 
RoundingMode.UP))
-                        .multiply(BigDecimal.valueOf(currNumRows))
-                        .doubleValue());
-              }
-            }
-          } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
-            float value = Float.parseFloat(boundValue);
-            float maxValue = cs.getRange().maxValue.floatValue();
-            float minValue = cs.getRange().minValue.floatValue();
-            if (upperBound) {
-              if (maxValue < value || maxValue == value && closedBound) {
-                return currNumRows;
-              }
-              if (minValue > value || minValue == value && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(((double) (value - minValue) / (maxValue - 
minValue)) * currNumRows);
-              }
-            } else {
-              if (minValue > value || minValue == value && closedBound) {
-                return currNumRows;
-              }
-              if (maxValue < value || maxValue == value && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(((double) (maxValue - value) / (maxValue - 
minValue)) * currNumRows);
-              }
-            }
-          } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) 
{
-            double value = Double.parseDouble(boundValue);
-            double maxValue = cs.getRange().maxValue.doubleValue();
-            double minValue = cs.getRange().minValue.doubleValue();
-            if (upperBound) {
-              if (maxValue < value || maxValue == value && closedBound) {
-                return currNumRows;
-              }
-              if (minValue > value || minValue == value && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(((value - minValue) / (maxValue - minValue)) 
* currNumRows);
-              }
-            } else {
-              if (minValue > value || minValue == value && closedBound) {
-                return currNumRows;
-              }
-              if (maxValue < value || maxValue == value && !closedBound) {
-                return 0;
-              }
-              if (aspCtx.isUniformWithinRange()) {
-                // Assuming uniform distribution, we can use the range to 
calculate
-                // new estimate for the number of rows
-                return Math.round(((maxValue - value) / (maxValue - minValue)) 
* currNumRows);
-              }
-            }
-          }
-        } catch (NumberFormatException nfe) {
-          return currNumRows / 3;
+        Long result =
+            evaluateComparatorWithRangeStats(cs, currNumRows, 
colTypeLowerCase, boundValue, upperBound, closedBound,
+                aspCtx);
+        if (result != null) {
+          return result;
         }
       }
       // default
       return currNumRows / 3;
     }
 
+    private static class EvaluateComparatorWithRange<T extends Number & 
Comparable<T>> {
+      /**
+       * Adjusts the number of rows assuming a uniform distribution.
+       * <p>
+       * If the values are uniformly distributed between min and max, and the 
predicate
+       * only accepts values between lower and upper, do a simple linear 
scaling.
+       * </p>
+       */
+      interface RescaleRows<T> {
+        double rescaleNumberOfRows(T lower, T upper, T min, T max, long 
numRows);
+      }
+
+      private final Function<Number, T> convert;
+      private final Function<String, T> parse;
+      private final RescaleRows<T> rescaleRows;
+
+      EvaluateComparatorWithRange(Function<Number, T> convert, 
Function<String, T> parse, RescaleRows<T> rescaleRows) {
+        this.convert = convert;
+        this.parse = parse;
+        this.rescaleRows = rescaleRows;
+      }
+
+      Long evaluate(Range range, String boundValue, boolean upperBound, 
boolean closedBound, long currNumRows,
+          AnnotateStatsProcCtx aspCtx) {
+        T maxValue = convert.apply(range.maxValue);
+        T minValue = convert.apply(range.minValue);
+        T value = parse.apply(boundValue);
+
+        int maxComparison = maxValue.compareTo(value);
+        int minComparison = minValue.compareTo(value);
+        if (upperBound) {
+          if (maxComparison < 0 || maxComparison == 0 && closedBound) {
+            return currNumRows;
+          }
+          if (minComparison > 0 || minComparison == 0 && !closedBound) {
+            return 0L;
+          }
+          if (aspCtx.isUniformWithinRange()) {
+            // Assuming uniform distribution, we can use the range to calculate
+            // new estimate for the number of rows
+            return Math.round(rescaleRows.rescaleNumberOfRows(minValue, value, 
minValue, maxValue, currNumRows));
+          }
+        } else {
+          if (minComparison > 0 || minComparison == 0 && closedBound) {
+            return currNumRows;
+          }
+          if (maxComparison < 0 || maxComparison == 0 && !closedBound) {
+            return 0L;
+          }
+          if (aspCtx.isUniformWithinRange()) {
+            // Assuming uniform distribution, we can use the range to calculate
+            // new estimate for the number of rows
+            return Math.round(rescaleRows.rescaleNumberOfRows(value, maxValue, 
minValue, maxValue, currNumRows));
+          }
+        }
+        return null;
+      }
+    }
+
+    private Long evaluateComparatorWithRangeStats(ColStatistics cs, long 
currNumRows, String type, String boundValue,
+        boolean upperBound, boolean closedBound, AnnotateStatsProcCtx aspCtx) {
+      try {
+        EvaluateComparatorWithRange<?> helper;
+        if (type.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+          type = serdeConstants.DECIMAL_TYPE_NAME;
+        }
+
+        switch (type) {
+        case serdeConstants.TINYINT_TYPE_NAME:
+          helper = new EvaluateComparatorWithRange<>(Number::byteValue, 
Byte::parseByte,
+              (lower, upper, min, max, numRows) -> ((double) (upper - lower) / 
(max - min)) * numRows);
+          break;
+        case serdeConstants.SMALLINT_TYPE_NAME:
+          helper = new EvaluateComparatorWithRange<>(Number::shortValue, 
Short::parseShort,
+              (lower, upper, min, max, numRows) -> ((double) (upper - lower) / 
(max - min)) * numRows);
+          break;
+        case serdeConstants.INT_TYPE_NAME, serdeConstants.DATE_TYPE_NAME, 
serdeConstants.TIMESTAMP_TYPE_NAME:
+          Function<String, Long> parse;
+          if (type.equals(serdeConstants.DATE_TYPE_NAME)) {
+            parse = str -> {
+              DateWritable writableVal = new 
DateWritable(java.sql.Date.valueOf(str));
+              return Long.valueOf(writableVal.getDays());
+            };
+          } else if (type.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+            parse = str -> {
+              TimestampWritableV2 timestampWritable = new 
TimestampWritableV2(Timestamp.valueOf(boundValue));
+              return timestampWritable.getTimestamp().toEpochSecond();
+            };
+          } else {
+            parse = str -> (long) Integer.parseInt(str);
+          }
+          helper = new EvaluateComparatorWithRange<>(Number::longValue, parse,
+              (lower, upper, min, max, numRows) -> ((double) (upper - lower) / 
(max - min)) * numRows);
+          break;
+
+        case serdeConstants.DECIMAL_TYPE_NAME, serdeConstants.BIGINT_TYPE_NAME:
+          helper = new EvaluateComparatorWithRange<>(num -> new 
BigDecimal(num.toString()), BigDecimal::new,
+              (lower, upper, min, max, numRows) -> 
((upper.subtract(lower)).divide(max.subtract(min), 10,
+                  
RoundingMode.UP)).multiply(BigDecimal.valueOf(currNumRows)).doubleValue());
+          break;
+        case serdeConstants.FLOAT_TYPE_NAME:
+          helper = new EvaluateComparatorWithRange<>(Number::floatValue, 
Float::parseFloat,
+              (lower, upper, min, max, numRows) -> ((double) (upper - lower) / 
(max - min)) * numRows);
+          break;
+        case serdeConstants.DOUBLE_TYPE_NAME:
+          helper = new EvaluateComparatorWithRange<>(Number::doubleValue, 
Double::parseDouble,
+              (lower, upper, min, max, numRows) -> ((upper - lower) / (max - 
min)) * numRows);
+          break;
+        default:
+          return null;
+        }
+
+        Long helperResult = helper.evaluate(cs.getRange(), boundValue, 
upperBound, closedBound, currNumRows, aspCtx);
+        if (helperResult != null) {
+          return helperResult;
+        }
+
+      } catch (NumberFormatException nfe) {
+        return null;
+      }
+      return null;
+    }
+
     private long evaluateComparatorWithHistogram(ColStatistics cs, long 
currNumRows, String colTypeLowerCase,
         String boundValue, boolean upperBound, boolean closedBound) {
       final KllFloatsSketch kll = 
KllFloatsSketch.heapify(Memory.wrap(cs.getHistogram()));

Reply via email to