zabetak commented on code in PR #6293:
URL: https://github.com/apache/hive/pull/6293#discussion_r2919368081


##########
ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java:
##########
@@ -511,6 +605,428 @@ public void 
testComputeRangePredicateSelectivityNotBetweenWithNULLS() {
     
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
     RexNode filter = REX_BUILDER.makeCall(HiveBetween.INSTANCE, boolTrue, 
inputRef0, int1, int3);
     FilterSelectivityEstimator estimator = new 
FilterSelectivityEstimator(scan, mq);
-    Assert.assertEquals(0.55, estimator.estimateSelectivity(filter), DELTA);
+    // only the values 4, 5, 6, 7 fulfill the condition NOT BETWEEN 1 AND 3
+    // (the NULL values do not fulfill the condition)
+    Assert.assertEquals(0.2, estimator.estimateSelectivity(filter), DELTA);
+  }
+
+  @Test
+  public void testRangePredicateCastIntegerValuesInsideTypeRange() {
+    // use VALUES2, even if the tested types cannot represent its values
+    // we're only interested in whether the cast to a smaller integer type 
results in the default selectivity
+    useFieldWithValues("f_tinyint", VALUES, KLL);
+    checkSelectivity(3 / 13.f, ge(cast("f_tinyint", TINYINT), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_tinyint", SMALLINT), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_tinyint", INTEGER), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_tinyint", BIGINT), int5));
+
+    useFieldWithValues("f_smallint", VALUES, KLL);
+    checkSelectivity(3 / 13.f, ge(cast("f_smallint", TINYINT), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_smallint", SMALLINT), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_smallint", INTEGER), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_smallint", BIGINT), int5));
+
+    useFieldWithValues("f_integer", VALUES, KLL);
+    checkSelectivity(3 / 13.f, ge(cast("f_integer", TINYINT), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_integer", SMALLINT), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_integer", INTEGER), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_integer", BIGINT), int5));
+
+    useFieldWithValues("f_bigint", VALUES, KLL);
+    checkSelectivity(3 / 13.f, ge(cast("f_bigint", TINYINT), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_bigint", SMALLINT), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_bigint", INTEGER), int5));
+    checkSelectivity(3 / 13.f, ge(cast("f_bigint", BIGINT), int5));
+  }
+
+  @Test
+  public void testRangePredicateCastIntegerValuesOutsideTypeRange() {
+    // use VALUES2, even if the tested types cannot represent its values
+    // we're only interested in whether the cast to a smaller integer type 
results in the default selectivity
+    useFieldWithValues("f_tinyint", VALUES2, KLL2);
+    checkSelectivity(16 / 28.f, ge(cast("f_tinyint", TINYINT), int5));
+    checkSelectivity(18 / 28.f, ge(cast("f_tinyint", SMALLINT), int5));
+    checkSelectivity(20 / 28.f, ge(cast("f_tinyint", INTEGER), int5));
+    checkSelectivity(20 / 28.f, ge(cast("f_tinyint", BIGINT), int5));
+
+    useFieldWithValues("f_smallint", VALUES2, KLL2);
+    checkSelectivity(1 / 3.f, ge(cast("f_smallint", TINYINT), int5));
+    checkSelectivity(18 / 28.f, ge(cast("f_smallint", SMALLINT), int5));
+    checkSelectivity(20 / 28.f, ge(cast("f_smallint", INTEGER), int5));
+    checkSelectivity(20 / 28.f, ge(cast("f_smallint", BIGINT), int5));
+
+    useFieldWithValues("f_integer", VALUES2, KLL2);
+    checkSelectivity(1 / 3.f, ge(cast("f_integer", TINYINT), int5));
+    checkSelectivity(1 / 3.f, ge(cast("f_integer", SMALLINT), int5));
+    checkSelectivity(20 / 28.f, ge(cast("f_integer", INTEGER), int5));
+    checkSelectivity(20 / 28.f, ge(cast("f_integer", BIGINT), int5));
+
+    useFieldWithValues("f_bigint", VALUES2, KLL2);
+    checkSelectivity(1 / 3.f, ge(cast("f_bigint", TINYINT), int5));
+    checkSelectivity(1 / 3.f, ge(cast("f_bigint", SMALLINT), int5));
+    checkSelectivity(1 / 3.f, ge(cast("f_bigint", INTEGER), int5));
+    checkSelectivity(20 / 28.f, ge(cast("f_bigint", BIGINT), int5));
+  }
+
+  @Test
+  public void testRangePredicateTypeMatrix() {
+    // checks many possible combinations of types
+    List<RelDataTypeField> fields = tableType.getFieldList();
+    for (var srcField : fields) {
+      if (isTemporal(srcField.getType())) {
+        continue;
+      }
+
+      useFieldWithValues(srcField.getName(), VALUES, KLL);
+
+      for (var tgt : fields) {
+        try {
+          if (isTemporal(tgt.getType())) {
+            continue;
+          }
+
+          RexNode expr = cast(srcField.getName(), tgt.getType());
+          checkBetweenSelectivity(3, VALUES.length, VALUES.length, expr, 5, 7);
+        } catch (AssertionError e) {
+          throw new AssertionError("Error when casting from " + 
srcField.getType() + " to " + tgt.getType(), e);
+        }

Review Comment:
   The use of parameterized tests can solve these shortcomings but let's not 
bother with it right now. I am OK to keep the try-catch pattern.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to