dengzhhu653 commented on code in PR #3137:
URL: https://github.com/apache/hive/pull/3137#discussion_r1044002365
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java:
##########
@@ -167,6 +178,109 @@ public Double visitCall(RexCall call) {
return selectivity;
}
+ private double computeRangePredicateSelectivity(RexCall call, SqlKind op) {
+ final boolean isLiteralLeft =
call.getOperands().get(0).getKind().equals(SqlKind.LITERAL);
+ final boolean isLiteralRight =
call.getOperands().get(1).getKind().equals(SqlKind.LITERAL);
+ final boolean isInputRefLeft =
call.getOperands().get(0).getKind().equals(SqlKind.INPUT_REF);
+ final boolean isInputRefRight =
call.getOperands().get(1).getKind().equals(SqlKind.INPUT_REF);
+
+ if (childRel instanceof HiveTableScan && isLiteralLeft != isLiteralRight
&& isInputRefLeft != isInputRefRight) {
+ final HiveTableScan t = (HiveTableScan) childRel;
+ final int inputRefIndex = ((RexInputRef)
call.getOperands().get(isInputRefLeft ? 0 : 1)).getIndex();
+ final List<ColStatistics> colStats =
t.getColStat(Collections.singletonList(inputRefIndex));
+
+ if (!colStats.isEmpty() && isHistogramAvailable(colStats.get(0))) {
+ final KllFloatsSketch kll =
KllFloatsSketch.heapify(Memory.wrap(colStats.get(0).getHistogram()));
+ final Object boundValueObject = ((RexLiteral)
call.getOperands().get(isLiteralLeft ? 0 : 1)).getValue();
+ final SqlTypeName typeName = call.getOperands().get(isInputRefLeft ? 0
: 1).getType().getSqlTypeName();
+ float value = extractLiteral(typeName, boundValueObject);
+ boolean closedBound = op.equals(SqlKind.LESS_THAN_OR_EQUAL) ||
op.equals(SqlKind.GREATER_THAN_OR_EQUAL);
+
+ double selectivity;
+ if (op.equals(SqlKind.LESS_THAN_OR_EQUAL) ||
op.equals(SqlKind.LESS_THAN)) {
+ selectivity = closedBound ? lessThanOrEqualSelectivity(kll, value) :
lessThanSelectivity(kll, value);
+ } else {
+ selectivity = closedBound ? greaterThanOrEqualSelectivity(kll,
value) : greaterThanSelectivity(kll, value);
+ }
+
+ // selectivity does not account for null values, we multiply for the
number of non-null values (getN) and we
+ // divide by the total (non-null + null values) to get the overall
selectivity
+ return kll.getN() * selectivity / t.getTable().getRowCount();
Review Comment:
I mean for a filter a < 3, if in table there are some rows for column a:
1
null
null
3
4
after filter the are only one row left, so the filter selectivity is 1/5,
not (1+2(null))/5?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]