Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/21109#discussion_r193733146 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala --- @@ -131,13 +135,100 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper { if (joinKeys.nonEmpty) { val (leftKeys, rightKeys) = joinKeys.unzip - logDebug(s"leftKeys:$leftKeys | rightKeys:$rightKeys") - Some((joinType, leftKeys, rightKeys, otherPredicates.reduceOption(And), left, right)) + // Find any simple range expressions between two columns + // (and involving only those two columns) of the two tables being joined, + // which are not used in the equijoin expressions, + // and which can be used for secondary sort optimizations. + // rangePreds will contain the original expressions to be filtered out later. + val rangePreds: mutable.Set[Expression] = mutable.Set.empty + var rangeConditions: Seq[BinaryComparison] = + if (SQLConf.get.useSmjInnerRangeOptimization) { + otherPredicates.flatMap { + case p@LessThan(l, r) => checkRangeConditions(l, r, left, right, joinKeys).map { + case true => rangePreds.add(p); GreaterThan(r, l) + case false => rangePreds.add(p); p + } + case p@LessThanOrEqual(l, r) => + checkRangeConditions(l, r, left, right, joinKeys).map { + case true => rangePreds.add(p); GreaterThanOrEqual(r, l) + case false => rangePreds.add(p); p + } + case p@GreaterThan(l, r) => checkRangeConditions(l, r, left, right, joinKeys).map { + case true => rangePreds.add(p); LessThan(r, l) + case false => rangePreds.add(p); p + } + case p@GreaterThanOrEqual(l, r) => + checkRangeConditions(l, r, left, right, joinKeys).map { + case true => rangePreds.add(p); LessThanOrEqual(r, l) + case false => rangePreds.add(p); p + } + case _ => None + } + } else { + Nil + } + + // Only using secondary join optimization when both lower and upper conditions + // are specified (e.g. t1.a < t2.b + x and t1.a > t2.b - x) + if(rangeConditions.size != 2 || --- End diff -- Nit: space after "if" here and elsewhere
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org