Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/19862#discussion_r154560155 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala --- @@ -699,39 +700,44 @@ private[joins] class SortMergeJoinScanner( matchJoinKey = null bufferedMatches.clear() false - } else if (matchJoinKey != null && keyOrdering.compare(streamedRowKey, matchJoinKey) == 0) { - // The new streamed row has the same join key as the previous row, so return the same matches. - true - } else if (bufferedRow == null) { - // The streamed row's join key does not match the current batch of buffered rows and there are - // no more rows to read from the buffered iterator, so there can be no more matches. - matchJoinKey = null - bufferedMatches.clear() - false } else { - // Advance both the streamed and buffered iterators to find the next pair of matching rows. - var comp = keyOrdering.compare(streamedRowKey, bufferedRowKey) - do { - if (streamedRowKey.anyNull) { - advancedStreamed() - } else { - assert(!bufferedRowKey.anyNull) - comp = keyOrdering.compare(streamedRowKey, bufferedRowKey) - if (comp > 0) advancedBufferedToRowWithNullFreeJoinKey() - else if (comp < 0) advancedStreamed() - } - } while (streamedRow != null && bufferedRow != null && comp != 0) - if (streamedRow == null || bufferedRow == null) { - // We have either hit the end of one of the iterators, so there can be no more matches. + // To make sure vars like bufferedRow is set + advancedBufferedIterRes --- End diff -- To advance buffer iterator here, won't we miss the `bufferedRow` advanced before?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org