beliefer commented on a change in pull request #30387:
URL: https://github.com/apache/spark/pull/30387#discussion_r547714479



##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
##########
@@ -147,31 +148,133 @@ class FrameLessOffsetWindowFunctionFrame(
     expressions: Array[OffsetWindowFunction],
     inputSchema: Seq[Attribute],
     newMutableProjection: (Seq[Expression], Seq[Attribute]) => 
MutableProjection,
-    offset: Int)
+    offset: Int,
+    ignoreNulls: Boolean = false)
   extends OffsetWindowFunctionFrameBase(
     target, ordinal, expressions, inputSchema, newMutableProjection, offset) {
 
+  /** The input expression of Lead/Lag. */
+  private lazy val inputExpression = expressions.toSeq.map(_.input).head
+
+  /** The index of input expression in the row. */
+  private lazy val idx = inputAttrs.zipWithIndex.find(_._1 == 
inputExpression).map(_._2).head
+
+  /** Holder the UnsafeRow where the input operator by function is not null. */
+  private var nextSelectedRow = EmptyRow
+
+  /**
+   *  The number of UnsafeRows skipped to get the next UnsafeRow where
+   *  the input operator by function is not null.
+   */
+  private var skipNonNullCount = 0
+
+  /** find the offset row whose input is not null */
+  private def findNextRowWithNonNullInput(): Unit = {
+    while (skipNonNullCount < offset && inputIndex < input.length) {
+      val r = WindowFunctionFrame.getNextOrNull(inputIterator)
+      if (!r.isNullAt(idx)) {
+        nextSelectedRow = r
+        skipNonNullCount += 1
+      }
+      inputIndex += 1
+    }
+  }
+
   override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
     input = rows
     inputIterator = input.generateIterator()
     // drain the first few rows if offset is larger than zero
     inputIndex = 0
-    while (inputIndex < offset) {
-      if (inputIterator.hasNext) inputIterator.next()
-      inputIndex += 1
+    if (ignoreNulls) {
+      findNextRowWithNonNullInput
+    } else {
+      while (inputIndex < offset) {
+        if (inputIterator.hasNext) inputIterator.next()
+        inputIndex += 1
+      }
+      inputIndex = offset
     }
-    inputIndex = offset
+  }
+
+  private val doWrite = if (ignoreNulls && offset > 0) {
+    // For illustration, here is one example: the input data contains six rows,
+    // and the input values of each row are: null, x, null, null, y, null, z, 
null.
+    // We use Lead(input, 2) with IGNORE NULLS and the process is as follows:
+    // 1. current row -> null, next selected row -> y, output: y;
+    // 2. current row -> x, next selected row -> z, output: z;
+    // 3. current row -> null, next selected row -> z, output: z;
+    // 4. current row -> null, next selected row -> z, output: z;
+    // 5. current row -> y, next selected row -> empty, output: null;
+    // ... next selected row is empty, all following return null.
+    (current: InternalRow) =>
+      if (current.isNullAt(idx)) {
+        if (nextSelectedRow == EmptyRow) {
+          // Use default values since the offset row whose input value is not 
null does not exist.
+          fillDefaultValue(current)
+        } else {
+          projection(nextSelectedRow)
+        }
+      } else {
+        skipNonNullCount -= 1
+        findNextRowWithNonNullInput
+        if (skipNonNullCount == offset) {
+          projection(nextSelectedRow)
+        } else {
+          // Use default values since the offset row whose input value is not 
null does not exist.
+          fillDefaultValue(current)
+          nextSelectedRow = EmptyRow
+        }
+      }
+  } else if (ignoreNulls && offset < 0) {
+    // For illustration, here is one example: the input data contains six rows,
+    // and the input values of each row are: null, x, null, null, y, null, z, 
null.
+    // We use Lag(input, 1) with IGNORE NULLS and the process is as follows:
+    // 1. current row -> null, next selected row -> empty, output: null;
+    // 2. current row -> x, next selected row -> empty, output: null;
+    // 3. current row -> null, next selected row -> x, output: x;
+    // 4. current row -> null, next selected row -> x, output: x;
+    // 5. current row -> y, next selected row -> x, output: x;
+    // 6. current row -> null, next selected row -> y, output: y;
+    // 7. current row -> z, next selected row -> y, output: y;
+    // 8. current row -> z, next selected row -> z, output: z;
+    val absOffset = Math.abs(offset)
+    (current: InternalRow) =>
+      if (nextSelectedRow == EmptyRow && skipNonNullCount == absOffset) {
+        do {
+          val r = WindowFunctionFrame.getNextOrNull(inputIterator)
+          if (!r.isNullAt(idx)) {
+            nextSelectedRow = r
+          }
+          inputIndex += 1
+        } while (nextSelectedRow == EmptyRow && inputIndex < input.length)
+      }
+      if (nextSelectedRow == EmptyRow) {
+        // Use default values since the offset row whose input value is not 
null does not exist.
+        fillDefaultValue(current)
+      } else {
+        projection(nextSelectedRow)
+      }
+      if (!current.isNullAt(idx)) {
+        if (skipNonNullCount < absOffset) {

Review comment:
       The original idea is that as long as skipNonNullCount is increased to 
absOffset, it will no longer increase




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to