tdas commented on a change in pull request #33093: URL: https://github.com/apache/spark/pull/33093#discussion_r662304369
########## File path: sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala ########## @@ -199,6 +284,49 @@ case class FlatMapGroupsWithStateExec( } } + /** + * Process the new data iterator along with the initial state. The initial state is applied + * before processing the new data for every key. The user defined function is called only + * once for every key that has either initial state or data or both. + */ + def processNewDataWithInitialState( + childDataIter: Iterator[InternalRow], + initStateIter: Iterator[InternalRow] + ): Iterator[InternalRow] = { + + if (!childDataIter.hasNext && !initStateIter.hasNext) return Iterator.empty + + // Create iterators for the child data and the initial state grouped by their grouping + // attributes. + val groupedChildDataIter = GroupedIterator(childDataIter, groupingAttributes, child.output) + val groupedInitialStateIter = + GroupedIterator(initStateIter, initialStateGroupAttrs, initialState.output) + + // Create a CoGroupedIterator that will group the two iterators together for every key group. + new CoGroupedIterator( + groupedChildDataIter, groupedInitialStateIter, groupingAttributes).flatMap { + case (keyRow, valueRowIter, initialStateRowIter) => + val keyUnsafeRow = keyRow.asInstanceOf[UnsafeRow] + var foundInitialStateForKey = false + initialStateRowIter.foreach { initialStateRow => + if (foundInitialStateForKey) { + throw new IllegalArgumentException("The initial state provided contained " + Review comment: nit: do not have to do in this PR but is it possible to print the key that has duplicate.. so that the user can debug? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org