viirya commented on code in PR #39248: URL: https://github.com/apache/spark/pull/39248#discussion_r1058775725
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala: ########## @@ -127,6 +125,54 @@ abstract class Expression extends TreeNode[Expression] { def references: AttributeSet = _references + /** + * Returns true if the expression contains mutable state. + * + * A stateful expression should never be evaluated multiple times for a single row. This should + * only be a problem for interpreted execution. This can be prevented by creating fresh copies + * of the stateful expression before execution. A common example to trigger this issue: + * {{{ + * val rand = functions.rand() + * df.select(rand, rand) // These 2 rand should not share a state. + * }}} + */ + def stateful: Boolean = false + + /** + * Returns a copy of this expression where all stateful expressions are replaced with fresh + * uninitialized copies. If the expression contains no stateful expressions then the original + * expression is returned. + */ + def freshCopyIfContainsStatefulExpression(): Expression = { + val childrenIndexedSeq: IndexedSeq[Expression] = children match { + case types: IndexedSeq[Expression] => types + case other => other.toIndexedSeq + } + val newChildren = childrenIndexedSeq.map(_.freshCopyIfContainsStatefulExpression()) + // A more efficient version of `children.zip(newChildren).exists(_ ne _)` + val anyChildChanged = { + val size = newChildren.length + var i = 0 + var res: Boolean = false + while (!res && i < size) { + res |= (childrenIndexedSeq(i) ne newChildren(i)) + i += 1 + } + res + } + // If the children contain stateful expressions and get copied, or this expression is stateful, + // copy this expression with the new children. + if (anyChildChanged || stateful) { + CurrentOrigin.withOrigin(origin) { + val res = withNewChildrenInternal(newChildren) + res.copyTagsFrom(this) Review Comment: Yea, you're correct. It is just a string for node name. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org