vinodkc commented on code in PR #39449: URL: https://github.com/apache/spark/pull/39449#discussion_r1087343234
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala: ########## @@ -77,13 +150,12 @@ import org.apache.spark.unsafe.types.UTF8String // scalastyle:on line.size.limit case class Mask( Review Comment: Done ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala: ########## @@ -236,40 +265,271 @@ case class Mask( } /** - * Returns the [[DataType]] of the result of evaluating this expression. It is invalid to query - * the dataType of an unresolved expression (i.e., when `resolved` == false). + * Returns a Seq of the children of this node. Children should not change. Immutability required + * for containsChild optimization */ - override def dataType: DataType = StringType + override def children: Seq[Expression] = + Seq(input, upperCharExpr, lowerCharExpr, digitCharExpr, otherCharExpr) + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Mask = + copy( + input = newChildren(0), + upperCharExpr = newChildren(1), + lowerCharExpr = newChildren(2), + digitCharExpr = newChildren(3), + otherCharExpr = newChildren(4)) +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = + """_FUNC_(input[, charCount, upperChar, lowerChar, digitChar, otherChar]) - masks the first n characters of given string value. + The function masks the first n characters of the value with 'X' or 'x', and numbers with 'n'. + This can be useful for creating copies of tables with sensitive information removed. + Error behavior: null value as replacement argument will throw AnalysisError. + """, + arguments = """ + Arguments: + * input - string value to mask. Supported types: STRING, VARCHAR, CHAR + * charCount - number of characters to be masked. Default value: 4 + * upperChar - character to replace upper-case characters with. Specify NULL to retain original character. Default value: 'X' + * lowerChar - character to replace lower-case characters with. Specify NULL to retain original character. Default value: 'x' + * digitChar - character to replace digit characters with. Specify NULL to retain original character. Default value: 'n' + * otherChar - character to replace all other characters with. Specify NULL to retain original character. Default value: NULL + """, + examples = """ + Examples: + > SELECT _FUNC_('abcd-EFGH-8765-4321'); + xxxx-EFGH-8765-4321 + > SELECT _FUNC_('abcd-EFGH-8765-4321', 9); + xxxx-XXXX-8765-4321 + > SELECT _FUNC_('abcd-EFGH-8765-@$#', 14); + xxxx-XXXX-nnnn-@$# + > SELECT _FUNC_('abcd-EFGH-8765-@$#', 15, 'x', 'X', 'n', 'o'); + XXXXoxxxxonnnno@$# + > SELECT _FUNC_('abcd-EFGH-8765-@$#', 20, 'x', 'X', 'n', 'o'); + XXXXoxxxxonnnnoooo + > SELECT _FUNC_('AbCD123-@$#', 10,'Q', 'q', 'd', 'o'); + QqQQdddooo# + > SELECT _FUNC_('AbCD123-@$#', 10, NULL, 'q', 'd', 'o'); + AqCDdddooo# + > SELECT _FUNC_('AbCD123-@$#', 10, NULL, NULL, 'd', 'o'); + AbCDdddooo# + > SELECT _FUNC_('AbCD123-@$#', 10, NULL, NULL, NULL, 'o'); + AbCD123ooo# + > SELECT _FUNC_(NULL); + NULL + > SELECT _FUNC_(NULL, 1, NULL, NULL, 'o'); + NULL + """, + since = "3.4.0", + group = "string_funcs") +// scalastyle:on line.size.limit +case class MaskFirstN( + input: Expression, + charCountExpr: Expression, + override val upperCharExpr: Expression, + override val lowerCharExpr: Expression, + override val digitCharExpr: Expression, + override val otherCharExpr: Expression) + extends SeptenaryExpression + with Maskable { + + def this(input: Expression) = + this( + input, + Literal(Mask.DEFAULT_CHAR_COUNT), + Literal(Mask.MASKED_UPPERCASE), + Literal(Mask.MASKED_LOWERCASE), + Literal(Mask.MASKED_DIGIT), + Literal(Mask.MASKED_IGNORE, StringType)) + + def this(input: Expression, charCountExpr: Expression) = + this( + input, + charCountExpr, + Literal(Mask.MASKED_UPPERCASE), + Literal(Mask.MASKED_LOWERCASE), + Literal(Mask.MASKED_DIGIT), + Literal(Mask.MASKED_IGNORE, StringType)) + + def this(input: Expression, charCountExpr: Expression, upperCharExpr: Expression) = + this( + input, + charCountExpr, + upperCharExpr, + Literal(Mask.MASKED_LOWERCASE), + Literal(Mask.MASKED_DIGIT), + Literal(Mask.MASKED_IGNORE, StringType)) + + def this( + input: Expression, + charCountExpr: Expression, + upperCharExpr: Expression, + lowerCharExpr: Expression) = + this( + input, + charCountExpr, + upperCharExpr, + lowerCharExpr, + Literal(Mask.MASKED_DIGIT), + Literal(Mask.MASKED_IGNORE, StringType)) + + def this( + input: Expression, + charCountExpr: Expression, + upperCharExpr: Expression, + lowerCharExpr: Expression, + digitCharExpr: Expression) = + this( + input, + charCountExpr, + upperCharExpr, + lowerCharExpr, + digitCharExpr, + Literal(Mask.MASKED_IGNORE, StringType)) + + @transient + private lazy val charCount = { + val value = charCountExpr.eval().asInstanceOf[Int] + if (value < 0) 0 else value Review Comment: Done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org