Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21246#discussion_r187835032
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
 ---
    @@ -0,0 +1,569 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.sql.catalyst.expressions
    +
    +import org.apache.commons.codec.digest.DigestUtils
    +
    +import org.apache.spark.sql.AnalysisException
    +import org.apache.spark.sql.catalyst.expressions.MaskExpressionsUtils._
    +import org.apache.spark.sql.catalyst.expressions.MaskLike._
    +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
CodeGenerator, ExprCode}
    +import org.apache.spark.sql.types._
    +import org.apache.spark.unsafe.types.UTF8String
    +
    +
    +trait MaskLike {
    +  def upper: String
    +  def lower: String
    +  def digit: String
    +
    +  protected lazy val upperReplacement: Int = getReplacementChar(upper, 
defaultMaskedUppercase)
    +  protected lazy val lowerReplacement: Int = getReplacementChar(lower, 
defaultMaskedLowercase)
    +  protected lazy val digitReplacement: Int = getReplacementChar(digit, 
defaultMaskedDigit)
    +
    +  protected val maskUtilsClassName: String = 
classOf[MaskExpressionsUtils].getName
    +
    +  def inputStringLengthCode(inputString: String, length: String): String = 
{
    +    s"${CodeGenerator.JAVA_INT} $length = $inputString.codePointCount(0, 
$inputString.length());"
    +  }
    +
    +  def appendMaskedToStringBuilderCode(
    +      ctx: CodegenContext,
    +      sb: String,
    +      inputString: String,
    +      offset: String,
    +      numChars: String): String = {
    +    val i = ctx.freshName("i")
    +    val codePoint = ctx.freshName("codePoint")
    +    s"""
    +       |for (${CodeGenerator.JAVA_INT} $i = 0; $i < $numChars; $i++) {
    +       |  ${CodeGenerator.JAVA_INT} $codePoint = 
$inputString.codePointAt($offset);
    +       |  $sb.appendCodePoint($maskUtilsClassName.transformChar($codePoint,
    +       |    $upperReplacement, $lowerReplacement,
    +       |    $digitReplacement, $defaultMaskedOther));
    +       |  $offset += Character.charCount($codePoint);
    +       |}
    +     """.stripMargin
    +  }
    +
    +  def appendUnchangedToStringBuilderCode(
    +      ctx: CodegenContext,
    +      sb: String,
    +      inputString: String,
    +      offset: String,
    +      numChars: String): String = {
    +    val i = ctx.freshName("i")
    +    val codePoint = ctx.freshName("codePoint")
    +    s"""
    +       |for (${CodeGenerator.JAVA_INT} $i = 0; $i < $numChars; $i++) {
    +       |  ${CodeGenerator.JAVA_INT} $codePoint = 
$inputString.codePointAt($offset);
    +       |  $sb.appendCodePoint($codePoint);
    +       |  $offset += Character.charCount($codePoint);
    +       |}
    +     """.stripMargin
    +  }
    +
    +  def appendMaskedToStringBuffer(
    +      sb: StringBuffer,
    +      inputString: String,
    +      startOffset: Int,
    +      numChars: Int): Int = {
    +    var offset = startOffset
    +    (1 to numChars) foreach { _ =>
    +      val codePoint = inputString.codePointAt(offset)
    +      sb.appendCodePoint(transformChar(
    +        codePoint,
    +        upperReplacement,
    +        lowerReplacement,
    +        digitReplacement,
    +        defaultMaskedOther))
    +      offset += Character.charCount(codePoint)
    +    }
    +    offset
    +  }
    +
    +  def appendUnchangedToStringBuffer(
    +      sb: StringBuffer,
    +      inputString: String,
    +      startOffset: Int,
    +      numChars: Int): Int = {
    +    var offset = startOffset
    +    (1 to numChars) foreach { _ =>
    +      val codePoint = inputString.codePointAt(offset)
    +      sb.appendCodePoint(codePoint)
    +      offset += Character.charCount(codePoint)
    +    }
    +    offset
    +  }
    +}
    +
    +trait MaskLikeWithN extends MaskLike {
    +  def n: Int
    +  protected lazy val charCount: Int = if (n < 0) 0 else n
    +}
    +
    +/**
    + * Utils for mask operations.
    + */
    +object MaskLike {
    +  val defaultCharCount = 4
    +  val defaultMaskedUppercase: Int = 'X'
    +  val defaultMaskedLowercase: Int = 'x'
    +  val defaultMaskedDigit: Int = 'n'
    +  val defaultMaskedOther: Int = MaskExpressionsUtils.UNMASKED_VAL
    +
    +  def extractCharCount(e: Expression): Int = e match {
    +    case Literal(i, IntegerType|NullType) =>
    +      if (i == null) defaultCharCount else i.asInstanceOf[Int]
    +    case Literal(_, dt) => throw new AnalysisException(s"Expected literal 
expression of type " +
    +      s"${IntegerType.simpleString}, but got literal of 
${dt.simpleString}")
    +    case _ => defaultCharCount
    +  }
    +
    +  def extractReplacement(e: Expression): String = e match {
    +    case Literal(s, StringType|NullType) => if (s == null) null else 
s.toString
    +    case Literal(_, dt) => throw new AnalysisException(s"Expected literal 
expression of type " +
    +      s"${StringType.simpleString}, but got literal of ${dt.simpleString}")
    +    case _ => null
    +  }
    +}
    +
    +/**
    + * Masks the input string. Additional parameters can be set to change the 
masking chars for
    + * uppercase letters, lowercase letters and digits.
    + */
    +// scalastyle:off line.size.limit
    +@ExpressionDescription(
    +  usage = "_FUNC_(str[, upper[, lower[, digit]]]) - Masks str. By default, 
upper case letters are converted to \"X\", lower case letters are converted to 
\"x\" and numbers are converted to \"n\". You can override the characters used 
in the mask by supplying additional arguments: the second argument controls the 
mask character for upper case letters, the third argument for lower case 
letters and the fourth argument for numbers.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_("abcd-EFGH-8765-4321", "U", "l", "#");
    +       llll-UUUU-####-####
    +  """)
    +// scalastyle:on line.size.limit
    +case class Mask(child: Expression, upper: String, lower: String, digit: 
String)
    +  extends UnaryExpression with ExpectsInputTypes with MaskLike {
    +
    +  def this(child: Expression) = this(child, null.asInstanceOf[String], 
null, null)
    +
    +  def this(child: Expression, upper: Expression) =
    +    this(child, extractReplacement(upper), null, null)
    +
    +  def this(child: Expression, upper: Expression, lower: Expression) =
    +    this(child, extractReplacement(upper), extractReplacement(lower), null)
    +
    +  def this(child: Expression, upper: Expression, lower: Expression, digit: 
Expression) =
    +    this(child, extractReplacement(upper), extractReplacement(lower), 
extractReplacement(digit))
    +
    +  override def nullSafeEval(input: Any): Any = {
    +    val str = input.asInstanceOf[UTF8String].toString
    +    val length = str.codePointCount(0, str.length())
    +    val sb = new StringBuffer(length)
    --- End diff --
    
    Let's use `java.lang.StringBuilder`. We don't need to use Scala's one.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to