Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21246#discussion_r187835834
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
 ---
    @@ -0,0 +1,569 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.sql.catalyst.expressions
    +
    +import org.apache.commons.codec.digest.DigestUtils
    +
    +import org.apache.spark.sql.AnalysisException
    +import org.apache.spark.sql.catalyst.expressions.MaskExpressionsUtils._
    +import org.apache.spark.sql.catalyst.expressions.MaskLike._
    +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
CodeGenerator, ExprCode}
    +import org.apache.spark.sql.types._
    +import org.apache.spark.unsafe.types.UTF8String
    +
    +
    +trait MaskLike {
    +  def upper: String
    +  def lower: String
    +  def digit: String
    +
    +  protected lazy val upperReplacement: Int = getReplacementChar(upper, 
defaultMaskedUppercase)
    +  protected lazy val lowerReplacement: Int = getReplacementChar(lower, 
defaultMaskedLowercase)
    +  protected lazy val digitReplacement: Int = getReplacementChar(digit, 
defaultMaskedDigit)
    +
    +  protected val maskUtilsClassName: String = 
classOf[MaskExpressionsUtils].getName
    +
    +  def inputStringLengthCode(inputString: String, length: String): String = 
{
    +    s"${CodeGenerator.JAVA_INT} $length = $inputString.codePointCount(0, 
$inputString.length());"
    +  }
    +
    +  def appendMaskedToStringBuilderCode(
    +      ctx: CodegenContext,
    +      sb: String,
    +      inputString: String,
    +      offset: String,
    +      numChars: String): String = {
    +    val i = ctx.freshName("i")
    +    val codePoint = ctx.freshName("codePoint")
    +    s"""
    +       |for (${CodeGenerator.JAVA_INT} $i = 0; $i < $numChars; $i++) {
    +       |  ${CodeGenerator.JAVA_INT} $codePoint = 
$inputString.codePointAt($offset);
    +       |  $sb.appendCodePoint($maskUtilsClassName.transformChar($codePoint,
    +       |    $upperReplacement, $lowerReplacement,
    +       |    $digitReplacement, $defaultMaskedOther));
    +       |  $offset += Character.charCount($codePoint);
    +       |}
    +     """.stripMargin
    +  }
    +
    +  def appendUnchangedToStringBuilderCode(
    +      ctx: CodegenContext,
    +      sb: String,
    +      inputString: String,
    +      offset: String,
    +      numChars: String): String = {
    +    val i = ctx.freshName("i")
    +    val codePoint = ctx.freshName("codePoint")
    +    s"""
    +       |for (${CodeGenerator.JAVA_INT} $i = 0; $i < $numChars; $i++) {
    +       |  ${CodeGenerator.JAVA_INT} $codePoint = 
$inputString.codePointAt($offset);
    +       |  $sb.appendCodePoint($codePoint);
    +       |  $offset += Character.charCount($codePoint);
    +       |}
    +     """.stripMargin
    +  }
    +
    +  def appendMaskedToStringBuffer(
    +      sb: StringBuffer,
    +      inputString: String,
    +      startOffset: Int,
    +      numChars: Int): Int = {
    +    var offset = startOffset
    +    (1 to numChars) foreach { _ =>
    +      val codePoint = inputString.codePointAt(offset)
    +      sb.appendCodePoint(transformChar(
    +        codePoint,
    +        upperReplacement,
    +        lowerReplacement,
    +        digitReplacement,
    +        defaultMaskedOther))
    +      offset += Character.charCount(codePoint)
    +    }
    +    offset
    +  }
    +
    +  def appendUnchangedToStringBuffer(
    +      sb: StringBuffer,
    +      inputString: String,
    +      startOffset: Int,
    +      numChars: Int): Int = {
    +    var offset = startOffset
    +    (1 to numChars) foreach { _ =>
    +      val codePoint = inputString.codePointAt(offset)
    +      sb.appendCodePoint(codePoint)
    +      offset += Character.charCount(codePoint)
    +    }
    +    offset
    +  }
    +}
    +
    +trait MaskLikeWithN extends MaskLike {
    +  def n: Int
    +  protected lazy val charCount: Int = if (n < 0) 0 else n
    +}
    +
    +/**
    + * Utils for mask operations.
    + */
    +object MaskLike {
    +  val defaultCharCount = 4
    +  val defaultMaskedUppercase: Int = 'X'
    +  val defaultMaskedLowercase: Int = 'x'
    +  val defaultMaskedDigit: Int = 'n'
    +  val defaultMaskedOther: Int = MaskExpressionsUtils.UNMASKED_VAL
    +
    +  def extractCharCount(e: Expression): Int = e match {
    +    case Literal(i, IntegerType|NullType) =>
    --- End diff --
    
    nit: maybe `IntegerType | NullType`


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to