Github user hvanhovell commented on a diff in the pull request:

    https://github.com/apache/spark/pull/15047#discussion_r80812953
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
 ---
    @@ -559,3 +607,219 @@ case class CurrentDatabase() extends LeafExpression 
with Unevaluable {
       override def foldable: Boolean = true
       override def nullable: Boolean = false
     }
    +
    +/**
    + * Simulates Hive's hashing function at
    + * 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils#hashcode() 
in Hive
    + *
    + * We should use this hash function for both shuffle and bucket of Hive 
tables, so that
    + * we can guarantee shuffle and bucketing have same data distribution
    + *
    + * TODO: Support Decimal and date related types
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(a1, a2, ...) - Returns a hash value of the arguments.")
    +case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] 
{
    +  override val seed = 0
    +
    +  override def dataType: DataType = IntegerType
    +
    +  override def prettyName: String = "hive-hash"
    +
    +  override protected def hasherClassName: String = 
classOf[HiveHasher].getName
    +
    +  override protected def computeHash(value: Any, dataType: DataType, seed: 
Int): Int = {
    +    HiveHashFunction.hash(value, dataType, seed).toInt
    +  }
    +
    +  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    +    ev.isNull = "false"
    +    val childHash = ctx.freshName("childHash")
    +    val childrenHash = children.map { child =>
    +      val childGen = child.genCode(ctx)
    +      childGen.code + ctx.nullSafeExec(child.nullable, childGen.isNull) {
    +        computeHash(childGen.value, child.dataType, childHash, ctx)
    +      } + s"${ev.value} = (31 * ${ev.value}) + $childHash;"
    +    }.mkString(s"int $childHash = 0;", s"\n$childHash = 0;\n", "")
    +
    +    ev.copy(code = s"""
    +      ${ctx.javaType(dataType)} ${ev.value} = $seed;
    +      $childrenHash""")
    +  }
    +
    +  @tailrec
    +  private def computeHash(
    --- End diff --
    
    Is this the same as the `HashExpression.computeHash`?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to