LuciferYang commented on code in PR #55498:
URL: https://github.com/apache/spark/pull/55498#discussion_r3128158504


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala:
##########
@@ -444,263 +444,199 @@ trait GetArrayItemUtil {
 
 /**
  * Common trait for [[GetMapValue]] and [[ElementAt]].
+ *
+ * The lookup strategy is chosen once per expression instance via [[executor]]:
+ *   - [[PrebuiltHashExecutor]] when the map child is foldable, its key type 
supports
+ *     hashing, and its size meets [[SQLConf.MAP_LOOKUP_HASH_THRESHOLD]]. The 
hash index
+ *     is built once from the constant map and reused for every row.
+ *   - [[LinearExecutor]] otherwise (non-foldable maps, unsupported key types, 
or maps
+ *     below the threshold). Hash lookup is not used here because its per-row 
build cost
+ *     is higher than a linear scan when the index cannot be reused across 
rows.
+ *
+ * Each strategy owns both its interpreted eval and its codegen, grouped 
together, and
+ * [[getValueEval]] / [[doGetValueGenCode]] are thin delegators.
  */
 trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
 
-  @transient private var lastMap: MapData = _
-  @transient private var lastIndex: java.util.HashMap[Any, Int] = _
-
-  /**
-   * The threshold to determine whether to use hash lookup for map lookup 
expressions.
-   * If the map size is small, the cost of building hash map exceeds the cost 
of a linear scan.
-   * This is configured by `spark.sql.optimizer.mapLookupHashThreshold`.
-   */
   @transient private lazy val hashLookupThreshold =
     SQLConf.get.getConf(SQLConf.MAP_LOOKUP_HASH_THRESHOLD)
 
-  private def getOrBuildIndex(map: MapData, keyType: DataType): 
java.util.HashMap[Any, Int] = {
-    if (lastMap ne map) {
-      val keys = map.keyArray()
-      val len = keys.numElements()
-      val hm = new java.util.HashMap[Any, Int]((len * 1.5).toInt)
-      var i = 0
-      while (i < len) {
-        val k = keys.get(i, keyType)
-        hm.putIfAbsent(k, i)
-        i += 1
-      }
-      lastIndex = hm
-      lastMap = map
-    }
-    lastIndex
-  }
-
-  def getValueEval(
-      value: Any,
-      ordinal: Any,
-      keyType: DataType,
-      ordering: Ordering[Any]): Any = {
-    val map = value.asInstanceOf[MapData]
-    val length = map.numElements()
+  @transient private lazy val executor: MapLookupExecutor = chooseExecutor()
 
-    if (length < hashLookupThreshold || 
!TypeUtils.typeWithProperEquals(keyType)) {
-      getValueEvalLinear(map, ordinal, keyType, ordering)
-    } else {
-      val idx = getOrBuildIndex(map, keyType).getOrDefault(ordinal, -1)
-      if (idx == -1 || map.valueArray().isNullAt(idx)) {
-        null
-      } else {
-        map.valueArray().get(idx, dataType)
+  private def chooseExecutor(): MapLookupExecutor = left.dataType match {
+    case MapType(keyType, _, _)
+        if left.foldable && TypeUtils.typeWithProperEquals(keyType) =>

Review Comment:
   nit: this isn't strictly equivalent to the old `supportsHashLookup` 
whitelist — `typeWithProperEquals` seems accepts `VariantType` / `GeometryType` 
/ `GeographyType` (all `AtomicType`), which the old whitelist excluded. Either:
   
   1. Mention the (intentional) widening in the PR body so reviewers know it's 
not a mechanical refactor, or
   2. Tighten the predicate to keep behavior bit-for-bit identical, e.g.:
   
   ```scala
     if left.foldable
       && TypeUtils.typeWithProperEquals(keyType)
       && !keyType.isInstanceOf[VariantType]
       && !keyType.isInstanceOf[GeometryType]
       && !keyType.isInstanceOf[GeographyType] =>
   ```



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala:
##########
@@ -444,263 +444,199 @@ trait GetArrayItemUtil {
 
 /**
  * Common trait for [[GetMapValue]] and [[ElementAt]].
+ *
+ * The lookup strategy is chosen once per expression instance via [[executor]]:
+ *   - [[PrebuiltHashExecutor]] when the map child is foldable, its key type 
supports
+ *     hashing, and its size meets [[SQLConf.MAP_LOOKUP_HASH_THRESHOLD]]. The 
hash index
+ *     is built once from the constant map and reused for every row.
+ *   - [[LinearExecutor]] otherwise (non-foldable maps, unsupported key types, 
or maps
+ *     below the threshold). Hash lookup is not used here because its per-row 
build cost
+ *     is higher than a linear scan when the index cannot be reused across 
rows.
+ *
+ * Each strategy owns both its interpreted eval and its codegen, grouped 
together, and
+ * [[getValueEval]] / [[doGetValueGenCode]] are thin delegators.
  */
 trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
 
-  @transient private var lastMap: MapData = _
-  @transient private var lastIndex: java.util.HashMap[Any, Int] = _
-
-  /**
-   * The threshold to determine whether to use hash lookup for map lookup 
expressions.
-   * If the map size is small, the cost of building hash map exceeds the cost 
of a linear scan.
-   * This is configured by `spark.sql.optimizer.mapLookupHashThreshold`.
-   */
   @transient private lazy val hashLookupThreshold =
     SQLConf.get.getConf(SQLConf.MAP_LOOKUP_HASH_THRESHOLD)
 
-  private def getOrBuildIndex(map: MapData, keyType: DataType): 
java.util.HashMap[Any, Int] = {
-    if (lastMap ne map) {
-      val keys = map.keyArray()
-      val len = keys.numElements()
-      val hm = new java.util.HashMap[Any, Int]((len * 1.5).toInt)
-      var i = 0
-      while (i < len) {
-        val k = keys.get(i, keyType)
-        hm.putIfAbsent(k, i)
-        i += 1
-      }
-      lastIndex = hm
-      lastMap = map
-    }
-    lastIndex
-  }
-
-  def getValueEval(
-      value: Any,
-      ordinal: Any,
-      keyType: DataType,
-      ordering: Ordering[Any]): Any = {
-    val map = value.asInstanceOf[MapData]
-    val length = map.numElements()
+  @transient private lazy val executor: MapLookupExecutor = chooseExecutor()
 
-    if (length < hashLookupThreshold || 
!TypeUtils.typeWithProperEquals(keyType)) {
-      getValueEvalLinear(map, ordinal, keyType, ordering)
-    } else {
-      val idx = getOrBuildIndex(map, keyType).getOrDefault(ordinal, -1)
-      if (idx == -1 || map.valueArray().isNullAt(idx)) {
-        null
-      } else {
-        map.valueArray().get(idx, dataType)
+  private def chooseExecutor(): MapLookupExecutor = left.dataType match {
+    case MapType(keyType, _, _)
+        if left.foldable && TypeUtils.typeWithProperEquals(keyType) =>
+      left.eval(null) match {
+        case map: MapData if map.numElements() >= hashLookupThreshold =>
+          new PrebuiltHashExecutor(buildHashIndex(map, keyType), 
map.valueArray())
+        case _ => LinearExecutor
       }
-    }
+    case _ => LinearExecutor
   }
 
-  private def getValueEvalLinear(
-      map: MapData,
-      ordinal: Any,
-      keyType: DataType,
-      ordering: Ordering[Any]): Any = {
-    val length = map.numElements()
+  private def buildHashIndex(map: MapData, keyType: DataType): 
java.util.HashMap[Any, Int] = {
     val keys = map.keyArray()
-    val values = map.valueArray()
-
+    val len = keys.numElements()
+    val hm = new java.util.HashMap[Any, Int]((len * 1.5).toInt)
     var i = 0
-    var found = false
-    while (i < length && !found) {
-      if (ordering.equiv(keys.get(i, keyType), ordinal)) {
-        found = true
-      } else {
-        i += 1
-      }
-    }
-
-    if (!found || values.isNullAt(i)) {
-      null
-    } else {
-      values.get(i, dataType)
+    while (i < len) {
+      // putIfAbsent preserves first-match semantics for maps with duplicate 
keys (allowed at
+      // the physical level by [[ArrayBasedMapData]]), matching the linear 
scan path.
+      hm.putIfAbsent(keys.get(i, keyType), i)
+      i += 1
     }
+    hm
   }
 
+  def getValueEval(
+      value: Any,
+      ordinal: Any,
+      keyType: DataType,
+      ordering: Ordering[Any]): Any =
+    executor.eval(value.asInstanceOf[MapData], ordinal, keyType, ordering)
+
   def doGetValueGenCode(
       ctx: CodegenContext,
       ev: ExprCode,
-      mapType: MapType): ExprCode = {
-    val keyType = mapType.keyType
-    if (supportsHashLookup(keyType)) {
-      doGetValueGenCodeWithHashOpt(ctx, ev, mapType)
-    } else {
-      doGetValueGenCodeLinear(ctx, ev, mapType)
-    }
-  }
+      mapType: MapType): ExprCode =
+    executor.genCode(ctx, ev, mapType)
 
-  private def supportsHashLookup(keyType: DataType): Boolean = keyType match {
-    case BooleanType | ByteType | ShortType | IntegerType | LongType |
-         FloatType | DoubleType | DateType | TimestampType |
-         TimestampNTZType | _: YearMonthIntervalType |
-         _: DayTimeIntervalType | _: DecimalType | _: TimeType => true
-    case st: StringType if st.supportsBinaryEquality => true
-    case _ => false
+  /**
+   * Execution strategy for a map key lookup. Inner to the trait so each 
concrete strategy
+   * has closure access to `dataType` and `nullSafeCodeGen` on the enclosing 
expression.
+   */
+  protected sealed trait MapLookupExecutor {
+    def eval(map: MapData, ordinal: Any, keyType: DataType, ordering: 
Ordering[Any]): Any
+    def genCode(ctx: CodegenContext, ev: ExprCode, mapType: MapType): ExprCode
   }
 
-  private def doGetValueGenCodeLinear(
-      ctx: CodegenContext,
-      ev: ExprCode,
-      mapType: MapType): ExprCode = {
-    val index = ctx.freshName("index")
-    val length = ctx.freshName("length")
-    val keys = ctx.freshName("keys")
-    val values = ctx.freshName("values")
-    val keyType = mapType.keyType
-
-    val keyJavaType = CodeGenerator.javaType(keyType)
-    val loopKey = ctx.freshName("loopKey")
-    val i = ctx.freshName("i")
-
-    val nullValueCheck = if (mapType.valueContainsNull) {
-      s"""
-         |else if ($values.isNullAt($index)) {
-         |  ${ev.isNull} = true;
-         |}
-       """.stripMargin
-    } else {
-      ""
-    }
+  /** Linear scan over the map's key array. Stateless; no pre-computation. */
+  protected object LinearExecutor extends MapLookupExecutor {

Review Comment:
   Tiny nit on the PR body: "`LinearExecutor` (stateless singleton)" — strictly 
speaking, a `protected object` inside a `trait` is per-outer-instance 
(path-dependent), not a JVM singleton. 



##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala:
##########
@@ -321,6 +321,30 @@ class ComplexTypeSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     }
   }
 
+  test("GetMapValue - non-foldable map always uses linear scan") {
+    // With threshold=0, a foldable map would take the hash path. A 
non-foldable map (backed by
+    // a row column) must still fall back to linear scan, because its hash 
index cannot be
+    // reused across rows (building it per row is a perf regression vs. 
linear).
+    withSQLConf(SQLConf.MAP_LOOKUP_HASH_THRESHOLD.key -> "0") {

Review Comment:
   Could we tighten this test to assert the strategy choice directly? Something 
like:
   
   ```scala
   val expr = GetMapValue(mapRef, Literal(1)).asInstanceOf[GetMapValueUtil]
   // reflectively read the private `executor` field
   val executor = {
     val f = classOf[GetMapValueUtil].getDeclaredField(
       "org$apache$spark$sql$catalyst$expressions$GetMapValueUtil$$executor")
     f.setAccessible(true)
     f.get(expr)
   }
   assert(executor.isInstanceOf[expr.LinearExecutor.type])
   ```
   
   Behavior-only tests are fine but they won't catch a future refactor that 
flips every map to one strategy.
   
   A symmetric test that a foldable `Literal(MapData)` above threshold lands on 
`PrebuiltHashExecutor` would also be valuable, plus one with a collated 
`StringType` key confirming it stays on the linear path.
   



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala:
##########
@@ -444,263 +444,199 @@ trait GetArrayItemUtil {
 
 /**
  * Common trait for [[GetMapValue]] and [[ElementAt]].
+ *
+ * The lookup strategy is chosen once per expression instance via [[executor]]:
+ *   - [[PrebuiltHashExecutor]] when the map child is foldable, its key type 
supports
+ *     hashing, and its size meets [[SQLConf.MAP_LOOKUP_HASH_THRESHOLD]]. The 
hash index
+ *     is built once from the constant map and reused for every row.
+ *   - [[LinearExecutor]] otherwise (non-foldable maps, unsupported key types, 
or maps
+ *     below the threshold). Hash lookup is not used here because its per-row 
build cost
+ *     is higher than a linear scan when the index cannot be reused across 
rows.
+ *
+ * Each strategy owns both its interpreted eval and its codegen, grouped 
together, and
+ * [[getValueEval]] / [[doGetValueGenCode]] are thin delegators.
  */
 trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
 
-  @transient private var lastMap: MapData = _
-  @transient private var lastIndex: java.util.HashMap[Any, Int] = _
-
-  /**
-   * The threshold to determine whether to use hash lookup for map lookup 
expressions.
-   * If the map size is small, the cost of building hash map exceeds the cost 
of a linear scan.
-   * This is configured by `spark.sql.optimizer.mapLookupHashThreshold`.
-   */
   @transient private lazy val hashLookupThreshold =
     SQLConf.get.getConf(SQLConf.MAP_LOOKUP_HASH_THRESHOLD)
 
-  private def getOrBuildIndex(map: MapData, keyType: DataType): 
java.util.HashMap[Any, Int] = {
-    if (lastMap ne map) {
-      val keys = map.keyArray()
-      val len = keys.numElements()
-      val hm = new java.util.HashMap[Any, Int]((len * 1.5).toInt)
-      var i = 0
-      while (i < len) {
-        val k = keys.get(i, keyType)
-        hm.putIfAbsent(k, i)
-        i += 1
-      }
-      lastIndex = hm
-      lastMap = map
-    }
-    lastIndex
-  }
-
-  def getValueEval(
-      value: Any,
-      ordinal: Any,
-      keyType: DataType,
-      ordering: Ordering[Any]): Any = {
-    val map = value.asInstanceOf[MapData]
-    val length = map.numElements()
+  @transient private lazy val executor: MapLookupExecutor = chooseExecutor()
 
-    if (length < hashLookupThreshold || 
!TypeUtils.typeWithProperEquals(keyType)) {
-      getValueEvalLinear(map, ordinal, keyType, ordering)
-    } else {
-      val idx = getOrBuildIndex(map, keyType).getOrDefault(ordinal, -1)
-      if (idx == -1 || map.valueArray().isNullAt(idx)) {
-        null
-      } else {
-        map.valueArray().get(idx, dataType)
+  private def chooseExecutor(): MapLookupExecutor = left.dataType match {
+    case MapType(keyType, _, _)
+        if left.foldable && TypeUtils.typeWithProperEquals(keyType) =>
+      left.eval(null) match {
+        case map: MapData if map.numElements() >= hashLookupThreshold =>
+          new PrebuiltHashExecutor(buildHashIndex(map, keyType), 
map.valueArray())
+        case _ => LinearExecutor
       }
-    }
+    case _ => LinearExecutor
   }
 
-  private def getValueEvalLinear(
-      map: MapData,
-      ordinal: Any,
-      keyType: DataType,
-      ordering: Ordering[Any]): Any = {
-    val length = map.numElements()
+  private def buildHashIndex(map: MapData, keyType: DataType): 
java.util.HashMap[Any, Int] = {
     val keys = map.keyArray()
-    val values = map.valueArray()
-
+    val len = keys.numElements()
+    val hm = new java.util.HashMap[Any, Int]((len * 1.5).toInt)
     var i = 0
-    var found = false
-    while (i < length && !found) {
-      if (ordering.equiv(keys.get(i, keyType), ordinal)) {
-        found = true
-      } else {
-        i += 1
-      }
-    }
-
-    if (!found || values.isNullAt(i)) {
-      null
-    } else {
-      values.get(i, dataType)
+    while (i < len) {
+      // putIfAbsent preserves first-match semantics for maps with duplicate 
keys (allowed at
+      // the physical level by [[ArrayBasedMapData]]), matching the linear 
scan path.
+      hm.putIfAbsent(keys.get(i, keyType), i)
+      i += 1
     }
+    hm
   }
 
+  def getValueEval(
+      value: Any,
+      ordinal: Any,
+      keyType: DataType,
+      ordering: Ordering[Any]): Any =
+    executor.eval(value.asInstanceOf[MapData], ordinal, keyType, ordering)
+
   def doGetValueGenCode(
       ctx: CodegenContext,
       ev: ExprCode,
-      mapType: MapType): ExprCode = {
-    val keyType = mapType.keyType
-    if (supportsHashLookup(keyType)) {
-      doGetValueGenCodeWithHashOpt(ctx, ev, mapType)
-    } else {
-      doGetValueGenCodeLinear(ctx, ev, mapType)
-    }
-  }
+      mapType: MapType): ExprCode =
+    executor.genCode(ctx, ev, mapType)
 
-  private def supportsHashLookup(keyType: DataType): Boolean = keyType match {
-    case BooleanType | ByteType | ShortType | IntegerType | LongType |
-         FloatType | DoubleType | DateType | TimestampType |
-         TimestampNTZType | _: YearMonthIntervalType |
-         _: DayTimeIntervalType | _: DecimalType | _: TimeType => true
-    case st: StringType if st.supportsBinaryEquality => true
-    case _ => false
+  /**
+   * Execution strategy for a map key lookup. Inner to the trait so each 
concrete strategy
+   * has closure access to `dataType` and `nullSafeCodeGen` on the enclosing 
expression.
+   */
+  protected sealed trait MapLookupExecutor {
+    def eval(map: MapData, ordinal: Any, keyType: DataType, ordering: 
Ordering[Any]): Any
+    def genCode(ctx: CodegenContext, ev: ExprCode, mapType: MapType): ExprCode
   }
 
-  private def doGetValueGenCodeLinear(
-      ctx: CodegenContext,
-      ev: ExprCode,
-      mapType: MapType): ExprCode = {
-    val index = ctx.freshName("index")
-    val length = ctx.freshName("length")
-    val keys = ctx.freshName("keys")
-    val values = ctx.freshName("values")
-    val keyType = mapType.keyType
-
-    val keyJavaType = CodeGenerator.javaType(keyType)
-    val loopKey = ctx.freshName("loopKey")
-    val i = ctx.freshName("i")
-
-    val nullValueCheck = if (mapType.valueContainsNull) {
-      s"""
-         |else if ($values.isNullAt($index)) {
-         |  ${ev.isNull} = true;
-         |}
-       """.stripMargin
-    } else {
-      ""
-    }
+  /** Linear scan over the map's key array. Stateless; no pre-computation. */
+  protected object LinearExecutor extends MapLookupExecutor {
+    override def eval(
+        map: MapData,
+        ordinal: Any,
+        keyType: DataType,
+        ordering: Ordering[Any]): Any = {
+      val length = map.numElements()
+      val keys = map.keyArray()
+      val values = map.valueArray()
 
-    nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
-      s"""
-         |final int $length = $eval1.numElements();
-         |final ArrayData $keys = $eval1.keyArray();
-         |final ArrayData $values = $eval1.valueArray();
-         |int $index = -1;
-         |
-         |for (int $i = 0; $i < $length; $i++) {
-         |  $keyJavaType $loopKey = ${CodeGenerator.getValue(keys, keyType, 
i)};
-         |  if (${ctx.genEqual(keyType, loopKey, eval2)}) {
-         |    $index = $i;
-         |    break;
-         |  }
-         |}
-         |
-         |if ($index < 0) {
-         |  ${ev.isNull} = true;
-         |} $nullValueCheck else {
-         |  ${ev.value} = ${CodeGenerator.getValue(values, dataType, index)};
-         |}
-       """.stripMargin
-    })
-  }
+      var i = 0
+      var found = false
+      while (i < length && !found) {
+        if (ordering.equiv(keys.get(i, keyType), ordinal)) {
+          found = true
+        } else {
+          i += 1
+        }
+      }
 
-  /**
-   * Generates code for map lookups.
-   * If the map size is small (less than HASH_LOOKUP_THRESHOLD), it uses a 
linear scan.
-   * If the map size is large, it builds a hash index for O(1) lookup.
-   */
-  private def doGetValueGenCodeWithHashOpt(
-      ctx: CodegenContext,
-      ev: ExprCode,
-      mapType: MapType): ExprCode = {
-    val index = ctx.freshName("index")
-    val length = ctx.freshName("length")
-    val keys = ctx.freshName("keys")
-    val values = ctx.freshName("values")
-    val keyType = mapType.keyType
-
-    val nullValueCheck = if (mapType.valueContainsNull) {
-      s"""
-         |else if ($values.isNullAt($index)) {
-         |  ${ev.isNull} = true;
-         |}
-       """.stripMargin
-    } else {
-      ""
+      if (!found || values.isNullAt(i)) null else values.get(i, dataType)
     }
 
-    val keyJavaType = CodeGenerator.javaType(keyType)
-    val lastKeyArray = ctx.addMutableState("ArrayData", "lastKeyArray", v => 
s"$v = null;")
-    val hashBuckets = ctx.addMutableState("int[]", "hashBuckets", v => s"$v = 
null;")
-    val hashMask = ctx.addMutableState("int", "hashMask", v => s"$v = 0;")
-
-    def genHash(v: String): String = keyType match {
-      case BooleanType => s"($v ? 1 : 0)"
-      case ByteType | ShortType | IntegerType | DateType | _: 
YearMonthIntervalType => s"$v"
-      case LongType | TimestampType | TimestampNTZType | _: 
DayTimeIntervalType | _: TimeType =>
-        s"(int)($v ^ ($v >>> 32))"
-      case FloatType => s"Float.floatToIntBits($v)"
-      case DoubleType =>
-        s"(int)(Double.doubleToLongBits($v) ^ (Double.doubleToLongBits($v) >>> 
32))"
-      case _ => s"$v.hashCode()"
-    }
+    override def genCode(
+        ctx: CodegenContext,
+        ev: ExprCode,
+        mapType: MapType): ExprCode = {
+      val index = ctx.freshName("index")
+      val length = ctx.freshName("length")
+      val keys = ctx.freshName("keys")
+      val values = ctx.freshName("values")
+      val keyType = mapType.keyType
 
-    nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
-      val i = ctx.freshName("i")
-      val h = ctx.freshName("h")
-      val cap = ctx.freshName("cap")
-      val idx = ctx.freshName("idx")
-      val candidate = ctx.freshName("candidate")
+      val keyJavaType = CodeGenerator.javaType(keyType)
       val loopKey = ctx.freshName("loopKey")
+      val i = ctx.freshName("i")
 
-      val buildIndex =
+      val nullValueCheck = if (mapType.valueContainsNull) {
         s"""
-           |int $cap = Math.max(Integer.highestOneBit(Math.max($length * 2 - 
1, 1)) << 1, 4);
-           |if ($hashBuckets == null || $hashBuckets.length < $cap) {
-           |  $hashBuckets = new int[$cap];
+           |else if ($values.isNullAt($index)) {
+           |  ${ev.isNull} = true;
            |}
-           |java.util.Arrays.fill($hashBuckets, 0, $cap, -1);
-           |$hashMask = $cap - 1;
-           |for (int $i = 0; $i < $length; $i++) {
-           |  $keyJavaType $loopKey = ${CodeGenerator.getValue(keys, keyType, 
i)};
-           |  int $h = (${genHash(loopKey)}) & $hashMask;
-           |  while ($hashBuckets[$h] != -1) {
-           |    $h = ($h + 1) & $hashMask;
-           |  }
-           |  $hashBuckets[$h] = $i;
-           |}
-           |$lastKeyArray = $keys;
          """.stripMargin
+      } else {
+        ""
+      }
 
-      val lookup =
+      nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
         s"""
-           |int $h = (${genHash(eval2)}) & $hashMask;
-           |$index = -1;
-           |while ($hashBuckets[$h] != -1) {
-           |  int $idx = $hashBuckets[$h];
-           |  $keyJavaType $candidate = ${CodeGenerator.getValue(keys, 
keyType, idx)};
-           |  if (${ctx.genEqual(keyType, candidate, eval2)}) {
-           |    $index = $idx;
+           |final int $length = $eval1.numElements();
+           |final ArrayData $keys = $eval1.keyArray();
+           |final ArrayData $values = $eval1.valueArray();
+           |int $index = -1;
+           |
+           |for (int $i = 0; $i < $length; $i++) {
+           |  $keyJavaType $loopKey = ${CodeGenerator.getValue(keys, keyType, 
i)};
+           |  if (${ctx.genEqual(keyType, loopKey, eval2)}) {
+           |    $index = $i;
            |    break;
            |  }
-           |  $h = ($h + 1) & $hashMask;
+           |}
+           |
+           |if ($index < 0) {
+           |  ${ev.isNull} = true;
+           |} $nullValueCheck else {
+           |  ${ev.value} = ${CodeGenerator.getValue(values, dataType, index)};
            |}
          """.stripMargin
+      })
+    }
+  }
 
-      s"""
-        final int $length = $eval1.numElements();
-        final ArrayData $keys = $eval1.keyArray();
-        final ArrayData $values = $eval1.valueArray();
-        int $index = -1;
-
-        if ($length >= $hashLookupThreshold) {
-          if ($keys != $lastKeyArray) {
-            $buildIndex
-          }
-          $lookup
-        } else {
-          for (int $i = 0; $i < $length; $i++) {
-            $keyJavaType $loopKey = ${CodeGenerator.getValue(keys, keyType, 
i)};
-            if (${ctx.genEqual(keyType, loopKey, eval2)}) {
-              $index = $i;
-              break;
-            }
-          }
-        }
+  /**
+   * Hash-based lookup for foldable (constant) maps. The [[java.util.HashMap]] 
and the
+   * value array are evaluated on the driver at construction time and reused 
for every row.
+   * The codegen embeds both via [[CodegenContext.addReferenceObj]]; the 
generated per-row
+   * code is a single boxed `HashMap.get` + null check + value read. Primitive 
keys pay one
+   * autobox per row, which is negligible relative to the O(n) linear scan it 
replaces.
+   */
+  protected final class PrebuiltHashExecutor(
+      index: java.util.HashMap[Any, Int],
+      values: ArrayData) extends MapLookupExecutor {
+
+    override def eval(
+        map: MapData,
+        ordinal: Any,
+        keyType: DataType,
+        ordering: Ordering[Any]): Any = {
+      val idx = index.getOrDefault(ordinal, -1)
+      if (idx == -1 || values.isNullAt(idx)) null else values.get(idx, 
dataType)
+    }
 
-        if ($index < 0) {
-          ${ev.isNull} = true;
-        } $nullValueCheck else {
-          ${ev.value} = ${CodeGenerator.getValue(values, dataType, index)};
+    override def genCode(
+        ctx: CodegenContext,
+        ev: ExprCode,
+        mapType: MapType): ExprCode = {
+      val indexRef = ctx.addReferenceObj("mapLookupIndex", index, 
"java.util.HashMap")
+      val valuesRef = ctx.addReferenceObj("mapLookupValues", values, 
"ArrayData")
+      val idxBoxed = ctx.freshName("idxBoxed")
+      val idx = ctx.freshName("idx")
+
+      nullSafeCodeGen(ctx, ev, (_, eval2) => {
+        val assignValue = if (mapType.valueContainsNull) {
+          s"""
+             |if ($valuesRef.isNullAt($idx)) {
+             |  ${ev.isNull} = true;
+             |} else {
+             |  ${ev.value} = ${CodeGenerator.getValue(valuesRef, dataType, 
idx)};
+             |}
+           """.stripMargin
+        } else {
+          s"${ev.value} = ${CodeGenerator.getValue(valuesRef, dataType, idx)};"
         }
-      """
-    })
+        // HashMap.get(Object) autoboxes primitive keys to their wrapper type 
(Integer, Long,
+        // ...) whose equals/hashCode match the values stored during index 
build.
+        s"""
+           |Integer $idxBoxed = (Integer) $indexRef.get($eval2);

Review Comment:
   JIT escape-analysis can sometimes elide the boxing if the boxed key doesn't 
escape `HashMap.get`. In practice this works for monomorphic, hot call sites 
but is unreliable for megamorphic ones (and `HashMap.get` is megamorphic by 
design). 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to