Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21772#discussion_r204618927 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala --- @@ -278,6 +278,39 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext { map.free() } + test("SPARK-24809: Serializing LongHashedRelation in executor may result in data error") { + val unsafeProj = UnsafeProjection.create(Array[DataType](LongType)) + val originalMap = new LongToUnsafeRowMap(mm, 1) + + val key1 = 1L + val value1 = new Random().nextLong() + + val key2 = 2L + val value2 = new Random().nextLong() + + originalMap.append(key1, unsafeProj(InternalRow(value1))) + originalMap.append(key2, unsafeProj(InternalRow(value2))) + originalMap.optimize() + + val resultRow = new UnsafeRow(1) + assert(originalMap.getValue(key1, resultRow).getLong(0) === value1) + assert(originalMap.getValue(key2, resultRow).getLong(0) === value2) + + val ser = new KryoSerializer( + (new SparkConf).set("spark.kryo.referenceTracking", "false")).newInstance() + + val mapSerializedInDriver = ser.deserialize[LongToUnsafeRowMap](ser.serialize(originalMap)) --- End diff -- nit: ```scala // Simulate serialize/deserialize twice on driver and executor val firstTimeSerialized = ... val secondTimeSerialized = ... ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org