cloud-fan commented on a change in pull request #29104: URL: https://github.com/apache/spark/pull/29104#discussion_r472691173
########## File path: sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala ########## @@ -903,15 +910,61 @@ private[joins] object LongHashedRelation { if (!rowKey.isNullAt(0)) { val key = rowKey.getLong(0) map.append(key, unsafeRow) + } else if (isNullAware) { + return EmptyHashedRelationWithAllNullKeys } } map.optimize() new LongHashedRelation(numFields, map) } } +/** + * Common trait with dummy implementation for NAAJ special HashedRelation + * EmptyHashedRelation + * EmptyHashedRelationWithAllNullKeys + */ +trait NullAwareHashedRelation extends HashedRelation with Externalizable { + override def get(key: InternalRow): Iterator[InternalRow] = { + throw new UnsupportedOperationException + } + + override def getValue(key: InternalRow): InternalRow = { + throw new UnsupportedOperationException + } + + override def keyIsUnique: Boolean = true + + override def keys(): Iterator[InternalRow] = { + throw new UnsupportedOperationException + } + + override def close(): Unit = {} + + override def writeExternal(out: ObjectOutput): Unit = {} + + override def readExternal(in: ObjectInput): Unit = {} + + override def estimatedSize: Long = 0 +} + +/** + * A special HashedRelation indicates it built from a empty input:Iterator[InternalRow]. + */ +object EmptyHashedRelation extends NullAwareHashedRelation { + override def asReadOnlyCopy(): EmptyHashedRelation.type = this +} + +/** + * A special HashedRelation indicates it built from a non-empty input:Iterator[InternalRow], + * which contains all null columns key. + */ +object EmptyHashedRelationWithAllNullKeys extends NullAwareHashedRelation { + override def asReadOnlyCopy(): EmptyHashedRelationWithAllNullKeys.type = this Review comment: probably just remove `Empty` to make it `HashedRelationWithAllNullKeys`? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org