Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/17062#discussion_r104282934 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala --- @@ -732,6 +741,38 @@ object HiveHashFunction extends InterpretedHashFunction { HiveHasher.hashUnsafeBytes(base, offset, len) } + /** + * Mimics TimestampWritable.hashCode() in Hive + */ + def hashTimestamp(timestamp: Long): Long = { + val timestampInSeconds = timestamp / 1000000 + val nanoSecondsPortion = (timestamp % 1000000) * 1000 + + var result = timestampInSeconds + result <<= 30 // the nanosecond part fits in 30 bits + result |= nanoSecondsPortion + ((result >>> 32) ^ result).toInt + } + + /** + * Hive allows input intervals to be defined using units below but the intervals + * have to be from the same category: + * - year, month (stored as HiveIntervalYearMonth) + * - day, hour, minute, second, nanosecond (stored as HiveIntervalDayTime) + * + * eg. (INTERVAL '30' YEAR + INTERVAL '-23' DAY) fails in Hive + * + * This method mimics HiveIntervalDayTime.hashCode() in Hive. If the `INTERVAL` is backed as + * HiveIntervalYearMonth in Hive, then this method will not produce Hive compatible result. + * The reason being Spark's representation of calendar does not have such categories based on + * the interval and is unified. + */ + def hashCalendarInterval(calendarInterval: CalendarInterval): Long = { + val totalSeconds = calendarInterval.milliseconds() / 1000 --- End diff -- How does Hive deal with nanoseconds, if we divide it by 1000?
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org