Github user original-brownbear commented on a diff in the pull request: https://github.com/apache/spark/pull/19180#discussion_r139112161 --- Diff: common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java --- @@ -1097,10 +1100,23 @@ public UTF8String copy() { @Override public int compareTo(@Nonnull final UTF8String other) { int len = Math.min(numBytes, other.numBytes); - // TODO: compare 8 bytes as unsigned long - for (int i = 0; i < len; i ++) { + int wordMax = (len / 8) * 8; + long roffset = other.offset; + Object rbase = other.base; + for (int i = 0; i < wordMax; i += 8) { + long left = getLong(base, offset + i); + long right = getLong(rbase, roffset + i); + if (left != right) { + if (IS_LITTLE_ENDIAN) { + return UnsignedLongs.compare(Long.reverseBytes(left), Long.reverseBytes(right)); + } else { + return UnsignedLongs.compare(left, right); --- End diff -- @viirya will adjust this, makes no sense using Guava if JDK has what we need imo :)
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org