Github user original-brownbear commented on a diff in the pull request: https://github.com/apache/spark/pull/19180#discussion_r138466080 --- Diff: common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java --- @@ -1097,8 +1101,21 @@ public UTF8String copy() { @Override public int compareTo(@Nonnull final UTF8String other) { int len = Math.min(numBytes, other.numBytes); - // TODO: compare 8 bytes as unsigned long - for (int i = 0; i < len; i ++) { + int words = len / Longs.BYTES; + long roffset = other.getBaseOffset(); + Object rbase = other.getBaseObject(); + for (int i = 0; i < words * Longs.BYTES; i += Longs.BYTES) { + long left = getLong(base, offset + i); + long right = getLong(rbase, roffset + i); + if (left != right) { + if (!IS_LITTLE_ENDIAN) { --- End diff -- @vanzin it's a runtime constant but after the C2 pass it is completely removed by the compiler (just tried this out with Jitwatch).
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org