This is an automated email from the ASF dual-hosted git repository. ravindra pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 70db333 ARROW-5164: [Gandiva][C++] Introduce murmur32 for 32 bit types. 70db333 is described below commit 70db3335dcd7de3726950635b8d25a58dd9c47be Author: Praveen <prav...@dremio.com> AuthorDate: Mon Apr 15 16:49:39 2019 +0530 ARROW-5164: [Gandiva][C++] Introduce murmur32 for 32 bit types. - Use 32 bit hash functions for 32 bit types instead of truncating the 64 bit hash. Author: Praveen <prav...@dremio.com> Closes #4153 from praveenbingo/ARROW-5164 and squashes the following commits: eb15a331 <Praveen> Fix gcc issues. 8c369b4e <Praveen> Fix lint issues. 73b41a08 <Praveen> ARROW-5164: Introduce murmur32 for 32 bit types. --- cpp/src/gandiva/precompiled/hash.cc | 100 +++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/cpp/src/gandiva/precompiled/hash.cc b/cpp/src/gandiva/precompiled/hash.cc index 4ab942c..bd884a9 100644 --- a/cpp/src/gandiva/precompiled/hash.cc +++ b/cpp/src/gandiva/precompiled/hash.cc @@ -72,6 +72,41 @@ static inline uint64 murmur3_64(uint64 val, int32 seed) { return h1; } +static inline uint32 murmur3_32(uint64 val, int32 seed) { + uint64 c1 = 0xcc9e2d51ull; + uint64 c2 = 0x1b873593ull; + int length = 8; + static uint64 UINT_MASK = 0xffffffffull; + uint64 lh1 = seed & UINT_MASK; + for (int i = 0; i < 2; i++) { + uint64 lk1 = ((val >> i * 32) & UINT_MASK); + lk1 *= c1; + lk1 &= UINT_MASK; + + lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17); + + lk1 *= c2; + lk1 &= UINT_MASK; + + lh1 ^= lk1; + lh1 = ((lh1 << 13) & UINT_MASK) | (lh1 >> 19); + + lh1 = lh1 * 5 + 0xe6546b64L; + lh1 = UINT_MASK & lh1; + } + lh1 ^= length; + + lh1 ^= lh1 >> 16; + lh1 *= 0x85ebca6bull; + lh1 = UINT_MASK & lh1; + lh1 ^= lh1 >> 13; + lh1 *= 0xc2b2ae35ull; + lh1 = UINT_MASK & lh1; + lh1 ^= lh1 >> 16; + + return static_cast<uint32>(lh1); +} + static inline uint64 double_to_long_bits(double value) { uint64 result; memcpy(&result, &value, sizeof(result)); @@ -83,7 +118,7 @@ FORCE_INLINE int64 hash64(double val, int64 seed) { } FORCE_INLINE int32 hash32(double val, int32 seed) { - return static_cast<int32>(murmur3_64(double_to_long_bits(val), seed)); + return murmur3_32(double_to_long_bits(val), seed); } // Wrappers for all the numeric/data/time arrow types @@ -229,12 +264,73 @@ static inline uint64 murmur3_64_buf(const uint8* key, int32 len, int32 seed) { return h1; } +static uint32 murmur3_32_buf(const uint8* key, int32 len, int32 seed) { + static const uint64 c1 = 0xcc9e2d51ull; + static const uint64 c2 = 0x1b873593ull; + static const uint64 UINT_MASK = 0xffffffffull; + uint64 lh1 = seed; + const uint32* blocks = reinterpret_cast<const uint32*>(key); + int nblocks = len / 4; + const uint8* tail = reinterpret_cast<const uint8*>(key + nblocks * 4); + for (int i = 0; i < nblocks; i++) { + uint64 lk1 = static_cast<uint64>(blocks[i]); + + // k1 *= c1; + lk1 *= c1; + lk1 &= UINT_MASK; + + lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17); + + lk1 *= c2; + lk1 = lk1 & UINT_MASK; + lh1 ^= lk1; + lh1 = ((lh1 << 13) & UINT_MASK) | (lh1 >> 19); + + lh1 = lh1 * 5 + 0xe6546b64ull; + lh1 = UINT_MASK & lh1; + } + + // tail + uint64 lk1 = 0; + + switch (len & 3) { + case 3: + lk1 = (tail[2] & 0xff) << 16; + case 2: + lk1 |= (tail[1] & 0xff) << 8; + case 1: + lk1 |= (tail[0] & 0xff); + lk1 *= c1; + lk1 = UINT_MASK & lk1; + lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17); + + lk1 *= c2; + lk1 = lk1 & UINT_MASK; + + lh1 ^= lk1; + } + + // finalization + lh1 ^= len; + + lh1 ^= lh1 >> 16; + lh1 *= 0x85ebca6b; + lh1 = UINT_MASK & lh1; + lh1 ^= lh1 >> 13; + + lh1 *= 0xc2b2ae35; + lh1 = UINT_MASK & lh1; + lh1 ^= lh1 >> 16; + + return static_cast<uint32>(lh1 & UINT_MASK); +} + FORCE_INLINE int64 hash64_buf(const uint8* buf, int len, int64 seed) { return murmur3_64_buf(buf, len, static_cast<int32>(seed)); } FORCE_INLINE int32 hash32_buf(const uint8* buf, int len, int32 seed) { - return static_cast<int32>(murmur3_64_buf(buf, len, seed)); + return murmur3_32_buf(buf, len, seed); } // Wrappers for the varlen types