================ @@ -0,0 +1,174 @@ +//===--- StableHash.cpp - An ABI-stable string hash -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements an ABI-stable string hash based on SipHash, used to +// compute ptrauth discriminators. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SipHash.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" +#include <cstdint> +#include <cstring> + +using namespace llvm; + +#define DEBUG_TYPE "llvm-siphash" + +// Lightly adapted from the SipHash reference C implementation by +// Jean-Philippe Aumasson and Daniel J. Bernstein. + +#define SIPHASH_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) + +#define SIPHASH_U8TO64_LE(p) \ + (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ + ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ + ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ + ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) + +#define SIPHASH_SIPROUND \ + do { \ + v0 += v1; \ + v1 = SIPHASH_ROTL(v1, 13); \ + v1 ^= v0; \ + v0 = SIPHASH_ROTL(v0, 32); \ + v2 += v3; \ + v3 = SIPHASH_ROTL(v3, 16); \ + v3 ^= v2; \ + v0 += v3; \ + v3 = SIPHASH_ROTL(v3, 21); \ + v3 ^= v0; \ + v2 += v1; \ + v1 = SIPHASH_ROTL(v1, 17); \ + v1 ^= v2; \ + v2 = SIPHASH_ROTL(v2, 32); \ + } while (0) + +template <int cROUNDS, int dROUNDS, class ResultTy> +static inline ResultTy siphash(const uint8_t *in, uint64_t inlen, + const uint8_t (&k)[16]) { + static_assert(sizeof(ResultTy) == 8 || sizeof(ResultTy) == 16, + "result type should be uint64_t or uint128_t"); + uint64_t v0 = 0x736f6d6570736575ULL; + uint64_t v1 = 0x646f72616e646f6dULL; + uint64_t v2 = 0x6c7967656e657261ULL; + uint64_t v3 = 0x7465646279746573ULL; + uint64_t b; + uint64_t k0 = SIPHASH_U8TO64_LE(k); + uint64_t k1 = SIPHASH_U8TO64_LE(k + 8); + uint64_t m; + int i; + const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); + const int left = inlen & 7; + b = ((uint64_t)inlen) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + if (sizeof(ResultTy) == 16) { + v1 ^= 0xee; + } + + for (; in != end; in += 8) { + m = SIPHASH_U8TO64_LE(in); + v3 ^= m; + + for (i = 0; i < cROUNDS; ++i) + SIPHASH_SIPROUND; + + v0 ^= m; + } + + switch (left) { + case 7: + b |= ((uint64_t)in[6]) << 48; + LLVM_FALLTHROUGH; + case 6: + b |= ((uint64_t)in[5]) << 40; + LLVM_FALLTHROUGH; + case 5: + b |= ((uint64_t)in[4]) << 32; + LLVM_FALLTHROUGH; + case 4: + b |= ((uint64_t)in[3]) << 24; + LLVM_FALLTHROUGH; + case 3: + b |= ((uint64_t)in[2]) << 16; + LLVM_FALLTHROUGH; + case 2: + b |= ((uint64_t)in[1]) << 8; + LLVM_FALLTHROUGH; + case 1: + b |= ((uint64_t)in[0]); + break; + case 0: + break; + } + + v3 ^= b; + + for (i = 0; i < cROUNDS; ++i) + SIPHASH_SIPROUND; + + v0 ^= b; + + if (sizeof(ResultTy) == 8) { + v2 ^= 0xff; + } else { + v2 ^= 0xee; + } + + for (i = 0; i < dROUNDS; ++i) + SIPHASH_SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + + // This mess with the result type would be easier with 'if constexpr'. + + uint64_t firstHalf = b; + if (sizeof(ResultTy) == 8) + return firstHalf; + + v1 ^= 0xdd; + + for (i = 0; i < dROUNDS; ++i) + SIPHASH_SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + uint64_t secondHalf = b; + + return firstHalf | (ResultTy(secondHalf) << (sizeof(ResultTy) == 8 ? 0 : 64)); +} + +//===--- LLVM-specific wrappers around siphash. + +/// Compute an ABI-stable 64-bit hash of the given string. +uint64_t llvm::getPointerAuthStableSipHash64(StringRef Str) { + static const uint8_t K[16] = {0xb5, 0xd4, 0xc9, 0xeb, 0x79, 0x10, 0x4a, 0x79, + 0x6f, 0xec, 0x8b, 0x1b, 0x42, 0x87, 0x81, 0xd4}; + + // The aliasing is fine here because of omnipotent char. + auto *Data = reinterpret_cast<const uint8_t *>(Str.data()); + return siphash<2, 4, uint64_t>(Data, Str.size(), K); +} + +/// Compute an ABI-stable 16-bit hash of the given string. +uint64_t llvm::getPointerAuthStableSipHash16(StringRef Str) { + uint64_t RawHash = getPointerAuthStableSipHash64(Str); + + // Produce a non-zero 16-bit discriminator. + uint64_t Discriminator = (RawHash % 0xFFFF) + 1; ---------------- kovdan01 wrote:
I'm sure that such scheme is already used in downstream for a long time and there is a strong point in having non-zero discriminator always when we compute that from a string, but let me mention a potential downside of such approach instead of just doing `uint64_t Discriminator = RawHash % 0x10000;`. If we assume that 64-bit hash values are distributed uniformly when applying the hash function to an infinite set of all possible strings (this should probably be true for a cryptographically secure hash), non-zero 16-bit values computed as here become non-uniformly distributed: - 16-bit value 0: 0 64-bit values corresponding - 16-bit value 1: 281479271743490 64-bit values corresponding - 16-bit values 2..65535: 281479271743489 64-bit values corresponding I suppose that it might be OK, it's just not very consistent with 64-bit hash computation since we do not try to avoid zero value there. I get the point that the chance of having zero 64-bit hash value is very low compared to 16-bit though. The final point: if that was discussed with security researchers, I have no issues with such an implementation ignoring 16-bit zeros. If not - IMHO it's better to talk to security specialists and ask them for a piece of advice. https://github.com/llvm/llvm-project/pull/93902 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits