> From: Pavan Nikhilesh <pbhagavat...@marvell.com> > > Merge crc32 hash calculation public API headers for x86 and ARM, > split implementations of x86 and ARM into their respective private > headers. > This reduces the ifdef code clutter while keeping current ABI intact. > > Although we install `rte_crc_arm64.h` it is not used in any of the lib or > drivers layers. All the libs and drivers use `rte_hash_crc.h` which falls > back to SW crc32 calculation for ARM platform. > > Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com> > --- > > Currently, if application incorrectly sets CRC32_ARM64 as crc32 algorithm > through `rte_hash_crc_set_alg()` on x86 or vice-versa we fallback to > algorithm > set previously via `rte_hash_crc_set_alg()` instead of setting the best > available. > This behaviour should probably change to setting the best available algorithm > and is up for discussion. > > app/test/test_hash.c | 6 + > lib/librte_hash/Makefile | 5 - > lib/librte_hash/crc_arm64.h | 67 +++++++++++ > lib/librte_hash/crc_x86.h | 68 +++++++++++ > lib/librte_hash/meson.build | 3 +- > lib/librte_hash/rte_crc_arm64.h | 183 ------------------------------ > lib/librte_hash/rte_hash_crc.h | 193 +++++++++++++------------------- > 7 files changed, 219 insertions(+), 306 deletions(-) > create mode 100644 lib/librte_hash/crc_arm64.h > create mode 100644 lib/librte_hash/crc_x86.h > delete mode 100644 lib/librte_hash/rte_crc_arm64.h > > diff --git a/app/test/test_hash.c b/app/test/test_hash.c > index afa3a1a3c..7bd457dac 100644 > --- a/app/test/test_hash.c > +++ b/app/test/test_hash.c > @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void) > } > > /* Resetting to best available algorithm */ > +#if defined RTE_ARCH_X86 > rte_hash_crc_set_alg(CRC32_SSE42_x64); > +#elif defined RTE_ARCH_ARM64 > + rte_hash_crc_set_alg(CRC32_ARM64); > +#else > + rte_hash_crc_set_alg(CRC32_SW); > +#endif > > if (i == CRC32_ITERATIONS) > return 0; > diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile > index ec9f86499..f640afc42 100644 > --- a/lib/librte_hash/Makefile > +++ b/lib/librte_hash/Makefile > @@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c > # install this header file > SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h > SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h > -ifeq ($(CONFIG_RTE_ARCH_ARM64),y) > -ifneq ($(findstring RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),) > -SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h > -endif > -endif > SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h > SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h > SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h > diff --git a/lib/librte_hash/crc_arm64.h b/lib/librte_hash/crc_arm64.h > new file mode 100644 > index 000000000..8e75f8297
Wouldn't that break 'make install T=...'? As now rte_hash_crc.h includes not public headers (crc_x86.h, etc.). Same question about external apps, where they would get from these headers? > --- /dev/null > +++ b/lib/librte_hash/crc_arm64.h > @@ -0,0 +1,67 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2015 Cavium, Inc > + */ > + > +#ifndef _CRC_ARM64_H_ > +#define _CRC_ARM64_H_ > + > +/** > + * @file > + * > + * CRC arm64 Hash > + */ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include <stdint.h> > +#include <rte_cpuflags.h> > +#include <rte_branch_prediction.h> > +#include <rte_common.h> > + > +static inline uint32_t > +crc32c_arm64_u8(uint8_t data, uint32_t init_val) > +{ > + __asm__ volatile( > + "crc32cb %w[crc], %w[crc], %w[value]" > + : [crc] "+r" (init_val) > + : [value] "r" (data)); > + return init_val; > +} > + > +static inline uint32_t > +crc32c_arm64_u16(uint16_t data, uint32_t init_val) > +{ > + __asm__ volatile( > + "crc32ch %w[crc], %w[crc], %w[value]" > + : [crc] "+r" (init_val) > + : [value] "r" (data)); > + return init_val; > +} > + > +static inline uint32_t > +crc32c_arm64_u32(uint32_t data, uint32_t init_val) > +{ > + __asm__ volatile( > + "crc32cw %w[crc], %w[crc], %w[value]" > + : [crc] "+r" (init_val) > + : [value] "r" (data)); > + return init_val; > +} > + > +static inline uint32_t > +crc32c_arm64_u64(uint64_t data, uint32_t init_val) > +{ > + __asm__ volatile( > + "crc32cx %w[crc], %w[crc], %x[value]" > + : [crc] "+r" (init_val) > + : [value] "r" (data)); > + return init_val; > +} > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _CRC_ARM64_H_ */ > diff --git a/lib/librte_hash/crc_x86.h b/lib/librte_hash/crc_x86.h > new file mode 100644 > index 000000000..fb45f2e98 > --- /dev/null > +++ b/lib/librte_hash/crc_x86.h > @@ -0,0 +1,68 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2010-2014 Intel Corporation > + */ > + > +#ifndef _CRC_X86_H_ > +#define _CRC_X86_H_ > + > +#if defined(RTE_ARCH_X86) > +static inline uint32_t > +crc32c_sse42_u8(uint8_t data, uint32_t init_val) > +{ > + __asm__ volatile( > + "crc32b %[data], %[init_val];" > + : [init_val] "+r" (init_val) > + : [data] "rm" (data)); > + return init_val; > +} > + > +static inline uint32_t > +crc32c_sse42_u16(uint16_t data, uint32_t init_val) > +{ > + __asm__ volatile( > + "crc32w %[data], %[init_val];" > + : [init_val] "+r" (init_val) > + : [data] "rm" (data)); > + return init_val; > +} > + > +static inline uint32_t > +crc32c_sse42_u32(uint32_t data, uint32_t init_val) > +{ > + __asm__ volatile( > + "crc32l %[data], %[init_val];" > + : [init_val] "+r" (init_val) > + : [data] "rm" (data)); > + return init_val; > +} > + > +static inline uint32_t > +crc32c_sse42_u64_mimic(uint64_t data, uint32_t init_val) > +{ > + union { > + uint32_t u32[2]; > + uint64_t u64; > + } d; > + > + d.u64 = data; > + init_val = crc32c_sse42_u32(d.u32[0], init_val); > + init_val = crc32c_sse42_u32(d.u32[1], init_val); > + return init_val; > +} > +#endif > + > +#ifdef RTE_ARCH_X86_64 > +static inline uint32_t > +crc32c_sse42_u64(uint64_t data, uint32_t init_val) > +{ > + uint64_t val = init_val; > + > + __asm__ volatile( > + "crc32q %[data], %[init_val];" > + : [init_val] "+r" (val) > + : [data] "rm" (data)); > + return (uint32_t)val; > +} > +#endif > + > +#endif > diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build > index 6ab46ae9d..90a180bc8 100644 > --- a/lib/librte_hash/meson.build > +++ b/lib/librte_hash/meson.build > @@ -1,8 +1,7 @@ > # SPDX-License-Identifier: BSD-3-Clause > # Copyright(c) 2017 Intel Corporation > > -headers = files('rte_crc_arm64.h', > - 'rte_fbk_hash.h', > +headers = files('rte_fbk_hash.h', > 'rte_hash_crc.h', > 'rte_hash.h', > 'rte_jhash.h', > diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h > deleted file mode 100644 > index b4628cfc0..000000000 > --- a/lib/librte_hash/rte_crc_arm64.h > +++ /dev/null > @@ -1,183 +0,0 @@ > -/* SPDX-License-Identifier: BSD-3-Clause > - * Copyright(c) 2015 Cavium, Inc > - */ > - > -#ifndef _RTE_CRC_ARM64_H_ > -#define _RTE_CRC_ARM64_H_ > - > -/** > - * @file > - * > - * RTE CRC arm64 Hash > - */ > - > -#ifdef __cplusplus > -extern "C" { > -#endif > - > -#include <stdint.h> > -#include <rte_cpuflags.h> > -#include <rte_branch_prediction.h> > -#include <rte_common.h> > - > -static inline uint32_t > -crc32c_arm64_u8(uint8_t data, uint32_t init_val) > -{ > - __asm__ volatile( > - "crc32cb %w[crc], %w[crc], %w[value]" > - : [crc] "+r" (init_val) > - : [value] "r" (data)); > - return init_val; > -} > - > -static inline uint32_t > -crc32c_arm64_u16(uint16_t data, uint32_t init_val) > -{ > - __asm__ volatile( > - "crc32ch %w[crc], %w[crc], %w[value]" > - : [crc] "+r" (init_val) > - : [value] "r" (data)); > - return init_val; > -} > - > -static inline uint32_t > -crc32c_arm64_u32(uint32_t data, uint32_t init_val) > -{ > - __asm__ volatile( > - "crc32cw %w[crc], %w[crc], %w[value]" > - : [crc] "+r" (init_val) > - : [value] "r" (data)); > - return init_val; > -} > - > -static inline uint32_t > -crc32c_arm64_u64(uint64_t data, uint32_t init_val) > -{ > - __asm__ volatile( > - "crc32cx %w[crc], %w[crc], %x[value]" > - : [crc] "+r" (init_val) > - : [value] "r" (data)); > - return init_val; > -} > - > -/** > - * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash > - * calculation. > - * > - * @param alg > - * An OR of following flags: > - * - (CRC32_SW) Don't use arm64 crc intrinsics > - * - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available > - * > - */ > -static inline void > -rte_hash_crc_set_alg(uint8_t alg) > -{ > - switch (alg) { > - case CRC32_ARM64: > - if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32)) > - alg = CRC32_SW; > - /* fall-through */ > - case CRC32_SW: > - crc32_alg = alg; > - /* fall-through */ > - default: > - break; > - } > -} > - > -/* Setting the best available algorithm */ > -RTE_INIT(rte_hash_crc_init_alg) > -{ > - rte_hash_crc_set_alg(CRC32_ARM64); > -} > - > -/** > - * Use single crc32 instruction to perform a hash on a 1 byte value. > - * Fall back to software crc32 implementation in case arm64 crc intrinsics is > - * not supported > - * > - * @param data > - * Data to perform hash on. > - * @param init_val > - * Value to initialise hash generator. > - * @return > - * 32bit calculated hash value. > - */ > -static inline uint32_t > -rte_hash_crc_1byte(uint8_t data, uint32_t init_val) > -{ > - if (likely(crc32_alg & CRC32_ARM64)) > - return crc32c_arm64_u8(data, init_val); > - > - return crc32c_1byte(data, init_val); > -} > - > -/** > - * Use single crc32 instruction to perform a hash on a 2 bytes value. > - * Fall back to software crc32 implementation in case arm64 crc intrinsics is > - * not supported > - * > - * @param data > - * Data to perform hash on. > - * @param init_val > - * Value to initialise hash generator. > - * @return > - * 32bit calculated hash value. > - */ > -static inline uint32_t > -rte_hash_crc_2byte(uint16_t data, uint32_t init_val) > -{ > - if (likely(crc32_alg & CRC32_ARM64)) > - return crc32c_arm64_u16(data, init_val); > - > - return crc32c_2bytes(data, init_val); > -} > - > -/** > - * Use single crc32 instruction to perform a hash on a 4 byte value. > - * Fall back to software crc32 implementation in case arm64 crc intrinsics is > - * not supported > - * > - * @param data > - * Data to perform hash on. > - * @param init_val > - * Value to initialise hash generator. > - * @return > - * 32bit calculated hash value. > - */ > -static inline uint32_t > -rte_hash_crc_4byte(uint32_t data, uint32_t init_val) > -{ > - if (likely(crc32_alg & CRC32_ARM64)) > - return crc32c_arm64_u32(data, init_val); > - > - return crc32c_1word(data, init_val); > -} > - > -/** > - * Use single crc32 instruction to perform a hash on a 8 byte value. > - * Fall back to software crc32 implementation in case arm64 crc intrinsics is > - * not supported > - * > - * @param data > - * Data to perform hash on. > - * @param init_val > - * Value to initialise hash generator. > - * @return > - * 32bit calculated hash value. > - */ > -static inline uint32_t > -rte_hash_crc_8byte(uint64_t data, uint32_t init_val) > -{ > - if (likely(crc32_alg == CRC32_ARM64)) > - return crc32c_arm64_u64(data, init_val); > - > - return crc32c_2words(data, init_val); > -} > - > -#ifdef __cplusplus > -} > -#endif > - > -#endif /* _RTE_CRC_ARM64_H_ */ > diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h > index cf28031b3..292697db1 100644 > --- a/lib/librte_hash/rte_hash_crc.h > +++ b/lib/librte_hash/rte_hash_crc.h > @@ -21,6 +21,15 @@ extern "C" { > #include <rte_branch_prediction.h> > #include <rte_common.h> > > +typedef uint32_t > +(*crc32_handler_1b)(uint8_t data, uint32_t init_val); > +typedef uint32_t > +(*crc32_handler_2b)(uint16_t data, uint32_t init_val); > +typedef uint32_t > +(*crc32_handler_4b)(uint32_t data, uint32_t init_val); > +typedef uint32_t > +(*crc32_handler_8b)(uint64_t data, uint32_t init_val); > + > /* Lookup tables for software implementation of CRC32C */ > static const uint32_t crc32c_tables[8][256] = {{ > 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, > 0x26A1E7E8, 0xD4CA64EB, > @@ -322,7 +331,7 @@ crc32c_2bytes(uint16_t data, uint32_t init_val) > } > > static inline uint32_t > -crc32c_1word(uint32_t data, uint32_t init_val) > +crc32c_4bytes(uint32_t data, uint32_t init_val) > { > uint32_t crc, term1, term2; > crc = init_val; > @@ -336,7 +345,7 @@ crc32c_1word(uint32_t data, uint32_t init_val) > } > > static inline uint32_t > -crc32c_2words(uint64_t data, uint32_t init_val) > +crc32c_8bytes(uint64_t data, uint32_t init_val) > { > uint32_t crc, term1, term2; > union { > @@ -357,109 +366,94 @@ crc32c_2words(uint64_t data, uint32_t init_val) > > return crc; > } > - > -#if defined(RTE_ARCH_X86) > -static inline uint32_t > -crc32c_sse42_u8(uint8_t data, uint32_t init_val) > -{ > - __asm__ volatile( > - "crc32b %[data], %[init_val];" > - : [init_val] "+r" (init_val) > - : [data] "rm" (data)); > - return init_val; > -} > - > -static inline uint32_t > -crc32c_sse42_u16(uint16_t data, uint32_t init_val) > -{ > - __asm__ volatile( > - "crc32w %[data], %[init_val];" > - : [init_val] "+r" (init_val) > - : [data] "rm" (data)); > - return init_val; > -} > - > -static inline uint32_t > -crc32c_sse42_u32(uint32_t data, uint32_t init_val) > -{ > - __asm__ volatile( > - "crc32l %[data], %[init_val];" > - : [init_val] "+r" (init_val) > - : [data] "rm" (data)); > - return init_val; > -} > - > -static inline uint32_t > -crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val) > -{ > - union { > - uint32_t u32[2]; > - uint64_t u64; > - } d; > - > - d.u64 = data; > - init_val = crc32c_sse42_u32(d.u32[0], (uint32_t)init_val); > - init_val = crc32c_sse42_u32(d.u32[1], (uint32_t)init_val); > - return (uint32_t)init_val; > -} > -#endif > - > -#ifdef RTE_ARCH_X86_64 > -static inline uint32_t > -crc32c_sse42_u64(uint64_t data, uint64_t init_val) > -{ > - __asm__ volatile( > - "crc32q %[data], %[init_val];" > - : [init_val] "+r" (init_val) > - : [data] "rm" (data)); > - return (uint32_t)init_val; > -} > -#endif > - > #define CRC32_SW (1U << 0) > #define CRC32_SSE42 (1U << 1) > #define CRC32_x64 (1U << 2) > #define CRC32_SSE42_x64 (CRC32_x64|CRC32_SSE42) > #define CRC32_ARM64 (1U << 3) > > -static uint8_t crc32_alg = CRC32_SW; > +static crc32_handler_1b crc32_1b = crc32c_1byte; > +static crc32_handler_2b crc32_2b = crc32c_2bytes; > +static crc32_handler_4b crc32_4b = crc32c_4bytes; > +static crc32_handler_8b crc32_8b = crc32c_8bytes; > > #if defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32) > -#include "rte_crc_arm64.h" > -#else > +#include "crc_arm64.h" > +#endif > + > +#if defined(RTE_ARCH_X86) > +#include "crc_x86.h" > +#endif > > /** > - * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash > + * Allow or disallow use of SSE4.2/ARMv8 instrinsics for CRC32 hash > * calculation. > * > * @param alg > * An OR of following flags: > - * - (CRC32_SW) Don't use SSE4.2 intrinsics > + * - (CRC32_SW) Don't use SSE4.2 intrinsics (default non-[x86/ARMv8]) > * - (CRC32_SSE42) Use SSE4.2 intrinsics if available > - * - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default) > - * > + * - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default > x86) > + * - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available > */ > static inline void > rte_hash_crc_set_alg(uint8_t alg) > { > -#if defined(RTE_ARCH_X86) > - if (alg == CRC32_SSE42_x64 && > - !rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T)) > - alg = CRC32_SSE42; > + switch (alg) { > + case CRC32_SSE42_x64: > +#if defined RTE_ARCH_X86 > + crc32_1b = crc32c_sse42_u8; > + crc32_2b = crc32c_sse42_u16; > + crc32_4b = crc32c_sse42_u32; > + > + if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T)) > + crc32_8b = crc32c_sse42_u64_mimic; > + else > + crc32_8b = crc32c_sse42_u64; > #endif > - crc32_alg = alg; > + break; > + case CRC32_SSE42: > +#if defined RTE_ARCH_X86 > + crc32_1b = crc32c_sse42_u8; > + crc32_2b = crc32c_sse42_u16; > + crc32_4b = crc32c_sse42_u32; > + crc32_8b = crc32c_sse42_u64_mimic; > +#endif > + break; > + case CRC32_ARM64: > +#if defined RTE_ARCH_ARM64 > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32)) { > + crc32_1b = crc32c_arm64_u8; > + crc32_2b = crc32c_arm64_u16; > + crc32_4b = crc32c_arm64_u32; > + crc32_8b = crc32c_arm64_u64; > + } > +#endif > + break; > + case CRC32_SW: > + default: > + crc32_1b = crc32c_1byte; > + crc32_2b = crc32c_2bytes; > + crc32_4b = crc32c_4bytes; > + crc32_8b = crc32c_8bytes; > + break; > + } > } > > /* Setting the best available algorithm */ > RTE_INIT(rte_hash_crc_init_alg) > { > +#if defined RTE_ARCH_X86 > rte_hash_crc_set_alg(CRC32_SSE42_x64); > +#elif defined RTE_ARCH_ARM64 > + rte_hash_crc_set_alg(CRC32_ARM64); > +#else > + rte_hash_crc_set_alg(CRC32_SW); > +#endif > } > > /** > - * Use single crc32 instruction to perform a hash on a byte value. > - * Fall back to software crc32 implementation in case SSE4.2 is > - * not supported > + * Calculate crc32 hash value of 1bytes. > * > * @param data > * Data to perform hash on. > @@ -471,18 +465,11 @@ RTE_INIT(rte_hash_crc_init_alg) > static inline uint32_t > rte_hash_crc_1byte(uint8_t data, uint32_t init_val) > { > -#if defined RTE_ARCH_X86 > - if (likely(crc32_alg & CRC32_SSE42)) > - return crc32c_sse42_u8(data, init_val); > -#endif > - > - return crc32c_1byte(data, init_val); > + return (crc32_1b)(data, init_val); > } > > /** > - * Use single crc32 instruction to perform a hash on a 2 bytes value. > - * Fall back to software crc32 implementation in case SSE4.2 is > - * not supported > + * Calculate crc32 hash value of 2bytes. > * > * @param data > * Data to perform hash on. > @@ -494,18 +481,11 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val) > static inline uint32_t > rte_hash_crc_2byte(uint16_t data, uint32_t init_val) > { > -#if defined RTE_ARCH_X86 > - if (likely(crc32_alg & CRC32_SSE42)) > - return crc32c_sse42_u16(data, init_val); > -#endif > - > - return crc32c_2bytes(data, init_val); > + return (crc32_2b)(data, init_val); > } > > /** > - * Use single crc32 instruction to perform a hash on a 4 byte value. > - * Fall back to software crc32 implementation in case SSE4.2 is > - * not supported > + * Calculate crc32 hash value of 4bytes. > * > * @param data > * Data to perform hash on. > @@ -517,18 +497,11 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val) > static inline uint32_t > rte_hash_crc_4byte(uint32_t data, uint32_t init_val) > { > -#if defined RTE_ARCH_X86 > - if (likely(crc32_alg & CRC32_SSE42)) > - return crc32c_sse42_u32(data, init_val); > -#endif > - > - return crc32c_1word(data, init_val); > + return (crc32_4b)(data, init_val); > } > > /** > - * Use single crc32 instruction to perform a hash on a 8 byte value. > - * Fall back to software crc32 implementation in case SSE4.2 is > - * not supported > + * Calculate crc32 hash value of 8bytes. > * > * @param data > * Data to perform hash on. > @@ -540,21 +513,9 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val) > static inline uint32_t > rte_hash_crc_8byte(uint64_t data, uint32_t init_val) > { > -#ifdef RTE_ARCH_X86_64 > - if (likely(crc32_alg == CRC32_SSE42_x64)) > - return crc32c_sse42_u64(data, init_val); > -#endif > - > -#if defined RTE_ARCH_X86 > - if (likely(crc32_alg & CRC32_SSE42)) > - return crc32c_sse42_u64_mimic(data, init_val); > -#endif > - > - return crc32c_2words(data, init_val); > + return (crc32_8b)(data, init_val); > } > > -#endif > - > /** > * Calculate CRC32 hash on user-supplied byte array. > * > -- > 2.17.1