Using carryless multiply instructions from RISC-V's Zbc extension,
implement a Barrett reduction that calculates CRC-32C checksums.

Based on the approach described by Intel's whitepaper on "Fast CRC
Computation for Generic Polynomials Using PCLMULQDQ Instruction", which
is also described here
(https://web.archive.org/web/20240111232520/https://mary.rs/lab/crc32/)

Add a case to the autotest_hash unit test.

Signed-off-by: Daniel Gregory <daniel.greg...@bytedance.com>
---
 MAINTAINERS                |  1 +
 app/test/test_hash.c       |  7 +++
 lib/hash/meson.build       |  1 +
 lib/hash/rte_crc_riscv64.h | 89 ++++++++++++++++++++++++++++++++++++++
 lib/hash/rte_hash_crc.c    | 13 +++++-
 lib/hash/rte_hash_crc.h    |  6 ++-
 6 files changed, 115 insertions(+), 2 deletions(-)
 create mode 100644 lib/hash/rte_crc_riscv64.h

diff --git a/MAINTAINERS b/MAINTAINERS
index c5a703b5c0..fa081552c7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -322,6 +322,7 @@ M: Stanislaw Kardach <stanislaw.kard...@gmail.com>
 F: config/riscv/
 F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
 F: lib/eal/riscv/
+F: lib/hash/rte_crc_riscv64.h
 
 Intel x86
 M: Bruce Richardson <bruce.richard...@intel.com>
diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 65b9cad93c..dd491ea4d9 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -231,6 +231,13 @@ test_crc32_hash_alg_equiv(void)
                        printf("Failed checking CRC32_SW against 
CRC32_ARM64\n");
                        break;
                }
+
+               /* Check against 8-byte-operand RISCV64 CRC32 if available */
+               rte_hash_crc_set_alg(CRC32_RISCV64);
+               if (hash_val != rte_hash_crc(data64, data_len, init_val)) {
+                       printf("Failed checking CRC32_SW against 
CRC32_RISC64\n");
+                       break;
+               }
        }
 
        /* Resetting to best available algorithm */
diff --git a/lib/hash/meson.build b/lib/hash/meson.build
index 277eb9fa93..8355869a80 100644
--- a/lib/hash/meson.build
+++ b/lib/hash/meson.build
@@ -12,6 +12,7 @@ headers = files(
 indirect_headers += files(
         'rte_crc_arm64.h',
         'rte_crc_generic.h',
+        'rte_crc_riscv64.h',
         'rte_crc_sw.h',
         'rte_crc_x86.h',
         'rte_thash_x86_gfni.h',
diff --git a/lib/hash/rte_crc_riscv64.h b/lib/hash/rte_crc_riscv64.h
new file mode 100644
index 0000000000..94f6857c69
--- /dev/null
+++ b/lib/hash/rte_crc_riscv64.h
@@ -0,0 +1,89 @@
+/* SPDX-License_Identifier: BSD-3-Clause
+ * Copyright(c) ByteDance 2024
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <riscv_bitmanip.h>
+
+#ifndef _RTE_CRC_RISCV64_H_
+#define _RTE_CRC_RISCV64_H_
+
+/*
+ * CRC-32C takes a reflected input (bit 7 is the lsb) and produces a reflected
+ * output. As reflecting the value we're checksumming is expensive, we instead
+ * reflect the polynomial P (0x11EDC6F41) and mu and our CRC32 algorithm.
+ *
+ * The mu constant is used for a Barrett reduction. It's 2^96 / P (0x11F91CAF6)
+ * reflected. Picking 2^96 rather than 2^64 means we can calculate a 64-bit crc
+ * using only two multiplications (https://mary.rs/lab/crc32/)
+ */
+static const uint64_t p = 0x105EC76F1;
+static const uint64_t mu = 0x4869EC38DEA713F1UL;
+
+/* Calculate the CRC32C checksum using a Barrett reduction */
+static inline uint32_t
+crc32c_riscv64(uint64_t data, uint32_t init_val, uint32_t bits)
+{
+       assert((bits == 64) || (bits == 32) || (bits == 16) || (bits == 8));
+
+       /* Combine data with the initial value */
+       uint64_t crc = (uint64_t)(data ^ init_val) << (64 - bits);
+
+       /*
+        * Multiply by mu, which is 2^96 / P. Division by 2^96 occurs by taking
+        * the lower 64 bits of the result (remember we're inverted)
+        */
+       crc = __riscv_clmul_64(crc, mu);
+       /* Multiply by P */
+       crc = __riscv_clmulh_64(crc, p);
+
+       /* Subtract from original (only needed for smaller sizes) */
+       if (bits == 16 || bits == 8)
+               crc ^= init_val >> bits;
+
+       return crc;
+}
+
+/*
+ * Use carryless multiply to perform hash on a value, falling back on the
+ * software in case the Zbc extension is not supported
+ */
+static inline uint32_t
+rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
+{
+       if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+               return crc32c_riscv64(data, init_val, 8);
+
+       return crc32c_1byte(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
+{
+       if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+               return crc32c_riscv64(data, init_val, 16);
+
+       return crc32c_2bytes(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
+{
+       if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+               return crc32c_riscv64(data, init_val, 32);
+
+       return crc32c_1word(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
+{
+       if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+               return crc32c_riscv64(data, init_val, 64);
+
+       return crc32c_2words(data, init_val);
+}
+
+#endif /* _RTE_CRC_RISCV64_H_ */
diff --git a/lib/hash/rte_hash_crc.c b/lib/hash/rte_hash_crc.c
index c037cdb0f0..3eb696a576 100644
--- a/lib/hash/rte_hash_crc.c
+++ b/lib/hash/rte_hash_crc.c
@@ -15,7 +15,7 @@ RTE_LOG_REGISTER_SUFFIX(hash_crc_logtype, crc, INFO);
 uint8_t rte_hash_crc32_alg = CRC32_SW;
 
 /**
- * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash
  * calculation.
  *
  * @param alg
@@ -24,6 +24,7 @@ uint8_t rte_hash_crc32_alg = CRC32_SW;
  *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available
  *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
  *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8)
+ *   - (CRC32_RISCV64) Use RISCV64 Zbc extension if available
  *
  */
 void
@@ -52,6 +53,14 @@ rte_hash_crc_set_alg(uint8_t alg)
                rte_hash_crc32_alg = CRC32_ARM64;
 #endif
 
+#if defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+       if (!(alg & CRC32_RISCV64))
+               HASH_CRC_LOG(WARNING,
+                       "Unsupported CRC32 algorithm requested using 
CRC32_RISCV64");
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_RISCV_EXT_ZBC))
+               rte_hash_crc32_alg = CRC32_RISCV64;
+#endif
+
        if (rte_hash_crc32_alg == CRC32_SW)
                HASH_CRC_LOG(WARNING,
                        "Unsupported CRC32 algorithm requested using CRC32_SW");
@@ -64,6 +73,8 @@ RTE_INIT(rte_hash_crc_init_alg)
        rte_hash_crc_set_alg(CRC32_SSE42_x64);
 #elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32)
        rte_hash_crc_set_alg(CRC32_ARM64);
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+       rte_hash_crc_set_alg(CRC32_RISCV64);
 #else
        rte_hash_crc_set_alg(CRC32_SW);
 #endif
diff --git a/lib/hash/rte_hash_crc.h b/lib/hash/rte_hash_crc.h
index 8ad2422ec3..034ce1f8b4 100644
--- a/lib/hash/rte_hash_crc.h
+++ b/lib/hash/rte_hash_crc.h
@@ -28,6 +28,7 @@ extern "C" {
 #define CRC32_x64           (1U << 2)
 #define CRC32_SSE42_x64     (CRC32_x64|CRC32_SSE42)
 #define CRC32_ARM64         (1U << 3)
+#define CRC32_RISCV64       (1U << 4)
 
 extern uint8_t rte_hash_crc32_alg;
 
@@ -35,12 +36,14 @@ extern uint8_t rte_hash_crc32_alg;
 #include "rte_crc_arm64.h"
 #elif defined(RTE_ARCH_X86)
 #include "rte_crc_x86.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+#include "rte_crc_riscv64.h"
 #else
 #include "rte_crc_generic.h"
 #endif
 
 /**
- * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash
  * calculation.
  *
  * @param alg
@@ -49,6 +52,7 @@ extern uint8_t rte_hash_crc32_alg;
  *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available
  *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
  *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8)
+ *   - (CRC32_RISCV64) Use RISC-V Carry-less multiply if available (default 
rv64gc_zbc)
  */
 void
 rte_hash_crc_set_alg(uint8_t alg);
-- 
2.39.2

Reply via email to