Implement ip4_lookup_node_process_vec function for RISC-V architecture
using RISC-V Vector Extension instruction set

Signed-off-by: Sun Yuechi <[email protected]>
Signed-off-by: Zijian <[email protected]>
---
 lib/eal/riscv/include/rte_vect.h |   2 +-
 lib/node/ip4_lookup.c            |   5 +-
 lib/node/ip4_lookup_rvv.h        | 167 +++++++++++++++++++++++++++++++
 3 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 lib/node/ip4_lookup_rvv.h

diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
index a4357e266a..4d16082449 100644
--- a/lib/eal/riscv/include/rte_vect.h
+++ b/lib/eal/riscv/include/rte_vect.h
@@ -19,7 +19,7 @@
 extern "C" {
 #endif
 
-#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
+#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_128
 
 typedef int32_t                xmm_t __attribute__((vector_size(16)));
 
diff --git a/lib/node/ip4_lookup.c b/lib/node/ip4_lookup.c
index 9673a0d78d..d3aed089f4 100644
--- a/lib/node/ip4_lookup.c
+++ b/lib/node/ip4_lookup.c
@@ -44,6 +44,8 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
 #include "ip4_lookup_neon.h"
 #elif defined(RTE_ARCH_X86)
 #include "ip4_lookup_sse.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V)
+#include "ip4_lookup_rvv.h"
 #endif
 
 static uint16_t
@@ -211,7 +213,8 @@ ip4_lookup_node_init(const struct rte_graph *graph, struct 
rte_node *node)
        IP4_LOOKUP_NODE_LPM(node->ctx) = ip4_lookup_nm.lpm_tbl[graph->socket];
        IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx) = dyn;
 
-#if defined(__ARM_NEON) || defined(RTE_ARCH_X86)
+#if defined(__ARM_NEON) || defined(RTE_ARCH_X86) || \
+       (defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V))
        if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128)
                node->process = ip4_lookup_node_process_vec;
 #endif
diff --git a/lib/node/ip4_lookup_rvv.h b/lib/node/ip4_lookup_rvv.h
new file mode 100644
index 0000000000..a74e4fa204
--- /dev/null
+++ b/lib/node/ip4_lookup_rvv.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 Institute of Software Chinese Academy of Sciences 
(ISCAS).
+ */
+
+#ifndef __INCLUDE_IP4_LOOKUP_RVV_H__
+#define __INCLUDE_IP4_LOOKUP_RVV_H__
+
+#define RTE_LPM_LOOKUP_SUCCESS 0x01000000
+#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000
+
+static __rte_always_inline vuint32m8_t
+bswap32_vec(vuint32m8_t v, size_t vl)
+{
+       vuint32m8_t low16 = __riscv_vor_vv_u32m8(
+               __riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF, vl), 24, 
vl),
+               __riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF00, vl), 8, 
vl),
+               vl);
+
+       vuint32m8_t high16 = __riscv_vor_vv_u32m8(
+               __riscv_vsrl_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF0000, vl), 
8, vl),
+               __riscv_vsrl_vx_u32m8(v, 24, vl),
+               vl);
+
+       return __riscv_vor_vv_u32m8(low16, high16, vl);
+}
+
+static __rte_always_inline void
+rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
+                       uint32_t *hop, size_t vl, uint32_t defv)
+{
+       /* Load IP addresses (network byte order) */
+       vuint32m8_t v_ip = bswap32_vec(__riscv_vle32_v_u32m8(ips, vl), vl);
+
+       vuint32m8_t v_tbl24_byte_offset = __riscv_vsll_vx_u32m8(
+                       __riscv_vsrl_vx_u32m8(v_ip, 8, vl), 2, vl);
+
+       vuint32m8_t vtbl_entry = __riscv_vluxei32_v_u32m8(
+               (const uint32_t *)lpm->tbl24, v_tbl24_byte_offset, vl);
+
+       vbool4_t mask = __riscv_vmseq_vx_u32m8_b4(
+               __riscv_vand_vx_u32m8(vtbl_entry, 
RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl),
+               RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl);
+
+       vuint32m8_t vtbl8_index = __riscv_vsll_vx_u32m8(
+               __riscv_vadd_vv_u32m8(
+                       __riscv_vsll_vx_u32m8(
+                               __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, 
vl), 8, vl),
+                       __riscv_vand_vx_u32m8(v_ip, 0x000000FF, vl), vl),
+               2, vl);
+
+       vtbl_entry = __riscv_vluxei32_v_u32m8_mu(
+               mask, vtbl_entry, (const uint32_t *)(lpm->tbl8), vtbl8_index, 
vl);
+
+       vuint32m8_t vnext_hop = __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, 
vl);
+       mask = __riscv_vmseq_vx_u32m8_b4(
+               __riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_LOOKUP_SUCCESS, vl), 
0, vl);
+
+       vnext_hop = __riscv_vmerge_vxm_u32m8(vnext_hop, defv, mask, vl);
+
+       __riscv_vse32_v_u32m8(hop, vnext_hop, vl);
+}
+
+/* Can be increased further for VLEN > 256 */
+#define RVV_MAX_BURST 64U
+
+static uint16_t
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
+                       void **objs, uint16_t nb_objs)
+{
+       struct rte_mbuf **pkts;
+       struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx);
+       const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx);
+       rte_edge_t next_index;
+       void **to_next, **from;
+       uint16_t last_spec = 0;
+       uint16_t n_left_from;
+       uint16_t held = 0;
+       uint32_t drop_nh;
+
+       /* Temporary arrays for batch processing */
+       uint32_t ips[RVV_MAX_BURST];
+       uint32_t res[RVV_MAX_BURST];
+       rte_edge_t next_hops[RVV_MAX_BURST];
+
+       /* Speculative next */
+       next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE;
+       /* Drop node */
+       drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16;
+
+       pkts = (struct rte_mbuf **)objs;
+       from = objs;
+       n_left_from = nb_objs;
+
+       /* Get stream for the speculated next node */
+       to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
+
+       while (n_left_from > 0) {
+               rte_edge_t fix_spec = 0;
+
+               size_t vl = __riscv_vsetvl_e32m8(RTE_MIN(n_left_from, 
RVV_MAX_BURST));
+
+               /* Extract IP addresses and metadata from current batch */
+               for (size_t i = 0; i < vl; i++) {
+                       struct rte_ipv4_hdr *ipv4_hdr =
+                               rte_pktmbuf_mtod_offset(pkts[i], struct 
rte_ipv4_hdr *,
+                                               sizeof(struct rte_ether_hdr));
+                       ips[i] = ipv4_hdr->dst_addr;
+                       node_mbuf_priv1(pkts[i], dyn)->cksum = 
ipv4_hdr->hdr_checksum;
+                       node_mbuf_priv1(pkts[i], dyn)->ttl = 
ipv4_hdr->time_to_live;
+               }
+
+               /* Perform LPM lookup */
+               rte_lpm_lookup_vec(lpm, ips, res, vl, drop_nh);
+
+               for (size_t i = 0; i < vl; i++) {
+                       /* Update statistics */
+                       if ((res[i] >> 16) == (drop_nh >> 16))
+                               NODE_INCREMENT_XSTAT_ID(node, 0, 1, 1);
+
+                       /* Extract next hop and next node */
+                       node_mbuf_priv1(pkts[i], dyn)->nh = res[i] & 0xFFFF;
+                       next_hops[i] = res[i] >> 16;
+
+                       /* Check speculation */
+                       fix_spec |= (next_index ^ next_hops[i]);
+               }
+
+               if (unlikely(fix_spec)) {
+                       /* Copy successfully speculated packets before this 
batch */
+                       rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
+                       from += last_spec;
+                       to_next += last_spec;
+                       held += last_spec;
+                       last_spec = 0;
+
+                       /* Process each packet in current batch individually */
+                       for (size_t i = 0; i < vl; i++) {
+                               if (next_index == next_hops[i]) {
+                                       *to_next++ = from[i];
+                                       held++;
+                               } else {
+                                       rte_node_enqueue_x1(graph, node, 
next_hops[i], from[i]);
+                               }
+                       }
+
+                       from += vl;
+               } else {
+                       last_spec += vl;
+               }
+
+               pkts += vl;
+               n_left_from -= vl;
+       }
+
+       /* Handle successfully speculated packets */
+       if (likely(last_spec == nb_objs)) {
+               rte_node_next_stream_move(graph, node, next_index);
+               return nb_objs;
+       }
+
+       held += last_spec;
+       rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
+       rte_node_next_stream_put(graph, node, next_index, held);
+
+       return nb_objs;
+}
+#endif
-- 
2.51.2

Reply via email to