For CN20k platform, support of Out-Of-Place (OOP) processing is added in Rx fastpath.
Signed-off-by: Rahul Bhansali <[email protected]> --- Changes in v2: Updated release notes and commit message. doc/guides/rel_notes/release_26_03.rst | 3 + drivers/net/cnxk/cn20k_ethdev_sec.c | 22 +++-- drivers/net/cnxk/cn20k_rx.h | 119 +++++++++++++++++++++---- 3 files changed, 121 insertions(+), 23 deletions(-) diff --git a/doc/guides/rel_notes/release_26_03.rst b/doc/guides/rel_notes/release_26_03.rst index 5c2a4bb32e..f37c87fc5b 100644 --- a/doc/guides/rel_notes/release_26_03.rst +++ b/doc/guides/rel_notes/release_26_03.rst @@ -82,6 +82,9 @@ New Features * NEA5, NIA5, NCA5: AES 256 confidentiality, integrity and AEAD modes. * NEA6, NIA6, NCA6: ZUC 256 confidentiality, integrity and AEAD modes. +* **Updated Marvell cnxk net driver.** + + * Added Out-Of-Place support for CN20K SoC. Removed Items ------------- diff --git a/drivers/net/cnxk/cn20k_ethdev_sec.c b/drivers/net/cnxk/cn20k_ethdev_sec.c index bddc02eb3b..c6a51f99f5 100644 --- a/drivers/net/cnxk/cn20k_ethdev_sec.c +++ b/drivers/net/cnxk/cn20k_ethdev_sec.c @@ -9,6 +9,7 @@ #include <rte_security_driver.h> #include <cn20k_ethdev.h> +#include <cn20k_rx.h> #include <cnxk_security.h> #include <roc_priv.h> @@ -810,10 +811,6 @@ cn20k_eth_sec_session_create(void *device, struct rte_security_session_conf *con inb_sa_dptr->w0.s.count_mib_pkts = 1; } - /* Enable out-of-place processing */ - if (ipsec->options.ingress_oop) - inb_sa_dptr->w0.s.pkt_format = ROC_IE_OT_SA_PKT_FMT_FULL; - /* Prepare session priv */ sess_priv.inb_sa = 1; sess_priv.sa_idx = ipsec->spi & spi_mask; @@ -843,6 +840,13 @@ cn20k_eth_sec_session_create(void *device, struct rte_security_session_conf *con if (ipsec->options.ingress_oop) dev->inb.nb_oop++; + /* Update function pointer to handle OOP sessions */ + if (dev->inb.nb_oop && !(dev->rx_offload_flags & NIX_RX_REAS_F)) { + dev->rx_offload_flags |= NIX_RX_REAS_F; + cn20k_eth_set_rx_function(eth_dev); + if (cnxk_ethdev_rx_offload_cb) + cnxk_ethdev_rx_offload_cb(eth_dev->data->port_id, NIX_RX_REAS_F); + } } else { struct roc_ow_ipsec_outb_sa *outb_sa, *outb_sa_dptr; struct cn20k_outb_priv_data *outb_priv; @@ -986,6 +990,12 @@ cn20k_eth_sec_session_destroy(void *device, struct rte_security_session *sess) if (eth_sec->inb_oop) dev->inb.nb_oop--; + /* Clear offload flags if was used by OOP */ + if (!dev->inb.nb_oop && !dev->inb.reass_en && + dev->rx_offload_flags & NIX_RX_REAS_F) { + dev->rx_offload_flags &= ~NIX_RX_REAS_F; + cn20k_eth_set_rx_function(eth_dev); + } } else { /* Disable SA */ sa_dptr = dev->outb.sa_dptr; @@ -1064,10 +1074,6 @@ cn20k_eth_sec_session_update(void *device, struct rte_security_session *sess, inb_sa_dptr->w0.s.count_mib_pkts = 1; } - /* Enable out-of-place processing */ - if (ipsec->options.ingress_oop) - inb_sa_dptr->w0.s.pkt_format = ROC_IE_OT_SA_PKT_FMT_FULL; - rc = roc_nix_inl_ctx_write(&dev->nix, inb_sa_dptr, eth_sec->sa, eth_sec->inb, sizeof(struct roc_ow_ipsec_inb_sa)); if (rc) diff --git a/drivers/net/cnxk/cn20k_rx.h b/drivers/net/cnxk/cn20k_rx.h index eed8d59a14..83c222c53c 100644 --- a/drivers/net/cnxk/cn20k_rx.h +++ b/drivers/net/cnxk/cn20k_rx.h @@ -200,6 +200,38 @@ nix_sec_reass_first_frag_update(struct rte_mbuf *head, const rte_iova_t *iova_li head->data_off += 8; } +static __rte_always_inline struct rte_mbuf * +nix_sec_oop_process(uintptr_t cpth, uint64_t buf_sz) +{ + const struct cpt_parse_hdr_s *hdr = (const struct cpt_parse_hdr_s *)cpth; + uint32_t offset = hdr->w2.ptr_offset; + struct rte_mbuf *inner, *mbuf; + union nix_rx_parse_u *rx; + rte_iova_t *iova_list; + uint64_t addr; + + iova_list = (rte_iova_t *)(cpth + (offset ? (offset << 5) : 256)) + 1; + addr = *iova_list; + offset = addr % (buf_sz & 0xFFFFFFFF); + mbuf = (struct rte_mbuf *)(addr - offset + (buf_sz >> 32)); + + rx = (union nix_rx_parse_u *)(((uintptr_t)(mbuf + 1)) + 8); + mbuf->pkt_len = rx->pkt_lenm1 + 1; + mbuf->data_len = rx->pkt_lenm1 + 1; + mbuf->data_off = addr - (uint64_t)mbuf->buf_addr; + + /* Pointers will be alternate encrypted-decrypted in gather list */ + iova_list++; + addr = *iova_list; + inner = (struct rte_mbuf *)(addr - offset + (buf_sz >> 32)); + + /* Mark original mbuf as get */ + RTE_MEMPOOL_CHECK_COOKIES(inner->pool, (void **)&mbuf, 1, 1); + + *rte_security_oop_dynfield(inner) = mbuf; + return inner; +} + static __rte_always_inline uint64_t nix_sec_meta_to_mbuf_sc(uint64_t cq_w5, uint64_t cpth, const uint64_t sa_base, struct rte_mbuf *mbuf, uint16_t *len, uint64_t *mbuf_init, @@ -318,10 +350,11 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, uint6 bool reas_fail = false; const rte_iova_t *eol; uint16_t data_off = 0; + bool is_oop = false; + uint16_t l4_off = 0; uint8_t ts_rx_off; int dyn_off = 0; uint16_t sg_len; - uint16_t l4_off; int64_t len; uintptr_t p; @@ -332,7 +365,7 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, uint6 uint64_t sg_base; /* Check if there are no SG's */ - if (!hdr->w4.gthr_size && ((flags & NIX_RX_REAS_F) || !hdr->w4.sctr_size)) + if (!hdr->w4.gthr_size) return; cq_w5 = *((const uint64_t *)rx + 4); @@ -342,7 +375,12 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, uint6 finfo = (const struct cpt_frag_info_s *)sg_base; sg_base += num_frags ? (num_frags > 4 ? 32 : 16) : 0; sg = *(uint64_t *)sg_base; + + is_oop = (hdr->w4.l4_chksum_type == 0) && (hdr->w4.l4_chksum == 1); nb_segs = (sg >> 14) & 0x3; + if (is_oop && nb_segs <= 2) + return; + iova_list = (rte_iova_t *)(sg_base); eol = iova_list + (hdr->w4.gthr_size << 2); iova_list += 2; @@ -374,6 +412,12 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, uint6 sg_swap = vreinterpret_u16_u64(vdup_n_u64(sg)); sg_swap = vrev64_u16(sg_swap); sg = vget_lane_u64(vreinterpret_u64_u16(sg_swap), 0); + + /* For Non inplace, first SG pointer will be original encrypted, + * whereas input mbuf is decrypted one. So need to update mbuf pointer + * in order to process SG list accordingly. + */ + mbuf = is_oop ? *rte_security_oop_dynfield(mbuf) : mbuf; } else { sg = *(const uint64_t *)(rx + 1); nb_segs = (sg >> 48) & 0x3; @@ -397,17 +441,17 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, uint6 head = mbuf; /* Update IP header */ - if ((flags & NIX_RX_REAS_F) && num_frags && !reas_fail) + if ((flags & NIX_RX_REAS_F) && num_frags && !reas_fail && !is_oop) nix_sec_reass_first_frag_update(mbuf, iova_list - 1, cpth, cq_w1, cq_w5, rlen); - len -= sg_len; + len -= is_oop ? 0 : sg_len; nb_segs--; later_skip = (uintptr_t)mbuf->buf_addr - (uintptr_t)mbuf; while (nb_segs) { last_mbuf = mbuf; - if ((flags & NIX_RX_REAS_F) && num_frags) { + if ((flags & NIX_RX_REAS_F) && (num_frags || is_oop)) { offset = (*iova_list) % (buf_sz & 0xFFFFFFFF); mbuf->next = (struct rte_mbuf *)((*iova_list) - offset + (buf_sz >> 32)); } else { @@ -451,12 +495,14 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, uint6 } } - if ((flags & NIX_RX_REAS_F) && num_frags && !reas_fail) + if ((flags & NIX_RX_REAS_F) && ((num_frags && !reas_fail) || is_oop)) data_off = *iova_list - (uint64_t)mbuf->buf_addr; - if ((flags & NIX_RX_OFFLOAD_SECURITY_F) && !(flags & NIX_RX_REAS_F)) { + if (((flags & NIX_RX_OFFLOAD_SECURITY_F) && !(flags & NIX_RX_REAS_F)) || + (is_oop && (sg_cnt % 2))) { /* Adjust last mbuf data length with negative offset for * security pkts if needed. + * For OOP, will correct mbuf length of decrypted pkt. */ len -= sg_len; sg_len = (len > 0) ? sg_len : (sg_len + len); @@ -496,6 +542,29 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, uint6 } } + if (unlikely(is_oop)) { + struct rte_mbuf *o_mbuf; + + /* mbuf chain will have all pointers of encrypted + decrypted + * alternatively. So will need to sort it. + */ + mbuf = head; + o_mbuf = head->next; + nb_segs = head->nb_segs; + mbuf->nb_segs = nb_segs / 2; + o_mbuf->nb_segs = nb_segs / 2; + nb_segs -= 2; + while (unlikely(nb_segs && (nb_segs % 2 == 0))) { + mbuf->next = o_mbuf->next; + o_mbuf->next = o_mbuf->next->next; + mbuf = mbuf->next; + o_mbuf = o_mbuf->next; + nb_segs -= 2; + } + mbuf->next = NULL; + o_mbuf->next = NULL; + } + /* Update for last failure fragment */ if ((flags & NIX_RX_REAS_F) && reas_fail) { cnxk_ip_reassembly_dynfield(head, dyn_off)->next_frag = NULL; @@ -648,6 +717,8 @@ cn20k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, co uintptr_t cpth = 0; uint16_t lmt_id; uint64_t laddr; + uint64_t w4; + bool is_oop; nb_pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); @@ -685,7 +756,12 @@ cn20k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, co */ *(uint64_t *)(laddr + (loff << 3)) = (uint64_t)mbuf; loff = loff + 1; - mbuf = (struct rte_mbuf *)(*(uint64_t *)(cpth + 8) - m_sz); + w4 = *(uint64_t *)(cpth + 32); + is_oop = !((w4 >> 32) & 0x3) && ((w4 & 0xffffffff) == 1); + if ((flags & NIX_RX_REAS_F) && is_oop) + mbuf = nix_sec_oop_process(cpth, buf_sz); + else + mbuf = (struct rte_mbuf *)(*(uint64_t *)(cpth + 8) - m_sz); /* Mark inner mbuf as get */ RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1); @@ -815,11 +891,21 @@ cn20k_nix_flush_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pk static __rte_always_inline void nix_sec_meta_to_mbuf(uintptr_t inb_sa, uintptr_t cpth, struct rte_mbuf **inner, uint64_t *ol_flags, - const uint16_t flags, uint64x2_t *rearm) + const uint16_t flags, uint64x2_t *rearm, uint64_t buf_sz) { const struct cpt_parse_hdr_s *hdr = (const struct cpt_parse_hdr_s *)cpth; - struct rte_mbuf *inner_m = inner[0]; struct cn20k_inb_priv_data *inb_priv; + struct rte_mbuf *inner_m; + uint64_t w4; + bool is_oop; + + w4 = *(uint64_t *)(cpth + 32); + is_oop = !((w4 >> 32) & 0x3) && ((w4 & 0xffffffff) == 1); + + if ((flags & NIX_RX_REAS_F) && is_oop) + inner[0] = nix_sec_oop_process(cpth, buf_sz); + + inner_m = inner[0]; /* Clear checksum flags */ *ol_flags &= ~(RTE_MBUF_F_RX_L4_CKSUM_MASK | RTE_MBUF_F_RX_IP_CKSUM_MASK); @@ -1328,7 +1414,8 @@ cn20k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, c f0 = vsetq_lane_u16(len, f0, 2); f0 = vsetq_lane_u16(len, f0, 4); - nix_sec_meta_to_mbuf(sa, cpth0, &mbuf0, &ol_flags0, flags, &rearm0); + nix_sec_meta_to_mbuf(sa, cpth0, &mbuf0, &ol_flags0, flags, &rearm0, + buf_sz); mbuf01 = vsetq_lane_u64((uintptr_t)mbuf0, mbuf01, 0); code = vget_lane_u8(ucc, 1); ol_flags0 |= code ? (code > 1 ? ((uint64_t)code) << 1 : 0) : @@ -1344,7 +1431,6 @@ cn20k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, c cpth1 = (uintptr_t)mbuf1 + d_off; - /* Free meta to aura */ NIX_PUSH_META_TO_FREE(mbuf1, laddr, &loff); mbuf1 = (struct rte_mbuf *)wqe; @@ -1352,7 +1438,8 @@ cn20k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, c f1 = vsetq_lane_u16(len, f1, 2); f1 = vsetq_lane_u16(len, f1, 4); - nix_sec_meta_to_mbuf(sa, cpth1, &mbuf1, &ol_flags1, flags, &rearm1); + nix_sec_meta_to_mbuf(sa, cpth1, &mbuf1, &ol_flags1, flags, &rearm1, + buf_sz); mbuf01 = vsetq_lane_u64((uintptr_t)mbuf1, mbuf01, 1); code = vget_lane_u8(ucc, 3); ol_flags1 |= code ? (code > 1 ? ((uint64_t)code) << 1 : 0) : @@ -1375,7 +1462,8 @@ cn20k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, c f2 = vsetq_lane_u16(len, f2, 2); f2 = vsetq_lane_u16(len, f2, 4); - nix_sec_meta_to_mbuf(sa, cpth2, &mbuf2, &ol_flags2, flags, &rearm2); + nix_sec_meta_to_mbuf(sa, cpth2, &mbuf2, &ol_flags2, flags, &rearm2, + buf_sz); mbuf23 = vsetq_lane_u64((uintptr_t)mbuf2, mbuf23, 0); code = vget_lane_u8(ucc, 5); ol_flags2 |= code ? (code > 1 ? ((uint64_t)code) << 1 : 0) : @@ -1398,7 +1486,8 @@ cn20k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, c f3 = vsetq_lane_u16(len, f3, 2); f3 = vsetq_lane_u16(len, f3, 4); - nix_sec_meta_to_mbuf(sa, cpth3, &mbuf3, &ol_flags3, flags, &rearm3); + nix_sec_meta_to_mbuf(sa, cpth3, &mbuf3, &ol_flags3, flags, &rearm3, + buf_sz); mbuf23 = vsetq_lane_u64((uintptr_t)mbuf3, mbuf23, 1); code = vget_lane_u8(ucc, 7); ol_flags3 |= code ? (code > 1 ? ((uint64_t)code) << 1 : 0) : -- 2.34.1

