> On 9/23/2021 3:09 PM, Ananyev, Konstantin wrote: > > > >> Add support for transmit segmentation offload to inline crypto processing > >> mode. This offload is not supported by other offload modes, as at a > >> minimum it requires inline crypto for IPsec to be supported on the > >> network interface. > >> > >> Signed-off-by: Declan Doherty <[email protected]> > >> Signed-off-by: Radu Nicolau <[email protected]> > >> Signed-off-by: Abhijit Sinha <[email protected]> > >> Signed-off-by: Daniel Martin Buckley <[email protected]> > >> Acked-by: Fan Zhang <[email protected]> > >> --- > >> lib/ipsec/esp_inb.c | 4 +- > >> lib/ipsec/esp_outb.c | 115 +++++++++++++++++++++++++++++++++++-------- > >> lib/ipsec/iph.h | 10 +++- > >> lib/ipsec/sa.c | 6 +++ > >> lib/ipsec/sa.h | 4 ++ > >> 5 files changed, 114 insertions(+), 25 deletions(-) > >> > >> diff --git a/lib/ipsec/esp_inb.c b/lib/ipsec/esp_inb.c > >> index d66c88f05d..a6ab8fbdd5 100644 > >> --- a/lib/ipsec/esp_inb.c > >> +++ b/lib/ipsec/esp_inb.c > >> @@ -668,8 +668,8 @@ trs_process(const struct rte_ipsec_sa *sa, struct > >> rte_mbuf *mb[], > >> /* modify packet's layout */ > >> np = trs_process_step2(mb[i], ml[i], hl[i], cofs, > >> to[i], tl, sqn + k); > >> - update_trs_l3hdr(sa, np + l2, mb[i]->pkt_len, > >> - l2, hl[i] - l2, espt[i].next_proto); > >> + update_trs_l34hdrs(sa, np + l2, mb[i]->pkt_len, > >> + l2, hl[i] - l2, espt[i].next_proto, 0); > >> > >> /* update mbuf's metadata */ > >> trs_process_step3(mb[i]); > >> diff --git a/lib/ipsec/esp_outb.c b/lib/ipsec/esp_outb.c > >> index a3f77469c3..9fc7075796 100644 > >> --- a/lib/ipsec/esp_outb.c > >> +++ b/lib/ipsec/esp_outb.c > >> @@ -2,6 +2,8 @@ > >> * Copyright(c) 2018-2020 Intel Corporation > >> */ > >> > >> +#include <math.h> > >> + > >> #include <rte_ipsec.h> > >> #include <rte_esp.h> > >> #include <rte_ip.h> > >> @@ -156,11 +158,20 @@ outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, > >> rte_be64_t sqc, > >> > >> /* number of bytes to encrypt */ > >> clen = plen + sizeof(*espt); > >> - clen = RTE_ALIGN_CEIL(clen, sa->pad_align); > >> + > >> + /* We don't need to pad/ailgn packet when using TSO offload */ > >> + if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)))) > >> + clen = RTE_ALIGN_CEIL(clen, sa->pad_align); > >> + > > Here and everywhere: > > It doesn't look nice that we have to pollute generic functions with > > checking TSO specific flags all over the place. > > Can we probably have a specific prepare/process function for inline+tso > > case? > > As we do have for cpu and inline cases right now. > > Or just update inline version? > I looked at doing this but unless I copy these 2 functions I can't move > this out. > > > >> /* pad length + esp tail */ > >> pdlen = clen - plen; > >> - tlen = pdlen + sa->icv_len + sqh_len; > >> + > >> + /* We don't append ICV length when using TSO offload */ > >> + if (likely(!(mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)))) > >> + tlen = pdlen + sa->icv_len + sqh_len; > >> + else > >> + tlen = pdlen + sqh_len; > >> > >> /* do append and prepend */ > >> ml = rte_pktmbuf_lastseg(mb); > >> @@ -337,6 +348,7 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, > >> rte_be64_t sqc, > >> char *ph, *pt; > >> uint64_t *iv; > >> uint32_t l2len, l3len; > >> + uint8_t tso = mb->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG) ? 1 : 0; > >> > >> l2len = mb->l2_len; > >> l3len = mb->l3_len; > >> @@ -349,11 +361,19 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, > >> rte_be64_t sqc, > >> > >> /* number of bytes to encrypt */ > >> clen = plen + sizeof(*espt); > >> - clen = RTE_ALIGN_CEIL(clen, sa->pad_align); > >> + > >> + /* We don't need to pad/ailgn packet when using TSO offload */ > >> + if (likely(!tso)) > >> + clen = RTE_ALIGN_CEIL(clen, sa->pad_align); > >> > >> /* pad length + esp tail */ > >> pdlen = clen - plen; > >> - tlen = pdlen + sa->icv_len + sqh_len; > >> + > >> + /* We don't append ICV length when using TSO offload */ > >> + if (likely(!tso)) > >> + tlen = pdlen + sa->icv_len + sqh_len; > >> + else > >> + tlen = pdlen + sqh_len; > >> > >> /* do append and insert */ > >> ml = rte_pktmbuf_lastseg(mb); > >> @@ -375,8 +395,8 @@ outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, > >> rte_be64_t sqc, > >> insert_esph(ph, ph + hlen, uhlen); > >> > >> /* update ip header fields */ > >> - np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len - sqh_len, l2len, > >> - l3len, IPPROTO_ESP); > >> + np = update_trs_l34hdrs(sa, ph + l2len, mb->pkt_len - sqh_len, l2len, > >> + l3len, IPPROTO_ESP, tso); > >> > >> /* update spi, seqn and iv */ > >> esph = (struct rte_esp_hdr *)(ph + uhlen); > >> @@ -651,6 +671,33 @@ inline_outb_mbuf_prepare(const struct > >> rte_ipsec_session *ss, > >> } > >> } > >> > >> +/* check if packet will exceed MSS and segmentation is required */ > >> +static inline int > >> +esn_outb_nb_segments(const struct rte_ipsec_sa *sa, struct rte_mbuf *m) { > >> + uint16_t segments = 1; > >> + uint16_t pkt_l3len = m->pkt_len - m->l2_len; > >> + > >> + /* Only support segmentation for UDP/TCP flows */ > >> + if (!(m->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP))) > >> + return segments; > >> + > >> + if (sa->tso.enabled && pkt_l3len > sa->tso.mss) { > >> + segments = ceil((float)pkt_l3len / sa->tso.mss); > > Float calculations in the middle of data-path? > > Just to calculate roundup? > > Doesn't look good to me at all. > It doesn't look good to me either - I will rework it. > > > >> + > >> + if (m->packet_type & RTE_PTYPE_L4_TCP) { > >> + m->ol_flags |= (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM); > > That's really strange - why ipsec library will set PKT_TX_TCP_SEG > > unconditionally? > > That should be responsibility of the upper layer, I think. > > In the lib we should only check was tso requested for that packet or not. > > Same for UDP. > These are under an if(TSO) condition. > > > >> + m->l4_len = sizeof(struct rte_tcp_hdr); > > Hmm, how do we know there are no TCP options present for that packet? > > Wouldn't it be better to expect user to provide proper l4_len for such > > packets? > You're right, I will update it. > > > > >> + } else { > >> + m->ol_flags |= (PKT_TX_UDP_SEG | PKT_TX_UDP_CKSUM); > >> + m->l4_len = sizeof(struct rte_udp_hdr); > >> + } > >> + > >> + m->tso_segsz = sa->tso.mss; > >> + } > >> + > >> + return segments; > >> +} > >> + > >> /* > >> * process group of ESP outbound tunnel packets destined for > >> * INLINE_CRYPTO type of device. > >> @@ -660,24 +707,29 @@ inline_outb_tun_pkt_process(const struct > >> rte_ipsec_session *ss, > >> struct rte_mbuf *mb[], uint16_t num) > >> { > >> int32_t rc; > >> - uint32_t i, k, n; > >> + uint32_t i, k, nb_sqn = 0, nb_sqn_alloc; > >> uint64_t sqn; > >> rte_be64_t sqc; > >> struct rte_ipsec_sa *sa; > >> union sym_op_data icv; > >> uint64_t iv[IPSEC_MAX_IV_QWORD]; > >> uint32_t dr[num]; > >> + uint16_t nb_segs[num]; > >> > >> sa = ss->sa; > >> > >> - n = num; > >> - sqn = esn_outb_update_sqn(sa, &n); > >> - if (n != num) > >> + for (i = 0; i != num; i++) { > >> + nb_segs[i] = esn_outb_nb_segments(sa, mb[i]); > >> + nb_sqn += nb_segs[i]; > >> + } > >> + > >> + nb_sqn_alloc = nb_sqn; > >> + sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc); > >> + if (nb_sqn_alloc != nb_sqn) > >> rte_errno = EOVERFLOW; > >> > >> k = 0; > >> - for (i = 0; i != n; i++) { > >> - > >> + for (i = 0; i != num; i++) { > >> sqc = rte_cpu_to_be_64(sqn + i); > >> gen_iv(iv, sqc); > >> > >> @@ -691,11 +743,18 @@ inline_outb_tun_pkt_process(const struct > >> rte_ipsec_session *ss, > >> dr[i - k] = i; > >> rte_errno = -rc; > >> } > >> + > >> + /** > >> + * If packet is using tso, increment sqn by the number of > >> + * segments for packet > >> + */ > >> + if (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)) > >> + sqn += nb_segs[i] - 1; > >> } > >> > >> /* copy not processed mbufs beyond good ones */ > >> - if (k != n && k != 0) > >> - move_bad_mbufs(mb, dr, n, n - k); > >> + if (k != num && k != 0) > >> + move_bad_mbufs(mb, dr, num, num - k); > >> > >> inline_outb_mbuf_prepare(ss, mb, k); > >> return k; > >> @@ -710,23 +769,30 @@ inline_outb_trs_pkt_process(const struct > >> rte_ipsec_session *ss, > >> struct rte_mbuf *mb[], uint16_t num) > >> { > >> int32_t rc; > >> - uint32_t i, k, n; > >> + uint32_t i, k, nb_sqn, nb_sqn_alloc; > >> uint64_t sqn; > >> rte_be64_t sqc; > >> struct rte_ipsec_sa *sa; > >> union sym_op_data icv; > >> uint64_t iv[IPSEC_MAX_IV_QWORD]; > >> uint32_t dr[num]; > >> + uint16_t nb_segs[num]; > >> > >> sa = ss->sa; > >> > >> - n = num; > >> - sqn = esn_outb_update_sqn(sa, &n); > >> - if (n != num) > >> + /* Calculate number of sequence numbers required */ > >> + for (i = 0, nb_sqn = 0; i != num; i++) { > >> + nb_segs[i] = esn_outb_nb_segments(sa, mb[i]); > >> + nb_sqn += nb_segs[i]; > >> + } > >> + > >> + nb_sqn_alloc = nb_sqn; > >> + sqn = esn_outb_update_sqn(sa, &nb_sqn_alloc); > >> + if (nb_sqn_alloc != nb_sqn) > >> rte_errno = EOVERFLOW; > >> > >> k = 0; > >> - for (i = 0; i != n; i++) { > >> + for (i = 0; i != num; i++) { > >> > >> sqc = rte_cpu_to_be_64(sqn + i); > >> gen_iv(iv, sqc); > >> @@ -741,11 +807,18 @@ inline_outb_trs_pkt_process(const struct > >> rte_ipsec_session *ss, > >> dr[i - k] = i; > >> rte_errno = -rc; > >> } > >> + > >> + /** > >> + * If packet is using tso, increment sqn by the number of > >> + * segments for packet > >> + */ > >> + if (mb[i]->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)) > >> + sqn += nb_segs[i] - 1; > >> } > >> > >> /* copy not processed mbufs beyond good ones */ > >> - if (k != n && k != 0) > >> - move_bad_mbufs(mb, dr, n, n - k); > >> + if (k != num && k != 0) > >> + move_bad_mbufs(mb, dr, num, num - k); > >> > >> inline_outb_mbuf_prepare(ss, mb, k); > >> return k; > >> diff --git a/lib/ipsec/iph.h b/lib/ipsec/iph.h > >> index 861f16905a..2d223199ac 100644 > >> --- a/lib/ipsec/iph.h > >> +++ b/lib/ipsec/iph.h > >> @@ -6,6 +6,8 @@ > >> #define _IPH_H_ > >> > >> #include <rte_ip.h> > >> +#include <rte_udp.h> > >> +#include <rte_tcp.h> > >> > >> /** > >> * @file iph.h > >> @@ -39,8 +41,8 @@ insert_esph(char *np, char *op, uint32_t hlen) > >> > >> /* update original ip header fields for transport case */ > >> static inline int > >> -update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen, > >> - uint32_t l2len, uint32_t l3len, uint8_t proto) > >> +update_trs_l34hdrs(const struct rte_ipsec_sa *sa, void *p, uint32_t plen, > >> + uint32_t l2len, uint32_t l3len, uint8_t proto, uint8_t tso) > > Hmm... why to change name of the function? > > > >> { > >> int32_t rc; > >> > >> @@ -51,6 +53,10 @@ update_trs_l3hdr(const struct rte_ipsec_sa *sa, void > >> *p, uint32_t plen, > >> v4h = p; > >> rc = v4h->next_proto_id; > >> v4h->next_proto_id = proto; > >> + if (tso) { > >> + v4h->hdr_checksum = 0; > >> + v4h->total_length = 0; > > total_len will be overwritten unconditionally at next line below. > > > > Another question - why it is necessary? > > Is it HW specific requirment or ... ? > It looks wrong I will rewrite this. > > > > > >> + } > >> v4h->total_length = rte_cpu_to_be_16(plen - l2len); > > > >> /* IPv6 */ > >> } else { > >> diff --git a/lib/ipsec/sa.c b/lib/ipsec/sa.c > >> index 720e0f365b..2ecbbce0a4 100644 > >> --- a/lib/ipsec/sa.c > >> +++ b/lib/ipsec/sa.c > >> @@ -565,6 +565,12 @@ rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const > >> struct rte_ipsec_sa_prm *prm, > >> sa->type = type; > >> sa->size = sz; > >> > >> + > >> + if (prm->ipsec_xform.options.tso == 1) { > >> + sa->tso.enabled = 1; > >> + sa->tso.mss = prm->ipsec_xform.mss; > >> + } > >> + > >> /* check for ESN flag */ > >> sa->sqn_mask = (prm->ipsec_xform.options.esn == 0) ? > >> UINT32_MAX : UINT64_MAX; > >> diff --git a/lib/ipsec/sa.h b/lib/ipsec/sa.h > >> index 107ebd1519..5e237f3525 100644 > >> --- a/lib/ipsec/sa.h > >> +++ b/lib/ipsec/sa.h > >> @@ -113,6 +113,10 @@ struct rte_ipsec_sa { > >> uint8_t iv_len; > >> uint8_t pad_align; > >> uint8_t tos_mask; > >> + struct { > >> + uint8_t enabled:1; > >> + uint16_t mss; > >> + } tso; > > Wouldn't one field be enough? > > uint16_t tso_mss; > > And it it is zero, then tso is disabled. > > In fact, do we need it at all? > > Wouldn't it be better to request user to fill mbuf->tso_segsz properly for > > us? > > We added an option to rte_security_ipsec_sa_options to allow the user to > enable TSO per SA and specify the MSS in the sessions parameters.
After another thought, it doesn’t look like a good approach to me: from one side same SA can be used for multiple IP addresses, from other side - MSS value can differ on a per connection basis. So different TCP connections within same SA can easily have different MSS values. So I think we shouldn't save mss in SA at all. Instead, we probably need to request user to fill mbuf->tso_segsz for us. > > We can request user to fill mbuf->tso_segsz, but with this patch we are > doing it for the user. > > > > >> /* template for tunnel header */ > >> uint8_t hdr[IPSEC_MAX_HDR_SIZE]; > >> -- > >> 2.25.1

