I like it. I would like it more if you named the mbuf list variable "fml".
ok by me. > On 26 Feb 2021, at 9:08 pm, Alexander Bluhm <alexander.bl...@gmx.net> wrote: > > Hi, > > I always bothered me that ip_fragment() and ip6_fragment() behave > sligtly differently. Unify them and use an mlist to simplify the > fragment list. > > - The functions ip_fragment() and ip6_fragment() always consume the mbuf. > - They free the mbuf and mbuf list in case of an error. > - They care about the counter. > - Adjust the code a bit to make v4 and v6 look similar. > - Maybe there was an mbuf leak when pf_route6() called pf_refragment6() > and it failed. Now the mbuf is always freed by ip6_fragment(). > > ok? > > bluhm > > Index: net/if_bridge.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v > retrieving revision 1.352 > diff -u -p -r1.352 if_bridge.c > --- net/if_bridge.c 25 Feb 2021 02:48:21 -0000 1.352 > +++ net/if_bridge.c 26 Feb 2021 10:41:57 -0000 > @@ -1853,7 +1853,7 @@ bridge_fragment(struct ifnet *brifp, str > struct mbuf *m) > { > struct llc llc; > - struct mbuf *m0; > + struct mbuf_list ml; > int error = 0; > int hassnap = 0; > u_int16_t etype; > @@ -1911,40 +1911,32 @@ bridge_fragment(struct ifnet *brifp, str > return; > } > > - error = ip_fragment(m, ifp, ifp->if_mtu); > - if (error) { > - m = NULL; > - goto dropit; > - } > + error = ip_fragment(m, &ml, ifp, ifp->if_mtu); > + if (error) > + return; > > - for (; m; m = m0) { > - m0 = m->m_nextpkt; > - m->m_nextpkt = NULL; > - if (error == 0) { > - if (hassnap) { > - M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT); > - if (m == NULL) { > - error = ENOBUFS; > - continue; > - } > - bcopy(&llc, mtod(m, caddr_t), > - LLC_SNAPFRAMELEN); > - } > - M_PREPEND(m, sizeof(*eh), M_DONTWAIT); > + while ((m = ml_dequeue(&ml)) != NULL) { > + if (hassnap) { > + M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT); > if (m == NULL) { > error = ENOBUFS; > - continue; > + break; > } > - bcopy(eh, mtod(m, caddr_t), sizeof(*eh)); > - error = bridge_ifenqueue(brifp, ifp, m); > - if (error) { > - continue; > - } > - } else > - m_freem(m); > + bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN); > + } > + M_PREPEND(m, sizeof(*eh), M_DONTWAIT); > + if (m == NULL) { > + error = ENOBUFS; > + break; > + } > + bcopy(eh, mtod(m, caddr_t), sizeof(*eh)); > + error = bridge_ifenqueue(brifp, ifp, m); > + if (error) > + break; > } > - > - if (error == 0) > + if (error) > + ml_purge(&ml); > + else > ipstat_inc(ips_fragmented); > > return; > Index: net/pf.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v > retrieving revision 1.1112 > diff -u -p -r1.1112 pf.c > --- net/pf.c 23 Feb 2021 11:43:40 -0000 1.1112 > +++ net/pf.c 26 Feb 2021 10:41:57 -0000 > @@ -5969,7 +5969,8 @@ pf_rtlabel_match(struct pf_addr *addr, s > void > pf_route(struct pf_pdesc *pd, struct pf_state *s) > { > - struct mbuf *m0, *m1; > + struct mbuf *m0; > + struct mbuf_list ml; > struct sockaddr_in *dst, sin; > struct rtentry *rt = NULL; > struct ip *ip; > @@ -6078,23 +6079,18 @@ pf_route(struct pf_pdesc *pd, struct pf_ > goto bad; > } > > - m1 = m0; > - error = ip_fragment(m0, ifp, ifp->if_mtu); > - if (error) { > - m0 = NULL; > - goto bad; > - } > + error = ip_fragment(m0, &ml, ifp, ifp->if_mtu); > + if (error) > + goto done; > > - for (m0 = m1; m0; m0 = m1) { > - m1 = m0->m_nextpkt; > - m0->m_nextpkt = NULL; > - if (error == 0) > - error = ifp->if_output(ifp, m0, sintosa(dst), rt); > - else > - m_freem(m0); > + while ((m0 = ml_dequeue(&ml)) != NULL) { > + error = ifp->if_output(ifp, m0, sintosa(dst), rt); > + if (error) > + break; > } > - > - if (error == 0) > + if (error) > + ml_purge(&ml); > + else > ipstat_inc(ips_fragmented); > > done: > Index: net/pf_norm.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf_norm.c,v > retrieving revision 1.221 > diff -u -p -r1.221 pf_norm.c > --- net/pf_norm.c 22 Feb 2021 13:04:56 -0000 1.221 > +++ net/pf_norm.c 26 Feb 2021 10:49:40 -0000 > @@ -966,12 +966,13 @@ int > pf_refragment6(struct mbuf **m0, struct m_tag *mtag, struct sockaddr_in6 *dst, > struct ifnet *ifp, struct rtentry *rt) > { > - struct mbuf *m = *m0, *t; > + struct mbuf *m = *m0; > + struct mbuf_list ml; > struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); > u_int32_t mtu; > u_int16_t hdrlen, extoff, maxlen; > u_int8_t proto; > - int error, action; > + int error; > > hdrlen = ftag->ft_hdrlen; > extoff = ftag->ft_extoff; > @@ -1009,39 +1010,25 @@ pf_refragment6(struct mbuf **m0, struct > * we drop the packet. > */ > mtu = hdrlen + sizeof(struct ip6_frag) + maxlen; > - error = ip6_fragment(m, hdrlen, proto, mtu); > - > - m = (*m0)->m_nextpkt; > - (*m0)->m_nextpkt = NULL; > - if (error == 0) { > - /* The first mbuf contains the unfragmented packet */ > - m_freemp(m0); > - action = PF_PASS; > - } else { > - /* Drop expects an mbuf to free */ > + error = ip6_fragment(m, &ml, hdrlen, proto, mtu); > + *m0 = NULL; /* ip6_fragment() has consumed original packet. */ > + if (error) { > DPFPRINTF(LOG_NOTICE, "refragment error %d", error); > - action = PF_DROP; > + return (PF_DROP); > } > > - for (t = m; m; m = t) { > - t = m->m_nextpkt; > - m->m_nextpkt = NULL; > + while ((m = ml_dequeue(&ml)) != NULL) { > m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED; > - if (error == 0) { > - if (ifp == NULL) { > - ip6_forward(m, NULL, 0); > - } else if ((u_long)m->m_pkthdr.len <= ifp->if_mtu) { > - ifp->if_output(ifp, m, sin6tosa(dst), rt); > - } else { > - icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, > - ifp->if_mtu); > - } > + if (ifp == NULL) { > + ip6_forward(m, NULL, 0); > + } else if ((u_long)m->m_pkthdr.len <= ifp->if_mtu) { > + ifp->if_output(ifp, m, sin6tosa(dst), rt); > } else { > - m_freem(m); > + icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); > } > } > > - return (action); > + return (PF_PASS); > } > #endif /* INET6 */ > > Index: netinet/ip_output.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v > retrieving revision 1.367 > diff -u -p -r1.367 ip_output.c > --- netinet/ip_output.c 23 Feb 2021 12:14:10 -0000 1.367 > +++ netinet/ip_output.c 26 Feb 2021 10:41:57 -0000 > @@ -96,12 +96,12 @@ ip_output_ipsec_send(struct tdb *, struc > * The mbuf opt, if present, will not be freed. > */ > int > -ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, > +ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, > struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo) > { > struct ip *ip; > struct ifnet *ifp = NULL; > - struct mbuf *m = m0; > + struct mbuf_list ml; > int hlen = sizeof (struct ip); > int error = 0; > struct route iproute; > @@ -502,22 +502,18 @@ sendit: > goto bad; > } > > - error = ip_fragment(m, ifp, mtu); > - if (error) { > - m = m0 = NULL; > - goto bad; > - } > + error = ip_fragment(m, &ml, ifp, mtu); > + if (error) > + goto done; > > - for (; m; m = m0) { > - m0 = m->m_nextpkt; > - m->m_nextpkt = NULL; > - if (error == 0) > - error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); > - else > - m_freem(m); > + while ((m = ml_dequeue(&ml)) != NULL) { > + error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); > + if (error) > + break; > } > - > - if (error == 0) > + if (error) > + ml_purge(&ml); > + else > ipstat_inc(ips_fragmented); > > done: > @@ -525,6 +521,7 @@ done: > rtfree(ro->ro_rt); > if_put(ifp); > return (error); > + > bad: > m_freem(m); > goto done; > @@ -651,23 +648,23 @@ ip_output_ipsec_send(struct tdb *tdb, st > #endif /* IPSEC */ > > int > -ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) > +ip_fragment(struct mbuf *m, struct mbuf_list *ml, struct ifnet *ifp, u_long > mtu) > { > struct ip *ip, *mhip; > struct mbuf *m0; > int len, hlen, off; > int mhlen, firstlen; > - struct mbuf **mnext; > - int fragments = 0; > - int error = 0; > + int error; > + > + ml_init(ml); > + ml_enqueue(ml, m); > > ip = mtod(m, struct ip *); > hlen = ip->ip_hl << 2; > - > len = (mtu - hlen) &~ 7; > if (len < 8) { > - m_freem(m); > - return (EMSGSIZE); > + error = EMSGSIZE; > + goto bad; > } > > /* > @@ -676,7 +673,6 @@ ip_fragment(struct mbuf *m, struct ifnet > */ > in_proto_cksum_out(m, NULL); > firstlen = len; > - mnext = &m->m_nextpkt; > > /* > * Loop through length of segment after first fragment, > @@ -687,12 +683,10 @@ ip_fragment(struct mbuf *m, struct ifnet > for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { > MGETHDR(m, M_DONTWAIT, MT_HEADER); > if (m == NULL) { > - ipstat_inc(ips_odropped); > error = ENOBUFS; > - goto sendorfree; > + goto bad; > } > - *mnext = m; > - mnext = &m->m_nextpkt; > + ml_enqueue(ml, m); > m->m_data += max_linkhdr; > mhip = mtod(m, struct ip *); > *mhip = *ip; > @@ -715,10 +709,9 @@ ip_fragment(struct mbuf *m, struct ifnet > mhip->ip_off |= IP_MF; > mhip->ip_len = htons((u_int16_t)(len + mhlen)); > m->m_next = m_copym(m0, off, len, M_NOWAIT); > - if (m->m_next == 0) { > - ipstat_inc(ips_odropped); > + if (m->m_next == NULL) { > error = ENOBUFS; > - goto sendorfree; > + goto bad; > } > m->m_pkthdr.len = mhlen + len; > m->m_pkthdr.ph_ifidx = 0; > @@ -730,8 +723,6 @@ ip_fragment(struct mbuf *m, struct ifnet > ipstat_inc(ips_outswcsum); > mhip->ip_sum = in_cksum(m, mhlen); > } > - ipstat_inc(ips_ofragments); > - fragments++; > } > /* > * Update first fragment by trimming what's been copied out > @@ -749,15 +740,13 @@ ip_fragment(struct mbuf *m, struct ifnet > ipstat_inc(ips_outswcsum); > ip->ip_sum = in_cksum(m, hlen); > } > -sendorfree: > - if (error) { > - for (m = m0; m; m = m0) { > - m0 = m->m_nextpkt; > - m->m_nextpkt = NULL; > - m_freem(m); > - } > - } > > + ipstat_add(ips_ofragments, ml_len(ml)); > + return (0); > + > +bad: > + ipstat_inc(ips_odropped); > + ml_purge(ml); > return (error); > } > > Index: netinet/ip_var.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v > retrieving revision 1.86 > diff -u -p -r1.86 ip_var.h > --- netinet/ip_var.h 8 Dec 2019 11:08:22 -0000 1.86 > +++ netinet/ip_var.h 26 Feb 2021 10:41:57 -0000 > @@ -145,6 +145,12 @@ ipstat_inc(enum ipstat_counters c) > counters_inc(ipcounters, c); > } > > +static inline void > +ipstat_add(enum ipstat_counters c, uint64_t v) > +{ > + counters_add(ipcounters, c, v); > +} > + > /* > * Structure attached to inpcb.ip_moptions and > * passed to ip_output when IP multicast options are in use. > @@ -218,7 +224,7 @@ struct inpcb; > > int ip_ctloutput(int, struct socket *, int, int, struct mbuf *); > void ip_flush(void); > -int ip_fragment(struct mbuf *, struct ifnet *, u_long); > +int ip_fragment(struct mbuf *, struct mbuf_list *, struct ifnet *, u_long); > void ip_freef(struct ipq *); > void ip_freemoptions(struct ip_moptions *); > int ip_getmoptions(int, struct ip_moptions *, struct mbuf *); > Index: netinet6/ip6_id.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_id.c,v > retrieving revision 1.14 > diff -u -p -r1.14 ip6_id.c > --- netinet6/ip6_id.c 24 Jun 2020 22:03:44 -0000 1.14 > +++ netinet6/ip6_id.c 26 Feb 2021 10:41:57 -0000 > @@ -83,6 +83,7 @@ > > #include <sys/param.h> > #include <sys/kernel.h> > +#include <sys/mbuf.h> > #include <sys/socket.h> > #include <sys/systm.h> > > Index: netinet6/ip6_output.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v > retrieving revision 1.254 > diff -u -p -r1.254 ip6_output.c > --- netinet6/ip6_output.c 23 Feb 2021 11:43:41 -0000 1.254 > +++ netinet6/ip6_output.c 26 Feb 2021 10:41:57 -0000 > @@ -154,12 +154,12 @@ struct idgen32_ctx ip6_id_ctx; > * which is rt_mtu. > */ > int > -ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, > +ip6_output(struct mbuf *m, struct ip6_pktopts *opt, struct route_in6 *ro, > int flags, struct ip6_moptions *im6o, struct inpcb *inp) > { > struct ip6_hdr *ip6; > struct ifnet *ifp = NULL; > - struct mbuf *m = m0; > + struct mbuf_list ml; > int hlen, tlen; > struct route_in6 ip6route; > struct rtentry *rt = NULL; > @@ -174,6 +174,7 @@ ip6_output(struct mbuf *m0, struct ip6_p > struct route_in6 *ro_pmtu = NULL; > int hdrsplit = 0; > u_int8_t sproto = 0; > + u_char nextproto; > #ifdef IPSEC > struct tdb *tdb = NULL; > #endif /* IPSEC */ > @@ -709,64 +710,47 @@ reroute: > /* jumbo payload cannot be fragmented */ > error = EMSGSIZE; > goto bad; > - } else { > - u_char nextproto; > -#if 0 > - struct ip6ctlparam ip6cp; > - u_int32_t mtu32; > -#endif > - > - /* > - * Too large for the destination or interface; > - * fragment if possible. > - * Must be able to put at least 8 bytes per fragment. > - */ > - hlen = unfragpartlen; > - if (mtu > IPV6_MAXPACKET) > - mtu = IPV6_MAXPACKET; > - > - /* > - * Change the next header field of the last header in the > - * unfragmentable part. > - */ > - if (exthdrs.ip6e_rthdr) { > - nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); > - *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; > - } else if (exthdrs.ip6e_dest1) { > - nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); > - *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; > - } else if (exthdrs.ip6e_hbh) { > - nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); > - *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; > - } else { > - nextproto = ip6->ip6_nxt; > - ip6->ip6_nxt = IPPROTO_FRAGMENT; > - } > - > - m0 = m; > - error = ip6_fragment(m0, hlen, nextproto, mtu); > - if (error) > - ip6stat_inc(ip6s_odropped); > } > > /* > - * Remove leading garbages. > + * Too large for the destination or interface; > + * fragment if possible. > + * Must be able to put at least 8 bytes per fragment. > */ > - m = m0->m_nextpkt; > - m0->m_nextpkt = NULL; > - m_freem(m0); > - for (m0 = m; m; m = m0) { > - m0 = m->m_nextpkt; > - m->m_nextpkt = NULL; > - if (error == 0) { > - ip6stat_inc(ip6s_ofragments); > - error = ifp->if_output(ifp, m, sin6tosa(dst), > - ro->ro_rt); > - } else > - m_freem(m); > + hlen = unfragpartlen; > + if (mtu > IPV6_MAXPACKET) > + mtu = IPV6_MAXPACKET; > + > + /* > + * Change the next header field of the last header in the > + * unfragmentable part. > + */ > + if (exthdrs.ip6e_rthdr) { > + nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); > + *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; > + } else if (exthdrs.ip6e_dest1) { > + nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); > + *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; > + } else if (exthdrs.ip6e_hbh) { > + nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); > + *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; > + } else { > + nextproto = ip6->ip6_nxt; > + ip6->ip6_nxt = IPPROTO_FRAGMENT; > } > > - if (error == 0) > + error = ip6_fragment(m, &ml, hlen, nextproto, mtu); > + if (error) > + goto done; > + > + while ((m = ml_dequeue(&ml)) != NULL) { > + error = ifp->if_output(ifp, m, sin6tosa(dst), ro->ro_rt); > + if (error) > + break; > + } > + if (error) > + ml_purge(&ml); > + else > ip6stat_inc(ip6s_fragmented); > > done: > @@ -776,7 +760,6 @@ done: > } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { > rtfree(ro_pmtu->ro_rt); > } > - > return (error); > > freehdrs: > @@ -791,45 +774,48 @@ bad: > } > > int > -ip6_fragment(struct mbuf *m0, int hlen, u_char nextproto, u_long mtu) > +ip6_fragment(struct mbuf *m0, struct mbuf_list *ml, int hlen, u_char > nextproto, > + u_long mtu) > { > - struct mbuf *m, **mnext, *m_frgpart; > + struct mbuf *m, *m_frgpart; > struct ip6_hdr *mhip6; > struct ip6_frag *ip6f; > u_int32_t id; > int tlen, len, off; > int error; > > - id = htonl(ip6_randomid()); > - > - mnext = &m0->m_nextpkt; > - *mnext = NULL; > + ml_init(ml); > > tlen = m0->m_pkthdr.len; > len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; > - if (len < 8) > - return (EMSGSIZE); > + if (len < 8) { > + error = EMSGSIZE; > + goto bad; > + } > + > + id = htonl(ip6_randomid()); > > /* > * Loop through length of segment after first fragment, > - * make new header and copy data of each part and link onto > - * chain. > + * make new header and copy data of each part and link onto chain. > */ > for (off = hlen; off < tlen; off += len) { > struct mbuf *mlast; > > - if ((m = m_gethdr(M_DONTWAIT, MT_HEADER)) == NULL) > - return (ENOBUFS); > - *mnext = m; > - mnext = &m->m_nextpkt; > + MGETHDR(m, M_DONTWAIT, MT_HEADER); > + if (m == NULL) { > + error = ENOBUFS; > + goto bad; > + } > + ml_enqueue(ml, m); > if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0) > - return (error); > + goto bad; > m->m_data += max_linkhdr; > mhip6 = mtod(m, struct ip6_hdr *); > *mhip6 = *mtod(m0, struct ip6_hdr *); > m->m_len = sizeof(*mhip6); > if ((error = ip6_insertfraghdr(m0, m, hlen, &ip6f)) != 0) > - return (error); > + goto bad; > ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7)); > if (off + len >= tlen) > len = tlen - off; > @@ -837,8 +823,10 @@ ip6_fragment(struct mbuf *m0, int hlen, > ip6f->ip6f_offlg |= IP6F_MORE_FRAG; > mhip6->ip6_plen = htons((u_int16_t)(len + hlen + > sizeof(*ip6f) - sizeof(struct ip6_hdr))); > - if ((m_frgpart = m_copym(m0, off, len, M_DONTWAIT)) == NULL) > - return (ENOBUFS); > + if ((m_frgpart = m_copym(m0, off, len, M_DONTWAIT)) == NULL) { > + error = ENOBUFS; > + goto bad; > + } > for (mlast = m; mlast->m_next; mlast = mlast->m_next) > ; > mlast->m_next = m_frgpart; > @@ -848,7 +836,15 @@ ip6_fragment(struct mbuf *m0, int hlen, > ip6f->ip6f_nxt = nextproto; > } > > + ip6stat_add(ip6s_ofragments, ml_len(ml)); > + m_freem(m0); > return (0); > + > +bad: > + ip6stat_inc(ip6s_odropped); > + ml_purge(ml); > + m_freem(m0); > + return (error); > } > > int > Index: netinet6/ip6_var.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_var.h,v > retrieving revision 1.87 > diff -u -p -r1.87 ip6_var.h > --- netinet6/ip6_var.h 11 Jan 2021 13:28:54 -0000 1.87 > +++ netinet6/ip6_var.h 26 Feb 2021 10:41:57 -0000 > @@ -322,7 +322,7 @@ void ip6_forward(struct mbuf *, struct r > void ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *); > int ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *, int, > struct ip6_moptions *, struct inpcb *); > -int ip6_fragment(struct mbuf *, int, u_char, u_long); > +int ip6_fragment(struct mbuf *, struct mbuf_list *, int, u_char, u_long); > int ip6_ctloutput(int, struct socket *, int, int, struct mbuf *); > int ip6_raw_ctloutput(int, struct socket *, int, int, struct mbuf *); > void ip6_initpktopts(struct ip6_pktopts *); >