We can't predict netlock state when we executing pipex(4) related (*if_qstart)() handlers, so we can't use netlock for pipex(4) protection.
We already use `pipex_list_mtx' for global pipex(4) data, so Use per-session `pxs_mtx' mutex(9) for pipex session context protection. We already use `pipex_list_mtx' for global pipex(4) data, so now netlock protects nothing in the pipex(4) layer. Please note, the MPPE related `pxm_mtx' mutex(9) looks unnecessary, because we always lock it when `pxs_mtx' is locked. I want to keep it as is until we decide, what to do with PIPEX_SFLAGS_IP_FORWARD flag in the pipex(4) output paths. If we decide to not check it, the current per-session locking could be more fine grained. Also pppx(4) layer still uses netlock for protection. This made sense when netlock was used to protect pipex(4) layer, but now it should be removed from here. This will be done with the next diffs. Hrvoje, could you test this diff please? Index: sys/net/if_pppx.c =================================================================== RCS file: /cvs/src/sys/net/if_pppx.c,v retrieving revision 1.117 diff -u -p -r1.117 if_pppx.c --- sys/net/if_pppx.c 26 Jun 2022 22:51:58 -0000 1.117 +++ sys/net/if_pppx.c 28 Jun 2022 13:47:09 -0000 @@ -659,8 +659,6 @@ pppx_add_session(struct pppx_dev *pxd, s ifp->if_softc = pxi; /* ifp->if_rdomain = req->pr_rdomain; */ if_counters_alloc(ifp); - /* XXXSMP: be sure pppx_if_qstart() called with NET_LOCK held */ - ifq_set_maxlen(&ifp->if_snd, 1); /* XXXSMP breaks atomicity */ NET_UNLOCK(); @@ -801,13 +799,16 @@ pppx_if_qstart(struct ifqueue *ifq) struct mbuf *m; int proto; - NET_ASSERT_LOCKED(); + mtx_enter(&pxi->pxi_session->pxs_mtx); + while ((m = ifq_dequeue(ifq)) != NULL) { proto = *mtod(m, int *); m_adj(m, sizeof(proto)); pipex_ppp_output(m, pxi->pxi_session, proto); } + + mtx_leave(&pxi->pxi_session->pxs_mtx); } int @@ -1044,8 +1045,6 @@ pppacopen(dev_t dev, int flags, int mode ifp->if_output = pppac_output; ifp->if_qstart = pppac_qstart; ifp->if_ioctl = pppac_ioctl; - /* XXXSMP: be sure pppac_qstart() called with NET_LOCK held */ - ifq_set_maxlen(&ifp->if_snd, 1); if_counters_alloc(ifp); if_attach(ifp); @@ -1441,7 +1440,6 @@ pppac_qstart(struct ifqueue *ifq) struct ip ip; int rv; - NET_ASSERT_LOCKED(); while ((m = ifq_dequeue(ifq)) != NULL) { #if NBPFILTER > 0 if (ifp->if_bpf) { Index: sys/net/pipex.c =================================================================== RCS file: /cvs/src/sys/net/pipex.c,v retrieving revision 1.142 diff -u -p -r1.142 pipex.c --- sys/net/pipex.c 28 Jun 2022 08:01:40 -0000 1.142 +++ sys/net/pipex.c 28 Jun 2022 13:47:09 -0000 @@ -91,7 +91,6 @@ struct pool mppe_key_pool; * Locks used to protect global data * A atomic operation * I immutable after creation - * N net lock * L pipex_list_mtx */ @@ -172,7 +171,6 @@ pipex_ioctl(void *ownersc, u_long cmd, c { int ret = 0; - NET_ASSERT_LOCKED(); switch (cmd) { case PIPEXCSESSION: ret = pipex_config_session( @@ -575,18 +573,20 @@ pipex_config_session(struct pipex_sessio struct pipex_session *session; int error = 0; - NET_ASSERT_LOCKED(); - session = pipex_lookup_by_session_id(req->pcr_protocol, req->pcr_session_id); if (session == NULL) return (EINVAL); if (session->ownersc == ownersc) { + mtx_enter(&session->pxs_mtx); + if (req->pcr_ip_forward != 0) session->flags |= PIPEX_SFLAGS_IP_FORWARD; else session->flags &= ~PIPEX_SFLAGS_IP_FORWARD; + + mtx_leave(&session->pxs_mtx); } else error = EINVAL; @@ -601,8 +601,6 @@ pipex_get_stat(struct pipex_session_stat struct pipex_session *session; int error = 0; - NET_ASSERT_LOCKED(); - session = pipex_lookup_by_session_id(req->psr_protocol, req->psr_session_id); if (session == NULL) @@ -811,6 +809,9 @@ pipex_ip_output(struct mbuf *m0, struct /* * Multicast packet is a idle packet and it's not TCP. */ + + mtx_enter(&session->pxs_mtx); + if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD | PIPEX_SFLAGS_IP6_FORWARD)) == 0) goto drop; @@ -837,6 +838,8 @@ pipex_ip_output(struct mbuf *m0, struct } pipex_ppp_output(m0, session, PPP_IP); + + mtx_leave(&session->pxs_mtx); } else { struct pipex_session *session_tmp; struct mbuf *m; @@ -846,16 +849,21 @@ pipex_ip_output(struct mbuf *m0, struct LIST_FOREACH(session_tmp, &pipex_session_list, session_list) { if (session_tmp->ownersc != session->ownersc) continue; + + mtx_enter(&session->pxs_mtx); + if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD | PIPEX_SFLAGS_IP6_FORWARD)) == 0) - continue; + goto next; m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); if (m == NULL) { counters_inc(session->stat_counters, pxc_oerrors); - continue; + goto next; } pipex_ppp_output(m, session_tmp, PPP_IP); +next: + mtx_leave(&session->pxs_mtx); } m_freem(m0); } @@ -864,6 +872,7 @@ pipex_ip_output(struct mbuf *m0, struct drop: m_freem(m0); dropped: + mtx_leave(&session->pxs_mtx); counters_inc(session->stat_counters, pxc_oerrors); } @@ -872,7 +881,7 @@ pipex_ppp_output(struct mbuf *m0, struct { u_char *cp, hdr[16]; - NET_ASSERT_LOCKED(); + MUTEX_ASSERT_LOCKED(&session->pxs_mtx); #ifdef PIPEX_MPPE if (pipex_session_is_mppe_enabled(session)) { @@ -927,6 +936,8 @@ pipex_ppp_input(struct mbuf *m0, struct int proto, hlen = 0; struct mbuf *n; + MUTEX_ASSERT_LOCKED(&session->pxs_mtx); + KASSERT(m0->m_pkthdr.len >= PIPEX_PPPMINLEN); proto = pipex_ppp_proto(m0, session, 0, &hlen); #ifdef PIPEX_MPPE @@ -1288,11 +1299,16 @@ pipex_pppoe_input(struct mbuf *m0, struc sizeof(struct pipex_pppoe_header), &pppoe); hlen = sizeof(struct ether_header) + sizeof(struct pipex_pppoe_header); - if ((m0 = pipex_common_input(session, m0, hlen, ntohs(pppoe.length))) - == NULL) - return (NULL); - m_freem(m0); - counters_inc(session->stat_counters, pxc_ierrors); + + mtx_enter(&session->pxs_mtx); + m0 = pipex_common_input(session, m0, hlen, ntohs(pppoe.length)); + mtx_leave(&session->pxs_mtx); + + if (m0 != NULL) { + m_freem(m0); + counters_inc(session->stat_counters, pxc_ierrors); + } + return (NULL); } @@ -1354,6 +1370,8 @@ pipex_pptp_output(struct mbuf *m0, struc struct ip *ip; u_char *cp; + MUTEX_ASSERT_LOCKED(&session->pxs_mtx); + reqlen = PIPEX_IPGRE_HDRLEN + (has_seq + has_ack) * 4; len = 0; @@ -1507,7 +1525,6 @@ pipex_pptp_input(struct mbuf *m0, struct struct pipex_pptp_session *pptp_session; int rewind = 0; - NET_ASSERT_LOCKED(); KASSERT(m0->m_pkthdr.len >= PIPEX_IPGRE_HDRLEN); pptp_session = &session->proto.pptp; @@ -1537,6 +1554,9 @@ pipex_pptp_input(struct mbuf *m0, struct seqp = cp; GETLONG(seq, cp); } + + mtx_enter(&session->pxs_mtx); + if (has_ack) { ackp = cp; GETLONG(ack, cp); @@ -1585,6 +1605,7 @@ pipex_pptp_input(struct mbuf *m0, struct /* ok, The packet is for PIPEX */ if (!rewind) session->proto.pptp.rcv_gap += nseq; + mtx_leave(&session->pxs_mtx); return (NULL); } @@ -1618,6 +1639,8 @@ not_ours: PUTLONG(ack, ackp); } + mtx_leave(&session->pxs_mtx); + return (m0); out_seq: pipex_session_log(session, LOG_DEBUG, @@ -1626,6 +1649,7 @@ out_seq: pptp_session->rcv_nxt + pptp_session->maxwinsz, ack, pptp_session->snd_una, pptp_session->snd_nxt); + mtx_leave(&session->pxs_mtx); /* FALLTHROUGH */ drop: @@ -1755,6 +1779,8 @@ pipex_pptp_userland_output(struct mbuf * gre = mtod(m0, struct pipex_gre_header *); cp = PIPEX_SEEK_NEXTHDR(gre, sizeof(struct pipex_gre_header), u_char *); + mtx_enter(&session->pxs_mtx); + /* * overwrite sequence numbers to adjust a gap between pipex and * userland. @@ -1775,6 +1801,8 @@ pipex_pptp_userland_output(struct mbuf * session->proto.pptp.rcv_acked = val32; } + mtx_leave(&session->pxs_mtx); + return (m0); } #endif /* PIPEX_PPTP */ @@ -1796,6 +1824,8 @@ pipex_l2tp_output(struct mbuf *m0, struc #endif struct m_tag *mtag; + MUTEX_ASSERT_LOCKED(&session->pxs_mtx); + hlen = sizeof(struct pipex_l2tp_header) + ((pipex_session_is_l2tp_data_sequencing_on(session)) ? sizeof(struct pipex_l2tp_seq_header) : 0) + @@ -1967,17 +1997,15 @@ pipex_l2tp_input(struct mbuf *m0, int of uint32_t ipsecflowinfo) { struct pipex_l2tp_session *l2tp_session; - int length, offset, hlen, nseq; - u_char *cp, *nsp, *nrp; + int length = 0, offset = 0, hlen, nseq; + u_char *cp, *nsp = NULL, *nrp = NULL; uint16_t flags, ns = 0, nr = 0; int rewind = 0; - NET_ASSERT_LOCKED(); + mtx_enter(&session->pxs_mtx); - length = offset = ns = nr = 0; l2tp_session = &session->proto.l2tp; l2tp_session->ipsecflowinfo = ipsecflowinfo; - nsp = nrp = NULL; m_copydata(m0, off0, sizeof(flags), &flags); @@ -2039,6 +2067,8 @@ pipex_l2tp_input(struct mbuf *m0, int of /* ok, The packet is for PIPEX */ if (!rewind) session->proto.l2tp.nr_gap += nseq; + mtx_leave(&session->pxs_mtx); + return (NULL); } @@ -2069,15 +2099,18 @@ pipex_l2tp_input(struct mbuf *m0, int of PUTSHORT(nr, nrp); } + mtx_leave(&session->pxs_mtx); + return (m0); out_seq: pipex_session_log(session, LOG_DEBUG, "Received bad data packet: out of sequence: seq=%u(%u-) " "ack=%u(%u-%u)", ns, l2tp_session->nr_nxt, nr, l2tp_session->ns_una, l2tp_session->ns_nxt); - /* FALLTHROUGH */ drop: + mtx_leave(&session->pxs_mtx); + m_freem(m0); counters_inc(session->stat_counters, pxc_ierrors); @@ -2202,6 +2235,8 @@ pipex_l2tp_userland_output(struct mbuf * ns = ntohs(seq->ns); nr = ntohs(seq->nr); + mtx_enter(&session->pxs_mtx); + ns += session->proto.l2tp.ns_gap; seq->ns = htons(ns); session->proto.l2tp.ns_nxt++; @@ -2211,6 +2246,8 @@ pipex_l2tp_userland_output(struct mbuf * if (SEQ16_GT(nr, session->proto.l2tp.nr_acked)) session->proto.l2tp.nr_acked = nr; + mtx_leave(&session->pxs_mtx); + return (m0); } #endif /* PIPEX_L2TP */ @@ -2379,6 +2416,8 @@ pipex_mppe_input(struct mbuf *m0, struct u_char *cp; int rewind = 0; + MUTEX_ASSERT_LOCKED(&session->pxs_mtx); + /* pullup */ PIPEX_PULLUP(m0, sizeof(coher_cnt)); if (m0 == NULL) @@ -2471,10 +2510,8 @@ pipex_mppe_input(struct mbuf *m0, struct /* Send CCP ResetReq */ PIPEX_DBG((session, LOG_DEBUG, "CCP SendResetReq")); - mtx_enter(&session->pxs_mtx); ccp_id = session->ccp_id; session->ccp_id++; - mtx_leave(&session->pxs_mtx); pipex_ccp_output(session, CCP_RESETREQ, ccp_id); goto drop; Index: sys/net/pipex_local.h =================================================================== RCS file: /cvs/src/sys/net/pipex_local.h,v retrieving revision 1.47 diff -u -p -r1.47 pipex_local.h --- sys/net/pipex_local.h 26 Jun 2022 15:50:21 -0000 1.47 +++ sys/net/pipex_local.h 28 Jun 2022 13:47:09 -0000 @@ -56,8 +56,8 @@ extern struct mutex pipex_list_mtx; /* * Locks used to protect struct members: + * A atomic operation * I immutable after creation - * N net lock * L pipex_list_mtx * s this pipex_session' `pxs_mtx' * m this pipex_mppe' `pxm_mtx' @@ -91,14 +91,14 @@ struct pipex_pppoe_session { #ifdef PIPEX_PPTP struct pipex_pptp_session { /* sequence number gap between pipex and userland */ - int32_t snd_gap; /* [N] gap of our sequence */ - int32_t rcv_gap; /* [N] gap of peer's sequence */ - int32_t ul_snd_una; /* [N] userland send acked seq */ - - uint32_t snd_nxt; /* [N] send next */ - uint32_t rcv_nxt; /* [N] receive next */ - uint32_t snd_una; /* [N] send acked sequence */ - uint32_t rcv_acked; /* [N] recv acked sequence */ + int32_t snd_gap; /* [s] gap of our sequence */ + int32_t rcv_gap; /* [s] gap of peer's sequence */ + int32_t ul_snd_una; /* [s] userland send acked seq */ + + uint32_t snd_nxt; /* [s] send next */ + uint32_t rcv_nxt; /* [s] receive next */ + uint32_t snd_una; /* [s] send acked sequence */ + uint32_t rcv_acked; /* [s] recv acked sequence */ int winsz; /* [I] windows size */ int maxwinsz; /* [I] max windows size */ @@ -141,16 +141,16 @@ struct pipex_l2tp_session { uint32_t option_flags; /* [I] protocol options */ - int16_t ns_gap; /* [N] gap between userland and pipex */ - int16_t nr_gap; /* [N] gap between userland and pipex */ - uint16_t ul_ns_una; /* [N] unacked sequence number (userland) */ - - uint16_t ns_nxt; /* [N] next sequence number to send */ - uint16_t ns_una; /* [N] unacked sequence number to send */ - - uint16_t nr_nxt; /* [N] next sequence number to recv */ - uint16_t nr_acked; /* [N] acked sequence number to recv */ - uint32_t ipsecflowinfo; /* [N] IPsec SA flow id for NAT-T */ + int16_t ns_gap; /* [s] gap between userland and pipex */ + int16_t nr_gap; /* [s] gap between userland and pipex */ + uint16_t ul_ns_una; /* [s] unacked sequence number (userland) */ + + uint16_t ns_nxt; /* [s] next sequence number to send */ + uint16_t ns_una; /* [s] unacked sequence number to send */ + + uint16_t nr_nxt; /* [s] next sequence number to recv */ + uint16_t nr_acked; /* [s] acked sequence number to recv */ + uint32_t ipsecflowinfo; /* [s] IPsec SA flow id for NAT-T */ }; #endif /* PIPEX_L2TP */ @@ -180,9 +180,9 @@ struct pipex_session { uint32_t idle_time; /* [L] idle time in seconds */ - u_int flags; /* [N] flags, see below */ -#define PIPEX_SFLAGS_IP_FORWARD 0x01 /* [N] enable IP forwarding */ -#define PIPEX_SFLAGS_IP6_FORWARD 0x02 /* [N] enable IPv6 forwarding */ + u_int flags; /* [s] flags, see below */ +#define PIPEX_SFLAGS_IP_FORWARD 0x01 /* [s] enable IP forwarding */ +#define PIPEX_SFLAGS_IP6_FORWARD 0x02 /* [s] enable IPv6 forwarding */ #define PIPEX_SFLAGS_MULTICAST 0x04 /* [I] virtual entry for multicast */ #define PIPEX_SFLAGS_PPPX 0x08 /* [I] interface is @@ -200,7 +200,7 @@ struct pipex_session { struct sockaddr_in6 ip6_address; /* [I] remote IPv6 address */ int ip6_prefixlen; /* [I] remote IPv6 prefixlen */ - u_int ifindex; /* [N] interface index */ + u_int ifindex; /* [A] interface index */ void *ownersc; /* [I] owner context */ uint32_t ppp_flags; /* [I] configure flags */