On Tue, Jun 28, 2022 at 09:05:09PM +0300, Vitaliy Makkoveev wrote: > The updated diff. It was triggered by Hrvoje Popovski, we could do > direct (*if_qstart)() call in pipex(4) PPPOE input path, and this > provides deadlock. The updated diff uses ip{,6}_send() instead of > ipv{4,6}_input(). >
I think, I found better solution. The diff below still uses ipv{4,6}_input(), but introduces custom pipex_if_enqueue() (*if_enqueue)() handler. Now we can be sure, our (*if_qstart)() handlers will never be called directly. Also we can be sure about netlock state within our (*if_qstart)() handlers and remove pipexintr(). We do direct (*if_qstart)() call only if we overflow `if_snd' queue, and of course we loose packets in this case, so the behavior mostly the same. Index: sys/net/if_pppx.c =================================================================== RCS file: /cvs/src/sys/net/if_pppx.c,v retrieving revision 1.117 diff -u -p -r1.117 if_pppx.c --- sys/net/if_pppx.c 26 Jun 2022 22:51:58 -0000 1.117 +++ sys/net/if_pppx.c 28 Jun 2022 19:56:47 -0000 @@ -651,6 +651,7 @@ pppx_add_session(struct pppx_dev *pxd, s ifp->if_mtu = req->pr_peer_mru; /* XXX */ ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST | IFF_UP; ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; + ifp->if_enqueue = pipex_if_enqueue; ifp->if_qstart = pppx_if_qstart; ifp->if_output = pppx_if_output; ifp->if_ioctl = pppx_if_ioctl; @@ -659,8 +660,6 @@ pppx_add_session(struct pppx_dev *pxd, s ifp->if_softc = pxi; /* ifp->if_rdomain = req->pr_rdomain; */ if_counters_alloc(ifp); - /* XXXSMP: be sure pppx_if_qstart() called with NET_LOCK held */ - ifq_set_maxlen(&ifp->if_snd, 1); /* XXXSMP breaks atomicity */ NET_UNLOCK(); @@ -801,13 +800,16 @@ pppx_if_qstart(struct ifqueue *ifq) struct mbuf *m; int proto; - NET_ASSERT_LOCKED(); + mtx_enter(&pxi->pxi_session->pxs_mtx); + while ((m = ifq_dequeue(ifq)) != NULL) { proto = *mtod(m, int *); m_adj(m, sizeof(proto)); pipex_ppp_output(m, pxi->pxi_session, proto); } + + mtx_leave(&pxi->pxi_session->pxs_mtx); } int @@ -1041,11 +1043,10 @@ pppacopen(dev_t dev, int flags, int mode ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST; ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; ifp->if_rtrequest = p2p_rtrequest; /* XXX */ + ifp->if_enqueue = pipex_if_enqueue; ifp->if_output = pppac_output; ifp->if_qstart = pppac_qstart; ifp->if_ioctl = pppac_ioctl; - /* XXXSMP: be sure pppac_qstart() called with NET_LOCK held */ - ifq_set_maxlen(&ifp->if_snd, 1); if_counters_alloc(ifp); if_attach(ifp); @@ -1441,7 +1442,6 @@ pppac_qstart(struct ifqueue *ifq) struct ip ip; int rv; - NET_ASSERT_LOCKED(); while ((m = ifq_dequeue(ifq)) != NULL) { #if NBPFILTER > 0 if (ifp->if_bpf) { Index: sys/net/pipex.c =================================================================== RCS file: /cvs/src/sys/net/pipex.c,v retrieving revision 1.142 diff -u -p -r1.142 pipex.c --- sys/net/pipex.c 28 Jun 2022 08:01:40 -0000 1.142 +++ sys/net/pipex.c 28 Jun 2022 19:56:47 -0000 @@ -91,7 +91,6 @@ struct pool mppe_key_pool; * Locks used to protect global data * A atomic operation * I immutable after creation - * N net lock * L pipex_list_mtx */ @@ -172,7 +171,6 @@ pipex_ioctl(void *ownersc, u_long cmd, c { int ret = 0; - NET_ASSERT_LOCKED(); switch (cmd) { case PIPEXCSESSION: ret = pipex_config_session( @@ -197,6 +195,19 @@ pipex_ioctl(void *ownersc, u_long cmd, c return (ret); } +int +pipex_if_enqueue(struct ifnet *ifp, struct mbuf *m) +{ + struct ifqueue *ifq = &ifp->if_snd; + int error; + + if ((error = ifq_enqueue(ifq, m)) != 0) + return (error); + task_add(ifq->ifq_softnet, &ifq->ifq_bundle); + + return (0); +} + /************************************************************************ * Software Interrupt Handler ************************************************************************/ @@ -575,18 +586,20 @@ pipex_config_session(struct pipex_sessio struct pipex_session *session; int error = 0; - NET_ASSERT_LOCKED(); - session = pipex_lookup_by_session_id(req->pcr_protocol, req->pcr_session_id); if (session == NULL) return (EINVAL); if (session->ownersc == ownersc) { + mtx_enter(&session->pxs_mtx); + if (req->pcr_ip_forward != 0) session->flags |= PIPEX_SFLAGS_IP_FORWARD; else session->flags &= ~PIPEX_SFLAGS_IP_FORWARD; + + mtx_leave(&session->pxs_mtx); } else error = EINVAL; @@ -601,8 +614,6 @@ pipex_get_stat(struct pipex_session_stat struct pipex_session *session; int error = 0; - NET_ASSERT_LOCKED(); - session = pipex_lookup_by_session_id(req->psr_protocol, req->psr_session_id); if (session == NULL) @@ -811,6 +822,9 @@ pipex_ip_output(struct mbuf *m0, struct /* * Multicast packet is a idle packet and it's not TCP. */ + + mtx_enter(&session->pxs_mtx); + if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD | PIPEX_SFLAGS_IP6_FORWARD)) == 0) goto drop; @@ -837,6 +851,8 @@ pipex_ip_output(struct mbuf *m0, struct } pipex_ppp_output(m0, session, PPP_IP); + + mtx_leave(&session->pxs_mtx); } else { struct pipex_session *session_tmp; struct mbuf *m; @@ -846,16 +862,21 @@ pipex_ip_output(struct mbuf *m0, struct LIST_FOREACH(session_tmp, &pipex_session_list, session_list) { if (session_tmp->ownersc != session->ownersc) continue; + + mtx_enter(&session->pxs_mtx); + if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD | PIPEX_SFLAGS_IP6_FORWARD)) == 0) - continue; + goto next; m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); if (m == NULL) { counters_inc(session->stat_counters, pxc_oerrors); - continue; + goto next; } pipex_ppp_output(m, session_tmp, PPP_IP); +next: + mtx_leave(&session->pxs_mtx); } m_freem(m0); } @@ -864,6 +885,7 @@ pipex_ip_output(struct mbuf *m0, struct drop: m_freem(m0); dropped: + mtx_leave(&session->pxs_mtx); counters_inc(session->stat_counters, pxc_oerrors); } @@ -872,7 +894,7 @@ pipex_ppp_output(struct mbuf *m0, struct { u_char *cp, hdr[16]; - NET_ASSERT_LOCKED(); + MUTEX_ASSERT_LOCKED(&session->pxs_mtx); #ifdef PIPEX_MPPE if (pipex_session_is_mppe_enabled(session)) { @@ -927,6 +949,8 @@ pipex_ppp_input(struct mbuf *m0, struct int proto, hlen = 0; struct mbuf *n; + MUTEX_ASSERT_LOCKED(&session->pxs_mtx); + KASSERT(m0->m_pkthdr.len >= PIPEX_PPPMINLEN); proto = pipex_ppp_proto(m0, session, 0, &hlen); #ifdef PIPEX_MPPE @@ -1288,11 +1312,16 @@ pipex_pppoe_input(struct mbuf *m0, struc sizeof(struct pipex_pppoe_header), &pppoe); hlen = sizeof(struct ether_header) + sizeof(struct pipex_pppoe_header); - if ((m0 = pipex_common_input(session, m0, hlen, ntohs(pppoe.length))) - == NULL) - return (NULL); - m_freem(m0); - counters_inc(session->stat_counters, pxc_ierrors); + + mtx_enter(&session->pxs_mtx); + m0 = pipex_common_input(session, m0, hlen, ntohs(pppoe.length)); + mtx_leave(&session->pxs_mtx); + + if (m0 != NULL) { + m_freem(m0); + counters_inc(session->stat_counters, pxc_ierrors); + } + return (NULL); } @@ -1354,6 +1383,8 @@ pipex_pptp_output(struct mbuf *m0, struc struct ip *ip; u_char *cp; + MUTEX_ASSERT_LOCKED(&session->pxs_mtx); + reqlen = PIPEX_IPGRE_HDRLEN + (has_seq + has_ack) * 4; len = 0; @@ -1507,7 +1538,6 @@ pipex_pptp_input(struct mbuf *m0, struct struct pipex_pptp_session *pptp_session; int rewind = 0; - NET_ASSERT_LOCKED(); KASSERT(m0->m_pkthdr.len >= PIPEX_IPGRE_HDRLEN); pptp_session = &session->proto.pptp; @@ -1537,6 +1567,9 @@ pipex_pptp_input(struct mbuf *m0, struct seqp = cp; GETLONG(seq, cp); } + + mtx_enter(&session->pxs_mtx); + if (has_ack) { ackp = cp; GETLONG(ack, cp); @@ -1585,6 +1618,7 @@ pipex_pptp_input(struct mbuf *m0, struct /* ok, The packet is for PIPEX */ if (!rewind) session->proto.pptp.rcv_gap += nseq; + mtx_leave(&session->pxs_mtx); return (NULL); } @@ -1618,6 +1652,8 @@ not_ours: PUTLONG(ack, ackp); } + mtx_leave(&session->pxs_mtx); + return (m0); out_seq: pipex_session_log(session, LOG_DEBUG, @@ -1626,6 +1662,7 @@ out_seq: pptp_session->rcv_nxt + pptp_session->maxwinsz, ack, pptp_session->snd_una, pptp_session->snd_nxt); + mtx_leave(&session->pxs_mtx); /* FALLTHROUGH */ drop: @@ -1755,6 +1792,8 @@ pipex_pptp_userland_output(struct mbuf * gre = mtod(m0, struct pipex_gre_header *); cp = PIPEX_SEEK_NEXTHDR(gre, sizeof(struct pipex_gre_header), u_char *); + mtx_enter(&session->pxs_mtx); + /* * overwrite sequence numbers to adjust a gap between pipex and * userland. @@ -1775,6 +1814,8 @@ pipex_pptp_userland_output(struct mbuf * session->proto.pptp.rcv_acked = val32; } + mtx_leave(&session->pxs_mtx); + return (m0); } #endif /* PIPEX_PPTP */ @@ -1796,6 +1837,8 @@ pipex_l2tp_output(struct mbuf *m0, struc #endif struct m_tag *mtag; + MUTEX_ASSERT_LOCKED(&session->pxs_mtx); + hlen = sizeof(struct pipex_l2tp_header) + ((pipex_session_is_l2tp_data_sequencing_on(session)) ? sizeof(struct pipex_l2tp_seq_header) : 0) + @@ -1967,17 +2010,15 @@ pipex_l2tp_input(struct mbuf *m0, int of uint32_t ipsecflowinfo) { struct pipex_l2tp_session *l2tp_session; - int length, offset, hlen, nseq; - u_char *cp, *nsp, *nrp; + int length = 0, offset = 0, hlen, nseq; + u_char *cp, *nsp = NULL, *nrp = NULL; uint16_t flags, ns = 0, nr = 0; int rewind = 0; - NET_ASSERT_LOCKED(); + mtx_enter(&session->pxs_mtx); - length = offset = ns = nr = 0; l2tp_session = &session->proto.l2tp; l2tp_session->ipsecflowinfo = ipsecflowinfo; - nsp = nrp = NULL; m_copydata(m0, off0, sizeof(flags), &flags); @@ -2039,6 +2080,8 @@ pipex_l2tp_input(struct mbuf *m0, int of /* ok, The packet is for PIPEX */ if (!rewind) session->proto.l2tp.nr_gap += nseq; + mtx_leave(&session->pxs_mtx); + return (NULL); } @@ -2069,15 +2112,18 @@ pipex_l2tp_input(struct mbuf *m0, int of PUTSHORT(nr, nrp); } + mtx_leave(&session->pxs_mtx); + return (m0); out_seq: pipex_session_log(session, LOG_DEBUG, "Received bad data packet: out of sequence: seq=%u(%u-) " "ack=%u(%u-%u)", ns, l2tp_session->nr_nxt, nr, l2tp_session->ns_una, l2tp_session->ns_nxt); - /* FALLTHROUGH */ drop: + mtx_leave(&session->pxs_mtx); + m_freem(m0); counters_inc(session->stat_counters, pxc_ierrors); @@ -2202,6 +2248,8 @@ pipex_l2tp_userland_output(struct mbuf * ns = ntohs(seq->ns); nr = ntohs(seq->nr); + mtx_enter(&session->pxs_mtx); + ns += session->proto.l2tp.ns_gap; seq->ns = htons(ns); session->proto.l2tp.ns_nxt++; @@ -2211,6 +2259,8 @@ pipex_l2tp_userland_output(struct mbuf * if (SEQ16_GT(nr, session->proto.l2tp.nr_acked)) session->proto.l2tp.nr_acked = nr; + mtx_leave(&session->pxs_mtx); + return (m0); } #endif /* PIPEX_L2TP */ @@ -2379,6 +2429,8 @@ pipex_mppe_input(struct mbuf *m0, struct u_char *cp; int rewind = 0; + MUTEX_ASSERT_LOCKED(&session->pxs_mtx); + /* pullup */ PIPEX_PULLUP(m0, sizeof(coher_cnt)); if (m0 == NULL) @@ -2471,10 +2523,8 @@ pipex_mppe_input(struct mbuf *m0, struct /* Send CCP ResetReq */ PIPEX_DBG((session, LOG_DEBUG, "CCP SendResetReq")); - mtx_enter(&session->pxs_mtx); ccp_id = session->ccp_id; session->ccp_id++; - mtx_leave(&session->pxs_mtx); pipex_ccp_output(session, CCP_RESETREQ, ccp_id); goto drop; Index: sys/net/pipex_local.h =================================================================== RCS file: /cvs/src/sys/net/pipex_local.h,v retrieving revision 1.47 diff -u -p -r1.47 pipex_local.h --- sys/net/pipex_local.h 26 Jun 2022 15:50:21 -0000 1.47 +++ sys/net/pipex_local.h 28 Jun 2022 19:56:47 -0000 @@ -56,8 +56,8 @@ extern struct mutex pipex_list_mtx; /* * Locks used to protect struct members: + * A atomic operation * I immutable after creation - * N net lock * L pipex_list_mtx * s this pipex_session' `pxs_mtx' * m this pipex_mppe' `pxm_mtx' @@ -91,14 +91,14 @@ struct pipex_pppoe_session { #ifdef PIPEX_PPTP struct pipex_pptp_session { /* sequence number gap between pipex and userland */ - int32_t snd_gap; /* [N] gap of our sequence */ - int32_t rcv_gap; /* [N] gap of peer's sequence */ - int32_t ul_snd_una; /* [N] userland send acked seq */ - - uint32_t snd_nxt; /* [N] send next */ - uint32_t rcv_nxt; /* [N] receive next */ - uint32_t snd_una; /* [N] send acked sequence */ - uint32_t rcv_acked; /* [N] recv acked sequence */ + int32_t snd_gap; /* [s] gap of our sequence */ + int32_t rcv_gap; /* [s] gap of peer's sequence */ + int32_t ul_snd_una; /* [s] userland send acked seq */ + + uint32_t snd_nxt; /* [s] send next */ + uint32_t rcv_nxt; /* [s] receive next */ + uint32_t snd_una; /* [s] send acked sequence */ + uint32_t rcv_acked; /* [s] recv acked sequence */ int winsz; /* [I] windows size */ int maxwinsz; /* [I] max windows size */ @@ -141,16 +141,16 @@ struct pipex_l2tp_session { uint32_t option_flags; /* [I] protocol options */ - int16_t ns_gap; /* [N] gap between userland and pipex */ - int16_t nr_gap; /* [N] gap between userland and pipex */ - uint16_t ul_ns_una; /* [N] unacked sequence number (userland) */ - - uint16_t ns_nxt; /* [N] next sequence number to send */ - uint16_t ns_una; /* [N] unacked sequence number to send */ - - uint16_t nr_nxt; /* [N] next sequence number to recv */ - uint16_t nr_acked; /* [N] acked sequence number to recv */ - uint32_t ipsecflowinfo; /* [N] IPsec SA flow id for NAT-T */ + int16_t ns_gap; /* [s] gap between userland and pipex */ + int16_t nr_gap; /* [s] gap between userland and pipex */ + uint16_t ul_ns_una; /* [s] unacked sequence number (userland) */ + + uint16_t ns_nxt; /* [s] next sequence number to send */ + uint16_t ns_una; /* [s] unacked sequence number to send */ + + uint16_t nr_nxt; /* [s] next sequence number to recv */ + uint16_t nr_acked; /* [s] acked sequence number to recv */ + uint32_t ipsecflowinfo; /* [s] IPsec SA flow id for NAT-T */ }; #endif /* PIPEX_L2TP */ @@ -180,9 +180,9 @@ struct pipex_session { uint32_t idle_time; /* [L] idle time in seconds */ - u_int flags; /* [N] flags, see below */ -#define PIPEX_SFLAGS_IP_FORWARD 0x01 /* [N] enable IP forwarding */ -#define PIPEX_SFLAGS_IP6_FORWARD 0x02 /* [N] enable IPv6 forwarding */ + u_int flags; /* [s] flags, see below */ +#define PIPEX_SFLAGS_IP_FORWARD 0x01 /* [s] enable IP forwarding */ +#define PIPEX_SFLAGS_IP6_FORWARD 0x02 /* [s] enable IPv6 forwarding */ #define PIPEX_SFLAGS_MULTICAST 0x04 /* [I] virtual entry for multicast */ #define PIPEX_SFLAGS_PPPX 0x08 /* [I] interface is @@ -200,7 +200,7 @@ struct pipex_session { struct sockaddr_in6 ip6_address; /* [I] remote IPv6 address */ int ip6_prefixlen; /* [I] remote IPv6 prefixlen */ - u_int ifindex; /* [N] interface index */ + u_int ifindex; /* [A] interface index */ void *ownersc; /* [I] owner context */ uint32_t ppp_flags; /* [I] configure flags */ @@ -469,3 +469,4 @@ int pipex_ppp_enqueue void pipex_timer_start (void); void pipex_timer_stop (void); void pipex_timer (void *); +int pipex_if_enqueue(struct ifnet *, struct mbuf *);