On Tue, Oct 28, 2025 at 02:26:57PM +1000, David Gwynne wrote:
> On Mon, Oct 27, 2025 at 09:43:04PM +0100, Alexander Bluhm wrote:
> > On Mon, Oct 27, 2025 at 08:54:02PM +0300, Vitaliy Makkoveev wrote:
> > > Seems the whole sppp_output() should take kernel lock. I have no ability
> > > to test this diff.
> > 
> > I also came to the conclusion that some big kernel lock is missing.
> > Just wondering why this bug appears now.  We have removed kernel
> > lock from network stack a long time ago.
> 
> this is because of src/sys/net/if_pppoe.c r1.87, which moved pppoe away
> from queueing outgoing packets on the ifq to direct dispatch. because
> pppoe wasn't marked mpsafe, sppp_output was called by the ifq machinery
> with the kernel lock held.
> 
> the testing i (and semarie) did was without the link1 flag, but that's
> being used here. IFF_LINK1 is aliased to IFF_AUTO flag in the code and
> is used to implement autodial functionality in the sppp code. the
> kernel lock is only really needed to start moving the interface
> through the connection state machine, which can be done in parallel
> to the data path.
> 
> also, fiddling with ifp->if_flags like this should be done while
> holding NET_LOCK, which isn't guaranteed to be held here. the diff
> below moves the autodial stuff into a task run from systq, which
> holds the kernel lock like the ppp state machine requires.
> 
> this diff compiles. i have no idea if it works.
> 

According the trace, sppp_output() running with shared netlock. Is it
still enough? I especially concern about if_enqueue() and the following
bridge(4) codepaths.

> Index: if_sppp.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if_sppp.h,v
> diff -u -p -r1.31 if_sppp.h
> --- if_sppp.h 15 Jan 2025 06:15:44 -0000      1.31
> +++ if_sppp.h 28 Oct 2025 04:25:22 -0000
> @@ -174,6 +174,7 @@ struct sppp {
>       time_t  pp_last_receive;        /* peer's last "sign of life" */
>       time_t  pp_last_activity;       /* second of last payload data s/r */
>       enum ppp_phase pp_phase;        /* phase we're currently in */
> +     struct task pp_autodial;
>       int     state[IDX_COUNT];       /* state machine */
>       u_char  confid[IDX_COUNT];      /* id of last configuration request */
>       int     rst_counter[IDX_COUNT]; /* restart counter */
> Index: if_spppsubr.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_spppsubr.c,v
> diff -u -p -r1.198 if_spppsubr.c
> --- if_spppsubr.c     19 Aug 2025 12:34:15 -0000      1.198
> +++ if_spppsubr.c     28 Oct 2025 04:25:22 -0000
> @@ -227,6 +227,7 @@ static struct timeout keepalive_ch;
>       struct ifnet *ifp = &sp->pp_if;                         \
>       int debug = ifp->if_flags & IFF_DEBUG
>  
> +void sppp_autodial(void *);
>  int sppp_output(struct ifnet *ifp, struct mbuf *m,
>                      struct sockaddr *dst, struct rtentry *rt);
>  
> @@ -570,6 +571,20 @@ sppp_input(struct ifnet *ifp, struct mbu
>       goto drop;
>  }
>  
> +void
> +sppp_autodial(void *arg)
> +{
> +     struct sppp *sp = arg;
> +     struct ifnet *ifp = &sp->pp_if;
> +
> +     NET_LOCK();
> +     if ((ifp->if_flags & (IFF_RUNNING | IFF_AUTO)) == IFF_AUTO) {
> +             ifp->if_flags |= IFF_RUNNING;
> +             lcp.Open(sp);
> +     }
> +     NET_UNLOCK();
> +}
> +
>  /*
>   * Enqueue transmit packet.
>   */
> @@ -581,6 +596,7 @@ sppp_output(struct ifnet *ifp, struct mb
>       struct timeval tv;
>       int s, rv = 0;
>       u_int16_t protocol;
> +     int if_flags;
>  
>  #ifdef DIAGNOSTIC
>       if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) {
> @@ -596,22 +612,20 @@ sppp_output(struct ifnet *ifp, struct mb
>       getmicrouptime(&tv);
>       sp->pp_last_activity = tv.tv_sec;
>  
> -     if ((ifp->if_flags & IFF_UP) == 0 ||
> -         (ifp->if_flags & (IFF_RUNNING | IFF_AUTO)) == 0) {
> +     if_flags = ifp->if_flags;
> +     if ((if_flags & IFF_UP) == 0 ||
> +         (if_flags & (IFF_RUNNING | IFF_AUTO)) == 0) {
>               m_freem (m);
>               splx (s);
>               return (ENETDOWN);
>       }
>  
> -     if ((ifp->if_flags & (IFF_RUNNING | IFF_AUTO)) == IFF_AUTO) {
> +     if ((if_flags & (IFF_RUNNING | IFF_AUTO)) == IFF_AUTO) {
>               /*
>                * Interface is not yet running, but auto-dial.  Need
>                * to start LCP for it.
>                */
> -             ifp->if_flags |= IFF_RUNNING;
> -             splx(s);
> -             lcp.Open(sp);
> -             s = splnet();
> +             task_add(systq, &sp->pp_autodial);
>       }
>  
>       if (dst->sa_family == AF_INET) {
> @@ -746,6 +760,8 @@ sppp_attach(struct ifnet *ifp)
>       sp->pp_up = lcp.Up;
>       sp->pp_down = lcp.Down;
>  
> +     task_set(&sp->pp_autodial, sppp_autodial, sp);
> +
>       for (i = 0; i < IDX_COUNT; i++)
>               timeout_set(&sp->ch[i], (cps[i])->TO, (void *)sp);
>       timeout_set(&sp->pap_my_to_ch, sppp_pap_my_TO, (void *)sp);
> @@ -762,6 +778,8 @@ sppp_detach(struct ifnet *ifp)
>  {
>       struct sppp **q, *p, *sp = (struct sppp*) ifp;
>       int i;
> +
> +     taskq_del_barrier(systq, &sp->pp_autodial);
>  
>       sppp_ipcp_destroy(sp);
>       sppp_ipv6cp_destroy(sp);

Reply via email to