Hillf,

With the latest version (attached what I have changed on my tree), the
system failed to start up with cpu stalled.


Hillf Danton <hdan...@sina.com> 于2020年8月22日周六 上午11:30写道:
>
>
> On Thu, 20 Aug 2020 20:43:17 +0800 Hillf Danton wrote:
> > Hi Jike,
> >
> > On Thu, 20 Aug 2020 15:43:17 +0800 Jike Song wrote:
> > > Hi Josh,
> > >
> > > On Fri, Jul 3, 2020 at 2:14 AM Josh Hunt <joh...@akamai.com> wrote:
> > > {snip}
> > > > Initial results with Cong's patch look promising, so far no stalls. We
> > > > will let it run over the long weekend and report back on Tuesday.
> > > >
> > > > Paolo - I have concerns about possible performance regression with the
> > > > change as well. If you can gather some data that would be great. If
> > > > things look good with our low throughput test over the weekend we can
> > > > also try assessing performance next week.
> > > >
> > >
> > > We met possibly the same problem when testing nvidia/mellanox's
> >
> > Below is what was sent in reply to this thread early last month with
> > minor tuning, based on the seqlock. Feel free to drop an echo if it
> > makes ant-antenna-size sense in your tests.
> >
> > > GPUDirect RDMA product, we found that changing NET_SCH_DEFAULT to
> > > DEFAULT_FQ_CODEL mitigated the problem, having no idea why. Maybe you
> > > can also have a try?
> > >
> > > Besides, our testing is pretty complex, do you have a quick test to
> > > reproduce it?
> > >
> > > --
> > > Thanks,
> > > Jike
> >
> >
> > --- a/include/net/sch_generic.h
> > +++ b/include/net/sch_generic.h
> > @@ -79,6 +79,7 @@ struct Qdisc {
> >  #define TCQ_F_INVISIBLE              0x80 /* invisible by default in dump 
> > */
> >  #define TCQ_F_NOLOCK         0x100 /* qdisc does not require locking */
> >  #define TCQ_F_OFFLOADED              0x200 /* qdisc is offloaded to HW */
> > +     int                     pkt_seq;
> >       u32                     limit;
> >       const struct Qdisc_ops  *ops;
> >       struct qdisc_size_table __rcu *stab;
> > @@ -156,6 +157,7 @@ static inline bool qdisc_is_empty(const
> >  static inline bool qdisc_run_begin(struct Qdisc *qdisc)
> >  {
> >       if (qdisc->flags & TCQ_F_NOLOCK) {
> > +             qdisc->pkt_seq++;
> >               if (!spin_trylock(&qdisc->seqlock))
> >                       return false;
> >               WRITE_ONCE(qdisc->empty, false);
> > --- a/include/net/pkt_sched.h
> > +++ b/include/net/pkt_sched.h
> > @@ -117,7 +117,9 @@ void __qdisc_run(struct Qdisc *q);
> >
> >  static inline void qdisc_run(struct Qdisc *q)
> >  {
> > -     if (qdisc_run_begin(q)) {
> > +     while (qdisc_run_begin(q)) {
> > +             int seq = q->pkt_seq;
> > +
> >               /* NOLOCK qdisc must check 'state' under the qdisc seqlock
> >                * to avoid racing with dev_qdisc_reset()
> >                */
> > @@ -125,6 +127,9 @@ static inline void qdisc_run(struct Qdis
> >                   likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
> >                       __qdisc_run(q);
> >               qdisc_run_end(q);
> > +
> > +             if (!(q->flags & TCQ_F_NOLOCK) || seq == q->pkt_seq)
> > +                     return;
> >       }
> >  }
>
> The echo from Feng indicates that it's hard to conclude that TCQ_F_NOLOCK
> is the culprit, lets try again with it ignored for now.
>
> Every pkt enqueued on pfifo_fast is tracked in the below diff, and those
> pkts enqueued while we're running qdisc are detected and handled to cut
> the chance for the stuck pkts reported.
>
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -79,6 +79,7 @@ struct Qdisc {
>  #define TCQ_F_INVISIBLE                0x80 /* invisible by default in dump 
> */
>  #define TCQ_F_NOLOCK           0x100 /* qdisc does not require locking */
>  #define TCQ_F_OFFLOADED                0x200 /* qdisc is offloaded to HW */
> +       int                     pkt_seq;
>         u32                     limit;
>         const struct Qdisc_ops  *ops;
>         struct qdisc_size_table __rcu *stab;
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -631,6 +631,7 @@ static int pfifo_fast_enqueue(struct sk_
>                         return qdisc_drop(skb, qdisc, to_free);
>         }
>
> +       qdisc->pkt_seq++;
>         qdisc_update_stats_at_enqueue(qdisc, pkt_len);
>         return NET_XMIT_SUCCESS;
>  }
> --- a/include/net/pkt_sched.h
> +++ b/include/net/pkt_sched.h
> @@ -117,7 +117,8 @@ void __qdisc_run(struct Qdisc *q);
>
>  static inline void qdisc_run(struct Qdisc *q)
>  {
> -       if (qdisc_run_begin(q)) {
> +       while (qdisc_run_begin(q)) {
> +               int seq = q->pkt_seq;
>                 /* NOLOCK qdisc must check 'state' under the qdisc seqlock
>                  * to avoid racing with dev_qdisc_reset()
>                  */
> @@ -125,6 +126,12 @@ static inline void qdisc_run(struct Qdis
>                     likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
>                         __qdisc_run(q);
>                 qdisc_run_end(q);
> +
> +               /* go another round if there are pkts enqueued after
> +                * taking seq_lock
> +                */
> +               if (seq != q->pkt_seq)
> +                       continue;
>         }
>  }
>
>

Attachment: fix_nolock_from_hillf.patch
Description: Binary data

Reply via email to