> On 27 Aug 2022, at 00:04, Alexander Bluhm <alexander.bl...@gmx.net> wrote:
> 
> Anyone willing to test or ok this?
> 

This fixes weird `ifa’ refcounting. I like this.

Could the ifaref() and ifafree() names use the same notation? Like
ifaref() and ifarele() or ifaget() and ifafree() or something else?


> I would like to get it in before kn@ bumps ports due to net/if_var.h
> changes.
> 
> On Wed, Aug 24, 2022 at 03:14:35PM +0200, Alexander Bluhm wrote:
>> On Tue, Aug 23, 2022 at 02:47:11PM +0200, Stefan Sperling wrote:
>>> ddb{2}> show struct ifaddr 0xffff8000004e9400
>>> struct ifaddr at 0xffff8000004e9400 (64 bytes) {ifa_addr = (struct sockaddr 
>>> *)0
>>> xdeaf0009deafbead, ifa_dstaddr = (struct sockaddr *)0x20ef1a8af1d895de, 
>>> ifa_net
>>> mask = (struct sockaddr *)0xdeafbeaddeafbead, ifa_ifp = (struct ifnet 
>>> *)0xdeafb
>>> eaddeafbead, ifa_list = {tqe_next = (struct ifaddr *)0xdeafbeaddeafbead, 
>>> tqe_pr
>>> ev = 0xdeafbeaddeafbead}, ifa_flags = 0xdeafbead, ifa_refcnt = 0xdeafbead, 
>>> ifa_
>>> metric = 0xdeafbead}
>> 
>> The ifaddr has been freed.  That should not happen, as ifafree()
>> uses reference counting.  But this refcount is a simple integer
>> increment, that is not MP safe.  In theory all ++ should be protected
>> by exclusive netlock or shared netlock combiined with kernel lock.
>> But maybe I missed a path.
>> 
>> What are you doing with the machine?  Forwarding, IPv6, Stress test?
>> Do you have network interfaces with multiple network queues?
>> 
>> Anyway, instead of adding kernel locks here and there, use a propper
>> struct refcnt.  The variable ifatrash is never read, it looks like
>> a ddb debugging feature.  But for debugging refcount leaks we have
>> dt(4) now.
>> 
>> enc and mpls are special, they have interface address as part of
>> their sc structure.  I use refcount anyway, the new panic prevents
>> use after free.
>> 
>> This has passed a full regres run.
>> 
>> ok?
>> 
>> bluhm
>> 
>> Index: dev/dt/dt_prov_static.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/dev/dt/dt_prov_static.c,v
>> retrieving revision 1.14
>> diff -u -p -r1.14 dt_prov_static.c
>> --- dev/dt/dt_prov_static.c  28 Jun 2022 09:32:27 -0000      1.14
>> +++ dev/dt/dt_prov_static.c  23 Aug 2022 17:16:55 -0000
>> @@ -88,9 +88,10 @@ DT_STATIC_PROBE0(smr, wakeup);
>> DT_STATIC_PROBE2(smr, thread, "uint64_t", "uint64_t");
>> 
>> /*
>> - * reference counting
>> + * reference counting, keep in sync with sys/refcnt.h
>>  */
>> DT_STATIC_PROBE0(refcnt, none);
>> +DT_STATIC_PROBE3(refcnt, ifaddr, "void *", "int", "int");
>> DT_STATIC_PROBE3(refcnt, inpcb, "void *", "int", "int");
>> DT_STATIC_PROBE3(refcnt, tdb, "void *", "int", "int");
>> 
>> @@ -135,6 +136,7 @@ struct dt_probe *const dtps_static[] = {
>>      &_DT_STATIC_P(smr, thread),
>>      /* refcnt */
>>      &_DT_STATIC_P(refcnt, none),
>> +    &_DT_STATIC_P(refcnt, ifaddr),
>>      &_DT_STATIC_P(refcnt, inpcb),
>>      &_DT_STATIC_P(refcnt, tdb),
>> };
>> Index: net/if_enc.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_enc.c,v
>> retrieving revision 1.78
>> diff -u -p -r1.78 if_enc.c
>> --- net/if_enc.c     28 Dec 2020 14:28:50 -0000      1.78
>> +++ net/if_enc.c     24 Aug 2022 07:51:49 -0000
>> @@ -100,6 +100,7 @@ enc_clone_create(struct if_clone *ifc, i
>>       * and empty ifa of type AF_LINK for this purpose.
>>       */
>>      if_alloc_sadl(ifp);
>> +    refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR);
>>      sc->sc_ifa.ifa_ifp = ifp;
>>      sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl);
>>      sc->sc_ifa.ifa_netmask = NULL;
>> @@ -152,6 +153,10 @@ enc_clone_destroy(struct ifnet *ifp)
>>      NET_UNLOCK();
>> 
>>      if_detach(ifp);
>> +    if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) {
>> +            panic("%s: ifa refcnt has %u refs", __func__,
>> +                sc->sc_ifa.ifa_refcnt.r_refs);
>> +    }
>>      free(sc, M_DEVBUF, sizeof(*sc));
>> 
>>      return (0);
>> Index: net/if_mpe.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_mpe.c,v
>> retrieving revision 1.101
>> diff -u -p -r1.101 if_mpe.c
>> --- net/if_mpe.c     8 Nov 2021 04:50:54 -0000       1.101
>> +++ net/if_mpe.c     24 Aug 2022 07:52:22 -0000
>> @@ -128,6 +128,7 @@ mpe_clone_create(struct if_clone *ifc, i
>>      sc->sc_txhprio = 0;
>>      sc->sc_rxhprio = IF_HDRPRIO_PACKET;
>>      sc->sc_rdomain = 0;
>> +    refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR);
>>      sc->sc_ifa.ifa_ifp = ifp;
>>      sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl);
>>      sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls);
>> @@ -154,6 +155,10 @@ mpe_clone_destroy(struct ifnet *ifp)
>>      ifq_barrier(&ifp->if_snd);
>> 
>>      if_detach(ifp);
>> +    if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) {
>> +            panic("%s: ifa refcnt has %u refs", __func__,
>> +                sc->sc_ifa.ifa_refcnt.r_refs);
>> +    }
>>      free(sc, M_DEVBUF, sizeof *sc);
>>      return (0);
>> }
>> Index: net/if_mpip.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_mpip.c,v
>> retrieving revision 1.15
>> diff -u -p -r1.15 if_mpip.c
>> --- net/if_mpip.c    26 Mar 2021 19:00:21 -0000      1.15
>> +++ net/if_mpip.c    24 Aug 2022 07:52:26 -0000
>> @@ -128,6 +128,7 @@ mpip_clone_create(struct if_clone *ifc, 
>>      bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
>> #endif
>> 
>> +    refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR);
>>      sc->sc_ifa.ifa_ifp = ifp;
>>      sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl);
>> 
>> @@ -152,7 +153,10 @@ mpip_clone_destroy(struct ifnet *ifp)
>>      ifq_barrier(&ifp->if_snd);
>> 
>>      if_detach(ifp);
>> -
>> +    if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) {
>> +            panic("%s: ifa refcnt has %u refs", __func__,
>> +                sc->sc_ifa.ifa_refcnt.r_refs);
>> +    }
>>      free(sc->sc_neighbor, M_DEVBUF, sizeof(*sc->sc_neighbor));
>>      free(sc, M_DEVBUF, sizeof(*sc));
>> 
>> Index: net/if_mpw.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_mpw.c,v
>> retrieving revision 1.62
>> diff -u -p -r1.62 if_mpw.c
>> --- net/if_mpw.c     26 Mar 2021 19:00:21 -0000      1.62
>> +++ net/if_mpw.c     24 Aug 2022 07:52:00 -0000
>> @@ -122,6 +122,7 @@ mpw_clone_create(struct if_clone *ifc, i
>>      sc->sc_txhprio = 0;
>>      sc->sc_rxhprio = IF_HDRPRIO_PACKET;
>>      sc->sc_rdomain = 0;
>> +    refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR);
>>      sc->sc_ifa.ifa_ifp = ifp;
>>      sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl);
>>      sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls);
>> @@ -149,7 +150,10 @@ mpw_clone_destroy(struct ifnet *ifp)
>> 
>>      ether_ifdetach(ifp);
>>      if_detach(ifp);
>> -
>> +    if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) {
>> +            panic("%s: ifa refcnt has %u refs", __func__,
>> +                sc->sc_ifa.ifa_refcnt.r_refs);
>> +    }
>>      free(sc->sc_neighbor, M_DEVBUF, sizeof(*sc->sc_neighbor));
>>      free(sc, M_DEVBUF, sizeof(*sc));
>> 
>> Index: net/if_pppx.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_pppx.c,v
>> retrieving revision 1.121
>> diff -u -p -r1.121 if_pppx.c
>> --- net/if_pppx.c    25 Jul 2022 08:29:26 -0000      1.121
>> +++ net/if_pppx.c    23 Aug 2022 17:34:20 -0000
>> @@ -659,6 +659,7 @@ pppx_add_session(struct pppx_dev *pxd, s
>>      ifaddr.sin_addr = req->pr_ip_srcaddr;
>> 
>>      ia = malloc(sizeof (*ia), M_IFADDR, M_WAITOK | M_ZERO);
>> +    refcnt_init_trace(&ia->ia_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR);
>> 
>>      ia->ia_addr.sin_family = AF_INET;
>>      ia->ia_addr.sin_len = sizeof(struct sockaddr_in);
>> Index: net/if_var.h
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v
>> retrieving revision 1.114
>> diff -u -p -r1.114 if_var.h
>> --- net/if_var.h     20 Feb 2021 04:55:52 -0000      1.114
>> +++ net/if_var.h     23 Aug 2022 16:25:32 -0000
>> @@ -242,7 +242,7 @@ struct ifaddr {
>>      struct  ifnet *ifa_ifp;         /* back-pointer to interface */
>>      TAILQ_ENTRY(ifaddr) ifa_list;   /* list of addresses for interface */
>>      u_int   ifa_flags;              /* interface flags, see below */
>> -    u_int   ifa_refcnt;             /* number of `rt_ifa` references */
>> +    struct  refcnt ifa_refcnt;      /* number of `rt_ifa` references */
>>      int     ifa_metric;             /* cost of going out this interface */
>> };
>> 
>> @@ -333,6 +333,7 @@ int      p2p_bpf_mtap(caddr_t, const struct m
>> struct       ifaddr *ifa_ifwithaddr(struct sockaddr *, u_int);
>> struct       ifaddr *ifa_ifwithdstaddr(struct sockaddr *, u_int);
>> struct       ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *);
>> +struct      ifaddr *ifaref(struct ifaddr *);
>> void ifafree(struct ifaddr *);
>> 
>> int  if_isconnected(const struct ifnet *, unsigned int);
>> Index: net/route.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
>> retrieving revision 1.413
>> diff -u -p -r1.413 route.c
>> --- net/route.c      28 Jul 2022 22:19:09 -0000      1.413
>> +++ net/route.c      23 Aug 2022 16:31:29 -0000
>> @@ -146,7 +146,6 @@ extern unsigned int      rtmap_limit;
>> 
>> struct cpumem *              rtcounters;
>> int                  rttrash;        /* routes not in table but not freed */
>> -int                 ifatrash;       /* ifas not in ifp list but not free */
>> 
>> struct pool  rtentry_pool;           /* pool for rtentry structures */
>> struct pool  rttimer_pool;           /* pool for rttimer structures */
>> @@ -512,16 +511,19 @@ rtfree(struct rtentry *rt)
>>      pool_put(&rtentry_pool, rt);
>> }
>> 
>> +struct ifaddr *
>> +ifaref(struct ifaddr *ifa)
>> +{
>> +    refcnt_take(&ifa->ifa_refcnt);
>> +    return ifa;
>> +}
>> +
>> void
>> ifafree(struct ifaddr *ifa)
>> {
>> -    if (ifa == NULL)
>> -            panic("ifafree");
>> -    if (ifa->ifa_refcnt == 0) {
>> -            ifatrash--;
>> -            free(ifa, M_IFADDR, 0);
>> -    } else
>> -            ifa->ifa_refcnt--;
>> +    if (refcnt_rele(&ifa->ifa_refcnt) == 0)
>> +            return;
>> +    free(ifa, M_IFADDR, 0);
>> }
>> 
>> /*
>> @@ -901,8 +903,7 @@ rtrequest(int req, struct rt_addrinfo *i
>>                      rt_mpls_clear(rt);
>> #endif
>> 
>> -            ifa->ifa_refcnt++;
>> -            rt->rt_ifa = ifa;
>> +            rt->rt_ifa = ifaref(ifa);
>>              rt->rt_ifidx = ifp->if_index;
>>              /*
>>               * Copy metrics and a back pointer from the cloned
>> @@ -1857,8 +1858,8 @@ db_print_ifa(struct ifaddr *ifa)
>>      db_print_sa(ifa->ifa_dstaddr);
>>      db_printf("  ifa_mask=");
>>      db_print_sa(ifa->ifa_netmask);
>> -    db_printf("  flags=0x%x, refcnt=%d, metric=%d\n",
>> -        ifa->ifa_flags, ifa->ifa_refcnt, ifa->ifa_metric);
>> +    db_printf("  flags=0x%x, refcnt=%u, metric=%d\n",
>> +        ifa->ifa_flags, ifa->ifa_refcnt.r_refs, ifa->ifa_metric);
>> }
>> 
>> /*
>> Index: net/rtsock.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtsock.c,v
>> retrieving revision 1.341
>> diff -u -p -r1.341 rtsock.c
>> --- net/rtsock.c     22 Aug 2022 21:18:48 -0000      1.341
>> +++ net/rtsock.c     23 Aug 2022 16:27:03 -0000
>> @@ -1115,8 +1115,7 @@ rtm_output(struct rt_msghdr *rtm, struct
>>                              ifp->if_rtrequest(ifp, RTM_DELETE, rt);
>>                              ifafree(rt->rt_ifa);
>> 
>> -                            ifa->ifa_refcnt++;
>> -                            rt->rt_ifa = ifa;
>> +                            rt->rt_ifa = ifaref(ifa);
>>                              rt->rt_ifidx = ifa->ifa_ifp->if_index;
>>                              /* recheck link state after ifp change */
>>                              rt_if_linkstate_change(rt, ifa->ifa_ifp,
>> Index: netinet/in.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in.c,v
>> retrieving revision 1.175
>> diff -u -p -r1.175 in.c
>> --- netinet/in.c     6 Aug 2022 15:57:59 -0000       1.175
>> +++ netinet/in.c     23 Aug 2022 16:59:09 -0000
>> @@ -379,6 +379,7 @@ in_ioctl_set_ifaddr(u_long cmd, caddr_t 
>>      }
>>      if (ia == NULL) {
>>              ia = malloc(sizeof *ia, M_IFADDR, M_WAITOK | M_ZERO);
>> +            refcnt_init_trace(&ia->ia_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR);
>>              ia->ia_addr.sin_family = AF_INET;
>>              ia->ia_addr.sin_len = sizeof(ia->ia_addr);
>>              ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr);
>> @@ -475,6 +476,8 @@ in_ioctl_change_ifaddr(u_long cmd, caddr
>> 
>>              if (ia == NULL) {
>>                      ia = malloc(sizeof *ia, M_IFADDR, M_WAITOK | M_ZERO);
>> +                    refcnt_init_trace(&ia->ia_ifa.ifa_refcnt,
>> +                        DT_REFCNT_IDX_IFADDR);
>>                      ia->ia_addr.sin_family = AF_INET;
>>                      ia->ia_addr.sin_len = sizeof(ia->ia_addr);
>>                      ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr);
>> @@ -745,7 +748,6 @@ in_purgeaddr(struct ifaddr *ifa)
>> {
>>      struct ifnet *ifp = ifa->ifa_ifp;
>>      struct in_ifaddr *ia = ifatoia(ifa);
>> -    extern int ifatrash;
>> 
>>      NET_ASSERT_LOCKED();
>> 
>> @@ -760,7 +762,6 @@ in_purgeaddr(struct ifaddr *ifa)
>>              ia->ia_allhosts = NULL;
>>      }
>> 
>> -    ifatrash++;
>>      ia->ia_ifp = NULL;
>>      ifafree(&ia->ia_ifa);
>> }
>> Index: netinet6/in6.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6.c,v
>> retrieving revision 1.247
>> diff -u -p -r1.247 in6.c
>> --- netinet6/in6.c   6 Aug 2022 15:57:59 -0000       1.247
>> +++ netinet6/in6.c   23 Aug 2022 16:55:17 -0000
>> @@ -647,6 +647,8 @@ in6_update_ifa(struct ifnet *ifp, struct
>>      if (ia6 == NULL) {
>>              hostIsNew = 1;
>>              ia6 = malloc(sizeof(*ia6), M_IFADDR, M_WAITOK | M_ZERO);
>> +            refcnt_init_trace(&ia6->ia_ifa.ifa_refcnt,
>> +                DT_REFCNT_IDX_IFADDR);
>>              LIST_INIT(&ia6->ia6_memberships);
>>              /* Initialize the address and masks, and put time stamp */
>>              ia6->ia_ifa.ifa_addr = sin6tosa(&ia6->ia_addr);
>> @@ -938,7 +940,6 @@ void
>> in6_unlink_ifa(struct in6_ifaddr *ia6, struct ifnet *ifp)
>> {
>>      struct ifaddr *ifa = &ia6->ia_ifa;
>> -    extern int ifatrash;
>>      int plen;
>> 
>>      NET_ASSERT_LOCKED();
>> @@ -953,7 +954,6 @@ in6_unlink_ifa(struct in6_ifaddr *ia6, s
>>      rt_ifa_purge(ifa);
>>      ifa_del(ifp, ifa);
>> 
>> -    ifatrash++;
>>      ia6->ia_ifp = NULL;
>>      ifafree(&ia6->ia_ifa);
>> }
>> Index: netinet6/nd6_nbr.c
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6_nbr.c,v
>> retrieving revision 1.132
>> diff -u -p -r1.132 nd6_nbr.c
>> --- netinet6/nd6_nbr.c       8 Aug 2022 17:47:59 -0000       1.132
>> +++ netinet6/nd6_nbr.c       23 Aug 2022 16:27:11 -0000
>> @@ -1124,8 +1124,7 @@ nd6_dad_start(struct ifaddr *ifa)
>>       * first packet to be sent from the interface after interface
>>       * (re)initialization.
>>       */
>> -    dp->dad_ifa = ifa;
>> -    ifa->ifa_refcnt++;      /* just for safety */
>> +    dp->dad_ifa = ifaref(ifa);
>>      dp->dad_count = ip6_dad_count;
>>      dp->dad_ns_icount = dp->dad_na_icount = 0;
>>      dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
>> Index: sys/refcnt.h
>> ===================================================================
>> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/refcnt.h,v
>> retrieving revision 1.7
>> diff -u -p -r1.7 refcnt.h
>> --- sys/refcnt.h     28 Jun 2022 09:32:28 -0000      1.7
>> +++ sys/refcnt.h     23 Aug 2022 16:54:02 -0000
>> @@ -43,8 +43,10 @@ void      refcnt_finalize(struct refcnt *, co
>> int  refcnt_shared(struct refcnt *);
>> unsigned int refcnt_read(struct refcnt *);
>> 
>> -#define DT_REFCNT_IDX_INPCB 1
>> -#define DT_REFCNT_IDX_TDB   2
>> +/* sorted alphabetically, keep in sync with dev/dt/dt_prov_static.c */
>> +#define DT_REFCNT_IDX_IFADDR        1
>> +#define DT_REFCNT_IDX_INPCB 2
>> +#define DT_REFCNT_IDX_TDB   3
>> 
>> #endif /* _KERNEL */
>> 
> 

Reply via email to