> On 27 Aug 2022, at 00:04, Alexander Bluhm <alexander.bl...@gmx.net> wrote: > > Anyone willing to test or ok this? >
This fixes weird `ifa’ refcounting. I like this. Could the ifaref() and ifafree() names use the same notation? Like ifaref() and ifarele() or ifaget() and ifafree() or something else? > I would like to get it in before kn@ bumps ports due to net/if_var.h > changes. > > On Wed, Aug 24, 2022 at 03:14:35PM +0200, Alexander Bluhm wrote: >> On Tue, Aug 23, 2022 at 02:47:11PM +0200, Stefan Sperling wrote: >>> ddb{2}> show struct ifaddr 0xffff8000004e9400 >>> struct ifaddr at 0xffff8000004e9400 (64 bytes) {ifa_addr = (struct sockaddr >>> *)0 >>> xdeaf0009deafbead, ifa_dstaddr = (struct sockaddr *)0x20ef1a8af1d895de, >>> ifa_net >>> mask = (struct sockaddr *)0xdeafbeaddeafbead, ifa_ifp = (struct ifnet >>> *)0xdeafb >>> eaddeafbead, ifa_list = {tqe_next = (struct ifaddr *)0xdeafbeaddeafbead, >>> tqe_pr >>> ev = 0xdeafbeaddeafbead}, ifa_flags = 0xdeafbead, ifa_refcnt = 0xdeafbead, >>> ifa_ >>> metric = 0xdeafbead} >> >> The ifaddr has been freed. That should not happen, as ifafree() >> uses reference counting. But this refcount is a simple integer >> increment, that is not MP safe. In theory all ++ should be protected >> by exclusive netlock or shared netlock combiined with kernel lock. >> But maybe I missed a path. >> >> What are you doing with the machine? Forwarding, IPv6, Stress test? >> Do you have network interfaces with multiple network queues? >> >> Anyway, instead of adding kernel locks here and there, use a propper >> struct refcnt. The variable ifatrash is never read, it looks like >> a ddb debugging feature. But for debugging refcount leaks we have >> dt(4) now. >> >> enc and mpls are special, they have interface address as part of >> their sc structure. I use refcount anyway, the new panic prevents >> use after free. >> >> This has passed a full regres run. >> >> ok? >> >> bluhm >> >> Index: dev/dt/dt_prov_static.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/dev/dt/dt_prov_static.c,v >> retrieving revision 1.14 >> diff -u -p -r1.14 dt_prov_static.c >> --- dev/dt/dt_prov_static.c 28 Jun 2022 09:32:27 -0000 1.14 >> +++ dev/dt/dt_prov_static.c 23 Aug 2022 17:16:55 -0000 >> @@ -88,9 +88,10 @@ DT_STATIC_PROBE0(smr, wakeup); >> DT_STATIC_PROBE2(smr, thread, "uint64_t", "uint64_t"); >> >> /* >> - * reference counting >> + * reference counting, keep in sync with sys/refcnt.h >> */ >> DT_STATIC_PROBE0(refcnt, none); >> +DT_STATIC_PROBE3(refcnt, ifaddr, "void *", "int", "int"); >> DT_STATIC_PROBE3(refcnt, inpcb, "void *", "int", "int"); >> DT_STATIC_PROBE3(refcnt, tdb, "void *", "int", "int"); >> >> @@ -135,6 +136,7 @@ struct dt_probe *const dtps_static[] = { >> &_DT_STATIC_P(smr, thread), >> /* refcnt */ >> &_DT_STATIC_P(refcnt, none), >> + &_DT_STATIC_P(refcnt, ifaddr), >> &_DT_STATIC_P(refcnt, inpcb), >> &_DT_STATIC_P(refcnt, tdb), >> }; >> Index: net/if_enc.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_enc.c,v >> retrieving revision 1.78 >> diff -u -p -r1.78 if_enc.c >> --- net/if_enc.c 28 Dec 2020 14:28:50 -0000 1.78 >> +++ net/if_enc.c 24 Aug 2022 07:51:49 -0000 >> @@ -100,6 +100,7 @@ enc_clone_create(struct if_clone *ifc, i >> * and empty ifa of type AF_LINK for this purpose. >> */ >> if_alloc_sadl(ifp); >> + refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR); >> sc->sc_ifa.ifa_ifp = ifp; >> sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl); >> sc->sc_ifa.ifa_netmask = NULL; >> @@ -152,6 +153,10 @@ enc_clone_destroy(struct ifnet *ifp) >> NET_UNLOCK(); >> >> if_detach(ifp); >> + if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) { >> + panic("%s: ifa refcnt has %u refs", __func__, >> + sc->sc_ifa.ifa_refcnt.r_refs); >> + } >> free(sc, M_DEVBUF, sizeof(*sc)); >> >> return (0); >> Index: net/if_mpe.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_mpe.c,v >> retrieving revision 1.101 >> diff -u -p -r1.101 if_mpe.c >> --- net/if_mpe.c 8 Nov 2021 04:50:54 -0000 1.101 >> +++ net/if_mpe.c 24 Aug 2022 07:52:22 -0000 >> @@ -128,6 +128,7 @@ mpe_clone_create(struct if_clone *ifc, i >> sc->sc_txhprio = 0; >> sc->sc_rxhprio = IF_HDRPRIO_PACKET; >> sc->sc_rdomain = 0; >> + refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR); >> sc->sc_ifa.ifa_ifp = ifp; >> sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl); >> sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls); >> @@ -154,6 +155,10 @@ mpe_clone_destroy(struct ifnet *ifp) >> ifq_barrier(&ifp->if_snd); >> >> if_detach(ifp); >> + if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) { >> + panic("%s: ifa refcnt has %u refs", __func__, >> + sc->sc_ifa.ifa_refcnt.r_refs); >> + } >> free(sc, M_DEVBUF, sizeof *sc); >> return (0); >> } >> Index: net/if_mpip.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_mpip.c,v >> retrieving revision 1.15 >> diff -u -p -r1.15 if_mpip.c >> --- net/if_mpip.c 26 Mar 2021 19:00:21 -0000 1.15 >> +++ net/if_mpip.c 24 Aug 2022 07:52:26 -0000 >> @@ -128,6 +128,7 @@ mpip_clone_create(struct if_clone *ifc, >> bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t)); >> #endif >> >> + refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR); >> sc->sc_ifa.ifa_ifp = ifp; >> sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl); >> >> @@ -152,7 +153,10 @@ mpip_clone_destroy(struct ifnet *ifp) >> ifq_barrier(&ifp->if_snd); >> >> if_detach(ifp); >> - >> + if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) { >> + panic("%s: ifa refcnt has %u refs", __func__, >> + sc->sc_ifa.ifa_refcnt.r_refs); >> + } >> free(sc->sc_neighbor, M_DEVBUF, sizeof(*sc->sc_neighbor)); >> free(sc, M_DEVBUF, sizeof(*sc)); >> >> Index: net/if_mpw.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_mpw.c,v >> retrieving revision 1.62 >> diff -u -p -r1.62 if_mpw.c >> --- net/if_mpw.c 26 Mar 2021 19:00:21 -0000 1.62 >> +++ net/if_mpw.c 24 Aug 2022 07:52:00 -0000 >> @@ -122,6 +122,7 @@ mpw_clone_create(struct if_clone *ifc, i >> sc->sc_txhprio = 0; >> sc->sc_rxhprio = IF_HDRPRIO_PACKET; >> sc->sc_rdomain = 0; >> + refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR); >> sc->sc_ifa.ifa_ifp = ifp; >> sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl); >> sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls); >> @@ -149,7 +150,10 @@ mpw_clone_destroy(struct ifnet *ifp) >> >> ether_ifdetach(ifp); >> if_detach(ifp); >> - >> + if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) { >> + panic("%s: ifa refcnt has %u refs", __func__, >> + sc->sc_ifa.ifa_refcnt.r_refs); >> + } >> free(sc->sc_neighbor, M_DEVBUF, sizeof(*sc->sc_neighbor)); >> free(sc, M_DEVBUF, sizeof(*sc)); >> >> Index: net/if_pppx.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_pppx.c,v >> retrieving revision 1.121 >> diff -u -p -r1.121 if_pppx.c >> --- net/if_pppx.c 25 Jul 2022 08:29:26 -0000 1.121 >> +++ net/if_pppx.c 23 Aug 2022 17:34:20 -0000 >> @@ -659,6 +659,7 @@ pppx_add_session(struct pppx_dev *pxd, s >> ifaddr.sin_addr = req->pr_ip_srcaddr; >> >> ia = malloc(sizeof (*ia), M_IFADDR, M_WAITOK | M_ZERO); >> + refcnt_init_trace(&ia->ia_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR); >> >> ia->ia_addr.sin_family = AF_INET; >> ia->ia_addr.sin_len = sizeof(struct sockaddr_in); >> Index: net/if_var.h >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v >> retrieving revision 1.114 >> diff -u -p -r1.114 if_var.h >> --- net/if_var.h 20 Feb 2021 04:55:52 -0000 1.114 >> +++ net/if_var.h 23 Aug 2022 16:25:32 -0000 >> @@ -242,7 +242,7 @@ struct ifaddr { >> struct ifnet *ifa_ifp; /* back-pointer to interface */ >> TAILQ_ENTRY(ifaddr) ifa_list; /* list of addresses for interface */ >> u_int ifa_flags; /* interface flags, see below */ >> - u_int ifa_refcnt; /* number of `rt_ifa` references */ >> + struct refcnt ifa_refcnt; /* number of `rt_ifa` references */ >> int ifa_metric; /* cost of going out this interface */ >> }; >> >> @@ -333,6 +333,7 @@ int p2p_bpf_mtap(caddr_t, const struct m >> struct ifaddr *ifa_ifwithaddr(struct sockaddr *, u_int); >> struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *, u_int); >> struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *); >> +struct ifaddr *ifaref(struct ifaddr *); >> void ifafree(struct ifaddr *); >> >> int if_isconnected(const struct ifnet *, unsigned int); >> Index: net/route.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v >> retrieving revision 1.413 >> diff -u -p -r1.413 route.c >> --- net/route.c 28 Jul 2022 22:19:09 -0000 1.413 >> +++ net/route.c 23 Aug 2022 16:31:29 -0000 >> @@ -146,7 +146,6 @@ extern unsigned int rtmap_limit; >> >> struct cpumem * rtcounters; >> int rttrash; /* routes not in table but not freed */ >> -int ifatrash; /* ifas not in ifp list but not free */ >> >> struct pool rtentry_pool; /* pool for rtentry structures */ >> struct pool rttimer_pool; /* pool for rttimer structures */ >> @@ -512,16 +511,19 @@ rtfree(struct rtentry *rt) >> pool_put(&rtentry_pool, rt); >> } >> >> +struct ifaddr * >> +ifaref(struct ifaddr *ifa) >> +{ >> + refcnt_take(&ifa->ifa_refcnt); >> + return ifa; >> +} >> + >> void >> ifafree(struct ifaddr *ifa) >> { >> - if (ifa == NULL) >> - panic("ifafree"); >> - if (ifa->ifa_refcnt == 0) { >> - ifatrash--; >> - free(ifa, M_IFADDR, 0); >> - } else >> - ifa->ifa_refcnt--; >> + if (refcnt_rele(&ifa->ifa_refcnt) == 0) >> + return; >> + free(ifa, M_IFADDR, 0); >> } >> >> /* >> @@ -901,8 +903,7 @@ rtrequest(int req, struct rt_addrinfo *i >> rt_mpls_clear(rt); >> #endif >> >> - ifa->ifa_refcnt++; >> - rt->rt_ifa = ifa; >> + rt->rt_ifa = ifaref(ifa); >> rt->rt_ifidx = ifp->if_index; >> /* >> * Copy metrics and a back pointer from the cloned >> @@ -1857,8 +1858,8 @@ db_print_ifa(struct ifaddr *ifa) >> db_print_sa(ifa->ifa_dstaddr); >> db_printf(" ifa_mask="); >> db_print_sa(ifa->ifa_netmask); >> - db_printf(" flags=0x%x, refcnt=%d, metric=%d\n", >> - ifa->ifa_flags, ifa->ifa_refcnt, ifa->ifa_metric); >> + db_printf(" flags=0x%x, refcnt=%u, metric=%d\n", >> + ifa->ifa_flags, ifa->ifa_refcnt.r_refs, ifa->ifa_metric); >> } >> >> /* >> Index: net/rtsock.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtsock.c,v >> retrieving revision 1.341 >> diff -u -p -r1.341 rtsock.c >> --- net/rtsock.c 22 Aug 2022 21:18:48 -0000 1.341 >> +++ net/rtsock.c 23 Aug 2022 16:27:03 -0000 >> @@ -1115,8 +1115,7 @@ rtm_output(struct rt_msghdr *rtm, struct >> ifp->if_rtrequest(ifp, RTM_DELETE, rt); >> ifafree(rt->rt_ifa); >> >> - ifa->ifa_refcnt++; >> - rt->rt_ifa = ifa; >> + rt->rt_ifa = ifaref(ifa); >> rt->rt_ifidx = ifa->ifa_ifp->if_index; >> /* recheck link state after ifp change */ >> rt_if_linkstate_change(rt, ifa->ifa_ifp, >> Index: netinet/in.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in.c,v >> retrieving revision 1.175 >> diff -u -p -r1.175 in.c >> --- netinet/in.c 6 Aug 2022 15:57:59 -0000 1.175 >> +++ netinet/in.c 23 Aug 2022 16:59:09 -0000 >> @@ -379,6 +379,7 @@ in_ioctl_set_ifaddr(u_long cmd, caddr_t >> } >> if (ia == NULL) { >> ia = malloc(sizeof *ia, M_IFADDR, M_WAITOK | M_ZERO); >> + refcnt_init_trace(&ia->ia_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR); >> ia->ia_addr.sin_family = AF_INET; >> ia->ia_addr.sin_len = sizeof(ia->ia_addr); >> ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr); >> @@ -475,6 +476,8 @@ in_ioctl_change_ifaddr(u_long cmd, caddr >> >> if (ia == NULL) { >> ia = malloc(sizeof *ia, M_IFADDR, M_WAITOK | M_ZERO); >> + refcnt_init_trace(&ia->ia_ifa.ifa_refcnt, >> + DT_REFCNT_IDX_IFADDR); >> ia->ia_addr.sin_family = AF_INET; >> ia->ia_addr.sin_len = sizeof(ia->ia_addr); >> ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr); >> @@ -745,7 +748,6 @@ in_purgeaddr(struct ifaddr *ifa) >> { >> struct ifnet *ifp = ifa->ifa_ifp; >> struct in_ifaddr *ia = ifatoia(ifa); >> - extern int ifatrash; >> >> NET_ASSERT_LOCKED(); >> >> @@ -760,7 +762,6 @@ in_purgeaddr(struct ifaddr *ifa) >> ia->ia_allhosts = NULL; >> } >> >> - ifatrash++; >> ia->ia_ifp = NULL; >> ifafree(&ia->ia_ifa); >> } >> Index: netinet6/in6.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6.c,v >> retrieving revision 1.247 >> diff -u -p -r1.247 in6.c >> --- netinet6/in6.c 6 Aug 2022 15:57:59 -0000 1.247 >> +++ netinet6/in6.c 23 Aug 2022 16:55:17 -0000 >> @@ -647,6 +647,8 @@ in6_update_ifa(struct ifnet *ifp, struct >> if (ia6 == NULL) { >> hostIsNew = 1; >> ia6 = malloc(sizeof(*ia6), M_IFADDR, M_WAITOK | M_ZERO); >> + refcnt_init_trace(&ia6->ia_ifa.ifa_refcnt, >> + DT_REFCNT_IDX_IFADDR); >> LIST_INIT(&ia6->ia6_memberships); >> /* Initialize the address and masks, and put time stamp */ >> ia6->ia_ifa.ifa_addr = sin6tosa(&ia6->ia_addr); >> @@ -938,7 +940,6 @@ void >> in6_unlink_ifa(struct in6_ifaddr *ia6, struct ifnet *ifp) >> { >> struct ifaddr *ifa = &ia6->ia_ifa; >> - extern int ifatrash; >> int plen; >> >> NET_ASSERT_LOCKED(); >> @@ -953,7 +954,6 @@ in6_unlink_ifa(struct in6_ifaddr *ia6, s >> rt_ifa_purge(ifa); >> ifa_del(ifp, ifa); >> >> - ifatrash++; >> ia6->ia_ifp = NULL; >> ifafree(&ia6->ia_ifa); >> } >> Index: netinet6/nd6_nbr.c >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6_nbr.c,v >> retrieving revision 1.132 >> diff -u -p -r1.132 nd6_nbr.c >> --- netinet6/nd6_nbr.c 8 Aug 2022 17:47:59 -0000 1.132 >> +++ netinet6/nd6_nbr.c 23 Aug 2022 16:27:11 -0000 >> @@ -1124,8 +1124,7 @@ nd6_dad_start(struct ifaddr *ifa) >> * first packet to be sent from the interface after interface >> * (re)initialization. >> */ >> - dp->dad_ifa = ifa; >> - ifa->ifa_refcnt++; /* just for safety */ >> + dp->dad_ifa = ifaref(ifa); >> dp->dad_count = ip6_dad_count; >> dp->dad_ns_icount = dp->dad_na_icount = 0; >> dp->dad_ns_ocount = dp->dad_ns_tcount = 0; >> Index: sys/refcnt.h >> =================================================================== >> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/refcnt.h,v >> retrieving revision 1.7 >> diff -u -p -r1.7 refcnt.h >> --- sys/refcnt.h 28 Jun 2022 09:32:28 -0000 1.7 >> +++ sys/refcnt.h 23 Aug 2022 16:54:02 -0000 >> @@ -43,8 +43,10 @@ void refcnt_finalize(struct refcnt *, co >> int refcnt_shared(struct refcnt *); >> unsigned int refcnt_read(struct refcnt *); >> >> -#define DT_REFCNT_IDX_INPCB 1 >> -#define DT_REFCNT_IDX_TDB 2 >> +/* sorted alphabetically, keep in sync with dev/dt/dt_prov_static.c */ >> +#define DT_REFCNT_IDX_IFADDR 1 >> +#define DT_REFCNT_IDX_INPCB 2 >> +#define DT_REFCNT_IDX_TDB 3 >> >> #endif /* _KERNEL */ >> >