Hi Martin, It appears to work. After patching, rebuilding, and installing the kernel, I could use the Internet with a lot of data transfer. I unplugged and plugged back in my wifi dongle twice without any crashes, and the Internet kept working.
However, I saw a crash I haven't seen before -- no traceback in ddb, just a blank screen (but with the LCD backlight on) and the fan spinning. Pressing caps lock didn't toggle the LED. This crash might be because of something else in the current /usr/src tree -- I can't rule that out because I was previously using a snapshot from two days earlier and some crashing bug could have been introduced since then. --Aaron On Mon, Dec 7, 2015 at 7:13 AM, Martin Pieuchot <m...@openbsd.org> wrote: > On 07/12/15(Mon) 12:29, Martin Pieuchot wrote: >> On 06/12/15(Sun) 13:52, aaron.mille...@gmail.com wrote: >> > >Synopsis: Unplugging my urtwn device causes an assertion failure in the >> > >driver code (see trace) >> > >Category: system kernel amd64 >> > >Environment: >> > System : OpenBSD 5.8 >> > Details : OpenBSD 5.8-current (GENERIC.MP) #1706: Fri Dec 4 >> > 01:49:55 MST 2015 >> > >> > dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP >> > >> > Architecture: OpenBSD.amd64 >> > Machine : amd64 >> > >Description: >> > >> > In the past, my urtwn(4) device (RTL8192CU chipset, I think) would stop >> > working after a while, and would start working again after unplugging and >> > plugging back in. I suspect it stopped working due to overheating, because >> > it's >> > really hot when I pull it out. Occasionally, this would crash the system. >> > >> > I upgraded from the Nov 25 snapshot to yesterday's snapshot, and now it >> > seems to fail every time with an assertion that drops me to the ddb prompt. >> > Here's the traceback I typed from a picture of the screen: >> > >> > panic: kernel diagnostic assertion "ifp != NULL" failed: file >> > "../../../../net/route.c", line 863 >> > Stopped at Debugger+0x9: leave >> > TID PID UID PRFLAGS PFLAGS CPU COMMAND >> > 18228 18228 77 0x10 0x80 3 dhclient >> > *26266 26266 0 0x14000 0x200 6 usbtask >> > Debugger() at Debugger+0x9 >> > panic() at panic+0xfe >> > __assert() at __assert+0x25 >> > rtrequest_delete() at rtrequest_delete+0x20a >> > rtdeletemsg() at rtdeletemsg+0xae >> > rtflushclone1() at rtflushclone1+0x98 >> > rn_walktree() at rn_walktree+0x83 >> > rtrequest_delete() at rtrequest_delete+0x18a >> > rtdeletemsg() at rtdeletemsg+0xae >> > rt_if_remove_rtdelete() at rt_if_remove_rtdelete+0x2b >> > rn_walktree() at rn_walktree+0x83 >> > rt_if_remove() at rt_if_remove+0x58 >> > if_detach() at if_detach+0x9e >> > urtwn_detach() at urtwn_detach+0x81 >> > end trace frame 0xffff800032cdddc0, count: 0 >> >> Could you show me the output of "route -n show" before unplugging your >> device? Are you using dhclient(8) on multiple interfaces? > > Ok I found the bug, can you tell me if the diff below helps? > > Index: net/route.c > =================================================================== > RCS file: /cvs/src/sys/net/route.c,v > retrieving revision 1.289 > diff -u -p -r1.289 route.c > --- net/route.c 5 Dec 2015 10:07:55 -0000 1.289 > +++ net/route.c 7 Dec 2015 15:10:15 -0000 > @@ -159,8 +159,7 @@ struct sockaddr *rt_plentosa(sa_family_t > > struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *, > u_int); > -int rtrequest_delete(struct rt_addrinfo *, u_int8_t, struct ifnet *, > - struct rtentry **, u_int); > +int rt_delete(struct rtentry *, struct ifnet *, unsigned int); > > #ifdef DDB > void db_print_sa(struct sockaddr *); > @@ -613,38 +612,29 @@ out: > } > > /* > - * Delete a route and generate a message > + * Delete a route and generate a message. The caller must hold a reference > + * on ``rt''. > */ > int > -rtdeletemsg(struct rtentry *rt, struct ifnet *ifp, u_int tableid) > +rtdeletemsg(struct rtentry *rt, struct ifnet *ifp, unsigned int rtableid) > { > int error; > - struct rt_addrinfo info; > - unsigned int ifidx; > - struct sockaddr_in6 sa_mask; > > - /* > - * Request the new route so that the entry is not actually > - * deleted. That will allow the information being reported to > - * be accurate (and consistent with route_output()). > - */ > - bzero((caddr_t)&info, sizeof(info)); > - info.rti_info[RTAX_DST] = rt_key(rt); > - if (!ISSET(rt->rt_flags, RTF_HOST)) > - info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); > - info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; > - info.rti_flags = rt->rt_flags; > - ifidx = rt->rt_ifidx; > - error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid); > - rt_missmsg(RTM_DELETE, &info, info.rti_flags, ifidx, error, tableid); > + KASSERT(rt->rt_ifidx == ifp->if_index); > + > + error = rt_delete(rt, ifp, rtableid); > if (error == 0) > - rtfree(rt); > + rt_sendmsg(rt, RTM_DELETE, rtableid); > + > return (error); > } > > static inline int > rtequal(struct rtentry *a, struct rtentry *b) > { > + if (a == b) > + return 1; > + > if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 && > rt_plen(a) == rt_plen(b)) > return 1; > @@ -656,10 +646,25 @@ int > rtflushclone1(struct rtentry *rt, void *arg, u_int id) > { > struct rtentry *parent = arg; > + struct ifnet *ifp; > + > + ifp = if_get(rt->rt_ifidx); > + > + /* > + * This happens when an interface with a RTF_CLONING route is > + * being detached. In this case it's safe to bail because all > + * the routes are being purged by rt_if_remove(). > + */ > + if (ifp == NULL) > + return 0; > > - if ((rt->rt_flags & RTF_CLONED) != 0 && (rt->rt_parent == parent || > - rtequal(rt->rt_parent, parent))) > - rtdeletemsg(rt, NULL, id); > + if (ISSET(rt->rt_flags, RTF_CLONED) && rtequal(rt->rt_parent, > parent)) { > + rtref(rt); > + rtdeletemsg(rt, ifp, id); > + rtfree(rt); > + } > + > + if_put(ifp); > return 0; > } > > @@ -801,53 +806,20 @@ rt_getifa(struct rt_addrinfo *info, u_in > } > > int > -rtrequest_delete(struct rt_addrinfo *info, u_int8_t prio, struct ifnet *ifp, > - struct rtentry **ret_nrt, u_int tableid) > +rt_delete(struct rtentry *rt, struct ifnet *ifp, unsigned int rtableid) > { > - struct rtentry *rt; > - int error; > + struct sockaddr_in6 sa_mask; > > - splsoftassert(IPL_SOFTNET); > + KASSERT(ifp->if_index == rt->rt_ifidx); > > - if (!rtable_exists(tableid)) > - return (EAFNOSUPPORT); > - rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], > - info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio); > - if (rt == NULL) > - return (ESRCH); > -#ifndef SMALL_KERNEL > - /* > - * If we got multipath routes, we require users to specify > - * a matching gateway. > - */ > - if ((rt->rt_flags & RTF_MPATH) && > - info->rti_info[RTAX_GATEWAY] == NULL) { > - rtfree(rt); > - return (ESRCH); > - } > -#endif > - > - /* > - * Since RTP_LOCAL cannot be set by userland, make > - * sure that local routes are only modified by the > - * kernel. > - */ > - if ((rt->rt_flags & (RTF_LOCAL|RTF_BROADCAST)) && > - prio != RTP_LOCAL) { > - rtfree(rt); > - return (EINVAL); > - } > + splsoftassert(IPL_SOFTNET); > > - error = rtable_delete(tableid, info->rti_info[RTAX_DST], > - info->rti_info[RTAX_NETMASK], rt); > - if (error != 0) { > - rtfree(rt); > + if (rtable_delete(rtableid, rt_key(rt), rt_plen2mask(rt, &sa_mask), > rt)) > return (ESRCH); > - } > > /* clean up any cloned children */ > if ((rt->rt_flags & RTF_CLONING) != 0) > - rtflushclone(tableid, rt); > + rtflushclone(rtableid, rt); > > rtfree(rt->rt_gwroute); > rt->rt_gwroute = NULL; > @@ -857,23 +829,10 @@ rtrequest_delete(struct rt_addrinfo *inf > > rt->rt_flags &= ~RTF_UP; > > - if (ifp == NULL) { > - ifp = if_get(rt->rt_ifidx); > - KASSERT(ifp != NULL); > - ifp->if_rtrequest(ifp, RTM_DELETE, rt); > - if_put(ifp); > - } else { > - KASSERT(ifp->if_index == rt->rt_ifidx); > - ifp->if_rtrequest(ifp, RTM_DELETE, rt); > - } > + ifp->if_rtrequest(ifp, RTM_DELETE, rt); > > atomic_inc_int(&rttrash); > > - if (ret_nrt != NULL) > - *ret_nrt = rt; > - else > - rtfree(rt); > - > return (0); > } > > @@ -900,9 +859,50 @@ rtrequest(int req, struct rt_addrinfo *i > info->rti_info[RTAX_NETMASK] = NULL; > switch (req) { > case RTM_DELETE: > - error = rtrequest_delete(info, prio, NULL, ret_nrt, tableid); > - if (error) > + rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], > + info->rti_info[RTAX_NETMASK], > + info->rti_info[RTAX_GATEWAY], prio); > + if (rt == NULL) > + return (ESRCH); > + > +#ifndef SMALL_KERNEL > + /* > + * If we got multipath routes, we require users to specify > + * a matching gateway. > + */ > + if (ISSET(rt->rt_flags, RTF_MPATH) && > + info->rti_info[RTAX_GATEWAY] == NULL) { > + rtfree(rt); > + return (ESRCH); > + } > +#endif > + > + /* > + * Since RTP_LOCAL cannot be set by userland, make > + * sure that local routes are only modified by the > + * kernel. > + */ > + if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST) && > + prio != RTP_LOCAL) { > + rtfree(rt); > + return (EINVAL); > + } > + > + ifp = if_get(rt->rt_ifidx); > + KASSERT(ifp != NULL); > + error = rt_delete(rt, ifp, tableid); > + if_put(ifp); > + > + if (error) { > + rtfree(rt); > return (error); > + } > + > + if (ret_nrt != NULL) > + *ret_nrt = rt; > + else > + rtfree(rt); > + > break; > > case RTM_RESOLVE: > @@ -1067,7 +1067,7 @@ rtrequest(int req, struct rt_addrinfo *i > if (error != 0 && (crt = rtalloc(ndst, 0, tableid)) != NULL) { > /* overwrite cloned route */ > if ((crt->rt_flags & RTF_CLONED) != 0) { > - rtdeletemsg(crt, NULL, tableid); > + rtdeletemsg(crt, ifp, tableid); > error = rtable_insert(tableid, ndst, > info->rti_info[RTAX_NETMASK], > info->rti_info[RTAX_GATEWAY], > @@ -1235,8 +1235,8 @@ rt_ifa_del(struct ifaddr *ifa, int flags > struct rtentry *rt; > struct mbuf *m = NULL; > struct sockaddr *deldst; > - struct rt_addrinfo info; > - struct sockaddr_rtlabel sa_rl; > + struct sockaddr *mask = NULL; > + struct sockaddr *gateway = NULL; > unsigned int rtableid = ifp->if_rdomain; > uint8_t prio = ifp->if_priority + RTP_STATIC; > int error; > @@ -1256,16 +1256,10 @@ rt_ifa_del(struct ifaddr *ifa, int flags > dst = deldst; > } > > - memset(&info, 0, sizeof(info)); > - info.rti_ifa = ifa; > - info.rti_flags = flags; > - info.rti_info[RTAX_DST] = dst; > if ((flags & RTF_LLINFO) == 0) > - info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; > - info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl); > - > + gateway = ifa->ifa_addr; > if ((flags & RTF_HOST) == 0) > - info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; > + mask = ifa->ifa_netmask; > > if (flags & (RTF_LOCAL|RTF_BROADCAST)) > prio = RTP_LOCAL; > @@ -1273,13 +1267,32 @@ rt_ifa_del(struct ifaddr *ifa, int flags > if (flags & RTF_CONNECTED) > prio = RTP_CONNECTED; > > - error = rtrequest_delete(&info, prio, ifp, &rt, rtableid); > - if (error == 0) { > - rt_sendmsg(rt, RTM_DELETE, rtableid); > - if (flags & RTF_LOCAL) > - rt_sendaddrmsg(rt, RTM_DELADDR); > + rt = rtable_lookup(rtableid, dst, mask, gateway, prio); > + if (rt == NULL) > + return (ESRCH); > + > +#ifndef SMALL_KERNEL > + /* > + * If we got multipath routes, we require users to specify > + * a matching gateway. > + */ > + if (ISSET(rt->rt_flags, RTF_MPATH) && gateway == NULL) { > rtfree(rt); > + return (ESRCH); > } > +#endif > + > + /* Make sure that's the route the caller want to delete. */ > + if (rt->rt_ifidx != ifp->if_index) { > + rtfree(rt); > + return (ESRCH); > + } > + > + error = rtdeletemsg(rt, ifp, rtableid); > + if (error == 0 && (flags & RTF_LOCAL)) > + rt_sendaddrmsg(rt, RTM_DELADDR); > + rtfree(rt); > + > if (m != NULL) > m_free(m); > > @@ -1691,9 +1704,12 @@ rt_if_remove_rtdelete(struct rtentry *rt > struct ifnet *ifp = vifp; > > if (rt->rt_ifidx == ifp->if_index) { > - int cloning = (rt->rt_flags & RTF_CLONING); > + int error, cloning = ISSET(rt->rt_flags, RTF_CLONING); > > - if (rtdeletemsg(rt, ifp, id) == 0 && cloning) > + rtref(rt); > + error = rtdeletemsg(rt, ifp, id); > + rtfree(rt); > + if (error == 0 && cloning) > return (EAGAIN); > } > > @@ -1751,7 +1767,9 @@ rt_if_linkstate_change(struct rtentry *r > * clone a new route from a better source. > */ > if (rt->rt_flags & RTF_CLONED) { > + rtref(rt); > rtdeletemsg(rt, ifp, id); > + rtfree(rt); > return (0); > } > /* take route down */ > Index: netinet/ip_icmp.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ip_icmp.c,v > retrieving revision 1.150 > diff -u -p -r1.150 ip_icmp.c > --- netinet/ip_icmp.c 3 Dec 2015 21:11:53 -0000 1.150 > +++ netinet/ip_icmp.c 7 Dec 2015 12:40:06 -0000 > @@ -1042,19 +1042,21 @@ icmp_mtudisc(struct icmp *icp, u_int rta > void > icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r) > { > - if (rt == NULL) > - panic("icmp_mtudisc_timeout: bad route to timeout"); > + struct ifnet *ifp; > + int s; > > - if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) == > - (RTF_DYNAMIC | RTF_HOST)) { > + ifp = if_get(rt->rt_ifidx); > + if (ifp == NULL) > + return; > + > + if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == > (RTF_DYNAMIC|RTF_HOST)) { > void *(*ctlfunc)(int, struct sockaddr *, u_int, void *); > struct sockaddr_in sin; > - int s; > > sin = *satosin(rt_key(rt)); > > s = splsoftnet(); > - rtdeletemsg(rt, NULL, r->rtt_tableid); > + rtdeletemsg(rt, ifp, r->rtt_tableid); > > /* Notify TCP layer of increased Path MTU estimate */ > ctlfunc = inetsw[ip_protox[IPPROTO_TCP]].pr_ctlinput; > @@ -1062,9 +1064,12 @@ icmp_mtudisc_timeout(struct rtentry *rt, > (*ctlfunc)(PRC_MTUINC, sintosa(&sin), > r->rtt_tableid, NULL); > splx(s); > - } else > + } else { > if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) > rt->rt_rmx.rmx_mtu = 0; > + } > + > + if_put(ifp); > } > > /* > @@ -1088,17 +1093,20 @@ icmp_ratelimit(const struct in_addr *dst > void > icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r) > { > - if (rt == NULL) > - panic("icmp_redirect_timeout: bad route to timeout"); > + struct ifnet *ifp; > + int s; > > - if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) == > - (RTF_DYNAMIC | RTF_HOST)) { > - int s; > + ifp = if_get(rt->rt_ifidx); > + if (ifp == NULL) > + return; > > + if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == > (RTF_DYNAMIC|RTF_HOST)) { > s = splsoftnet(); > - rtdeletemsg(rt, NULL, r->rtt_tableid); > + rtdeletemsg(rt, ifp, r->rtt_tableid); > splx(s); > } > + > + if_put(ifp); > } > > int > Index: netinet6/icmp6.c > =================================================================== > RCS file: /cvs/src/sys/netinet6/icmp6.c,v > retrieving revision 1.182 > diff -u -p -r1.182 icmp6.c > --- netinet6/icmp6.c 3 Dec 2015 21:11:53 -0000 1.182 > +++ netinet6/icmp6.c 7 Dec 2015 12:39:28 -0000 > @@ -1952,34 +1952,42 @@ icmp6_mtudisc_clone(struct sockaddr *dst > void > icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r) > { > - if (rt == NULL) > - panic("icmp6_mtudisc_timeout: bad route to timeout"); > - if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) == > - (RTF_DYNAMIC | RTF_HOST)) { > - int s; > + struct ifnet *ifp; > + int s; > > + ifp = if_get(rt->rt_ifidx); > + if (ifp == NULL) > + return; > + > + if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == > (RTF_DYNAMIC|RTF_HOST)) { > s = splsoftnet(); > - rtdeletemsg(rt, NULL, r->rtt_tableid); > + rtdeletemsg(rt, ifp, r->rtt_tableid); > splx(s); > } else { > if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) > rt->rt_rmx.rmx_mtu = 0; > } > + > + if_put(ifp); > } > > void > icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r) > { > - if (rt == NULL) > - panic("icmp6_redirect_timeout: bad route to timeout"); > - if ((rt->rt_flags & (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) == > - (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) { > - int s; > + struct ifnet *ifp; > + int s; > > + ifp = if_get(rt->rt_ifidx); > + if (ifp == NULL) > + return; > + > + if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == > (RTF_DYNAMIC|RTF_HOST)) { > s = splsoftnet(); > - rtdeletemsg(rt, NULL, r->rtt_tableid); > + rtdeletemsg(rt, ifp, r->rtt_tableid); > splx(s); > } > + > + if_put(ifp); > } > > int *icmpv6ctl_vars[ICMPV6CTL_MAXID] = ICMPV6CTL_VARS;