I sent this diff out some time ago and would really like to get this in.
This is one step on makeing rtsock.c less of a hornets nest.
This reduces the side effects in route_output and simplifies some other
bits as well. For example route_input is less variadic and simpler.

Anyone couragous enough to send me an OK?
-- 
:wq Claudio

Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.213
diff -u -p -r1.213 rtsock.c
--- net/rtsock.c        19 Jan 2017 23:18:29 -0000      1.213
+++ net/rtsock.c        20 Jan 2017 01:37:33 -0000
@@ -92,7 +92,6 @@
 
 struct sockaddr                route_dst = { 2, PF_ROUTE, };
 struct sockaddr                route_src = { 2, PF_ROUTE, };
-struct sockproto       route_proto = { PF_ROUTE, };
 
 struct walkarg {
        int     w_op, w_arg, w_given, w_needed, w_tmemsize;
@@ -100,7 +99,7 @@ struct walkarg {
 };
 
 int    route_ctloutput(int, struct socket *, int, int, struct mbuf **);
-void   route_input(struct mbuf *m0, ...);
+void   route_input(struct mbuf *m0, unsigned short);
 int    route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
 int    route_cleargateway(struct rtentry *, void *, unsigned int);
 
@@ -331,7 +330,7 @@ rt_senddesync(void *data)
 }
 
 void
-route_input(struct mbuf *m0, ...)
+route_input(struct mbuf *m0, unsigned short sa_family)
 {
        struct rawcb *rp;
        struct routecb *rop;
@@ -339,15 +338,10 @@ route_input(struct mbuf *m0, ...)
        struct mbuf *m = m0;
        int s, sockets = 0;
        struct socket *last = NULL;
-       va_list ap;
-       struct sockproto *proto;
        struct sockaddr *sosrc, *sodst;
        
-       va_start(ap, m0);
-       proto = va_arg(ap, struct sockproto *);
-       sosrc = va_arg(ap, struct sockaddr *);
-       sodst = va_arg(ap, struct sockaddr *);
-       va_end(ap);
+       sosrc = &route_src;
+       sodst = &route_src;
 
        /* ensure that we can access the rtm_type via mtod() */
        if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
@@ -358,10 +352,15 @@ route_input(struct mbuf *m0, ...)
        LIST_FOREACH(rp, &rawcb, rcb_list) {
                if (rp->rcb_socket->so_state & SS_CANTRCVMORE)
                        continue;
-               if (rp->rcb_proto.sp_family != proto->sp_family)
+               if (rp->rcb_proto.sp_family != PF_ROUTE)
                        continue;
-               if (rp->rcb_proto.sp_protocol && proto->sp_protocol &&
-                   rp->rcb_proto.sp_protocol != proto->sp_protocol)
+               /*
+                * If route socket is bound to an address family only send
+                * messages that match the address family. Address family
+                * agnostic messages are always send.
+                */
+               if (rp->rcb_proto.sp_protocol != 0 && sa_family != 0 &&
+                   rp->rcb_proto.sp_protocol != sa_family)
                        continue;
                /*
                 * We assume the lower level routines have
@@ -458,36 +457,99 @@ route_input(struct mbuf *m0, ...)
                m_freem(m);
 }
 
+struct rt_msghdr *
+rt_report(struct rtentry *rt, u_char type, int seq, int tableid)
+{
+       struct rt_msghdr        *rtm;
+       struct rt_addrinfo       info;
+       struct sockaddr_rtlabel  sa_rl;
+       struct sockaddr_in6      sa_mask;
+#ifdef BFD
+       struct sockaddr_bfd      sa_bfd;
+#endif
+#ifdef MPLS
+       struct sockaddr_mpls     sa_mpls;
+#endif
+       struct ifnet            *ifp = NULL;
+       int                      len;
+
+       bzero(&info, sizeof(info));
+       info.rti_info[RTAX_DST] = rt_key(rt);
+       info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+       info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
+       info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
+#ifdef BFD
+       if (rt->rt_flags & RTF_BFD)
+               info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
+#endif
+#ifdef MPLS
+       if (rt->rt_flags & RTF_MPLS) {
+               bzero(&sa_mpls, sizeof(sa_mpls));
+               sa_mpls.smpls_family = AF_MPLS;
+               sa_mpls.smpls_len = sizeof(sa_mpls);
+               sa_mpls.smpls_label = ((struct rt_mpls *)
+                   rt->rt_llinfo)->mpls_label;
+               info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
+               info.rti_mpls = ((struct rt_mpls *)
+                   rt->rt_llinfo)->mpls_operation;
+       }
+#endif
+       ifp = if_get(rt->rt_ifidx);
+       if (ifp != NULL) {
+               info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
+               info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
+               if (ifp->if_flags & IFF_POINTOPOINT)
+                       info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
+       }
+       if_put(ifp);
+       /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
+
+       /* build new route message */
+       len = rt_msg2(type, RTM_VERSION, &info, NULL, NULL);
+       /* XXX why can't we wait? Should be process context... */
+       rtm = malloc(len, M_RTABLE, M_NOWAIT | M_ZERO);
+       if (rtm == NULL)
+               return NULL;
+
+       rt_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
+       rtm->rtm_type = type;
+       rtm->rtm_index = rt->rt_ifidx;
+       rtm->rtm_tableid = tableid;
+       rtm->rtm_priority = rt->rt_priority & RTP_MASK;
+       rtm->rtm_flags = rt->rt_flags;
+       rtm->rtm_pid = curproc->p_p->ps_pid;
+       rtm->rtm_seq = seq;
+       rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+       rtm->rtm_addrs = info.rti_addrs;
+#ifdef MPLS
+       rtm->rtm_mpls = info.rti_mpls;
+#endif
+       return rtm;
+}
+
 int
 route_output(struct mbuf *m, ...)
 {
        struct rt_msghdr        *rtm = NULL;
        struct rtentry          *rt = NULL;
-       struct rtentry          *saved_nrt = NULL;
        struct rt_addrinfo       info;
-       int                      plen, len, newgate = 0, error = 0;
+       int                      plen, len, seq, newgate = 0, error = 0;
        struct ifnet            *ifp = NULL;
        struct ifaddr           *ifa = NULL;
        struct socket           *so;
        struct rawcb            *rp = NULL;
-       struct sockaddr_rtlabel  sa_rl;
-       struct sockaddr_in6      sa_mask;
-#ifdef BFD
-       struct sockaddr_bfd      sa_bfd;
-#endif
 #ifdef MPLS
-       struct sockaddr_mpls     sa_mpls, *psa_mpls;
+       struct sockaddr_mpls    *psa_mpls;
 #endif
        va_list                  ap;
        u_int                    tableid;
        u_int8_t                 prio;
-       u_char                   vers;
+       u_char                   vers, type;
 
        va_start(ap, m);
        so = va_arg(ap, struct socket *);
        va_end(ap);
 
-       info.rti_info[RTAX_DST] = NULL; /* for error handling (goto flush) */
        if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
            (m = m_pullup(m, sizeof(int32_t))) == 0))
                return (ENOBUFS);
@@ -554,10 +616,10 @@ route_output(struct mbuf *m, ...)
        if (!rtable_exists(tableid)) {
                if (rtm->rtm_type == RTM_ADD) {
                        if ((error = rtable_add(tableid)) != 0)
-                               goto flush;
+                               goto fail;
                } else {
                        error = EINVAL;
-                       goto flush;
+                       goto fail;
                }
        }
 
@@ -597,7 +659,7 @@ route_output(struct mbuf *m, ...)
            info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
            info.rti_info[RTAX_GENMASK] != NULL) {
                error = EINVAL;
-               goto flush;
+               goto fail;
        }
 #ifdef MPLS
        info.rti_mpls = rtm->rtm_mpls;
@@ -609,6 +671,11 @@ route_output(struct mbuf *m, ...)
                info.rti_flags |= RTF_LLINFO;
        }
 
+       /*
+        * Do not use goto flush before this point since the message itself
+        * may be not consistent and could cause unexpected behaviour in other
+        * userland clients.
+        */
        switch (rtm->rtm_type) {
        case RTM_ADD:
                if (info.rti_info[RTAX_GATEWAY] == NULL) {
@@ -634,23 +701,14 @@ route_output(struct mbuf *m, ...)
                rtfree(rt);
                rt = NULL;
 
-               error = rtrequest(RTM_ADD, &info, prio, &saved_nrt, tableid);
-               if (error == 0) {
+               error = rtrequest(RTM_ADD, &info, prio, &rt, tableid);
+               if (error == 0)
                        rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
-                           &saved_nrt->rt_rmx);
-                       /* write back the priority the kernel used */
-                       rtm->rtm_priority = saved_nrt->rt_priority & RTP_MASK;
-                       rtm->rtm_index = saved_nrt->rt_ifidx;
-                       rtm->rtm_flags = saved_nrt->rt_flags;
-                       rtfree(saved_nrt);
-               }
+                           &rt->rt_rmx);
+               else
+                       goto flush;
                break;
        case RTM_DELETE:
-               if (!rtable_exists(tableid)) {
-                       error = EAFNOSUPPORT;
-                       goto flush;
-               }
-
                rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
                    info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY],
                    prio);
@@ -668,7 +726,7 @@ route_output(struct mbuf *m, ...)
                        /* Reset the MTU of the gateway route. */
                        rtable_walk(tableid, rt_key(rt)->sa_family,
                            route_cleargateway, rt);
-                       goto report;
+                       break;
                }
 
                /*
@@ -678,96 +736,18 @@ route_output(struct mbuf *m, ...)
                if ((rt != NULL) &&
                    ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
                        error = EINVAL;
-                       goto report;
+                       break;
                }
 
                rtfree(rt);
                rt = NULL;
 
                error = rtrequest(RTM_DELETE, &info, prio, &rt, tableid);
-               if (error == 0)
-                       goto report;
-               break;
-       case RTM_GET:
-               if (!rtable_exists(tableid)) {
-                       error = EAFNOSUPPORT;
+               if (error != 0)
                        goto flush;
-               }
-               rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
-                   info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY],
-                   prio);
-               if (rt == NULL) {
-                       error = ESRCH;
-                       goto flush;
-               }
-
-report:
-               info.rti_info[RTAX_DST] = rt_key(rt);
-               info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-               info.rti_info[RTAX_NETMASK] =
-                   rt_plen2mask(rt, &sa_mask);
-               info.rti_info[RTAX_LABEL] =
-                   rtlabel_id2sa(rt->rt_labelid, &sa_rl);
-#ifdef BFD
-               if (rt->rt_flags & RTF_BFD)
-                       info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
-#endif
-#ifdef MPLS
-               if (rt->rt_flags & RTF_MPLS) {
-                       bzero(&sa_mpls, sizeof(sa_mpls));
-                       sa_mpls.smpls_family = AF_MPLS;
-                       sa_mpls.smpls_len = sizeof(sa_mpls);
-                       sa_mpls.smpls_label = ((struct rt_mpls *)
-                           rt->rt_llinfo)->mpls_label;
-                       info.rti_info[RTAX_SRC] =
-                           (struct sockaddr *)&sa_mpls;
-                       info.rti_mpls = ((struct rt_mpls *)
-                           rt->rt_llinfo)->mpls_operation;
-                       rtm->rtm_mpls = info.rti_mpls;
-               }
-#endif
-               info.rti_info[RTAX_IFP] = NULL;
-               info.rti_info[RTAX_IFA] = NULL;
-               ifp = if_get(rt->rt_ifidx);
-               if (ifp != NULL && rtm->rtm_addrs & (RTA_IFP|RTA_IFA)) {
-                       info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
-                       info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
-                       if (ifp->if_flags & IFF_POINTOPOINT)
-                               info.rti_info[RTAX_BRD] =
-                                   rt->rt_ifa->ifa_dstaddr;
-                       else
-                               info.rti_info[RTAX_BRD] = NULL;
-               }
-               if_put(ifp);
-               len = rt_msg2(rtm->rtm_type, RTM_VERSION, &info, NULL,
-                   NULL);
-               if (len > rtm->rtm_msglen) {
-                       struct rt_msghdr        *new_rtm;
-                       new_rtm = malloc(len, M_RTABLE, M_NOWAIT);
-                       if (new_rtm == NULL) {
-                               error = ENOBUFS;
-                               goto flush;
-                       }
-                       memcpy(new_rtm, rtm, rtm->rtm_msglen);
-                       free(rtm, M_RTABLE, 0);
-                       rtm = new_rtm;
-               }
-               rt_msg2(rtm->rtm_type, RTM_VERSION, &info, (caddr_t)rtm,
-                   NULL);
-               rtm->rtm_flags = rt->rt_flags;
-               rtm->rtm_use = 0;
-               rtm->rtm_priority = rt->rt_priority & RTP_MASK;
-               rtm->rtm_index = rt->rt_ifidx;
-               rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
-               rtm->rtm_addrs = info.rti_addrs;
                break;
        case RTM_CHANGE:
        case RTM_LOCK:
-               if (!rtable_exists(tableid)) {
-                       error = EAFNOSUPPORT;
-                       goto flush;
-               }
-
                rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
                    info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY],
                    prio);
@@ -916,9 +896,6 @@ change:
 
                        rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
                            &rt->rt_rmx);
-                       rtm->rtm_index = rt->rt_ifidx;
-                       rtm->rtm_priority = rt->rt_priority & RTP_MASK;
-                       rtm->rtm_flags = rt->rt_flags;
 
                        ifp = if_get(rt->rt_ifidx);
                        KASSERT(ifp != NULL);
@@ -938,13 +915,45 @@ change:
                        rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
                        rt->rt_rmx.rmx_locks |=
                            (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
-                       rtm->rtm_priority = rt->rt_priority & RTP_MASK;
                        break;
                }
                break;
+       case RTM_GET:
+               rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
+                   info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY],
+                   prio);
+               if (rt == NULL) {
+                       error = ESRCH;
+                       goto flush;
+               }
+               break;
+       }
+
+       /* CODE DOWN HERE NEEDS:
+       rt, rtm, error, so, m, tableid, sa_family of some sort
+
+       OTHER NOTES:
+       - to end up here it seems all is OK, error is most probably 0
+       - error cases take the flush route or in bad cases fail.
+       - fail does not report the message back but just fails the call.
+         if the message is not valid then fail should be used
+       - the dance in route_input and rp->rcb_proto.sp_family = 0 dance is
+         insane, better to pass the so we don't want to listen on to
+         route_input.
+       */
+
+       type = rtm->rtm_type;
+       seq = rtm->rtm_seq;
+       free(rtm, M_RTABLE, 0);
+       rtm = rt_report(rt, type, seq, tableid);
+       if (rtm == NULL) {
+               error = ENOBUFS;
+               goto fail;
        }
 
 flush:
+       if (rt)
+               rtfree(rt);
        if (rtm) {
                if (error)
                        rtm->rtm_errno = error;
@@ -952,16 +961,12 @@ flush:
                        rtm->rtm_flags |= RTF_DONE;
                }
        }
-       if (info.rti_info[RTAX_DST])
-               route_proto.sp_protocol = info.rti_info[RTAX_DST]->sa_family;
-       if (rt)
-               rtfree(rt);
-
        /*
         * Check to see if we don't want our own messages.
         */
        if (!(so->so_options & SO_USELOOPBACK)) {
                if (route_cb.any_count <= 1) {
+                       /* no other listener and no loopback of messages */
 fail:
                        free(rtm, M_RTABLE, 0);
                        m_freem(m);
@@ -969,9 +974,8 @@ fail:
                }
                /* There is another listener, so construct message */
                rp = sotorawcb(so);
-       }
-       if (rp)
                rp->rcb_proto.sp_family = 0; /* Avoid us */
+       }
        if (rtm) {
                if (m_copyback(m, 0, rtm->rtm_msglen, rtm, M_NOWAIT)) {
                        m_freem(m);
@@ -981,9 +985,10 @@ fail:
                free(rtm, M_RTABLE, 0);
        }
        if (m)
-               route_input(m, &route_proto, &route_src, &route_dst);
+               route_input(m, info.rti_info[RTAX_DST] ?
+                   info.rti_info[RTAX_DST]->sa_family : 0);
        if (rp)
-               rp->rcb_proto.sp_family = PF_ROUTE;
+               rp->rcb_proto.sp_family = PF_ROUTE; /* Readd us */
 
        return (error);
 }
@@ -1055,7 +1060,6 @@ rt_setmetrics(u_long which, const struct
 
                out->rmx_expire = expire;
        }
-       /* RTV_PRIORITY handled before */
 }
 
 void
@@ -1257,11 +1261,7 @@ rt_missmsg(int type, struct rt_addrinfo 
        rtm->rtm_tableid = tableid;
        rtm->rtm_addrs = rtinfo->rti_addrs;
        rtm->rtm_index = ifidx;
-       if (sa == NULL)
-               route_proto.sp_protocol = 0;
-       else
-               route_proto.sp_protocol = sa->sa_family;
-       route_input(m, &route_proto, &route_src, &route_dst);
+       route_input(m, sa ? sa->sa_family : 0);
 }
 
 /*
@@ -1286,8 +1286,7 @@ rt_ifmsg(struct ifnet *ifp)
        ifm->ifm_xflags = ifp->if_xflags;
        ifm->ifm_data = ifp->if_data;
        ifm->ifm_addrs = 0;
-       route_proto.sp_protocol = 0;
-       route_input(m, &route_proto, &route_src, &route_dst);
+       route_input(m, 0);
 }
 
 /*
@@ -1323,11 +1322,7 @@ rt_sendaddrmsg(struct rtentry *rt, int c
        ifam->ifam_addrs = info.rti_addrs;
        ifam->ifam_tableid = ifp->if_rdomain;
 
-       if (ifa->ifa_addr == NULL)
-               route_proto.sp_protocol = 0;
-       else
-               route_proto.sp_protocol = ifa->ifa_addr->sa_family;
-       route_input(m, &route_proto, &route_src, &route_dst);
+       route_input(m, ifa->ifa_addr ? ifa->ifa_addr->sa_family : 0);
 }
 
 /*
@@ -1349,8 +1344,7 @@ rt_ifannouncemsg(struct ifnet *ifp, int 
        ifan->ifan_index = ifp->if_index;
        strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
        ifan->ifan_what = what;
-       route_proto.sp_protocol = 0;
-       route_input(m, &route_proto, &route_src, &route_dst);
+       route_input(m, 0);
 }
 
 #ifdef BFD
@@ -1381,8 +1375,7 @@ rt_bfdmsg(struct bfd_config *bfd)
        bfd2sa(bfd->bc_rt, &sa_bfd);
        memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
 
-       route_proto.sp_protocol = info.rti_info[RTAX_DST]->sa_family;
-       route_input(m, &route_proto, &route_src, &route_dst);
+       route_input(m, info.rti_info[RTAX_DST]->sa_family);
 }
 #endif /* BFD */
 
@@ -1647,7 +1640,7 @@ extern    struct domain routedomain;              /* or
 
 struct protosw routesw[] = {
 { SOCK_RAW,    &routedomain,   0,              PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
-  route_input, route_output,   0,              route_ctloutput,
+  0,           route_output,   0,              route_ctloutput,
   route_usrreq,
   raw_init,    0,              0,              0,
   sysctl_rtable,

Reply via email to