Hello Sebastien,

On 01/11/16(Tue) 09:36, Sebastien Marie wrote:
> Hi,
> 
> I experiment problem with local connection on specific rdomain.
> 
> I tried to make a simple and reproductible environment.

Thanks for the nice report, could you confirm the diff below fixes your
issue?

The idea is to stop using lo0 for every routing domain.  So with this
diff a new loopback is created, and used, per rdomain.


Index: kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.261
diff -u -p -r1.261 init_main.c
--- kern/init_main.c    24 Oct 2016 04:38:44 -0000      1.261
+++ kern/init_main.c    2 Nov 2016 16:05:43 -0000
@@ -388,6 +388,9 @@ main(void *framep)
        msginit();
 #endif
 
+       /* Create default routing table before attaching lo0. */
+       rtable_init();
+
        /* Attach pseudo-devices. */
        for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
                if (pdev->pdev_count > 0)
@@ -397,8 +400,6 @@ main(void *framep)
        crypto_init();
        swcr_init();
 #endif /* CRYPTO */
-
-       rtable_init();
 
        /*
         * Initialize protocols.  Block reception of incoming packets
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.456
diff -u -p -r1.456 if.c
--- net/if.c    19 Oct 2016 02:05:49 -0000      1.456
+++ net/if.c    2 Nov 2016 16:29:39 -0000
@@ -259,7 +259,6 @@ struct srp_gc if_ifp_gc = SRP_GC_INITIAL
 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
 
 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
-unsigned int lo0ifidx;
 
 void
 if_idxmap_init(unsigned int limit)
@@ -1392,12 +1391,7 @@ p2p_rtrequest(struct ifnet *ifp, int req
 
                KASSERT(ifa == rt->rt_ifa);
 
-               /*
-                * XXX Since lo0 is in the default rdomain we should not
-                * (ab)use it for any route related to an interface of a
-                * different rdomain.
-                */
-               lo0ifp = if_get(lo0ifidx);
+               lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
                KASSERT(lo0ifp != NULL);
                TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
                        if (lo0ifa->ifa_addr->sa_family ==
@@ -1480,7 +1474,7 @@ if_up(struct ifnet *ifp)
 
 #ifdef INET6
        /* Userland expects the kernel to set ::1 on lo0. */
-       if (ifp->if_index == lo0ifidx)
+       if (ifp->if_index == rtable_loindex(0))
                in6_ifattach(ifp);
 #endif
 
@@ -1647,14 +1641,31 @@ if_setrdomain(struct ifnet *ifp, int rdo
        if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
                return (EINVAL);
 
-       /* make sure that the routing table exists */
+       /*
+        * Create the routing table if it does not exist, including its
+        * loopback interface with unit == rdomain.
+        */
        if (!rtable_exists(rdomain)) {
+               struct ifnet *loifp;
+               char loifname[IFNAMSIZ];
+
+               snprintf(loifname, sizeof(loifname), "lo%d", rdomain);
+               if ((error = if_clone_create(loifname, 0)))
+                       return (error);
+
+               if ((loifp = ifunit(loifname)) == NULL)
+                       return (ENXIO);
+
                s = splsoftnet();
                if ((error = rtable_add(rdomain)) == 0)
-                       rtable_l2set(rdomain, rdomain);
+                       rtable_l2set(rdomain, rdomain, loifp->if_index);
                splx(s);
-               if (error)
+               if (error) {
+                       if_clone_destroy(loifname);
                        return (error);
+               }
+
+               loifp->if_rdomain = rdomain;
        }
 
        /* make sure that the routing table is a real rdomain */
Index: net/if_loop.c
===================================================================
RCS file: /cvs/src/sys/net/if_loop.c,v
retrieving revision 1.76
diff -u -p -r1.76 if_loop.c
--- net/if_loop.c       13 Apr 2016 11:41:15 -0000      1.76
+++ net/if_loop.c       2 Nov 2016 16:26:03 -0000
@@ -121,6 +121,7 @@
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
+#include <net/rtable.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
@@ -182,7 +183,7 @@ loop_clone_create(struct if_clone *ifc, 
        if (unit == 0) {
                if_attachhead(ifp);
                if_addgroup(ifp, ifc->ifc_name);
-               lo0ifidx = ifp->if_index;
+               rtable_l2set(0, 0, ifp->if_index);
        } else
                if_attach(ifp);
        if_alloc_sadl(ifp);
@@ -195,7 +196,7 @@ loop_clone_create(struct if_clone *ifc, 
 int
 loop_clone_destroy(struct ifnet *ifp)
 {
-       if (ifp->if_index == lo0ifidx)
+       if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
                return (EPERM);
 
        if_detach(ifp);
Index: net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.75
diff -u -p -r1.75 if_var.h
--- net/if_var.h        4 Sep 2016 15:46:39 -0000       1.75
+++ net/if_var.h        2 Nov 2016 12:11:19 -0000
@@ -291,7 +291,6 @@ int         niq_enlist(struct niqueue *, struct
     sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q)
 
 extern struct ifnet_head ifnet;
-extern unsigned int lo0ifidx;
 extern struct taskq *softnettq;
 
 void   if_start(struct ifnet *);
Index: net/route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.333
diff -u -p -r1.333 route.c
--- net/route.c 6 Oct 2016 19:09:08 -0000       1.333
+++ net/route.c 2 Nov 2016 16:31:00 -0000
@@ -197,8 +197,6 @@ route_init(void)
        while (rt_hashjitter == 0)
                rt_hashjitter = arc4random();
 
-       if (rtable_add(0) != 0)
-               panic("route_init rtable_add");
 #ifdef BFD
        bfdinit();
 #endif
Index: net/rtable.c
===================================================================
RCS file: /cvs/src/sys/net/rtable.c,v
retrieving revision 1.52
diff -u -p -r1.52 rtable.c
--- net/rtable.c        7 Sep 2016 09:36:49 -0000       1.52
+++ net/rtable.c        2 Nov 2016 16:37:06 -0000
@@ -1,7 +1,7 @@
 /*     $OpenBSD: rtable.c,v 1.52 2016/09/07 09:36:49 mpi Exp $ */
 
 /*
- * Copyright (c) 2014-2015 Martin Pieuchot
+ * Copyright (c) 2014-2016 Martin Pieuchot
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -41,7 +41,7 @@
  *     afmap               rtmap/dommp
  *   -----------          ---------     -----
  *   |   0     |--------> | 0 | 0 | ... | 0 |  Array mapping rtableid (=index)
- *   -----------          ---------     -----   to rdomain (=value).
+ *   -----------          ---------     -----   to rdomain/loopback (=value).
  *   | AF_INET |.
  *   ----------- `.       .---------.     .---------.
  *       ...      `----> | rtable0 | ... | rtableN |   Array of pointers for
@@ -59,10 +59,20 @@ struct rtmap {
        void             **tbl;
 };
 
-/* Array of rtableid -> rdomain mapping. */
+/*
+ * Array of rtableid -> rdomain mapping.
+ *
+ * Only used for the first index as describbed above.
+ */
 struct dommp {
        unsigned int       limit;
-       unsigned int      *dom;
+       /*
+        * Array to get the routing domain and loopback interface related to
+        * a routing table. Format:
+        *
+        * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain
+        */
+       unsigned int      *value;
 };
 
 unsigned int      rtmap_limit = 0;
@@ -146,6 +156,8 @@ rtable_init(void)
        unsigned int     keylen = 0;
        int              i;
 
+       KASSERT(sizeof(struct rtmap) == sizeof(struct dommp));
+
        /* We use index 0 for the rtable/rdomain map. */
        af2idx_max = 1;
        memset(af2idx, 0, sizeof(af2idx));
@@ -173,6 +185,9 @@ rtable_init(void)
            M_WAITOK|M_ZERO);
 
        rtmap_init();
+
+       if (rtable_add(0) != 0)
+               panic("unable to create default routing table");
 }
 
 int
@@ -221,7 +236,7 @@ rtable_add(unsigned int id)
 
        /* Use main rtable/rdomain by default. */
        dmm = srp_get_locked(&afmap[0]);
-       dmm->dom[id] = 0;
+       dmm->value[id] = 0;
 
        return (0);
 }
@@ -272,24 +287,42 @@ rtable_l2(unsigned int rtableid)
 
        dmm = srp_enter(&sr, &afmap[0]);
        if (rtableid < dmm->limit)
-               rdomain = dmm->dom[rtableid];
+               rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK);
        srp_leave(&sr);
 
        return (rdomain);
 }
 
+unsigned int
+rtable_loindex(unsigned int rtableid)
+{
+       struct dommp    *dmm;
+       unsigned int     loifidx = 0;
+       struct srp_ref   sr;
+
+       dmm = srp_enter(&sr, &afmap[0]);
+       if (rtableid < dmm->limit)
+               loifidx = dmm->value[rtableid] >> RT_TABLEID_BITS;
+       srp_leave(&sr);
+
+       return (loifidx);
+}
+
 void
-rtable_l2set(unsigned int rtableid, unsigned int rdomain)
+rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int loifidx)
 {
        struct dommp    *dmm;
+       unsigned int     value;
 
        KERNEL_ASSERT_LOCKED();
 
        if (!rtable_exists(rtableid) || !rtable_exists(rdomain))
                return;
 
+       value = (rdomain & RT_TABLEID_MASK) + (loifidx << RT_TABLEID_BITS);
+
        dmm = srp_get_locked(&afmap[0]);
-       dmm->dom[rtableid] = rdomain;
+       dmm->value[rtableid] = value;
 }
 
 #ifndef ART
Index: net/rtable.h
===================================================================
RCS file: /cvs/src/sys/net/rtable.h,v
retrieving revision 1.16
diff -u -p -r1.16 rtable.h
--- net/rtable.h        7 Sep 2016 09:36:49 -0000       1.16
+++ net/rtable.h        2 Nov 2016 12:01:11 -0000
@@ -54,7 +54,8 @@ void           rtable_init(void);
 int             rtable_exists(unsigned int);
 int             rtable_add(unsigned int);
 unsigned int    rtable_l2(unsigned int);
-void            rtable_l2set(unsigned int, unsigned int);
+unsigned int    rtable_loindex(unsigned int);
+void            rtable_l2set(unsigned int, unsigned int, unsigned int);
 
 struct rtentry *rtable_lookup(unsigned int, struct sockaddr *,
                     struct sockaddr *, struct sockaddr *, uint8_t);
Index: netinet/ip_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.327
diff -u -p -r1.327 ip_output.c
--- netinet/ip_output.c 4 Sep 2016 17:18:56 -0000       1.327
+++ netinet/ip_output.c 2 Nov 2016 16:27:14 -0000
@@ -211,7 +211,7 @@ reroute:
 
                ia = ifatoia(ro->ro_rt->rt_ifa);
                if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL))
-                       ifp = if_get(lo0ifidx);
+                       ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid));
                else
                        ifp = if_get(ro->ro_rt->rt_ifidx);
                if (ifp == NULL) {
Index: netinet6/ip6_input.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.168
diff -u -p -r1.168 ip6_input.c
--- netinet6/ip6_input.c        24 Aug 2016 09:41:12 -0000      1.168
+++ netinet6/ip6_input.c        2 Nov 2016 16:27:14 -0000
@@ -211,7 +211,10 @@ ip6_input(struct mbuf *m)
        } else {
                if (m->m_next) {
                        if (m->m_flags & M_LOOP) {
-                               ip6stat.ip6s_m2m[lo0ifidx]++;   /*XXX*/
+                               int ifidx;
+
+                               ifidx = rtable_loindex(m->m_pkthdr.ph_rtableid);
+                               ip6stat.ip6s_m2m[ifidx]++;
                        } else if (ifp->if_index < nitems(ip6stat.ip6s_m2m))
                                ip6stat.ip6s_m2m[ifp->if_index]++;
                        else
Index: netinet6/ip6_output.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.216
diff -u -p -r1.216 ip6_output.c
--- netinet6/ip6_output.c       19 Sep 2016 18:09:09 -0000      1.216
+++ netinet6/ip6_output.c       2 Nov 2016 12:10:02 -0000
@@ -460,7 +460,7 @@ reroute:
                        goto bad;
                }
                if (ISSET(rt->rt_flags, RTF_LOCAL))
-                       ifp = if_get(lo0ifidx);
+                       ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid));
                else
                        ifp = if_get(rt->rt_ifidx);
        } else {
Index: sys/socket.h
===================================================================
RCS file: /cvs/src/sys/sys/socket.h,v
retrieving revision 1.92
diff -u -p -r1.92 socket.h
--- sys/socket.h        28 Sep 2016 18:50:20 -0000      1.92
+++ sys/socket.h        2 Nov 2016 16:18:41 -0000
@@ -143,7 +143,9 @@ struct      splice {
 /*
  * Maximum number of alternate routing tables
  */
-#define        RT_TABLEID_MAX  255
+#define        RT_TABLEID_MAX          255
+#define        RT_TABLEID_BITS         8
+#define        RT_TABLEID_MASK         0xff
 
 #endif /* __BSD_VISIBLE */
 

Reply via email to