On 28/10/16(Fri) 16:27, Claudio Jeker wrote: > On Fri, Oct 28, 2016 at 04:19:35PM +0200, Nils Frohberg wrote: > > I currently cannot access the local IP of an interface on rdomain 1: > > > > Script started on Fri Oct 28 15:02:20 2016 > > $ doas pfctl -d > > pfctl: pf not enabled > > $ doas ifconfig vether0 > > vether0: no such interface > > $ doas ifconfig vether0 rdomain 1 > > $ doas ifconfig vether0 inet 192.168.42.2 > > $ doas ifconfig vether0 > > vether0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> rdomain 1 mtu > > 1500 > > lladdr fe:e1:ba:d1:73:55 > > index 7 priority 0 llprio 3 > > groups: vether > > media: Ethernet autoselect > > status: active > > inet 192.168.42.2 netmask 0xffffff00 broadcast 192.168.42.255 > > $ route -T1 -n show -inet > > Routing tables > > > > Internet: > > Destination Gateway Flags Refs Use Mtu Prio > > Iface > > 192.168.42/24 192.168.42.2 UCn 0 0 - 4 > > vether0 > > 192.168.42.2 fe:e1:ba:d1:73:55 UHLl 0 0 - 1 > > vether0 > > 192.168.42.255 192.168.42.2 UHb 0 0 - 1 > > vether0 > > $ ping -V1 192.168.42.2 > > PING 192.168.42.2 (192.168.42.2): 56 data bytes > > ^C > > --- 192.168.42.2 ping statistics --- > > 4 packets transmitted, 0 packets received, 100.0% packet loss > > $ ^D > > > > Script done on Fri Oct 28 15:04:16 2016 > > > > Connecting to other boxes via routes on rdomain 1 works as usual > > (not shown in the script output above). > > > > I tracked the problem to being caused by v1.455 of if.c. It seems > > that ifp->if_rdomain == 0 when running the ping above. In this case, > > m->m_pkthdr.ph_rtableid should be set back to 1 after the reset > > though (as it was before m_resethdr(m)). Shouldn't ifp->if_rdomain > > be equal to 1 in this case? If yes, there must be a bug earlier in > > the code. > > > > The following diff fixes things for me, but I don't know if > > ifp->if_rdomain isn't the one that needs to be fixed: > > Can you check the ifp->if_index. I bet it is pointing to the loopback > interface lo0 and so you end up with the wrong rdomain id. > This is a nasty consequence of using lo0 accross domains.
Diff below should fix that by automagically creating a loopback interface when a new routing domain is created. That mean loX will now correspond to routing domain X. Not that if you are currently using a lo2 and a rdomain 2 this diff will break your rdomain setup. I'll write the manual page for rtable_loindex(9) if you're ok with the approach. Index: kern/init_main.c =================================================================== RCS file: /cvs/src/sys/kern/init_main.c,v retrieving revision 1.261 diff -u -p -r1.261 init_main.c --- kern/init_main.c 24 Oct 2016 04:38:44 -0000 1.261 +++ kern/init_main.c 2 Nov 2016 16:05:43 -0000 @@ -388,6 +388,9 @@ main(void *framep) msginit(); #endif + /* Create default routing table before attaching lo0. */ + rtable_init(); + /* Attach pseudo-devices. */ for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++) if (pdev->pdev_count > 0) @@ -397,8 +400,6 @@ main(void *framep) crypto_init(); swcr_init(); #endif /* CRYPTO */ - - rtable_init(); /* * Initialize protocols. Block reception of incoming packets Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.456 diff -u -p -r1.456 if.c --- net/if.c 19 Oct 2016 02:05:49 -0000 1.456 +++ net/if.c 2 Nov 2016 16:29:39 -0000 @@ -259,7 +259,6 @@ struct srp_gc if_ifp_gc = SRP_GC_INITIAL struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL); struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet); -unsigned int lo0ifidx; void if_idxmap_init(unsigned int limit) @@ -1392,12 +1391,7 @@ p2p_rtrequest(struct ifnet *ifp, int req KASSERT(ifa == rt->rt_ifa); - /* - * XXX Since lo0 is in the default rdomain we should not - * (ab)use it for any route related to an interface of a - * different rdomain. - */ - lo0ifp = if_get(lo0ifidx); + lo0ifp = if_get(rtable_loindex(ifp->if_rdomain)); KASSERT(lo0ifp != NULL); TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) { if (lo0ifa->ifa_addr->sa_family == @@ -1480,7 +1474,7 @@ if_up(struct ifnet *ifp) #ifdef INET6 /* Userland expects the kernel to set ::1 on lo0. */ - if (ifp->if_index == lo0ifidx) + if (ifp->if_index == rtable_loindex(0)) in6_ifattach(ifp); #endif @@ -1647,14 +1641,31 @@ if_setrdomain(struct ifnet *ifp, int rdo if (rdomain < 0 || rdomain > RT_TABLEID_MAX) return (EINVAL); - /* make sure that the routing table exists */ + /* + * Create the routing table if it does not exist, including its + * loopback interface with unit == rdomain. + */ if (!rtable_exists(rdomain)) { + struct ifnet *loifp; + char loifname[IFNAMSIZ]; + + snprintf(loifname, sizeof(loifname), "lo%d", rdomain); + if ((error = if_clone_create(loifname, 0))) + return (error); + + if ((loifp = ifunit(loifname)) == NULL) + return (ENXIO); + s = splsoftnet(); if ((error = rtable_add(rdomain)) == 0) - rtable_l2set(rdomain, rdomain); + rtable_l2set(rdomain, rdomain, loifp->if_index); splx(s); - if (error) + if (error) { + if_clone_destroy(loifname); return (error); + } + + loifp->if_rdomain = rdomain; } /* make sure that the routing table is a real rdomain */ Index: net/if_loop.c =================================================================== RCS file: /cvs/src/sys/net/if_loop.c,v retrieving revision 1.76 diff -u -p -r1.76 if_loop.c --- net/if_loop.c 13 Apr 2016 11:41:15 -0000 1.76 +++ net/if_loop.c 2 Nov 2016 16:26:03 -0000 @@ -121,6 +121,7 @@ #include <net/if_var.h> #include <net/if_types.h> #include <net/netisr.h> +#include <net/rtable.h> #include <net/route.h> #include <netinet/in.h> @@ -182,7 +183,7 @@ loop_clone_create(struct if_clone *ifc, if (unit == 0) { if_attachhead(ifp); if_addgroup(ifp, ifc->ifc_name); - lo0ifidx = ifp->if_index; + rtable_l2set(0, 0, ifp->if_index); } else if_attach(ifp); if_alloc_sadl(ifp); @@ -195,7 +196,7 @@ loop_clone_create(struct if_clone *ifc, int loop_clone_destroy(struct ifnet *ifp) { - if (ifp->if_index == lo0ifidx) + if (ifp->if_index == rtable_loindex(ifp->if_rdomain)) return (EPERM); if_detach(ifp); Index: net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.75 diff -u -p -r1.75 if_var.h --- net/if_var.h 4 Sep 2016 15:46:39 -0000 1.75 +++ net/if_var.h 2 Nov 2016 12:11:19 -0000 @@ -291,7 +291,6 @@ int niq_enlist(struct niqueue *, struct sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q) extern struct ifnet_head ifnet; -extern unsigned int lo0ifidx; extern struct taskq *softnettq; void if_start(struct ifnet *); Index: net/route.c =================================================================== RCS file: /cvs/src/sys/net/route.c,v retrieving revision 1.333 diff -u -p -r1.333 route.c --- net/route.c 6 Oct 2016 19:09:08 -0000 1.333 +++ net/route.c 2 Nov 2016 16:31:00 -0000 @@ -197,8 +197,6 @@ route_init(void) while (rt_hashjitter == 0) rt_hashjitter = arc4random(); - if (rtable_add(0) != 0) - panic("route_init rtable_add"); #ifdef BFD bfdinit(); #endif Index: net/rtable.c =================================================================== RCS file: /cvs/src/sys/net/rtable.c,v retrieving revision 1.52 diff -u -p -r1.52 rtable.c --- net/rtable.c 7 Sep 2016 09:36:49 -0000 1.52 +++ net/rtable.c 2 Nov 2016 16:37:06 -0000 @@ -1,7 +1,7 @@ /* $OpenBSD: rtable.c,v 1.52 2016/09/07 09:36:49 mpi Exp $ */ /* - * Copyright (c) 2014-2015 Martin Pieuchot + * Copyright (c) 2014-2016 Martin Pieuchot * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -41,7 +41,7 @@ * afmap rtmap/dommp * ----------- --------- ----- * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid (=index) - * ----------- --------- ----- to rdomain (=value). + * ----------- --------- ----- to rdomain/loopback (=value). * | AF_INET |. * ----------- `. .---------. .---------. * ... `----> | rtable0 | ... | rtableN | Array of pointers for @@ -59,10 +59,20 @@ struct rtmap { void **tbl; }; -/* Array of rtableid -> rdomain mapping. */ +/* + * Array of rtableid -> rdomain mapping. + * + * Only used for the first index as describbed above. + */ struct dommp { unsigned int limit; - unsigned int *dom; + /* + * Array to get the routing domain and loopback interface related to + * a routing table. Format: + * + * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain + */ + unsigned int *value; }; unsigned int rtmap_limit = 0; @@ -146,6 +156,8 @@ rtable_init(void) unsigned int keylen = 0; int i; + KASSERT(sizeof(struct rtmap) == sizeof(struct dommp)); + /* We use index 0 for the rtable/rdomain map. */ af2idx_max = 1; memset(af2idx, 0, sizeof(af2idx)); @@ -173,6 +185,9 @@ rtable_init(void) M_WAITOK|M_ZERO); rtmap_init(); + + if (rtable_add(0) != 0) + panic("unable to create default routing table"); } int @@ -221,7 +236,7 @@ rtable_add(unsigned int id) /* Use main rtable/rdomain by default. */ dmm = srp_get_locked(&afmap[0]); - dmm->dom[id] = 0; + dmm->value[id] = 0; return (0); } @@ -272,24 +287,42 @@ rtable_l2(unsigned int rtableid) dmm = srp_enter(&sr, &afmap[0]); if (rtableid < dmm->limit) - rdomain = dmm->dom[rtableid]; + rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK); srp_leave(&sr); return (rdomain); } +unsigned int +rtable_loindex(unsigned int rtableid) +{ + struct dommp *dmm; + unsigned int loifidx = 0; + struct srp_ref sr; + + dmm = srp_enter(&sr, &afmap[0]); + if (rtableid < dmm->limit) + loifidx = dmm->value[rtableid] >> RT_TABLEID_BITS; + srp_leave(&sr); + + return (loifidx); +} + void -rtable_l2set(unsigned int rtableid, unsigned int rdomain) +rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int loifidx) { struct dommp *dmm; + unsigned int value; KERNEL_ASSERT_LOCKED(); if (!rtable_exists(rtableid) || !rtable_exists(rdomain)) return; + value = (rdomain & RT_TABLEID_MASK) + (loifidx << RT_TABLEID_BITS); + dmm = srp_get_locked(&afmap[0]); - dmm->dom[rtableid] = rdomain; + dmm->value[rtableid] = value; } #ifndef ART Index: net/rtable.h =================================================================== RCS file: /cvs/src/sys/net/rtable.h,v retrieving revision 1.16 diff -u -p -r1.16 rtable.h --- net/rtable.h 7 Sep 2016 09:36:49 -0000 1.16 +++ net/rtable.h 2 Nov 2016 12:01:11 -0000 @@ -54,7 +54,8 @@ void rtable_init(void); int rtable_exists(unsigned int); int rtable_add(unsigned int); unsigned int rtable_l2(unsigned int); -void rtable_l2set(unsigned int, unsigned int); +unsigned int rtable_loindex(unsigned int); +void rtable_l2set(unsigned int, unsigned int, unsigned int); struct rtentry *rtable_lookup(unsigned int, struct sockaddr *, struct sockaddr *, struct sockaddr *, uint8_t); Index: netinet/ip_output.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_output.c,v retrieving revision 1.327 diff -u -p -r1.327 ip_output.c --- netinet/ip_output.c 4 Sep 2016 17:18:56 -0000 1.327 +++ netinet/ip_output.c 2 Nov 2016 16:27:14 -0000 @@ -211,7 +211,7 @@ reroute: ia = ifatoia(ro->ro_rt->rt_ifa); if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL)) - ifp = if_get(lo0ifidx); + ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid)); else ifp = if_get(ro->ro_rt->rt_ifidx); if (ifp == NULL) { Index: netinet6/ip6_input.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_input.c,v retrieving revision 1.168 diff -u -p -r1.168 ip6_input.c --- netinet6/ip6_input.c 24 Aug 2016 09:41:12 -0000 1.168 +++ netinet6/ip6_input.c 2 Nov 2016 16:27:14 -0000 @@ -211,7 +211,10 @@ ip6_input(struct mbuf *m) } else { if (m->m_next) { if (m->m_flags & M_LOOP) { - ip6stat.ip6s_m2m[lo0ifidx]++; /*XXX*/ + int ifidx; + + ifidx = rtable_loindex(m->m_pkthdr.ph_rtableid); + ip6stat.ip6s_m2m[ifidx]++; } else if (ifp->if_index < nitems(ip6stat.ip6s_m2m)) ip6stat.ip6s_m2m[ifp->if_index]++; else Index: netinet6/ip6_output.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_output.c,v retrieving revision 1.216 diff -u -p -r1.216 ip6_output.c --- netinet6/ip6_output.c 19 Sep 2016 18:09:09 -0000 1.216 +++ netinet6/ip6_output.c 2 Nov 2016 12:10:02 -0000 @@ -460,7 +460,7 @@ reroute: goto bad; } if (ISSET(rt->rt_flags, RTF_LOCAL)) - ifp = if_get(lo0ifidx); + ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid)); else ifp = if_get(rt->rt_ifidx); } else { Index: sys/socket.h =================================================================== RCS file: /cvs/src/sys/sys/socket.h,v retrieving revision 1.92 diff -u -p -r1.92 socket.h --- sys/socket.h 28 Sep 2016 18:50:20 -0000 1.92 +++ sys/socket.h 2 Nov 2016 16:18:41 -0000 @@ -143,7 +143,9 @@ struct splice { /* * Maximum number of alternate routing tables */ -#define RT_TABLEID_MAX 255 +#define RT_TABLEID_MAX 255 +#define RT_TABLEID_BITS 8 +#define RT_TABLEID_MASK 0xff #endif /* __BSD_VISIBLE */