Re: No RTF_UP after route change to an interface that is up

2009-12-08 Thread Doran Mori
Here's the patch for making the RTF_UP show up properly when changing
a gateway from/to an up/down link.

--- /usr/src/sys/net/rtsock.c   Tue Dec  1 00:36:58 2009
+++ rtsock.cMon Dec  7 23:36:20 2009
@@ -641,6 +641,19 @@ report:
}
}

+   /* new gateway, possible link state change  */
+   if ((LINK_STATE_IS_UP(ifa->ifa_ifp->if_link_state) ||
+   ifa->ifa_ifp->if_link_state ==
LINK_STATE_UNKNOWN) &&
+   ifa->ifa_ifp->if_flags & IFF_UP) {
+   rt->rt_flags |= RTF_UP;
+   rt->rt_priority &= RTP_MASK;
+   }
+   else {
+   rt->rt_flags &= ~RTF_UP;
+   rtm->rtm_flags &= RTF_UP;
+   rt->rt_priority |= RTP_DOWN;
+   }
+
/* XXX Hack to allow some flags to be toggled */
if (rtm->rtm_fmask & RTF_FMASK)
rt->rt_flags = (rt->rt_flags &

The above patch made apparent that the RTP_DOWN flag needed to be
taken into handled properly in rt_mpath_conflict.

--- /usr/src/sys/net/radix_mpath.c  Mon Apr 20 17:01:35 2009
+++ radix_mpath.c   Tue Dec  8 01:16:21 2009
@@ -282,6 +282,7 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct
struct rtentry *rt1;
char *p, *q, *eq;
int same, l, skip;
+   u_int8_t prio;

rn = (struct radix_node *)rt;
rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
@@ -348,10 +349,13 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct
}

  maskmatched:
-   if (!mpathok && rt1->rt_priority == rt->rt_priority)
+   /* consider route even if RTP_DOWN */
+   prio = rt->rt_priority & RTP_MASK;
+
+   if (!mpathok && (prio == (rt->rt_priority & RTP_MASK)))
return EEXIST;

-   rn1 = rn_mpath_prio((struct radix_node *)rt1, rt->rt_priority);
+   rn1 = rn_mpath_prio((struct radix_node *)rt1, prio);
/* key/mask were the same.  compare gateway for all multipaths */
do {
rt1 = (struct rtentry *)rn1;
@@ -366,12 +370,12 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct
continue;

/* check the route priority */
-   if (rt1->rt_priority != rt->rt_priority)
+   if ((rt1->rt_priority & RTP_MASK) != prio)
continue;

/* all key/mask/gateway are the same.  conflicting entry. */
return EEXIST;
-   } while ((rn1 = rn_mpath_next(rn1, 0)) != NULL);
+   } while ((rn1 = rn_mpath_next(rn1, 1)) != NULL);

  different:
return 0;

Finally when adding or deleting multipath routes that could be flagged
as RTP_DOWN need to be handled also. I haven't gotten to writing a
patch for that yet.

dmo



Re: No RTF_UP after route change to an interface that is up

2009-12-10 Thread Doran Mori
I finished the patch dealing with both RTF_UP and RTP_DOWN.  I've
tested as best as I could.  I'm not currently using ECMP but I don't
think anything I've touched would have changed how that works.

Forgive me if I'm not going about things the right way.  This is my
first attempt at hacking on the kernel.

dmo

--- /usr/src/sys/net/radix_mpath.c  Thu Dec 10 17:29:34 2009
+++ radix_mpath.c   Thu Dec 10 19:00:16 2009
@@ -68,7 +68,7 @@ rn_mpath_capable(struct radix_node_head *rnh)
 }

 struct radix_node *
-rn_mpath_next(struct radix_node *rn, int all)
+rn_mpath_next(struct radix_node *rn, int kind)
 {
struct radix_node   *next;
struct rtentry  *rt = (struct rtentry *)rn;
@@ -76,11 +76,17 @@ rn_mpath_next(struct radix_node *rn, int all)
if (!rn->rn_dupedkey)
return NULL;
next = rn->rn_dupedkey;
-   if (rn->rn_mask == next->rn_mask && (all ||
-   rt->rt_priority == ((struct rtentry *)next)->rt_priority))
-   return next;
-   else
-   return NULL;
+   if (rn->rn_mask == next->rn_mask) {
+   if (kind == 0 && (rt->rt_priority ==
+   (((struct rtentry *)next)->rt_priority)))
+   return next;
+   if (kind == 1)
+   return next;
+   if (kind == 2 && ((rt->rt_priority & RTP_MASK) ==
+   struct rtentry *)next)->rt_priority) & RTP_MASK)))
+   return next;
+   }
+   return NULL;
 }

 struct radix_node *
@@ -109,6 +115,33 @@ rn_mpath_prio(struct radix_node *rn, u_int8_t prio)
return (prev);
 }

+struct radix_node *
+rn_mpath_prio_lookup(struct radix_node *rn, u_int8_t prio)
+{
+   struct radix_node   *prev = rn;
+   struct rtentry  *rt;
+
+   if (prio == RTP_ANY)
+   return rn;
+prio &= RTP_MASK;
+
+   while (rn) {
+   /* different netmask -> different route */
+   if (rn->rn_mask != prev->rn_mask)
+   return NULL;
+
+   rt = (struct rtentry *)rn;
+   if ((rt->rt_priority & RTP_MASK) == prio)
+   return rn;
+   if ((rt->rt_priority & RTP_MASK) > prio)
+   /* list is sorted return */
+   return NULL;
+   prev = rn;
+   rn = rn->rn_dupedkey;
+   }
+   return NULL;
+}
+
 void
 rn_mpath_reprio(struct radix_node *rn, int newprio)
 {
@@ -282,6 +315,7 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct
struct rtentry *rt1;
char *p, *q, *eq;
int same, l, skip;
+   u_int8_t prio;

rn = (struct radix_node *)rt;
rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
@@ -348,10 +382,17 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct
}

  maskmatched:
-   if (!mpathok && rt1->rt_priority == rt->rt_priority)
+   /* consider route even if RTP_DOWN */
+   prio = rt->rt_priority & RTP_MASK;
+
+   rn1 = rn_mpath_prio_lookup((struct radix_node *)rt1, prio);
+if (!rn1)
+   goto different;
+
+   rt1 = (struct rtentry *)rn1;
+   if (!mpathok && ((rt1->rt_priority & RTP_MASK) == prio))
return EEXIST;

-   rn1 = rn_mpath_prio_lookup((struct radix_node *)rt1, rt->rt_priority);
/* key/mask were the same.  compare gateway for all multipaths */
do {
rt1 = (struct rtentry *)rn1;
@@ -366,12 +407,12 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct
continue;

/* check the route priority */
-   if (rt1->rt_priority != rt->rt_priority)
+   if ((rt1->rt_priority & RTP_MASK) != prio)
continue;

/* all key/mask/gateway are the same.  conflicting entry. */
return EEXIST;
-   } while ((rn1 = rn_mpath_next(rn1, 0)) != NULL);
+   } while ((rn1 = rn_mpath_next(rn1, 2)) != NULL);

  different:
return 0;

--- /usr/src/sys/net/radix_mpath.h  Mon Nov 24 04:53:53 2008
+++ radix_mpath.h   Thu Dec 10 19:00:16 2009
@@ -47,6 +47,7 @@ struct sockaddr;
 intrn_mpath_capable(struct radix_node_head *);
 struct radix_node *rn_mpath_next(struct radix_node *, int);
 struct radix_node *rn_mpath_prio(struct radix_node *, u_int8_t);
+struct radix_node *rn_mpath_prio_lookup(struct radix_node *, u_int8_t);
 void   rn_mpath_reprio(struct radix_node *, int);
 intrn_mpath_count(struct radix_node *);
 struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *,


--- /usr/src/sys/net/route.cTue Dec  1 00:36:58 2009
+++ route.c Thu Dec 10 19:00:16 2009
@@ -753,8 +753,9 @@ rtrequest1(int req, struct rt_addrinfo *info, u_int8_t
 struct rtentry **ret_nrt, u_int tableid)
 {
int  s = splsoftnet(); int error = 0;
+   int mpaths = 0;

Re: No RTF_UP after route change to an interface that is up

2009-12-12 Thread Doran Mori
I missed handling RTM_CHANGES with my changes.

BTW I just finished testing what wasn't working right with my ospfd
setup before and now everything is working properly. Yipee!

dmo

Index: rtsock.c
===
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.95
diff -u -p rtsock.c
--- rtsock.c3 Nov 2009 10:59:04 -   1.95
+++ rtsock.c13 Dec 2009 05:01:25 -
@@ -490,17 +490,17 @@ route_output(struct mbuf *m, ...)
 */
if (rn_mpath_capable(rnh)) {
/* first find correct priority bucket */
-   rn = rn_mpath_prio(rn, prio);
+   rn = rn_mpath_prio_lookup(rn, prio);
rt = (struct rtentry *)rn;
-   if (prio != RTP_ANY &&
-   (rt->rt_priority & RTP_MASK) != prio) {
+   if (!rn || (prio != RTP_ANY &&
+   (rt->rt_priority & RTP_MASK) != prio)) {
error = ESRCH;
rt->rt_refcnt++;
goto flush;
}

/* if multipath routes */
-   if (rn_mpath_next(rn, 0)) {
+   if (rn_mpath_next(rn, 2)) {
if (gate)
rt = rt_mpath_matchgate(rt, gate, prio);
else if (rtm->rtm_type != RTM_GET)
@@ -639,6 +639,19 @@ report:
ifa->ifa_refcnt++;
rt->rt_ifp = ifp;
}
+   }
+
+   /* new gateway, possible link state change  */
+   if ((LINK_STATE_IS_UP(ifa->ifa_ifp->if_link_state) ||
+   ifa->ifa_ifp->if_link_state ==
LINK_STATE_UNKNOWN) &&
+   ifa->ifa_ifp->if_flags & IFF_UP) {
+   rt->rt_flags |= RTF_UP;
+   rt->rt_priority &= RTP_MASK;
+   }
+   else {
+   rt->rt_flags &= ~RTF_UP;
+   rtm->rtm_flags &= RTF_UP;
+   rt->rt_priority |= RTP_DOWN;
}

/* XXX Hack to allow some flags to be toggled */



Re: No RTF_UP after route change to an interface that is up

2009-12-12 Thread Doran Mori
I was wrong. I just looked at rtalloc_mpath in the multipath code and
realized what I touched is probably going to break this now.

Should I be flipping RTF_MPATH flags when I'm flipping RTF_UP flags?

dmo



Re: No RTF_UP after route change to an interface that is up

2009-12-15 Thread Claudio Jeker
On Sat, Dec 12, 2009 at 11:35:45PM -0800, Doran Mori wrote:
> I was wrong. I just looked at rtalloc_mpath in the multipath code and
> realized what I touched is probably going to break this now.
> 
> Should I be flipping RTF_MPATH flags when I'm flipping RTF_UP flags?
> 

RTF_MPATH has nothing todo with RTF_UP. RTF_MPATH needs to be set if there
is a route to the same destination and priority already in the table.
In other words if there are multiple routes to the destination.
RTM_CHANGE will never change RTF_MPATH.

-- 
:wq Claudio



Re: No RTF_UP after route change to an interface that is up

2009-12-15 Thread Claudio Jeker
On Tue, Dec 08, 2009 at 02:37:51AM -0800, Doran Mori wrote:
> Here's the patch for making the RTF_UP show up properly when changing
> a gateway from/to an up/down link.
> 
> --- /usr/src/sys/net/rtsock.c   Tue Dec  1 00:36:58 2009
> +++ rtsock.cMon Dec  7 23:36:20 2009
> @@ -641,6 +641,19 @@ report:
> }
> }
> 
> +   /* new gateway, possible link state change  */
> +   if ((LINK_STATE_IS_UP(ifa->ifa_ifp->if_link_state) ||
> +   ifa->ifa_ifp->if_link_state ==
> LINK_STATE_UNKNOWN) &&
> +   ifa->ifa_ifp->if_flags & IFF_UP) {
> +   rt->rt_flags |= RTF_UP;
> +   rt->rt_priority &= RTP_MASK;
> +   }
> +   else {
> +   rt->rt_flags &= ~RTF_UP;
> +   rtm->rtm_flags &= RTF_UP;
> +   rt->rt_priority |= RTP_DOWN;
> +   }
> +

This is not correct and expains why you had to change radix_mpath.c.
If you change the rt_priority you must rebalance the dupedkey list so that
the order remains correct. It is also only necessary when the ifp changes.
So here is what I came up with that is totaly untested and maybe wrong as
well.

-- 
:wq Claudio

Index: route.c
===
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.114
diff -u -p -r1.114 route.c
--- route.c 3 Nov 2009 10:59:04 -   1.114
+++ route.c 15 Dec 2009 14:48:36 -
@@ -152,9 +152,6 @@ int okaytoclone(u_int, int);
 intrtflushclone1(struct radix_node *, void *);
 void   rtflushclone(struct radix_node_head *, struct rtentry *);
 intrt_if_remove_rtdelete(struct radix_node *, void *);
-#ifndef SMALL_KERNEL
-intrt_if_linkstate_change(struct radix_node *, void *);
-#endif
 
 #defineLABELID_MAX 5
 
Index: route.h
===
RCS file: /cvs/src/sys/net/route.h,v
retrieving revision 1.65
diff -u -p -r1.65 route.h
--- route.h 3 Nov 2009 10:59:04 -   1.65
+++ route.h 15 Dec 2009 14:48:23 -
@@ -394,6 +394,7 @@ int  rtrequest1(int, struct rt_addrinfo 
 voidrt_if_remove(struct ifnet *);
 #ifndef SMALL_KERNEL
 voidrt_if_track(struct ifnet *);
+int rt_if_linkstate_change(struct radix_node *, void *);
 #endif
 int rtdeletemsg(struct rtentry *, u_int);
 
Index: rtsock.c
===
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.95
diff -u -p -r1.95 rtsock.c
--- rtsock.c3 Nov 2009 10:59:04 -   1.95
+++ rtsock.c15 Dec 2009 14:49:53 -
@@ -638,6 +638,11 @@ report:
rt->rt_ifa = ifa;
ifa->ifa_refcnt++;
rt->rt_ifp = ifp;
+#ifndef SMALL_KERNEL
+   /* recheck link state after ifp change */
+   rt_if_linkstate_change(
+   (struct radix_node *)rt, ifp);
+#endif
}
}
 
@@ -651,6 +656,7 @@ report:
&rt->rt_rmx);
rtm->rtm_index = rt->rt_ifp->if_index;
rtm->rtm_priority = rt->rt_priority & RTP_MASK;
+   rtm->rtm_flags = rt->rt_flags;
if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
if (genmask)



Re: No RTF_UP after route change to an interface that is up

2009-12-15 Thread Doran Mori
> This is not correct and expains why you had to change radix_mpath.c.
> If you change the rt_priority you must rebalance the dupedkey list so that
> the order remains correct. It is also only necessary when the ifp changes.
> So here is what I came up with that is totaly untested and maybe wrong as
> well.
>
> --
> :wq Claudio
>
> Index: route.c
> ===
> RCS file: /cvs/src/sys/net/route.c,v
> retrieving revision 1.114
> diff -u -p -r1.114 route.c
> --- route.c 3 Nov 2009 10:59:04 -   1.114
> +++ route.c 15 Dec 2009 14:48:36 -
> @@ -152,9 +152,6 @@ int okaytoclone(u_int, int);
>  intrtflushclone1(struct radix_node *, void *);
>  void   rtflushclone(struct radix_node_head *, struct rtentry *);
>  intrt_if_remove_rtdelete(struct radix_node *, void *);
> -#ifndef SMALL_KERNEL
> -intrt_if_linkstate_change(struct radix_node *, void *);
> -#endif
>
>  #defineLABELID_MAX 5
>
> Index: route.h
> ===
> RCS file: /cvs/src/sys/net/route.h,v
> retrieving revision 1.65
> diff -u -p -r1.65 route.h
> --- route.h 3 Nov 2009 10:59:04 -   1.65
> +++ route.h 15 Dec 2009 14:48:23 -
> @@ -394,6 +394,7 @@ int  rtrequest1(int, struct rt_addrinfo
>  voidrt_if_remove(struct ifnet *);
>  #ifndef SMALL_KERNEL
>  voidrt_if_track(struct ifnet *);
> +int rt_if_linkstate_change(struct radix_node *, void *);
>  #endif
>  int rtdeletemsg(struct rtentry *, u_int);
>
> Index: rtsock.c
> ===
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.95
> diff -u -p -r1.95 rtsock.c
> --- rtsock.c3 Nov 2009 10:59:04 -   1.95
> +++ rtsock.c15 Dec 2009 14:49:53 -
> @@ -638,6 +638,11 @@ report:
> rt->rt_ifa = ifa;
> ifa->ifa_refcnt++;
> rt->rt_ifp = ifp;
> +#ifndef SMALL_KERNEL
> +   /* recheck link state after ifp change */
> +   rt_if_linkstate_change(
> +   (struct radix_node *)rt, ifp);
> +#endif
> }
> }
>
> @@ -651,6 +656,7 @@ report:
> &rt->rt_rmx);
> rtm->rtm_index = rt->rt_ifp->if_index;
> rtm->rtm_priority = rt->rt_priority & RTP_MASK;
> +   rtm->rtm_flags = rt->rt_flags;
> if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
> rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
> if (genmask)
>

Thanks for looking into this.  I've tested your patch and it seems to
work and it is more correct than what I came up with (darn it).

So with that problem solved it some other ones apparent.  My
radix_multipath.c meddling had to do with priorities not being handled
correctly.

Here's how to reproduce some of the faulty behaviour:
#route add 1/8 192.168.2.2

netstat shows the route:
1/8192.168.2.2UGS00 - 8 em0

#route add 1/8 192.168.2.3 -priority 32
#route add 1/8 192.168.2.4 -priority 32

netstat nows shows:
1/8192.168.2.2UGS00 - 8 em2
1/8192.168.2.3UGSP   00 -32 em2
1/8192.168.2.4UGS00 -32 em2

It shouldn't have allowed me to add that last route without the -mpath
flag and only one route has the MPATH flag.  Both will have the MPATH
flag if you use the -mpath flag to route.

#route delete 1/8 192.168.2.4

netstat shows:
1/8192.168.2.2UGS00 - 8 em2
1/8192.168.2.3UGSP   00 -32 em2

It should have removed the MPATH flag but I believe it removed it from
the route with priority equalling 8.  It does the same thing if both
routes had the MPATH flag too.

With my most current patch this all works properly.

It looks like the last road block in my router project is going to be
similar to Vladimir Kirillov's problem.  In my case I'm having a
downed link layer host route take precedence over an up ospf /32 route

Much thanks for the help so far.

dmo



Re: No RTF_UP after route change to an interface that is up

2009-12-17 Thread Stuart Henderson
On 2009-12-15, Doran Mori  wrote:
> It looks like the last road block in my router project is going to be
> similar to Vladimir Kirillov's problem.  In my case I'm having a
> downed link layer host route take precedence over an up ospf /32 route

So it seems that any host routes, even RTP_DOWN, take priority over
higher priority net routes for the same address.

This explains a little trouble I've been having when I restart ospfd
(which I do a bit more often than is good for me, but haven't been able
to put my finger on exactly why I have to...)

My bgp routers run sessions between loopbacks on lo1 which are advertised
into ospf. My defaults are localhost -reject routes. A pretty typical setup
for a network with multiple links between routers.

If I stop and restart ospfd on router X, typically the bgp sessions
go down, and if I go to the other routers I see dynamic host routes
directing X's traffic towards 127.0.0.1;

Y# route -n get X
   route to: aa.bb.cc.9
destination: aa.bb.cc.9
gateway: 127.0.0.1
  interface: lo0
 if address: 127.0.0.1
   priority: 56 (default)
  flags: 
 use   mtuexpire
   41060 33160L  522

Y# netstat -rnfinet | grep aa.bb.cc.9
aa.bb.cc.9   127.0.0.1  UGHD   241188 33160 L  56 lo0
aa.bb.cc.9/32aa.bb.cc.244   UGP00 -32 vlan2244
aa.bb.cc.9/32aa.bb.cc.243   UGP00 -32 vlan2244

and I have to route delete aa.bb.cc.9 to get things flowing again.
I'm not quite sure why it's RTF_DYNAMIC, ICMP redirects are off and
I haven't spotted where other than ICMP redirects that sets this,
so I'm not entirely sure where this entry has come from.

All pretty recent code, Y is running Nov 11th, X running Dec 14th,
this isn't new though, I have just managed to get past enough other
problems that I can see it a bit more clearly..;-)



Re: No RTF_UP after route change to an interface that is up

2009-12-18 Thread Claudio Jeker
On Thu, Dec 17, 2009 at 09:17:12PM +, Stuart Henderson wrote:
> On 2009-12-15, Doran Mori  wrote:
> > It looks like the last road block in my router project is going to be
> > similar to Vladimir Kirillov's problem.  In my case I'm having a
> > downed link layer host route take precedence over an up ospf /32 route
> 
> So it seems that any host routes, even RTP_DOWN, take priority over
> higher priority net routes for the same address.
> 

Host routes are allways more specific then network routes (even /32 ones).
So they will used in that case. Currently the lookup will not try less
specific routes in case their RTP_DOWN (or actually not RTF_UP). This
could be regarded as bug -- the code is just too insane to fix it easily.

> This explains a little trouble I've been having when I restart ospfd
> (which I do a bit more often than is good for me, but haven't been able
> to put my finger on exactly why I have to...)
> 

Hmm. If you know what goes wrong I will try to fix it :)

> My bgp routers run sessions between loopbacks on lo1 which are advertised
> into ospf. My defaults are localhost -reject routes. A pretty typical setup
> for a network with multiple links between routers.
> 
> If I stop and restart ospfd on router X, typically the bgp sessions
> go down, and if I go to the other routers I see dynamic host routes
> directing X's traffic towards 127.0.0.1;
> 
> Y# route -n get X
>route to: aa.bb.cc.9
> destination: aa.bb.cc.9
> gateway: 127.0.0.1
>   interface: lo0
>  if address: 127.0.0.1
>priority: 56 (default)
>   flags: 
>  use   mtuexpire
>41060 33160L  522
> 
> Y# netstat -rnfinet | grep aa.bb.cc.9
> aa.bb.cc.9   127.0.0.1  UGHD   241188 33160 L  56 lo0
> aa.bb.cc.9/32aa.bb.cc.244   UGP00 -32 vlan2244
> aa.bb.cc.9/32aa.bb.cc.243   UGP00 -32 vlan2244
> 
> and I have to route delete aa.bb.cc.9 to get things flowing again.
> I'm not quite sure why it's RTF_DYNAMIC, ICMP redirects are off and
> I haven't spotted where other than ICMP redirects that sets this,
> so I'm not entirely sure where this entry has come from.
> 

This is PMTU fucking around because TCP is no longer getting ACKs back and
so it goes and tries to disable PMTU by creating a dynamic route cloned
from the parent route. In your case that's the default reject route.
Now that's totaly stupid I know and especially the created route is
wrong in so far that the reject bit is dropped. It is also questionable
why we should create a dynamic route cloned from a reject or blackhole
route.

> All pretty recent code, Y is running Nov 11th, X running Dec 14th,
> this isn't new though, I have just managed to get past enough other
> problems that I can see it a bit more clearly..;-)
> 

As a workaround I would try to use blackhole routes instead of reject ones
and see if this will make the event of TCPs PMTU magic kicking in less
probable.

-- 
:wq Claudio



Re: No RTF_UP after route change to an interface that is up

2009-12-18 Thread Stuart Henderson
On 2009/12/18 12:31, Claudio Jeker wrote:
> > So it seems that any host routes, even RTP_DOWN, take priority over
> > higher priority net routes for the same address.
> 
> Host routes are allways more specific then network routes (even /32 ones).
> So they will used in that case. Currently the lookup will not try less
> specific routes in case their RTP_DOWN (or actually not RTF_UP). This
> could be regarded as bug -- the code is just too insane to fix it easily.

Hmmm... given this, would it make any kind of sense to have the routing
daemons install /32 as host rather than network routes?

> > This explains a little trouble I've been having when I restart ospfd
> > (which I do a bit more often than is good for me, but haven't been able
> > to put my finger on exactly why I have to...)
> 
> Hmm. If you know what goes wrong I will try to fix it :)

The relevant machines were running old code, but this week I've finally
got them over the nat-to bump, so I'll be able to do some meaningful
testing with -current soon (I hate reporting problems unless I know
I've collected enough information to at least point someone in
approximately the right direction ;)

> This is PMTU fucking around because TCP is no longer getting ACKs back and
> so it goes and tries to disable PMTU by creating a dynamic route cloned
> from the parent route. In your case that's the default reject route.
> Now that's totaly stupid I know and especially the created route is
> wrong in so far that the reject bit is dropped. It is also questionable
> why we should create a dynamic route cloned from a reject or blackhole
> route.   

aha...yes this does indeed seem to be the explanation, and certainly
for disabling PMTU, cloning a reject or blackhole route makes no sense.

> As a workaround I would try to use blackhole routes instead of reject ones
> and see if this will make the event of TCPs PMTU magic kicking in less
> probable.

This doesn't noticably help. But now I remember that since I started
sending full BGP tables everywhere I don't actually need a default route
to redist into OSPF any more...and after removing the route completely,
this does work as expected, fixing my immediate problem.



Re: No RTF_UP after route change to an interface that is up

2009-12-19 Thread Claudio Jeker
On Fri, Dec 18, 2009 at 02:22:11PM +, Stuart Henderson wrote:
> On 2009/12/18 12:31, Claudio Jeker wrote:
> > > So it seems that any host routes, even RTP_DOWN, take priority over
> > > higher priority net routes for the same address.
> > 
> > Host routes are allways more specific then network routes (even /32 ones).
> > So they will used in that case. Currently the lookup will not try less
> > specific routes in case their RTP_DOWN (or actually not RTF_UP). This
> > could be regarded as bug -- the code is just too insane to fix it easily.
> 
> Hmmm... given this, would it make any kind of sense to have the routing
> daemons install /32 as host rather than network routes?
> 

No I don't think this is a good decision. I prefer having them different
from the dynamic host routes generated by arp and icmp.
This is a very simple way to ensure that this those special routes are a
best match and work (or don't work) in all cases.

> > > This explains a little trouble I've been having when I restart ospfd
> > > (which I do a bit more often than is good for me, but haven't been able
> > > to put my finger on exactly why I have to...)
> > 
> > Hmm. If you know what goes wrong I will try to fix it :)
> 
> The relevant machines were running old code, but this week I've finally
> got them over the nat-to bump, so I'll be able to do some meaningful
> testing with -current soon (I hate reporting problems unless I know
> I've collected enough information to at least point someone in
> approximately the right direction ;)
> 

I know a few things especially with new interface addresses are still not
perfect. I will try to unslack on ospfd and ospf6d in the next days.

> > This is PMTU fucking around because TCP is no longer getting ACKs back and
> > so it goes and tries to disable PMTU by creating a dynamic route cloned
> > from the parent route. In your case that's the default reject route.
> > Now that's totaly stupid I know and especially the created route is
> > wrong in so far that the reject bit is dropped. It is also questionable
> > why we should create a dynamic route cloned from a reject or blackhole
> > route.   
> 
> aha...yes this does indeed seem to be the explanation, and certainly
> for disabling PMTU, cloning a reject or blackhole route makes no sense.
> 

See attached diff. Not seriously tested but until now no flames are
exiting my laptop...

> > As a workaround I would try to use blackhole routes instead of reject ones
> > and see if this will make the event of TCPs PMTU magic kicking in less
> > probable.
> 
> This doesn't noticably help. But now I remember that since I started
> sending full BGP tables everywhere I don't actually need a default route
> to redist into OSPF any more...and after removing the route completely,
> this does work as expected, fixing my immediate problem.
> 

-- 
:wq Claudio

Index: netinet/ip_icmp.c
===
RCS file: /cvs/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.86
diff -u -p -r1.86 ip_icmp.c
--- netinet/ip_icmp.c   13 Nov 2009 20:54:05 -  1.86
+++ netinet/ip_icmp.c   18 Dec 2009 14:41:42 -
@@ -881,6 +881,11 @@ icmp_mtudisc_clone(struct sockaddr *dst,
if (rt == 0)
return (NULL);
 
+   /* Check if the route is actually usable */
+   if (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE) ||
+   (rt->rt_flags & RTF_UP) == 0)
+   return (NULL);
+
/* If we didn't get a host route, allocate one */
 
if ((rt->rt_flags & RTF_HOST) == 0) {