defer routing table updates on link state changes (again)

2013-10-19 Thread Mike Belopuhov
hi,

since mpi's if_index diff is now in, this should probably
go in as well.  it has received some testing in the meantime.

original description:

in order to make our life a bit easier and prevent rogue
accesses to the routing table from the hardware interrupt
context violating all kinds of spl assumptions we would
like if_link_state_change that is called by network device
drivers in their interrupt service routines to defer its
work to the process context or thereabouts.

please note that a token (an interface index) is passed
to the workq in order to make sure that if the interface
would be gone by the time syswq goes around to run the
task it would just fall through.

ok?

diff --git sys/net/if.c sys/net/if.c
index 6dafd0d..5b6800a 100644
--- sys/net/if.c
+++ sys/net/if.c
@@ -79,10 +79,11 @@
 #include sys/protosw.h
 #include sys/kernel.h
 #include sys/ioctl.h
 #include sys/domain.h
 #include sys/sysctl.h
+#include sys/workq.h
 
 #include net/if.h
 #include net/if_dl.h
 #include net/if_media.h
 #include net/if_types.h
@@ -151,10 +152,12 @@ int   if_clone_list(struct if_clonereq *);
 struct if_clone*if_clone_lookup(const char *, int *);
 
 void   if_congestion_clear(void *);
 intif_group_egress_build(void);
 
+void   if_link_state_change_task(void *, void *);
+
 intifai_cmp(struct ifaddr_item *,  struct ifaddr_item *);
 void   ifa_item_insert(struct sockaddr *, struct ifaddr *, struct ifnet *);
 void   ifa_item_remove(struct sockaddr *, struct ifaddr *, struct ifnet *);
 #ifndef SMALL_KERNEL
 void   ifa_print_rb(void);
@@ -1106,21 +1109,39 @@ if_up(struct ifnet *ifp)
 
m_clinitifp(ifp);
 }
 
 /*
- * Process a link state change.
- * NOTE: must be called at splsoftnet or equivalent.
+ * Schedule a link state change task.
  */
 void
 if_link_state_change(struct ifnet *ifp)
 {
-   rt_ifmsg(ifp);
+   /* try to put the routing table update task on syswq */
+   workq_add_task(NULL, 0, if_link_state_change_task,
+   (void *)((unsigned long)ifp-if_index), NULL);
+}
+
+/*
+ * Process a link state change.
+ */
+void
+if_link_state_change_task(void *arg, void *unused)
+{
+   unsigned int index = (unsigned long)arg;
+   struct ifnet *ifp;
+   int s;
+
+   s = splsoftnet();
+   if ((ifp = if_get(index)) != NULL) {
+   rt_ifmsg(ifp);
 #ifndef SMALL_KERNEL
-   rt_if_track(ifp);
+   rt_if_track(ifp);
 #endif
-   dohooks(ifp-if_linkstatehooks, 0);
+   dohooks(ifp-if_linkstatehooks, 0);
+   }
+   splx(s);
 }
 
 /*
  * Handle interface watchdog timer routines.  Called
  * from softclock, we decrement timers (if set) and



Re: defer routing table updates on link state changes (again)

2013-10-19 Thread Claudio Jeker
On Sat, Oct 19, 2013 at 01:26:39PM +0200, Mike Belopuhov wrote:
 hi,
 
 since mpi's if_index diff is now in, this should probably
 go in as well.  it has received some testing in the meantime.
 
 original description:
 
 in order to make our life a bit easier and prevent rogue
 accesses to the routing table from the hardware interrupt
 context violating all kinds of spl assumptions we would
 like if_link_state_change that is called by network device
 drivers in their interrupt service routines to defer its
 work to the process context or thereabouts.
 
 please note that a token (an interface index) is passed
 to the workq in order to make sure that if the interface
 would be gone by the time syswq goes around to run the
 task it would just fall through.
 
 ok?

OK claudio@
 
 diff --git sys/net/if.c sys/net/if.c
 index 6dafd0d..5b6800a 100644
 --- sys/net/if.c
 +++ sys/net/if.c
 @@ -79,10 +79,11 @@
  #include sys/protosw.h
  #include sys/kernel.h
  #include sys/ioctl.h
  #include sys/domain.h
  #include sys/sysctl.h
 +#include sys/workq.h
  
  #include net/if.h
  #include net/if_dl.h
  #include net/if_media.h
  #include net/if_types.h
 @@ -151,10 +152,12 @@ int if_clone_list(struct if_clonereq *);
  struct if_clone  *if_clone_lookup(const char *, int *);
  
  void if_congestion_clear(void *);
  int  if_group_egress_build(void);
  
 +void if_link_state_change_task(void *, void *);
 +
  int  ifai_cmp(struct ifaddr_item *,  struct ifaddr_item *);
  void ifa_item_insert(struct sockaddr *, struct ifaddr *, struct ifnet *);
  void ifa_item_remove(struct sockaddr *, struct ifaddr *, struct ifnet *);
  #ifndef SMALL_KERNEL
  void ifa_print_rb(void);
 @@ -1106,21 +1109,39 @@ if_up(struct ifnet *ifp)
  
   m_clinitifp(ifp);
  }
  
  /*
 - * Process a link state change.
 - * NOTE: must be called at splsoftnet or equivalent.
 + * Schedule a link state change task.
   */
  void
  if_link_state_change(struct ifnet *ifp)
  {
 - rt_ifmsg(ifp);
 + /* try to put the routing table update task on syswq */
 + workq_add_task(NULL, 0, if_link_state_change_task,
 + (void *)((unsigned long)ifp-if_index), NULL);
 +}
 +
 +/*
 + * Process a link state change.
 + */
 +void
 +if_link_state_change_task(void *arg, void *unused)
 +{
 + unsigned int index = (unsigned long)arg;
 + struct ifnet *ifp;
 + int s;
 +
 + s = splsoftnet();
 + if ((ifp = if_get(index)) != NULL) {
 + rt_ifmsg(ifp);
  #ifndef SMALL_KERNEL
 - rt_if_track(ifp);
 + rt_if_track(ifp);
  #endif
 - dohooks(ifp-if_linkstatehooks, 0);
 + dohooks(ifp-if_linkstatehooks, 0);
 + }
 + splx(s);
  }
  
  /*
   * Handle interface watchdog timer routines.  Called
   * from softclock, we decrement timers (if set) and
 

-- 
:wq Claudio



Re: defer routing table updates on link state changes

2013-09-13 Thread Martin Pieuchot
On 12/09/13(Thu) 13:50, Philip Guenther wrote:
 On Thu, Sep 12, 2013 at 10:19 AM, Mike Belopuhov m...@belopuhov.com wrote:
 ...
  either way, we need to move forward on this.  we want to use if_index
  for the purpose of looking up the interface w/o a pointer to the ifnet.
 
 This sounds like just using a pid to identify processes and hoping
 they haven't wrapped around...and the places the kernel does that are
 wrong too**.  If pointers are out because refcounting them to avoid
 dangling pointers leaves them impossible to reliably clean up in
 bounded time (i.e., you need weak pointers), then there should be a
 generation number to catch the wraps.  IMO.

Yes!  It's really similar to pid for processes, except that interface
indexes should not be recycled that often.

I agree that having a generation number would help for the wrapping
case, but for now I *guess* that the limit of USHRT_MAX interface
indexes generated during an uptime before wrapping is enough to move
forward.

 (I don't get why it's useful for tun0-in-layer3 mode to have the same
 if_index as tun0-in-layer2 mode.  The properties are so different that
 there doesn't really seem to be continuity of identity between them.)

Me neither, is there any reason for that before I start digging into it?



Re: defer routing table updates on link state changes

2013-09-13 Thread Stuart Henderson
On 2013/09/13 09:10, Martin Pieuchot wrote:
 On 12/09/13(Thu) 13:50, Philip Guenther wrote:
  (I don't get why it's useful for tun0-in-layer3 mode to have the same
  if_index as tun0-in-layer2 mode.  The properties are so different that
  there doesn't really seem to be continuity of identity between them.)
 
 Me neither, is there any reason for that before I start digging into it?

I haven't checked, but it is possible that a program sets link0 mode
itself and keeps using the old index number. OpenVPN would be the
most likely candidate for this.



Re: defer routing table updates on link state changes

2013-09-13 Thread Martin Pieuchot
On 12/09/13(Thu) 18:56, Henning Brauer wrote:
 * Mike Belopuhov m...@belopuhov.com [2013-09-12 17:54]:
  it makes no sense whatsoever, reyk.  those indices can be easily
  stolen and nobody guarantees that if you create vlan10, vlan11,
  then destroy vlan10, create vlan12 and vlan10 that vlan10 will
  have the same index as before.  in fact it might be a different
  interface created for a different purpose days after.  who knows?
  if snmp client relies on this behavior, it's broken since we have
  never made any provisions regarding how we use those indices.
 
 correct.
 
 however, it is not reyk who's on drugs here, it's snmp itself. using
 the OS-private ifindex and making assumptions about it is the root
 problem. but since that's in the standards, there are only 2 possible
 solutions I see:
 -keep trying to please snmp in the way we assign ifindex

That would involve modifying the actual code to not reuse the *last*
but *first* free index ;) 

But it doesn't help at all for our problem, which means that we need
another API to not pass pointers to interfaces in our kernel. 

 -let snmpd (or sth else) make up ifindices just for that purpose

That looks like the best solution to me.  If a userland program want
to expose following numbers, then it probably needs to create its own
indexes anyway, even our actual in-kernel code doesn't guarantee that.

In the end we need two different tables, one with an unique index per
interface (to avoid passing pointers in kernel) and another one with
the biggest index equals to the number of interfaces (to not confuse
snmp).  IMHO we don't need these two tables in the kernel.

Martin



Re: defer routing table updates on link state changes

2013-09-13 Thread Reyk Floeter
On Fri, Sep 13, 2013 at 09:53:03AM +0200, Martin Pieuchot wrote:
  -let snmpd (or sth else) make up ifindices just for that purpose
 
 That looks like the best solution to me.  If a userland program want
 to expose following numbers, then it probably needs to create its own
 indexes anyway, even our actual in-kernel code doesn't guarantee that.
 

No, that's utterly stupid.  The interface index is a value that is
supposed to be consistent across the system.  How should it be synced
with other userland tools?  How would you handle it in if_nametoindex
and friends?

As I said before: it is not a big issue for snmpd and I can live with
the fact that your changes might confuse SNMP clients a bit.  Just go
ahead, but

 In the end we need two different tables, one with an unique index per
 interface (to avoid passing pointers in kernel) and another one with
 the biggest index equals to the number of interfaces (to not confuse
 snmp).  IMHO we don't need these two tables in the kernel.
 

please read the history: if_index _was_ created for SNMP.  Then it was
used for routing and other stuff.  Now you want to use it as a PID
for something else.  Fine.  But please stop claiming that SNMP is
doing anything wrong with if_indexes when they were created FOR SNMP.

Reyk



Re: defer routing table updates on link state changes

2013-09-13 Thread Stuart Henderson
On 2013/09/13 09:53, Martin Pieuchot wrote:
 On 12/09/13(Thu) 18:56, Henning Brauer wrote:
  -let snmpd (or sth else) make up ifindices just for that purpose
 
 That looks like the best solution to me.  If a userland program want
 to expose following numbers, then it probably needs to create its own
 indexes anyway, even our actual in-kernel code doesn't guarantee that.
 
 In the end we need two different tables, one with an unique index per
 interface (to avoid passing pointers in kernel) and another one with
 the biggest index equals to the number of interfaces (to not confuse
 snmp).  IMHO we don't need these two tables in the kernel.

Having snmpd keep its own table would have another advantage: if you add
interfaces, you would be able to get new stable index numbers by just
restarting snmpd, allowing you to update index numbers in monitoring
systems without rebooting the kernel.



Re: defer routing table updates on link state changes

2013-09-13 Thread Henning Brauer
* Reyk Floeter r...@openbsd.org [2013-09-13 10:20]:
 please read the history: if_index _was_ created for SNMP.

I'm not at all certain you got the history right there...

-- 
Henning Brauer, h...@bsws.de, henn...@openbsd.org
BS Web Services GmbH, http://bsws.de, Full-Service ISP
Secure Hosting, Mail and DNS Services. Dedicated Servers, Root to Fully Managed
Henning Brauer Consulting, http://henningbrauer.com/



Re: defer routing table updates on link state changes

2013-09-13 Thread Martin Pieuchot
On 13/09/13(Fri) 10:14, Reyk Floeter wrote:
 On Fri, Sep 13, 2013 at 09:53:03AM +0200, Martin Pieuchot wrote:
   -let snmpd (or sth else) make up ifindices just for that purpose
  
  That looks like the best solution to me.  If a userland program want
  to expose following numbers, then it probably needs to create its own
  indexes anyway, even our actual in-kernel code doesn't guarantee that.
  
 
 No, that's utterly stupid.  The interface index is a value that is
 supposed to be consistent across the system.  How should it be synced
 with other userland tools?  How would you handle it in if_nametoindex
 and friends?

So what do *you* suggest which is not utterly stupid?

I can't see where it is written that the interface index should be
consistent, from the RFC 1156, I read:

ifIndex { ifEntry 1 }

A unique value for each interface.  Its value ranges
between 1 and the value of ifNumber.  The value for each
interface must remain constant at least from one re-
initialization of the entity's network management system
to the next re-initialization.


 As I said before: it is not a big issue for snmpd and I can live with
 the fact that your changes might confuse SNMP clients a bit.  Just go
 ahead, but

Well it looks like a big issue because you keep complaining.

Would you exclaiming what confusing SNMP clients means?  What breaks?
Can it be solved?  What is the technical problem here?

  In the end we need two different tables, one with an unique index per
  interface (to avoid passing pointers in kernel) and another one with
  the biggest index equals to the number of interfaces (to not confuse
  snmp).  IMHO we don't need these two tables in the kernel.
  
 
 please read the history: if_index _was_ created for SNMP.  Then it was
 used for routing and other stuff.  Now you want to use it as a PID
 for something else.  Fine.  But please stop claiming that SNMP is
 doing anything wrong with if_indexes when they were created FOR SNMP.

I'm not clamming anything that SNMP is doing wrong, I'm not rewriting
history, I'm just saying that the actual index generation code is broken
for SNMP because it doesn't returns a value between 1 and the number of
interfaces.



Re: defer routing table updates on link state changes

2013-09-13 Thread Reyk Floeter
On Fri, Sep 13, 2013 at 10:45:57AM +0200, Martin Pieuchot wrote:
  No, that's utterly stupid.  The interface index is a value that is
  supposed to be consistent across the system.  How should it be synced
  with other userland tools?  How would you handle it in if_nametoindex
  and friends?
 
 So what do *you* suggest which is not utterly stupid?
 

I stopped suggesting things that get ignored.

 I can't see where it is written that the interface index should be
 consistent, from the RFC 1156, I read:
 
 ifIndex { ifEntry 1 }
 
 A unique value for each interface.  Its value ranges
 between 1 and the value of ifNumber.  The value for each
 interface must remain constant at least from one re-
 initialization of the entity's network management system
 to the next re-initialization.
 

I read your statement and read the RFC text and I totally don't get
how they fit together.  Isn't the RFC saying the opposite of what
you're saying?  I will get some coffee now and try to read it again
afterwards.

 
  As I said before: it is not a big issue for snmpd and I can live with
  the fact that your changes might confuse SNMP clients a bit.  Just go
  ahead, but
 
 Well it looks like a big issue because you keep complaining.
 
 Would you exclaiming what confusing SNMP clients means?  What breaks?
 Can it be solved?  What is the technical problem here?
 

Well

   In the end we need two different tables, one with an unique index per
   interface (to avoid passing pointers in kernel) and another one with
   the biggest index equals to the number of interfaces (to not confuse
   snmp).  IMHO we don't need these two tables in the kernel.
   
  
  please read the history: if_index _was_ created for SNMP.  Then it was
  used for routing and other stuff.  Now you want to use it as a PID
  for something else.  Fine.  But please stop claiming that SNMP is
  doing anything wrong with if_indexes when they were created FOR SNMP.
 
 I'm not clamming anything that SNMP is doing wrong, I'm not rewriting
 history, I'm just saying that the actual index generation code is broken
 for SNMP because it doesn't returns a value between 1 and the number of
 interfaces.

Yes, in theory if_index should be fixed and return a consistent number
between 1 and the number of interfaces.  But this is obviously
difficult and I'm not sure if it's worth the effort.  So the hack
that you're going to remove was a best effort.  But putting another
interface index abstraction layer in userland (via snmpd or some
shared db) is just not the right way to do it.  We either have a
reliable if_index from the kernel or we don't.  But inventing another
thing in userland doesn't make sense to me.

So let's get back to the technical part:

- Do you even have to use if_index?  Is it performance critical or
could you lookup the interface by name?

- I think introducing a new kernel-only if_id for your needs would be
the better approach.  Then we could fix if_index to conform to the
definition between 1 and number of interfaces.

- But I can also live with a modified if_index that is unreliable.
What am I suggesting?  In this situation: just continue.

Reyk



Re: defer routing table updates on link state changes

2013-09-13 Thread Alexey Suslikov
Reyk Floeter wrote:

 Yes, in theory if_index should be fixed and return a consistent number
 between 1 and the number of interfaces.  But this is obviously
 difficult and I'm not sure if it's worth the effort.  So the hack
 that you're going to remove was a best effort.  But putting another
 interface index abstraction layer in userland (via snmpd or some
 shared db) is just not the right way to do it.  We either have a
 reliable if_index from the kernel or we don't.  But inventing another
 thing in userland doesn't make sense to me.

If above theory doesn't dictate all interfaces must exist (it shouldn't
because of hot-plug interfaces), kernel can operate on fixed predefined
ifIndex table like this:

tun ifIndex (only have 256 of them because of unit_no):
  1 - 00:bd:xx:xx:xx:00 - tun0
256 - 00:bd:xx:xx:xx:ff - tun255

vether ifIndex (only have 65536 of them?):
257 - fe:e1:ba:d0:xx:xx - vether0
 65,792 - fe:e1:ba:d0:xx:xx - vether65535

physical ifIndex (claim to support ~1M of physical interfaces):
 65,793 - 00:25:90:xx:xx:aa - em0
 65,794 - 00:25:90:xx:xx:ab - em1
  1,179,906 - xx:xx:xx:xx:xx:xx - foo77

trunk ifIndex (claim to support ~17M of trunk interfaces, by unit_no):
  1,179,907 - xx:xx:xx:xx:xx:xx - trunk0
 19,005,699 - xx:xx:xx:xx:xx:xx - trunk1699

vlan ifIndex (claim to support ~280M of vlan interfaces, by unit_no):
 19,005,700 - xx:xx:xx:xx:xx:xx - vlan0
304,218,372 - xx:xx:xx:xx:xx:xx - vlan27999

and so on, up to 2,147,483,647.

IMO, cloners aren't so problematic (because of algorithmically controlled
enumeration and unit number assignment) as physical interfaces are.

I think, the best is to let ifIndexes be assigned to physical interfaces
via ifconfig, but let cloners to do their assignments automatically.

And do not let snmpd to operate on interface without an ifIndex: having no
ifIndex means no interface available.



Re: defer routing table updates on link state changes

2013-09-12 Thread Reyk Floeter
On Thu, Sep 12, 2013 at 06:51:46AM +0200, Claudio Jeker wrote:
 On Tue, Aug 27, 2013 at 01:39:14PM +0200, Martin Pieuchot wrote:
  I think that's the right approach but the current code generating
  interfaces indexes is too clever from my point of view, it tries
  to reuse the last index if possible.  This could lead to some
  funny races if we detach and reattach an interface before the
  task get scheduled.
  
  So I'm ok with your diff if we avoid to reuse the last index too
  soon.  Diff below does that.
 
 We should not do that since this was done for tun(4) switching between
 modes without getting new ifindexes. It also reduces the holes in the
 ifindex array on system that dynamically allocate interfaces for e.g.
 VPNs.
  

Claudio is right.

The if_indexes are excessively used by the SNMP MIBs for various
interface-related values.  I'm not just talking about the
implementation in snmpd(8), I'm talking about the actual standard MIBs
that use ifIndex values everywhere.  Having dynamic / random /
inconsistent if_indexes confuse SNMP a lot.

For example, you have to query the IfIndex via SNMP to get further
information, like the ifName or statistics, and most monitoring
systems would save interface information based on the index - they
would not recognize that tun0 with if_index 10 is the same interface
(from an OpenBSD point of view) as tun0 with an if_index 11.  It is
not guaranteed in OpenBSD, but we shouldn't make it worse.

btw., just have a look on a system with net-snmpd's MIBs.  On a Mac,
which ships with Apple's version of net-snmp by default, go to
/usr/share/snmp/mibs and run:
$ grep -i ifindex * | wc -l
 162

Reyk



Re: defer routing table updates on link state changes

2013-09-12 Thread Mike Belopuhov
On 12 September 2013 17:18, Martin Pieuchot mpieuc...@nolizard.org wrote:
 FWIW it would be interesting to modify tun(4) so that it doesn't
 need to detach/reattach itself when switching between mode, this
 would allow us to stop reusing the last index.


this definitely makes a lot of sense.



Re: defer routing table updates on link state changes

2013-09-12 Thread Reyk Floeter
On Thu, Sep 12, 2013 at 05:18:39PM +0200, Martin Pieuchot wrote:
  For example, you have to query the IfIndex via SNMP to get further
  information, like the ifName or statistics, and most monitoring
  systems would save interface information based on the index - they
  would not recognize that tun0 with if_index 10 is the same interface
  (from an OpenBSD point of view) as tun0 with an if_index 11.  It is
  not guaranteed in OpenBSD, but we shouldn't make it worse.
 
 All our interface drivers are associated to one index when they are
 attached and it does not change.
 
 The only thing is that tun(4) is special because internally when it
 switches from L3 to L2 or vice versa it detaches and reattaches itself.
 That's why this hack of reusing the last index is needed.
 

It also matters if you create destroy and re-create any other cloner
interface (vlan, ...).

 But if I understand correctly what you're saying about querying the
 IfIndex, I find it even more dangerous to reuse the last index,
 because I can create a configuration where I change an interface
 while keeping the same index (with usb interfaces for example).
 

I think that's nonsense.

 FWIW it would be interesting to modify tun(4) so that it doesn't
 need to detach/reattach itself when switching between mode, this
 would allow us to stop reusing the last index.
 

Or you could simply rewrite tun(4)?

Isn't there any other way to do what you want without stopping to
reuse the index?  SNMP simply expects that if_indexes are fairly
static, linear, and without holes.  Why should we change that in
OpenBSD?  Is there any security reason to randomize the indexes -
No.  

Reyk



Re: defer routing table updates on link state changes

2013-09-12 Thread Martin Pieuchot
On 12/09/13(Thu) 16:40, Reyk Floeter wrote:
 On Thu, Sep 12, 2013 at 06:51:46AM +0200, Claudio Jeker wrote:
  On Tue, Aug 27, 2013 at 01:39:14PM +0200, Martin Pieuchot wrote:
   I think that's the right approach but the current code generating
   interfaces indexes is too clever from my point of view, it tries
   to reuse the last index if possible.  This could lead to some
   funny races if we detach and reattach an interface before the
   task get scheduled.
   
   So I'm ok with your diff if we avoid to reuse the last index too
   soon.  Diff below does that.
  
  We should not do that since this was done for tun(4) switching between
  modes without getting new ifindexes. It also reduces the holes in the
  ifindex array on system that dynamically allocate interfaces for e.g.
  VPNs.
   
 
 Claudio is right.
 
 The if_indexes are excessively used by the SNMP MIBs for various
 interface-related values.  I'm not just talking about the
 implementation in snmpd(8), I'm talking about the actual standard MIBs
 that use ifIndex values everywhere.  Having dynamic / random /
 inconsistent if_indexes confuse SNMP a lot.

I'm not sure to understand.  What do you mean by dynamic / random /
inconsistent if_indexes? 

 For example, you have to query the IfIndex via SNMP to get further
 information, like the ifName or statistics, and most monitoring
 systems would save interface information based on the index - they
 would not recognize that tun0 with if_index 10 is the same interface
 (from an OpenBSD point of view) as tun0 with an if_index 11.  It is
 not guaranteed in OpenBSD, but we shouldn't make it worse.

All our interface drivers are associated to one index when they are
attached and it does not change.

The only thing is that tun(4) is special because internally when it
switches from L3 to L2 or vice versa it detaches and reattaches itself.
That's why this hack of reusing the last index is needed.

But if I understand correctly what you're saying about querying the
IfIndex, I find it even more dangerous to reuse the last index,
because I can create a configuration where I change an interface
while keeping the same index (with usb interfaces for example).

FWIW it would be interesting to modify tun(4) so that it doesn't
need to detach/reattach itself when switching between mode, this
would allow us to stop reusing the last index.

Martin



Re: defer routing table updates on link state changes

2013-09-12 Thread Mike Belopuhov
On 12 September 2013 17:31, Reyk Floeter r...@openbsd.org wrote:
 On Thu, Sep 12, 2013 at 05:18:39PM +0200, Martin Pieuchot wrote:
  For example, you have to query the IfIndex via SNMP to get further
  information, like the ifName or statistics, and most monitoring
  systems would save interface information based on the index - they
  would not recognize that tun0 with if_index 10 is the same interface
  (from an OpenBSD point of view) as tun0 with an if_index 11.  It is
  not guaranteed in OpenBSD, but we shouldn't make it worse.

 All our interface drivers are associated to one index when they are
 attached and it does not change.

 The only thing is that tun(4) is special because internally when it
 switches from L3 to L2 or vice versa it detaches and reattaches itself.
 That's why this hack of reusing the last index is needed.


 It also matters if you create destroy and re-create any other cloner
 interface (vlan, ...).


it makes no sense whatsoever, reyk.  those indices can be easily
stolen and nobody guarantees that if you create vlan10, vlan11,
then destroy vlan10, create vlan12 and vlan10 that vlan10 will
have the same index as before.  in fact it might be a different
interface created for a different purpose days after.  who knows?
if snmp client relies on this behavior, it's broken since we have
never made any provisions regarding how we use those indices.

 But if I understand correctly what you're saying about querying the
 IfIndex, I find it even more dangerous to reuse the last index,
 because I can create a configuration where I change an interface
 while keeping the same index (with usb interfaces for example).


 I think that's nonsense.


looks like you misunderstand the problem we're dealing with here.

 FWIW it would be interesting to modify tun(4) so that it doesn't
 need to detach/reattach itself when switching between mode, this
 would allow us to stop reusing the last index.


 Or you could simply rewrite tun(4)?

 Isn't there any other way to do what you want without stopping to
 reuse the index?  SNMP simply expects that if_indexes are fairly
 static, linear, and without holes.  Why should we change that in
 OpenBSD?  Is there any security reason to randomize the indexes -
 No.

 Reyk


or snmp can simply stop assuming things.  if_index wasn't created
for snmp in the first place.



Re: defer routing table updates on link state changes

2013-09-12 Thread Reyk Floeter
On Thu, Sep 12, 2013 at 07:19:34PM +0200, Mike Belopuhov wrote:
 either way, we need to move forward on this.  we want to use if_index
 for the purpose of looking up the interface w/o a pointer to the ifnet.
 should we implement additional indices for that or snmp problem will
 be dealt with?  currently if you reuse an index and reuse the memory
 for the ifnet you may suddenly pick a new interface and perform an
 operation on it that might not necessarily make sense in the context.

You should check all cloners and make sure that they don't have to be
recreated for any of the options.  tun(4) should be updated.  I did it
for vlan(4)/svlan(4) with rev 1.86 + 1.87 in 2011.  Are there any
others left?  As an alternative to reusing if_index, it would even be
nice to be able to completely reset the configuration of any interface
without recreating it - even for physical interfaces like WLAN or
Ethernet.  This allows you to start over with the interface
configuration without loosing its if_index or reference.

Reyk



Re: defer routing table updates on link state changes

2013-09-12 Thread Mike Belopuhov
On 12 September 2013 18:28, Mike Belopuhov m...@belopuhov.com wrote:
 On 12 September 2013 18:14, Reyk Floeter r...@openbsd.org wrote:
 On Thu, Sep 12, 2013 at 05:53:42PM +0200, Mike Belopuhov wrote:
 looks like you misunderstand the problem we're dealing with here.


 Sure, I do.  You're trying to push one thing and you don't want to
 hear the concerns about a specific detail of it.


 with all respect, i think you don't.  otherwise you wouldn't be asking
 the questions you're asking.

 we do hear your concerns, but since even before the change if_index
 was not static at all the way you seem to be implying snmp requires
 it, i don't see a situation drastically changing.  if you create all
 the interfaces on startup or before you start snmpd and don't destroy/
 re-create them nothing is changed.


perhaps truly persistent snmp ifindexes could be implemented in the
snmpd itself via a new configuration option, i.e.

interface vlan1 index 1.

you don't necessarily have to use the same ones system is using for
it's own purposes.



Re: defer routing table updates on link state changes

2013-09-12 Thread Reyk Floeter
On Thu, Sep 12, 2013 at 06:28:15PM +0200, Mike Belopuhov wrote:
  Sure, I do.  You're trying to push one thing and you don't want to
  hear the concerns about a specific detail of it.
 
 
 with all respect, i think you don't.  otherwise you wouldn't be asking
 the questions you're asking.
 
 we do hear your concerns, but since even before the change if_index
 was not static at all the way you seem to be implying snmp requires
 it, i don't see a situation drastically changing.  if you create all
 the interfaces on startup or before you start snmpd and don't destroy/
 re-create them nothing is changed.
 

Ok, let's stop this.  I don't think you read what I replied before.  I
didn't say that we're static with if_indexes, just that we shouldn't
make it worse.

I give up, but please read my next comment below.

   Isn't there any other way to do what you want without stopping to
   reuse the index?  SNMP simply expects that if_indexes are fairly
   static, linear, and without holes.  Why should we change that in
   OpenBSD?  Is there any security reason to randomize the indexes -
   No.
 
  or snmp can simply stop assuming things.  if_index wasn't created
  for snmp in the first place.

Actually, I think this assumption is wrong.  I researched a little bit
in BSD history:

- RFC 1066 from August 1988 is one of the early SNMP RFC that mention IfIndex

- 4.3BSD-Tahoe from June 1988 doesn't have if_index, I also didn't find
  in other early BSDs.

- 4.3BSD-Reno from June 1990 does have it.  You can even find a
  new comment /*  XXX fast fix for SNMP, going away soon */ on top of if.h.

So it seems that if_index was added _for_ SNMP.

  Of course, everyone else is wrong, let's change the world!  IfIndex is
  used by SNMP since at least 1988 (RFC 1066) and many many tools have
  adopted it expecting this behaviour.  Anyway, just go ahead and do the
  stuff.  I don't care, it is not a big issue for snmpd.  But I still
  don't see the point of changing the semantics instead of finding
  another way to do what you want.  Unless there is a security issue or
  similar with if_indexes and changing it would actually improve
  something.  Blah.
 
 no need to get upset.  mpi's change does improve things.  we want to
 make full use of if_index' initial design and use it as a reference
 to the interface in the mp network stack .  it has nothing to do with
 a badly designed protocol from the eighties.

Oh, come on.  SNMP is as badly designed as many other things that
we're using every day.  Do you suggest to rm snmpd because the
protocol is badly designed?

Reyk



Re: defer routing table updates on link state changes

2013-09-12 Thread Mike Belopuhov
On 12 September 2013 18:14, Reyk Floeter r...@openbsd.org wrote:
 On Thu, Sep 12, 2013 at 05:53:42PM +0200, Mike Belopuhov wrote:
 looks like you misunderstand the problem we're dealing with here.


 Sure, I do.  You're trying to push one thing and you don't want to
 hear the concerns about a specific detail of it.


with all respect, i think you don't.  otherwise you wouldn't be asking
the questions you're asking.

we do hear your concerns, but since even before the change if_index
was not static at all the way you seem to be implying snmp requires
it, i don't see a situation drastically changing.  if you create all
the interfaces on startup or before you start snmpd and don't destroy/
re-create them nothing is changed.

  FWIW it would be interesting to modify tun(4) so that it doesn't
  need to detach/reattach itself when switching between mode, this
  would allow us to stop reusing the last index.
 
 
  Or you could simply rewrite tun(4)?
 
  Isn't there any other way to do what you want without stopping to
  reuse the index?  SNMP simply expects that if_indexes are fairly
  static, linear, and without holes.  Why should we change that in
  OpenBSD?  Is there any security reason to randomize the indexes -
  No.
 
  Reyk
 

 or snmp can simply stop assuming things.  if_index wasn't created
 for snmp in the first place.

 Of course, everyone else is wrong, let's change the world!  IfIndex is
 used by SNMP since at least 1988 (RFC 1066) and many many tools have
 adopted it expecting this behaviour.  Anyway, just go ahead and do the
 stuff.  I don't care, it is not a big issue for snmpd.  But I still
 don't see the point of changing the semantics instead of finding
 another way to do what you want.  Unless there is a security issue or
 similar with if_indexes and changing it would actually improve
 something.  Blah.

 Reyk

no need to get upset.  mpi's change does improve things.  we want to
make full use of if_index' initial design and use it as a reference
to the interface in the mp network stack .  it has nothing to do with
a badly designed protocol from the eighties.



Re: defer routing table updates on link state changes

2013-09-12 Thread Henning Brauer
* Mike Belopuhov m...@belopuhov.com [2013-09-12 17:54]:
 it makes no sense whatsoever, reyk.  those indices can be easily
 stolen and nobody guarantees that if you create vlan10, vlan11,
 then destroy vlan10, create vlan12 and vlan10 that vlan10 will
 have the same index as before.  in fact it might be a different
 interface created for a different purpose days after.  who knows?
 if snmp client relies on this behavior, it's broken since we have
 never made any provisions regarding how we use those indices.

correct.

however, it is not reyk who's on drugs here, it's snmp itself. using
the OS-private ifindex and making assumptions about it is the root
problem. but since that's in the standards, there are only 2 possible
solutions I see:
-keep trying to please snmp in the way we assign ifindex
-let snmpd (or sth else) make up ifindices just for that purpose

-- 
Henning Brauer, h...@bsws.de, henn...@openbsd.org
BS Web Services GmbH, http://bsws.de, Full-Service ISP
Secure Hosting, Mail and DNS Services. Dedicated Servers, Root to Fully Managed
Henning Brauer Consulting, http://henningbrauer.com/



Re: defer routing table updates on link state changes

2013-09-12 Thread Mike Belopuhov
On 12 September 2013 19:07, Reyk Floeter r...@openbsd.org wrote:
 On Thu, Sep 12, 2013 at 06:59:13PM +0200, Mike Belopuhov wrote:
  Ok, let's stop this.  I don't think you read what I replied before.  I
  didn't say that we're static with if_indexes, just that we shouldn't
  make it worse.
 

 or implement persistent indices in the snmpd itself maybe?


 Maybe.  But this would create another layer of abstraction.  And
 if_index is not just used by SNMP.

  I give up, but please read my next comment below.
 
Isn't there any other way to do what you want without stopping to
reuse the index?  SNMP simply expects that if_indexes are fairly
static, linear, and without holes.  Why should we change that in
OpenBSD?  Is there any security reason to randomize the indexes -
No.
  
   or snmp can simply stop assuming things.  if_index wasn't created
   for snmp in the first place.
 
  Actually, I think this assumption is wrong.  I researched a little bit
  in BSD history:
 
  - RFC 1066 from August 1988 is one of the early SNMP RFC that mention 
  IfIndex
 
  - 4.3BSD-Tahoe from June 1988 doesn't have if_index, I also didn't find
in other early BSDs.
 
  - 4.3BSD-Reno from June 1990 does have it.  You can even find a
new comment /*  XXX fast fix for SNMP, going away soon */ on top of 
  if.h.
 
  So it seems that if_index was added _for_ SNMP.
 

 i believe this comment refers to the inclusion of sys/time.h.


 Yes, I know.  But see:

 1. 1988-06: 4.3BSD-Tahoe without if_index
 2. 1988-08: SNMP
 3. 1990-06: 4.3BSD-Reno with if_index, mentioning SNMP in if.h.

 I don't have the commit history, but this might indicate that there
 was some work to support SNMP and many new fields in struct ifnet have
 been added for SNMP.

 http://minnie.tuhs.org/cgi-bin/utree.pl?file1=4.3BSD-Reno/src/sys/net/if.hfile2=4.3BSD-Tahoe/usr/src/sys/net/if.hprint=1

 reyk

either way, we need to move forward on this.  we want to use if_index
for the purpose of looking up the interface w/o a pointer to the ifnet.
should we implement additional indices for that or snmp problem will
be dealt with?  currently if you reuse an index and reuse the memory
for the ifnet you may suddenly pick a new interface and perform an
operation on it that might not necessarily make sense in the context.



Re: defer routing table updates on link state changes

2013-09-12 Thread Mike Belopuhov
On 12 September 2013 18:48, Reyk Floeter r...@openbsd.org wrote:
 On Thu, Sep 12, 2013 at 06:28:15PM +0200, Mike Belopuhov wrote:
  Sure, I do.  You're trying to push one thing and you don't want to
  hear the concerns about a specific detail of it.
 

 with all respect, i think you don't.  otherwise you wouldn't be asking
 the questions you're asking.

 we do hear your concerns, but since even before the change if_index
 was not static at all the way you seem to be implying snmp requires
 it, i don't see a situation drastically changing.  if you create all
 the interfaces on startup or before you start snmpd and don't destroy/
 re-create them nothing is changed.


 Ok, let's stop this.  I don't think you read what I replied before.  I
 didn't say that we're static with if_indexes, just that we shouldn't
 make it worse.


or implement persistent indices in the snmpd itself maybe?

 I give up, but please read my next comment below.

   Isn't there any other way to do what you want without stopping to
   reuse the index?  SNMP simply expects that if_indexes are fairly
   static, linear, and without holes.  Why should we change that in
   OpenBSD?  Is there any security reason to randomize the indexes -
   No.
 
  or snmp can simply stop assuming things.  if_index wasn't created
  for snmp in the first place.

 Actually, I think this assumption is wrong.  I researched a little bit
 in BSD history:

 - RFC 1066 from August 1988 is one of the early SNMP RFC that mention IfIndex

 - 4.3BSD-Tahoe from June 1988 doesn't have if_index, I also didn't find
   in other early BSDs.

 - 4.3BSD-Reno from June 1990 does have it.  You can even find a
   new comment /*  XXX fast fix for SNMP, going away soon */ on top of if.h.

 So it seems that if_index was added _for_ SNMP.


i believe this comment refers to the inclusion of sys/time.h.

  Of course, everyone else is wrong, let's change the world!  IfIndex is
  used by SNMP since at least 1988 (RFC 1066) and many many tools have
  adopted it expecting this behaviour.  Anyway, just go ahead and do the
  stuff.  I don't care, it is not a big issue for snmpd.  But I still
  don't see the point of changing the semantics instead of finding
  another way to do what you want.  Unless there is a security issue or
  similar with if_indexes and changing it would actually improve
  something.  Blah.

 no need to get upset.  mpi's change does improve things.  we want to
 make full use of if_index' initial design and use it as a reference
 to the interface in the mp network stack .  it has nothing to do with
 a badly designed protocol from the eighties.

 Oh, come on.  SNMP is as badly designed as many other things that
 we're using every day.

sure.

 Do you suggest to rm snmpd because the
 protocol is badly designed?


no, i don't.  i merely suggest that if_index should not be used if
persistent ifindex'es are required.



Re: defer routing table updates on link state changes

2013-09-12 Thread Reyk Floeter
On Thu, Sep 12, 2013 at 05:53:42PM +0200, Mike Belopuhov wrote:
 looks like you misunderstand the problem we're dealing with here.
 

Sure, I do.  You're trying to push one thing and you don't want to
hear the concerns about a specific detail of it.

  FWIW it would be interesting to modify tun(4) so that it doesn't
  need to detach/reattach itself when switching between mode, this
  would allow us to stop reusing the last index.
 
 
  Or you could simply rewrite tun(4)?
 
  Isn't there any other way to do what you want without stopping to
  reuse the index?  SNMP simply expects that if_indexes are fairly
  static, linear, and without holes.  Why should we change that in
  OpenBSD?  Is there any security reason to randomize the indexes -
  No.
 
  Reyk
 
 
 or snmp can simply stop assuming things.  if_index wasn't created
 for snmp in the first place.

Of course, everyone else is wrong, let's change the world!  IfIndex is
used by SNMP since at least 1988 (RFC 1066) and many many tools have
adopted it expecting this behaviour.  Anyway, just go ahead and do the
stuff.  I don't care, it is not a big issue for snmpd.  But I still
don't see the point of changing the semantics instead of finding
another way to do what you want.  Unless there is a security issue or
similar with if_indexes and changing it would actually improve
something.  Blah.

Reyk



Re: defer routing table updates on link state changes

2013-09-12 Thread Philip Guenther
On Thu, Sep 12, 2013 at 10:19 AM, Mike Belopuhov m...@belopuhov.com wrote:
...
 either way, we need to move forward on this.  we want to use if_index
 for the purpose of looking up the interface w/o a pointer to the ifnet.

This sounds like just using a pid to identify processes and hoping
they haven't wrapped around...and the places the kernel does that are
wrong too**.  If pointers are out because refcounting them to avoid
dangling pointers leaves them impossible to reliably clean up in
bounded time (i.e., you need weak pointers), then there should be a
generation number to catch the wraps.  IMO.

(I don't get why it's useful for tun0-in-layer3 mode to have the same
if_index as tun0-in-layer2 mode.  The properties are so different that
there doesn't really seem to be continuity of identity between them.)


Philip Guenther

** yes, fixing ptrace() reparenting back to the original is on my todo list.



Re: defer routing table updates on link state changes

2013-09-12 Thread Reyk Floeter
On Thu, Sep 12, 2013 at 06:59:13PM +0200, Mike Belopuhov wrote:
  Ok, let's stop this.  I don't think you read what I replied before.  I
  didn't say that we're static with if_indexes, just that we shouldn't
  make it worse.
 
 
 or implement persistent indices in the snmpd itself maybe?
 

Maybe.  But this would create another layer of abstraction.  And
if_index is not just used by SNMP.

  I give up, but please read my next comment below.
 
Isn't there any other way to do what you want without stopping to
reuse the index?  SNMP simply expects that if_indexes are fairly
static, linear, and without holes.  Why should we change that in
OpenBSD?  Is there any security reason to randomize the indexes -
No.
  
   or snmp can simply stop assuming things.  if_index wasn't created
   for snmp in the first place.
 
  Actually, I think this assumption is wrong.  I researched a little bit
  in BSD history:
 
  - RFC 1066 from August 1988 is one of the early SNMP RFC that mention 
  IfIndex
 
  - 4.3BSD-Tahoe from June 1988 doesn't have if_index, I also didn't find
in other early BSDs.
 
  - 4.3BSD-Reno from June 1990 does have it.  You can even find a
new comment /*  XXX fast fix for SNMP, going away soon */ on top of 
  if.h.
 
  So it seems that if_index was added _for_ SNMP.
 
 
 i believe this comment refers to the inclusion of sys/time.h.
 

Yes, I know.  But see:

1. 1988-06: 4.3BSD-Tahoe without if_index
2. 1988-08: SNMP
3. 1990-06: 4.3BSD-Reno with if_index, mentioning SNMP in if.h.

I don't have the commit history, but this might indicate that there
was some work to support SNMP and many new fields in struct ifnet have
been added for SNMP.

http://minnie.tuhs.org/cgi-bin/utree.pl?file1=4.3BSD-Reno/src/sys/net/if.hfile2=4.3BSD-Tahoe/usr/src/sys/net/if.hprint=1

reyk



Re: defer routing table updates on link state changes

2013-09-11 Thread Claudio Jeker
On Tue, Aug 27, 2013 at 01:39:14PM +0200, Martin Pieuchot wrote:
 On 26/08/13(Mon) 13:36, Mike Belopuhov wrote:
  hi,
  
  in order to make our life a bit easier and prevent rogue
  accesses to the routing table from the hardware interrupt
  context violating all kinds of spl assumptions we would
  like if_link_state_change that is called by network device
  drivers in their interrupt service routines to defer its
  work to the process context or thereabouts.
  
  i did some testing here, but wouldn't mind if someone
  tries this diff in gre/vlan/ospf/anything-weird setups.
  making sure that hot-plugging/unplugging usb interfaces
  doesn't produce any undesirable effects would be superb
  as well.
 
 I just tested vlan and usb interfaces, it works just fine.
 
  please note that a token (an interface index) is passed
  to the workq in order to make sure that if the interface
  would be gone by the time syswq goes around to run the
  task it would just fall through.
 
 I think that's the right approach but the current code generating
 interfaces indexes is too clever from my point of view, it tries
 to reuse the last index if possible.  This could lead to some
 funny races if we detach and reattach an interface before the
 task get scheduled.
 
 So I'm ok with your diff if we avoid to reuse the last index too
 soon.  Diff below does that.

We should not do that since this was done for tun(4) switching between
modes without getting new ifindexes. It also reduces the holes in the
ifindex array on system that dynamically allocate interfaces for e.g.
VPNs.
 
 Index: net/if.c
 ===
 RCS file: /home/ncvs/src/sys/net/if.c,v
 retrieving revision 1.262
 diff -u -p -r1.262 if.c
 --- net/if.c  20 Aug 2013 09:14:22 -  1.262
 +++ net/if.c  27 Aug 2013 10:22:44 -
 @@ -204,31 +204,37 @@ if_attachsetup(struct ifnet *ifp)
  {
   int wrapped = 0;
  
 - if (ifindex2ifnet == 0)
 + /*
 +  * Always increment the index to avoid races.
 +  */
 + if_index++;
 +
 + /*
 +  * If we hit USHRT_MAX, we skip back to 1 since there are a
 +  * number of places where the value of ifp-if_index or
 +  * if_index itself is compared to or stored in an unsigned
 +  * short.  By jumping back, we won't botch those assignments
 +  * or comparisons.
 +  */
 + if (if_index == USHRT_MAX) {
   if_index = 1;
 - else {
 - while (if_index  if_indexlim 
 - ifindex2ifnet[if_index] != NULL) {
 - if_index++;
 + wrapped++;
 + }
 +
 + while (if_index  if_indexlim  ifindex2ifnet[if_index] != NULL) {
 + if_index++;
 +
 + if (if_index == USHRT_MAX) {
   /*
 -  * If we hit USHRT_MAX, we skip back to 1 since
 -  * there are a number of places where the value
 -  * of ifp-if_index or if_index itself is compared
 -  * to or stored in an unsigned short.  By
 -  * jumping back, we won't botch those assignments
 -  * or comparisons.
 +  * If we have to jump back to 1 twice without
 +  * finding an empty slot then there are too many
 +  * interfaces.
*/
 - if (if_index == USHRT_MAX) {
 - if_index = 1;
 - /*
 -  * However, if we have to jump back to 1
 -  * *twice* without finding an empty
 -  * slot in ifindex2ifnet[], then there
 -  * there are too many (65535) interfaces.
 -  */
 - if (wrapped++)
 - panic(too many interfaces);
 - }
 + if (wrapped)
 + panic(too many interfaces);
 +
 + if_index = 1;
 + wrapped++;
   }
   }
   ifp-if_index = if_index;
 

-- 
:wq Claudio



Re: defer routing table updates on link state changes

2013-09-02 Thread Mike Belopuhov
On Mon, Aug 26, 2013 at 13:36 +0200, Mike Belopuhov wrote:
 hi,
 
 in order to make our life a bit easier and prevent rogue
 accesses to the routing table from the hardware interrupt
 context violating all kinds of spl assumptions we would
 like if_link_state_change that is called by network device
 drivers in their interrupt service routines to defer its
 work to the process context or thereabouts.
 
 i did some testing here, but wouldn't mind if someone
 tries this diff in gre/vlan/ospf/anything-weird setups.
 making sure that hot-plugging/unplugging usb interfaces
 doesn't produce any undesirable effects would be superb
 as well.
 
 please note that a token (an interface index) is passed
 to the workq in order to make sure that if the interface
 would be gone by the time syswq goes around to run the
 task it would just fall through.
 
 ok?
 

i've got an ok from mpi, anyone else would like to test
and/or comment?

 diff --git sys/net/if.c sys/net/if.c
 index 6dafd0d..5b6800a 100644
 --- sys/net/if.c
 +++ sys/net/if.c
 @@ -79,10 +79,11 @@
  #include sys/protosw.h
  #include sys/kernel.h
  #include sys/ioctl.h
  #include sys/domain.h
  #include sys/sysctl.h
 +#include sys/workq.h
  
  #include net/if.h
  #include net/if_dl.h
  #include net/if_media.h
  #include net/if_types.h
 @@ -151,10 +152,12 @@ int if_clone_list(struct if_clonereq *);
  struct if_clone  *if_clone_lookup(const char *, int *);
  
  void if_congestion_clear(void *);
  int  if_group_egress_build(void);
  
 +void if_link_state_change_task(void *, void *);
 +
  int  ifai_cmp(struct ifaddr_item *,  struct ifaddr_item *);
  void ifa_item_insert(struct sockaddr *, struct ifaddr *, struct ifnet *);
  void ifa_item_remove(struct sockaddr *, struct ifaddr *, struct ifnet *);
  #ifndef SMALL_KERNEL
  void ifa_print_rb(void);
 @@ -1106,21 +1109,39 @@ if_up(struct ifnet *ifp)
  
   m_clinitifp(ifp);
  }
  
  /*
 - * Process a link state change.
 - * NOTE: must be called at splsoftnet or equivalent.
 + * Schedule a link state change task.
   */
  void
  if_link_state_change(struct ifnet *ifp)
  {
 - rt_ifmsg(ifp);
 + /* try to put the routing table update task on syswq */
 + workq_add_task(NULL, 0, if_link_state_change_task,
 + (void *)((unsigned long)ifp-if_index), NULL);
 +}
 +
 +/*
 + * Process a link state change.
 + */
 +void
 +if_link_state_change_task(void *arg, void *unused)
 +{
 + unsigned int index = (unsigned long)arg;
 + struct ifnet *ifp;
 + int s;
 +
 + s = splsoftnet();
 + if ((ifp = if_get(index)) != NULL) {
 + rt_ifmsg(ifp);
  #ifndef SMALL_KERNEL
 - rt_if_track(ifp);
 + rt_if_track(ifp);
  #endif
 - dohooks(ifp-if_linkstatehooks, 0);
 + dohooks(ifp-if_linkstatehooks, 0);
 + }
 + splx(s);
  }
  
  /*
   * Handle interface watchdog timer routines.  Called
   * from softclock, we decrement timers (if set) and



Re: defer routing table updates on link state changes

2013-08-27 Thread Martin Pieuchot
On 26/08/13(Mon) 13:36, Mike Belopuhov wrote:
 hi,
 
 in order to make our life a bit easier and prevent rogue
 accesses to the routing table from the hardware interrupt
 context violating all kinds of spl assumptions we would
 like if_link_state_change that is called by network device
 drivers in their interrupt service routines to defer its
 work to the process context or thereabouts.
 
 i did some testing here, but wouldn't mind if someone
 tries this diff in gre/vlan/ospf/anything-weird setups.
 making sure that hot-plugging/unplugging usb interfaces
 doesn't produce any undesirable effects would be superb
 as well.

I just tested vlan and usb interfaces, it works just fine.

 please note that a token (an interface index) is passed
 to the workq in order to make sure that if the interface
 would be gone by the time syswq goes around to run the
 task it would just fall through.

I think that's the right approach but the current code generating
interfaces indexes is too clever from my point of view, it tries
to reuse the last index if possible.  This could lead to some
funny races if we detach and reattach an interface before the
task get scheduled.

So I'm ok with your diff if we avoid to reuse the last index too
soon.  Diff below does that.

Index: net/if.c
===
RCS file: /home/ncvs/src/sys/net/if.c,v
retrieving revision 1.262
diff -u -p -r1.262 if.c
--- net/if.c20 Aug 2013 09:14:22 -  1.262
+++ net/if.c27 Aug 2013 10:22:44 -
@@ -204,31 +204,37 @@ if_attachsetup(struct ifnet *ifp)
 {
int wrapped = 0;
 
-   if (ifindex2ifnet == 0)
+   /*
+* Always increment the index to avoid races.
+*/
+   if_index++;
+
+   /*
+* If we hit USHRT_MAX, we skip back to 1 since there are a
+* number of places where the value of ifp-if_index or
+* if_index itself is compared to or stored in an unsigned
+* short.  By jumping back, we won't botch those assignments
+* or comparisons.
+*/
+   if (if_index == USHRT_MAX) {
if_index = 1;
-   else {
-   while (if_index  if_indexlim 
-   ifindex2ifnet[if_index] != NULL) {
-   if_index++;
+   wrapped++;
+   }
+
+   while (if_index  if_indexlim  ifindex2ifnet[if_index] != NULL) {
+   if_index++;
+
+   if (if_index == USHRT_MAX) {
/*
-* If we hit USHRT_MAX, we skip back to 1 since
-* there are a number of places where the value
-* of ifp-if_index or if_index itself is compared
-* to or stored in an unsigned short.  By
-* jumping back, we won't botch those assignments
-* or comparisons.
+* If we have to jump back to 1 twice without
+* finding an empty slot then there are too many
+* interfaces.
 */
-   if (if_index == USHRT_MAX) {
-   if_index = 1;
-   /*
-* However, if we have to jump back to 1
-* *twice* without finding an empty
-* slot in ifindex2ifnet[], then there
-* there are too many (65535) interfaces.
-*/
-   if (wrapped++)
-   panic(too many interfaces);
-   }
+   if (wrapped)
+   panic(too many interfaces);
+
+   if_index = 1;
+   wrapped++;
}
}
ifp-if_index = if_index;



Re: defer routing table updates on link state changes

2013-08-27 Thread Mike Belopuhov
On 27 August 2013 13:39, Martin Pieuchot mpieuc...@nolizard.org wrote:
 I think that's the right approach but the current code generating
 interfaces indexes is too clever from my point of view, it tries
 to reuse the last index if possible.  This could lead to some
 funny races if we detach and reattach an interface before the
 task get scheduled.

 So I'm ok with your diff if we avoid to reuse the last index too
 soon.  Diff below does that.


i'm ok with this change.



Re: defer routing table updates on link state changes

2013-08-27 Thread Kenneth R Westerback
On Tue, Aug 27, 2013 at 01:54:34PM +0200, Mike Belopuhov wrote:
 On 27 August 2013 13:39, Martin Pieuchot mpieuc...@nolizard.org wrote:
  I think that's the right approach but the current code generating
  interfaces indexes is too clever from my point of view, it tries
  to reuse the last index if possible.  This could lead to some
  funny races if we detach and reattach an interface before the
  task get scheduled.
 
  So I'm ok with your diff if we avoid to reuse the last index too
  soon.  Diff below does that.
 
 
 i'm ok with this change.
 

I like that tweak too. Makes me feel warm and safe.

 Ken



defer routing table updates on link state changes

2013-08-26 Thread Mike Belopuhov
hi,

in order to make our life a bit easier and prevent rogue
accesses to the routing table from the hardware interrupt
context violating all kinds of spl assumptions we would
like if_link_state_change that is called by network device
drivers in their interrupt service routines to defer its
work to the process context or thereabouts.

i did some testing here, but wouldn't mind if someone
tries this diff in gre/vlan/ospf/anything-weird setups.
making sure that hot-plugging/unplugging usb interfaces
doesn't produce any undesirable effects would be superb
as well.

please note that a token (an interface index) is passed
to the workq in order to make sure that if the interface
would be gone by the time syswq goes around to run the
task it would just fall through.

ok?

diff --git sys/net/if.c sys/net/if.c
index 6dafd0d..5b6800a 100644
--- sys/net/if.c
+++ sys/net/if.c
@@ -79,10 +79,11 @@
 #include sys/protosw.h
 #include sys/kernel.h
 #include sys/ioctl.h
 #include sys/domain.h
 #include sys/sysctl.h
+#include sys/workq.h
 
 #include net/if.h
 #include net/if_dl.h
 #include net/if_media.h
 #include net/if_types.h
@@ -151,10 +152,12 @@ int   if_clone_list(struct if_clonereq *);
 struct if_clone*if_clone_lookup(const char *, int *);
 
 void   if_congestion_clear(void *);
 intif_group_egress_build(void);
 
+void   if_link_state_change_task(void *, void *);
+
 intifai_cmp(struct ifaddr_item *,  struct ifaddr_item *);
 void   ifa_item_insert(struct sockaddr *, struct ifaddr *, struct ifnet *);
 void   ifa_item_remove(struct sockaddr *, struct ifaddr *, struct ifnet *);
 #ifndef SMALL_KERNEL
 void   ifa_print_rb(void);
@@ -1106,21 +1109,39 @@ if_up(struct ifnet *ifp)
 
m_clinitifp(ifp);
 }
 
 /*
- * Process a link state change.
- * NOTE: must be called at splsoftnet or equivalent.
+ * Schedule a link state change task.
  */
 void
 if_link_state_change(struct ifnet *ifp)
 {
-   rt_ifmsg(ifp);
+   /* try to put the routing table update task on syswq */
+   workq_add_task(NULL, 0, if_link_state_change_task,
+   (void *)((unsigned long)ifp-if_index), NULL);
+}
+
+/*
+ * Process a link state change.
+ */
+void
+if_link_state_change_task(void *arg, void *unused)
+{
+   unsigned int index = (unsigned long)arg;
+   struct ifnet *ifp;
+   int s;
+
+   s = splsoftnet();
+   if ((ifp = if_get(index)) != NULL) {
+   rt_ifmsg(ifp);
 #ifndef SMALL_KERNEL
-   rt_if_track(ifp);
+   rt_if_track(ifp);
 #endif
-   dohooks(ifp-if_linkstatehooks, 0);
+   dohooks(ifp-if_linkstatehooks, 0);
+   }
+   splx(s);
 }
 
 /*
  * Handle interface watchdog timer routines.  Called
  * from softclock, we decrement timers (if set) and