date:20070418

[PATCH] [0/5] [v2] pasemi_mac: fixes and enhancements

2007-04-18 Thread Olof Johansson

Hi,

The five following patches contain a number of fixes and improvements
of the pasemi_mac driver:

1/5: A couple of minor bugfixes.
2/5: Move the IRQ mapping from the PCI layer under our platform, to
 the driver.
3/5: A rather large patch with various NAPI/performance-related fixes
 and enhancements.
4/5: phy support
5/5: use local-mac-address instead of mac-address if available.

(Changes from last time: Added 5/5, changes to 2/5 to use virq_to_hw()).


-Olof
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] [0/5] [v2] pasemi_mac: minor bugfixes

2007-04-18 Thread Olof Johansson

Ethernet bugfixes:

* Move the was_full/wake_queue logic from tx_intr to clean_tx
* Fix polarity in checks in pasemi_mac_close


Signed-off-by: Olof Johansson [EMAIL PROTECTED]


Index: linux-2.6/drivers/net/pasemi_mac.c
===
--- linux-2.6.orig/drivers/net/pasemi_mac.c
+++ linux-2.6/drivers/net/pasemi_mac.c
@@ -451,9 +451,12 @@ static int pasemi_mac_clean_tx(struct pa
struct pas_dma_xct_descr *dp;
int start, count;
int flags;
+   int was_full;
 
spin_lock_irqsave(mac-tx-lock, flags);
 
+   was_full = mac-tx-next_to_clean - mac-tx-next_to_use == 
TX_RING_SIZE;
+
start = mac-tx-next_to_clean;
count = 0;
 
@@ -478,6 +481,9 @@ static int pasemi_mac_clean_tx(struct pa
mac-tx-next_to_clean += count;
spin_unlock_irqrestore(mac-tx-lock, flags);
 
+   if (was_full)
+   netif_wake_queue(mac-netdev);
+
return count;
 }
 
@@ -512,9 +518,6 @@ static irqreturn_t pasemi_mac_tx_intr(in
struct net_device *dev = data;
struct pasemi_mac *mac = netdev_priv(dev);
unsigned int reg;
-   int was_full;
-
-   was_full = mac-tx-next_to_clean - mac-tx-next_to_use == 
TX_RING_SIZE;
 
if (!(*mac-tx_status  PAS_STATUS_INT))
return IRQ_NONE;
@@ -528,9 +531,6 @@ static irqreturn_t pasemi_mac_tx_intr(in
pci_write_config_dword(mac-iob_pdev, 
PAS_IOB_DMA_TXCH_RESET(mac-dma_txch),
   reg);
 
-   if (was_full)
-   netif_wake_queue(dev);
-
return IRQ_HANDLED;
 }
 
@@ -662,40 +665,37 @@ static int pasemi_mac_close(struct net_d
pci_read_config_dword(mac-dma_pdev,
  PAS_DMA_TXCHAN_TCMDSTA(mac-dma_txch),
  stat);
-   if (stat  PAS_DMA_TXCHAN_TCMDSTA_ACT)
+   if (!(stat  PAS_DMA_TXCHAN_TCMDSTA_ACT))
break;
cond_resched();
}
 
-   if (!(stat  PAS_DMA_TXCHAN_TCMDSTA_ACT)) {
+   if (stat  PAS_DMA_TXCHAN_TCMDSTA_ACT)
dev_err(mac-dma_pdev-dev, Failed to stop tx channel\n);
-   }
 
for (retries = 0; retries  MAX_RETRIES; retries++) {
pci_read_config_dword(mac-dma_pdev,
  PAS_DMA_RXCHAN_CCMDSTA(mac-dma_rxch),
  stat);
-   if (stat  PAS_DMA_RXCHAN_CCMDSTA_ACT)
+   if (!(stat  PAS_DMA_RXCHAN_CCMDSTA_ACT))
break;
cond_resched();
}
 
-   if (!(stat  PAS_DMA_RXCHAN_CCMDSTA_ACT)) {
+   if (stat  PAS_DMA_RXCHAN_CCMDSTA_ACT)
dev_err(mac-dma_pdev-dev, Failed to stop rx channel\n);
-   }
 
for (retries = 0; retries  MAX_RETRIES; retries++) {
pci_read_config_dword(mac-dma_pdev,
  PAS_DMA_RXINT_RCMDSTA(mac-dma_if),
  stat);
-   if (stat  PAS_DMA_RXINT_RCMDSTA_ACT)
+   if (!(stat  PAS_DMA_RXINT_RCMDSTA_ACT))
break;
cond_resched();
}
 
-   if (!(stat  PAS_DMA_RXINT_RCMDSTA_ACT)) {
+   if (stat  PAS_DMA_RXINT_RCMDSTA_ACT)
dev_err(mac-dma_pdev-dev, Failed to stop rx interface\n);
-   }
 
/* Then, disable the channel. This must be done separately from
 * stopping, since you can't disable when active.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] [2/5] [v2] pasemi_mac: irq mapping changes

2007-04-18 Thread Olof Johansson

Fixes for ethernet IRQ mapping, to be done in the driver instead of in
the platform setup code.


Signed-off-by: Olof Johansson [EMAIL PROTECTED]


Index: powerpc/arch/powerpc/platforms/pasemi/pci.c
===
--- powerpc.orig/arch/powerpc/platforms/pasemi/pci.c
+++ powerpc/arch/powerpc/platforms/pasemi/pci.c
@@ -163,19 +163,6 @@ static void __init pas_fixup_phb_resourc
 }
 
 
-void __devinit pas_pci_irq_fixup(struct pci_dev *dev)
-{
-   /* DMA is special, 84 interrupts (128 - 211), all but 128
-* need to be mapped by hand here.
-*/
-   if (dev-vendor == 0x1959  dev-device == 0xa007) {
-   int i;
-   for (i = 129; i  212; i++)
-   irq_create_mapping(NULL, i);
-   }
-}
-
-
 void __init pas_pci_init(void)
 {
struct device_node *np, *root;
Index: powerpc/arch/powerpc/platforms/pasemi/setup.c
===
--- powerpc.orig/arch/powerpc/platforms/pasemi/setup.c
+++ powerpc/arch/powerpc/platforms/pasemi/setup.c
@@ -240,5 +240,4 @@ define_machine(pas) {
.check_legacy_ioport= pas_check_legacy_ioport,
.progress   = pas_progress,
.machine_check_exception = pas_machine_check_handler,
-   .pci_irq_fixup  = pas_pci_irq_fixup,
 };
Index: powerpc/drivers/net/pasemi_mac.c
===
--- powerpc.orig/drivers/net/pasemi_mac.c
+++ powerpc/drivers/net/pasemi_mac.c
@@ -33,6 +33,8 @@
 #include linux/tcp.h
 #include net/checksum.h
 
+#include asm/irq.h
+
 #include pasemi_mac.h
 
 
@@ -537,6 +539,7 @@ static irqreturn_t pasemi_mac_tx_intr(in
 static int pasemi_mac_open(struct net_device *dev)
 {
struct pasemi_mac *mac = netdev_priv(dev);
+   int base_irq;
unsigned int flags;
int ret;
 
@@ -600,28 +603,37 @@ static int pasemi_mac_open(struct net_de
netif_start_queue(dev);
netif_poll_enable(dev);
 
-   ret = request_irq(mac-dma_pdev-irq + mac-dma_txch,
- pasemi_mac_tx_intr, IRQF_DISABLED,
+   /* Interrupts are a bit different for our DMA controller: While
+* it's got one a regular PCI device header, the interrupt there
+* is really the base of the range it's using. Each tx and rx
+* channel has it's own interrupt source.
+*/
+
+   base_irq = virq_to_hw(mac-dma_pdev-irq);
+
+   mac-tx_irq = irq_create_mapping(NULL, base_irq + mac-dma_txch);
+   mac-rx_irq = irq_create_mapping(NULL, base_irq + 20 + mac-dma_txch);
+
+   ret = request_irq(mac-tx_irq, pasemi_mac_tx_intr, IRQF_DISABLED,
  mac-tx-irq_name, dev);
if (ret) {
dev_err(mac-pdev-dev, request_irq of irq %d failed: %d\n,
-  mac-dma_pdev-irq + mac-dma_txch, ret);
+   base_irq + mac-dma_txch, ret);
goto out_tx_int;
}
 
-   ret = request_irq(mac-dma_pdev-irq + 20 + mac-dma_rxch,
- pasemi_mac_rx_intr, IRQF_DISABLED,
+   ret = request_irq(mac-rx_irq, pasemi_mac_rx_intr, IRQF_DISABLED,
  mac-rx-irq_name, dev);
if (ret) {
dev_err(mac-pdev-dev, request_irq of irq %d failed: %d\n,
-  mac-dma_pdev-irq + 20 + mac-dma_rxch, ret);
+   base_irq + 20 + mac-dma_rxch, ret);
goto out_rx_int;
}
 
return 0;
 
 out_rx_int:
-   free_irq(mac-dma_pdev-irq + mac-dma_txch, dev);
+   free_irq(mac-tx_irq, dev);
 out_tx_int:
netif_poll_disable(dev);
netif_stop_queue(dev);
@@ -705,8 +717,8 @@ static int pasemi_mac_close(struct net_d
pci_write_config_dword(mac-dma_pdev,
   PAS_DMA_RXINT_RCMDSTA(mac-dma_if), 0);
 
-   free_irq(mac-dma_pdev-irq + mac-dma_txch, dev);
-   free_irq(mac-dma_pdev-irq + 20 + mac-dma_rxch, dev);
+   free_irq(mac-tx_irq, dev);
+   free_irq(mac-rx_irq, dev);
 
/* Free resources */
pasemi_mac_free_rx_resources(dev);
Index: powerpc/drivers/net/pasemi_mac.h
===
--- powerpc.orig/drivers/net/pasemi_mac.h
+++ powerpc/drivers/net/pasemi_mac.h
@@ -73,6 +73,8 @@ struct pasemi_mac {
 
struct pasemi_mac_txring *tx;
struct pasemi_mac_rxring *rx;
+   unsigned long   tx_irq;
+   unsigned long   rx_irq;
 };
 
 /* Software status descriptor (desc_info) */
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] [4/5] [v2] pasemi_mac: phy support

2007-04-18 Thread Olof Johansson

PHY support for pasemi_mac. Also add msg_enable flags for future
disablement of the link messages.

Signed-off-by: Olof Johansson [EMAIL PROTECTED]


Index: powerpc/drivers/net/pasemi_mac.c
===
--- powerpc.orig/drivers/net/pasemi_mac.c
+++ powerpc/drivers/net/pasemi_mac.c
@@ -594,6 +592,110 @@ static irqreturn_t pasemi_mac_tx_intr(in
return IRQ_HANDLED;
 }
 
+static void pasemi_adjust_link(struct net_device *dev)
+{
+   struct pasemi_mac *mac = netdev_priv(dev);
+   int msg;
+   unsigned int flags;
+   unsigned int new_flags;
+
+   if (!mac-phydev-link) {
+   /* If no link, MAC speed settings don't matter. Just report
+* link down and return.
+*/
+   if (mac-link  netif_msg_link(mac))
+   printk(KERN_INFO %s: Link is down.\n, dev-name);
+
+   netif_carrier_off(dev);
+   mac-link = 0;
+
+   return;
+   } else
+   netif_carrier_on(dev);
+
+   pci_read_config_dword(mac-pdev, PAS_MAC_CFG_PCFG, flags);
+   new_flags = flags  ~(PAS_MAC_CFG_PCFG_HD | PAS_MAC_CFG_PCFG_SPD_M);
+
+   if (!mac-phydev-duplex)
+   new_flags |= PAS_MAC_CFG_PCFG_HD;
+
+   switch (mac-phydev-speed) {
+   case 1000:
+   new_flags |= PAS_MAC_CFG_PCFG_SPD_1G;
+   break;
+   case 100:
+   new_flags |= PAS_MAC_CFG_PCFG_SPD_100M;
+   break;
+   case 10:
+   new_flags |= PAS_MAC_CFG_PCFG_SPD_10M;
+   break;
+   default:
+   printk(Unsupported speed %d\n, mac-phydev-speed);
+   }
+
+   /* Print on link or speed/duplex change */
+   msg = mac-link != mac-phydev-link || flags != new_flags;
+
+   mac-duplex = mac-phydev-duplex;
+   mac-speed = mac-phydev-speed;
+   mac-link = mac-phydev-link;
+
+   if (new_flags != flags)
+   pci_write_config_dword(mac-pdev, PAS_MAC_CFG_PCFG, new_flags);
+
+   if (msg  netif_msg_link(mac))
+   printk(KERN_INFO %s: Link is up at %d Mbps, %s duplex.\n,
+  dev-name, mac-speed, mac-duplex ? full : half);
+}
+
+static int pasemi_mac_phy_init(struct net_device *dev)
+{
+   struct pasemi_mac *mac = netdev_priv(dev);
+   struct device_node *dn, *phy_dn;
+   struct phy_device *phydev;
+   unsigned int phy_id;
+   const phandle *ph;
+   const unsigned int *prop;
+   struct resource r;
+   int ret;
+
+   dn = pci_device_to_OF_node(mac-pdev);
+   ph = get_property(dn, phy-handle, NULL);
+   if (!ph)
+   return -ENODEV;
+   phy_dn = of_find_node_by_phandle(*ph);
+
+   prop = get_property(phy_dn, reg, NULL);
+   ret = of_address_to_resource(phy_dn-parent, 0, r);
+   if (ret)
+   goto err;
+
+   phy_id = *prop;
+   snprintf(mac-phy_id, BUS_ID_SIZE, PHY_ID_FMT, (int)r.start, phy_id);
+
+   of_node_put(phy_dn);
+
+   mac-link = 0;
+   mac-speed = 0;
+   mac-duplex = -1;
+
+   phydev = phy_connect(dev, mac-phy_id, pasemi_adjust_link, 0, 
PHY_INTERFACE_MODE_SGMII);
+
+   if (IS_ERR(phydev)) {
+   printk(KERN_ERR %s: Could not attach to phy\n, dev-name);
+   return PTR_ERR(phydev);
+   }
+
+   mac-phydev = phydev;
+
+   return 0;
+
+err:
+   of_node_put(phy_dn);
+   return -ENODEV;
+}
+
+
 static int pasemi_mac_open(struct net_device *dev)
 {
struct pasemi_mac *mac = netdev_priv(dev);
@@ -667,6 +769,13 @@ static int pasemi_mac_open(struct net_de
 
pasemi_mac_replenish_rx_ring(dev);
 
+   ret = pasemi_mac_phy_init(dev);
+   /* Some configs don't have PHYs (XAUI etc), so don't complain about
+* failed init due to -ENODEV.
+*/
+   if (ret  ret != -ENODEV)
+   dev_warn(mac-pdev-dev, phy init failed: %d\n, ret);
+
netif_start_queue(dev);
netif_poll_enable(dev);
 
@@ -697,6 +806,9 @@ static int pasemi_mac_open(struct net_de
goto out_rx_int;
}
 
+   if (mac-phydev)
+   phy_start(mac-phydev);
+
return 0;
 
 out_rx_int:
@@ -720,6 +832,11 @@ static int pasemi_mac_close(struct net_d
unsigned int stat;
int retries;
 
+   if (mac-phydev) {
+   phy_stop(mac-phydev);
+   phy_disconnect(mac-phydev);
+   }
+
netif_stop_queue(dev);
 
/* Clean out any pending buffers */
@@ -1013,6 +1130,9 @@ pasemi_mac_probe(struct pci_dev *pdev, c
mac-rx_status = dma_status-rx_sta[mac-dma_rxch];
mac-tx_status = dma_status-tx_sta[mac-dma_txch];
 
+   /* Enable most messages by default */
+   mac-msg_enable = (NETIF_MSG_IFUP  1 ) - 1;
+
err = register_netdev(dev);
 
if (err) {
Index: powerpc/drivers/net/pasemi_mac.h

[PATCH] [5/5] [v2] pasemi_mac: use local-mac-address

2007-04-18 Thread Olof Johansson

Use local-mac-address in the device tree instead. Fall back to mac-address
for older firmware.


Signed-off-by: Olof Johansson [EMAIL PROTECTED]

Index: powerpc/drivers/net/pasemi_mac.c
===
--- powerpc.orig/drivers/net/pasemi_mac.c
+++ powerpc/drivers/net/pasemi_mac.c
@@ -74,7 +74,12 @@ static int pasemi_get_mac_addr(struct pa
return -ENOENT;
}
 
-   maddr = get_property(dn, mac-address, NULL);
+   maddr = get_property(dn, local-mac-address, NULL);
+
+   /* Fall back to mac-address for older firmware */
+   if (maddr == NULL)
+   maddr = get_property(dn, mac-address, NULL);
+
if (maddr == NULL) {
dev_warn(pdev-dev,
 no mac address in device tree, not configuring\n);
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: + ppp_generic-fix-lockdep-warning.patch added to -mm tree

2007-04-18 Thread Jarek Poplawski

On Tue, Apr 17, 2007 at 08:26:32AM -0500, Michal Ostrowski wrote:
 The xmit function of a PPP channel is a synchronous operation.  If the 
 transmission fails, we must notify the caller and let them re-submit the 
 skb later.  The return status of dev_queue_xmit is needed to determine 
 the return code passed back to the caller and thus the call is made 
 synchronously and not in a tasklet.

Sure! But on the other hand:

- the return code from dev_queue_xmit doesn't guarantee
the transmission won't fail,

- similar code in ppp_async: ppp_async_send isn't so
truthful and doesn't even check the return from
ppp_async_push; BTW - probably other layers should
care for transmission errors and re-submiting,

- maybe I'm wrong here, but I think every layer should
look (work) similarly here: dev_queue_xmit (or qdisc_run)
thinks it's talking to some independent network device,
which after dev_hard_start_xmit (and dev-hard_start_xmit)
does some transmission; if, instead of this, next
dev_queue_xmits are called with xmit locks held from
previous devs, then it looks like logical recursion and
locking is really hard to follow (even if it's OK).

 Looking at the stack traces earlier in this thread, it seems to me that 
 even if the PPPoE call was made in a tasklet, this same warning could be 
 generated.

Of course a tasklet by itself isn't a cure, but if
dev_queue_xmit is done from tasklet - only locks got
within this tasklet should be counted.

Thanks for response  best regards,
Jarek P.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [BRIDGE] Unaligned access on IA64 when comparing ethernet addresses

2007-04-18 Thread Pavel Emelianov

[snip]

 --- linux-2.6.orig/net/bridge/br_private.h2007-04-17
 13:26:48.0 -0700 +++ linux-2.6/net/bridge/br_private.h
 2007-04-17 13:30:29.0 -0700 @@ -36,7 +36,7 @@
  {
   unsigned char   prio[2];
   unsigned char   addr[6];
 -};
 +} __attribute__((aligned(8)));

Why 8? Mustn't it be 16? Address is to be 2-bytes aligned...

  struct mac_addr
  {
 -
 To unsubscribe from this list: send the line unsubscribe netdev in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Pavel Emelianov

Sorry, I forgot to put netdev and David in Cc when I first sent it.

There is a race between netlink_dump_start() and netlink_release()
that can lead to the situation when a netlink socket with non-zero
callback is freed.

Here it is:

CPU1:   CPU2
netlink_release():  netlink_dump_start():

sk = netlink_lookup(); /* OK */

netlink_remove();

spin_lock(nlk-cb_lock);
if (nlk-cb) { /* false */
  ...
}
spin_unlock(nlk-cb_lock);

spin_lock(nlk-cb_lock);
if (nlk-cb) { /* false */
 ...
}
nlk-cb = cb;
spin_unlock(nlk-cb_lock);
...
sock_orphan(sk);
/*
 * proceed with releasing
 * the socket
 */

The proposal it to make sock_orphan before detaching the callback
in netlink_release() and to check for the sock to be SOCK_DEAD in
netlink_dump_start() before setting a new callback.

Signed-off-by: Denis Lunev [EMAIL PROTECTED]
Signed-off-by: Kirill Korotaev [EMAIL PROTECTED]
Signed-off-by: Pavel Emelianov [EMAIL PROTECTED]
Acked-by: Patrick McHardy [EMAIL PROTECTED]

---

--- a/net/netlink/af_netlink.c  2004-10-25 12:12:23.0 +0400
+++ b/net/netlink/af_netlink.c  2004-10-28 16:26:12.0 +0400
@@ -255,6 +255,7 @@ static int netlink_release(struct socket
return 0;
 
netlink_remove(sk);
+   sock_orphan(sk);
nlk = nlk_sk(sk);
 
spin_lock(nlk-cb_lock);
@@ -269,7 +270,6 @@ static int netlink_release(struct socket
/* OK. Socket is unlinked, and, therefore,
   no new packets will arrive */
 
-   sock_orphan(sk);
sock-sk = NULL;
wake_up_interruptible_all(nlk-wait);
 
@@ -942,9 +942,9 @@ int netlink_dump_start(struct sock *ssk,
return -ECONNREFUSED;
}
nlk = nlk_sk(sk);
-   /* A dump is in progress... */
+   /* A dump or destruction is in progress... */
spin_lock(nlk-cb_lock);
-   if (nlk-cb) {
+   if (nlk-cb || sock_flag(sk, SOCK_DEAD)) {
spin_unlock(nlk-cb_lock);
netlink_destroy_callback(cb);
sock_put(sk);
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Evgeniy Polyakov

On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) 
wrote:
 Sorry, I forgot to put netdev and David in Cc when I first sent it.
 
 There is a race between netlink_dump_start() and netlink_release()
 that can lead to the situation when a netlink socket with non-zero
 callback is freed.

Out of curiosity, why not to fix a netlink_dump_start() to remove
callback in error path, since in 'no-error' path it removes it in
netlink_dump().

And, btw, can release method be called while socket is being used, I
thought about proper reference counters should prevent this, but not
100% sure with RCU dereferencing of the descriptor.

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Patrick McHardy

Evgeniy Polyakov wrote:
 On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) 
 wrote:
 
Sorry, I forgot to put netdev and David in Cc when I first sent it.

There is a race between netlink_dump_start() and netlink_release()
that can lead to the situation when a netlink socket with non-zero
callback is freed.
 
 
 Out of curiosity, why not to fix a netlink_dump_start() to remove
 callback in error path, since in 'no-error' path it removes it in
 netlink_dump().


It already does (netlink_destroy_callback), but that doesn't help
with this race though since without this patch we don't enter the
error path.

 And, btw, can release method be called while socket is being used, I
 thought about proper reference counters should prevent this, but not
 100% sure with RCU dereferencing of the descriptor.


The problem is asynchronous processing of the dump request in the
context of a different process. Process requests a dump, message
is queued and process returns from sendmsg since some other process
is already processing the queue. Then the process closes the socket,
resulting in netlink_release being called. When the dump request
is finally processed the race Pavel described might happen. This
can only happen for netlink families that use mutex_try_lock for
queue processing of course.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Pavel Emelianov

Evgeniy Polyakov wrote:
 On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) 
 wrote:
 Sorry, I forgot to put netdev and David in Cc when I first sent it.

 There is a race between netlink_dump_start() and netlink_release()
 that can lead to the situation when a netlink socket with non-zero
 callback is freed.
 
 Out of curiosity, why not to fix a netlink_dump_start() to remove
 callback in error path, since in 'no-error' path it removes it in

Error path is not relevant here. The problem is that we
keep a calback on a socket that is about to be freed.

 netlink_dump().
 
 And, btw, can release method be called while socket is being used, I
 thought about proper reference counters should prevent this, but not
 100% sure with RCU dereferencing of the descriptor.
 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [BRIDGE] Unaligned access on IA64 when comparing ethernet addresses

2007-04-18 Thread David Miller

From: Pavel Emelianov [EMAIL PROTECTED]
Date: Wed, 18 Apr 2007 10:43:56 +0400

 [snip]

  --- linux-2.6.orig/net/bridge/br_private.h  2007-04-17
  13:26:48.0 -0700 +++ linux-2.6/net/bridge/br_private.h
  2007-04-17 13:30:29.0 -0700 @@ -36,7 +36,7 @@
   {
  unsigned char   prio[2];
  unsigned char   addr[6];
  -};
  +} __attribute__((aligned(8)));

 Why 8? Mustn't it be 16? Address is to be 2-bytes aligned...

Actually it could be made 2, the aligned() attribute is
in bytes, not bits.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [BRIDGE] Unaligned access on IA64 when comparing ethernet addresses

2007-04-18 Thread Pavel Emelianov

David Miller wrote:
 From: Pavel Emelianov [EMAIL PROTECTED]
 Date: Wed, 18 Apr 2007 10:43:56 +0400
 
 [snip]

 --- linux-2.6.orig/net/bridge/br_private.h  2007-04-17
 13:26:48.0 -0700 +++ linux-2.6/net/bridge/br_private.h
 2007-04-17 13:30:29.0 -0700 @@ -36,7 +36,7 @@
  {
 unsigned char   prio[2];
 unsigned char   addr[6];
 -};
 +} __attribute__((aligned(8)));
 Why 8? Mustn't it be 16? Address is to be 2-bytes aligned...
 
 Actually it could be made 2, the aligned() attribute is
 in bytes, not bits.

Indeed :) My bad :( Thank you...

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Evgeniy Polyakov

On Wed, Apr 18, 2007 at 10:26:31AM +0200, Patrick McHardy ([EMAIL PROTECTED]) 
wrote:
 Evgeniy Polyakov wrote:
  On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL 
  PROTECTED]) wrote:
  
 Sorry, I forgot to put netdev and David in Cc when I first sent it.
 
 There is a race between netlink_dump_start() and netlink_release()
 that can lead to the situation when a netlink socket with non-zero
 callback is freed.
  
  
  Out of curiosity, why not to fix a netlink_dump_start() to remove
  callback in error path, since in 'no-error' path it removes it in
  netlink_dump().
 
 
 It already does (netlink_destroy_callback), but that doesn't help
 with this race though since without this patch we don't enter the
 error path.

I thought that with releasing a socket, which will have a callback
attached only results in a leak of the callback? In that case we can
just free it in dump() just like it is done in no-error path already.
Or do I miss something additional?

  And, btw, can release method be called while socket is being used, I
  thought about proper reference counters should prevent this, but not
  100% sure with RCU dereferencing of the descriptor.
 
 
 The problem is asynchronous processing of the dump request in the
 context of a different process. Process requests a dump, message
 is queued and process returns from sendmsg since some other process
 is already processing the queue. Then the process closes the socket,
 resulting in netlink_release being called. When the dump request
 is finally processed the race Pavel described might happen. This
 can only happen for netlink families that use mutex_try_lock for
 queue processing of course.

Doesn't it called from -sk_data_ready() which is synchronous with
respect to sendmsg, not sure about conntrack though, but it looks so?

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Evgeniy Polyakov

On Wed, Apr 18, 2007 at 12:32:40PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) 
wrote:
 Evgeniy Polyakov wrote:
  On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL 
  PROTECTED]) wrote:
  Sorry, I forgot to put netdev and David in Cc when I first sent it.
 
  There is a race between netlink_dump_start() and netlink_release()
  that can lead to the situation when a netlink socket with non-zero
  callback is freed.
  
  Out of curiosity, why not to fix a netlink_dump_start() to remove
  callback in error path, since in 'no-error' path it removes it in
 
 Error path is not relevant here. The problem is that we
 keep a calback on a socket that is about to be freed.

Yes, you are right, that it will not be freed in netlink_release(), 
but it will be freed in netlink_dump() after it is processed (in no-error 
path only though).

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Patrick McHardy

Evgeniy Polyakov wrote:
 On Wed, Apr 18, 2007 at 10:26:31AM +0200, Patrick McHardy ([EMAIL PROTECTED]) 
 wrote:
 
Out of curiosity, why not to fix a netlink_dump_start() to remove
callback in error path, since in 'no-error' path it removes it in
netlink_dump().


It already does (netlink_destroy_callback), but that doesn't help
with this race though since without this patch we don't enter the
error path.
 
 
 I thought that with releasing a socket, which will have a callback
 attached only results in a leak of the callback? In that case we can
 just free it in dump() just like it is done in no-error path already.
 Or do I miss something additional?


That would only work if there is nothing to dump (cb-dump returns 0).
Otherwise it is not freed.

The problem is asynchronous processing of the dump request in the
context of a different process. Process requests a dump, message
is queued and process returns from sendmsg since some other process
is already processing the queue. Then the process closes the socket,
resulting in netlink_release being called. When the dump request
is finally processed the race Pavel described might happen. This
can only happen for netlink families that use mutex_try_lock for
queue processing of course.
 
 
 Doesn't it called from -sk_data_ready() which is synchronous with
 respect to sendmsg, not sure about conntrack though, but it looks so?


Yes, but for kernel sockets we end up calling the input function,
which when mutex_trylock is used returns immediately when some
other process is already processing the queue, so the requesting
process might close the socket before the request is processed.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Pavel Emelianov

Evgeniy Polyakov wrote:
 On Wed, Apr 18, 2007 at 12:32:40PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) 
 wrote:
 Evgeniy Polyakov wrote:
 On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL 
 PROTECTED]) wrote:
 Sorry, I forgot to put netdev and David in Cc when I first sent it.

 There is a race between netlink_dump_start() and netlink_release()
 that can lead to the situation when a netlink socket with non-zero
 callback is freed.
 Out of curiosity, why not to fix a netlink_dump_start() to remove
 callback in error path, since in 'no-error' path it removes it in
 Error path is not relevant here. The problem is that we
 keep a calback on a socket that is about to be freed.
 
 Yes, you are right, that it will not be freed in netlink_release(), 
 but it will be freed in netlink_dump() after it is processed (in no-error 
 path only though).
 

But error path will leak it. On success path we would have
a leaked packet in sk_write_queue, since we did't see it in
skb_queue_purge() while doing netlink_release().

Of course we can place the struts in code to handle the case
when we have a released socket with the attached callback, but
it is more correct (IMHO) not to allow to attach the callbacks
to dead sockets.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Evgeniy Polyakov

On Wed, Apr 18, 2007 at 10:50:42AM +0200, Patrick McHardy ([EMAIL PROTECTED]) 
wrote:
 It already does (netlink_destroy_callback), but that doesn't help
 with this race though since without this patch we don't enter the
 error path.
  
  I thought that with releasing a socket, which will have a callback
  attached only results in a leak of the callback? In that case we can
  just free it in dump() just like it is done in no-error path already.
  Or do I miss something additional?
 
 That would only work if there is nothing to dump (cb-dump returns 0).
 Otherwise it is not freed.

That is what I referred to as error path. Btw, with positive return
value we end up in subsequent call to input which will free callback
under lock as expected.

I do not object against the patch, just want to make a clear vision about
dumps - if callback is allocated to be used in dump only, then we could
just free it there without passing to next round.

 The problem is asynchronous processing of the dump request in the
 context of a different process. Process requests a dump, message
 is queued and process returns from sendmsg since some other process
 is already processing the queue. Then the process closes the socket,
 resulting in netlink_release being called. When the dump request
 is finally processed the race Pavel described might happen. This
 can only happen for netlink families that use mutex_try_lock for
 queue processing of course.
  
  
  Doesn't it called from -sk_data_ready() which is synchronous with
  respect to sendmsg, not sure about conntrack though, but it looks so?
 
 
 Yes, but for kernel sockets we end up calling the input function,
 which when mutex_trylock is used returns immediately when some
 other process is already processing the queue, so the requesting
 process might close the socket before the request is processed.

So far it is only netfilter and gennetlink, we would see huge dump 
from netlink_sock_destruct.
Anyway, that is possible situation, thanks for clearing this up.

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Evgeniy Polyakov

On Wed, Apr 18, 2007 at 01:03:56PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) 
wrote:
  Yes, you are right, that it will not be freed in netlink_release(), 
  but it will be freed in netlink_dump() after it is processed (in no-error 
  path only though).
  
 
 But error path will leak it. On success path we would have
 a leaked packet in sk_write_queue, since we did't see it in
 skb_queue_purge() while doing netlink_release().
 
 Of course we can place the struts in code to handle the case
 when we have a released socket with the attached callback, but
 it is more correct (IMHO) not to allow to attach the callbacks
 to dead sockets.

That is why I've asked why such approach is used but not freeing
callback in errror (well, no-dump name is better to describe that path)
path, and more generally, why callback is attached, but not freed in the
function, but instead is freed next time dump started.

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Patrick McHardy

Evgeniy Polyakov wrote:
 On Wed, Apr 18, 2007 at 10:50:42AM +0200, Patrick McHardy ([EMAIL PROTECTED]) 
 wrote:
 
I thought that with releasing a socket, which will have a callback
attached only results in a leak of the callback? In that case we can
just free it in dump() just like it is done in no-error path already.
Or do I miss something additional?

That would only work if there is nothing to dump (cb-dump returns 0).
Otherwise it is not freed.
 
 
 That is what I referred to as error path. Btw, with positive return
 value we end up in subsequent call to input which will free callback
 under lock as expected.


No, nothing is going to call netlink_dump after the initial call since
the socket is gone.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

ESP interfamily tunnel bug?

2007-04-18 Thread Diego Beltrami

Hi,

we have discovered a routing related problem in ESP tunnel and beet mode.
We don't know whether it is a bug in the XFRM, or just in the way the
virtual addresses and the corresponding routes are set-up. We set up a
dummy0 device for the virtual addresses:

[EMAIL PROTECTED]:~# ip addr show dummy0
5: dummy0: BROADCAST,NOARP,UP,1 mtu 1500 qdisc noqueue
 link/ether 92:09:fe:11:81:1b brd ff:ff:ff:ff:ff:ff
 inet6 2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e/28 scope global
valid_lft forever preferred_lft forever
 inet6 2001:74:32e0:df36:e862:3963:523e:dd7d/28 scope global
valid_lft forever preferred_lft forever
 inet6 2001:73:d3a8:8723:d572:7549:7f2c:e590/28 scope global
valid_lft forever preferred_lft forever
 inet6 2001:75:a2e6:aad6:e901:dd1c:ba95:e300/28 scope global
valid_lft forever preferred_lft forever
 inet6 fe80::9009:feff:fe11:811b/64 scope link
valid_lft forever preferred_lft forever

And then we have routes for the virtual addresses:

[EMAIL PROTECTED]:~# ip -6 route
2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e dev dummy0  metric 1024  expires
21334305sec mtu 1500 advmss 1440 metric 10 4294967295
2001:73:d3a8:8723:d572:7549:7f2c:e590 dev dummy0  metric 1024  expires
21334305sec mtu 1500 advmss 1440 metric 10 4294967295
2001:74:32e0:df36:e862:3963:523e:dd7d dev dummy0  metric 1024  expires
21334305sec mtu 1500 advmss 1440 metric 10 4294967295
2001:75:a2e6:aad6:e901:dd1c:ba95:e300 dev dummy0  metric 1024  expires
21334305sec mtu 1500 advmss 1440 metric 10 4294967295
2001:70::/28 dev dummy0  metric 256  expires 21334305sec mtu 1500 advmss
1440 metric 10 4294967295
fe80::/64 dev dummy0  metric 256  expires 21334305sec mtu 1500 advmss 1440
metric 10 4294967295
ff00::/8 dev eth0  metric 256  expires 21325454sec mtu 1500 advmss 1440
metric 10 4294967295
ff00::/8 dev dummy0  metric 256  expires 21334305sec mtu 1500 advmss 1440
metric 10 4294967295
unreachable default dev lo  proto none  metric -1  error -101 metric 10
255

...and set-up policies and associations. The virtual IPv6 addresses
are inner and IPv4 addresses are outer addresses:

[EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm policy show
src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128 dst
2001:74:32e0:df36:e862:3963:523e:dd7d/128
 dir in priority 0
 tmpl src c1a7:bb82:: dst c0a8:65::
 proto esp reqid 0 mode beet
src 2001:74:32e0:df36:e862:3963:523e:dd7d/128 dst
2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128
 dir out priority 0
 tmpl src c0a8:65:: dst c1a7:bb82::
 proto esp reqid 0 mode beet

[EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm state show
src 193.167.187.130 dst 192.168.0.101
 proto esp spi 0xf556c7c7 reqid 0 mode beet
 replay-window 0
 auth sha1 0xab327b944011c94a0c54a097b4752e23f377ff34
 enc aes 0x882a334830b1cd14b9e411ec37a4242f
 encap type espinudp-nonike sport 50500 dport 50500
   addr 193.167.187.130
 sel src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0
 dst 2001:74:32e0:df36:e862:3963:523e:dd7d/0
 src 192.168.0.101 dst 193.167.187.130
 proto esp spi 0x1663f3a4 reqid 0 mode beet
 replay-window 0
 auth sha1 0x9f07dabce4abf2ebfe45e247ede2cf15f9156a13
 enc aes 0xfc50593b9af6d296b042a16ca00bad20
 encap type espinudp-nonike
 sport 50500 dport 50500 addr 192.168.0.101
 sel src 2001:74:32e0:df36:e862:3963:523e:dd7d/0
 dst 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0

And then we try to ping6 the virtual address:

[EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ping6 -I
2001:0074:32e0:df36:e862:3963:523e:dd7d
2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15
PING
2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15(2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15)
from 2001:74:32e0:df36:e862:3963:523e:dd7d : 56 data bytes
ping: sendmsg: Network is unreachable
ping: sendmsg: Network is unreachable

Tcpdump shows no traffic at the host. We can repeat the problem both with
tunnel and beet modes in 2.6.21-rc6 (and also in 2.6.17.14).

I have tried also ip rule stuff but it seems that it does not rule with
IPv6 :) It does help either to reduce the number of virtual addresses to a
single one. It is weird that the ESP actually works some combinations of
virtual addresses (4 of 16) in both directions, or works unidirectionally
on some and does not work at all on the rest. I verified the
unidirectional property using a simple UDP based application: sender xmits
UDP packet, receiver gets it ok, but cannot respond. So, the problem is in
the transmission of packets.

I traced the ENETUNREACH in the kernel side to here:

net/ipv4/route.c:ip_route_output_slow:
 if (fib_lookup(fl, res)) {
 
if (dev_out)
 dev_put(dev_out);
 err = -ENETUNREACH;

FIB lookup up is returning an error net/ipv4/fib_rules:

int fib_lookup(const struct flowi

Re: [PATCH 2.6] WE-22 : prevent information leak on 64 bit

2007-04-18 Thread Johannes Berg

Jean,

   First, I'm the current active maintainer of the
 wext-over-netlink interface, and nobody bothered to even 'inform' me
 about its removal, let alone consult with me.

I definitely should have copied you on the feature-removal schedule
patch for wext-over-netlink and then the actual removal in wireless-dev;
please accept my apologies for not doing that, it was not done in bad
faith. It was never my intention to demote you to a second class
citizen, I'm sorry you feel that way.

I have previously (and multiple times) given technical justification for
removing this code (even recorded in the kernel changelog now) and I
contend your allegation that it is a political issue. Others in this
thread have pointed out the technical issues with wext and wext/nl so I
will not repeat them.

I hope that despite my mistakes in handling the wext/nl removal we will
be able to work together in the future to have wext fully supported with
clear semantics for backwards compatibility while the kernel internally
migrates towards cfg80211.

johannes


signature.asc
Description: This is a digitally signed message part

[RFC][PATCH -mm take4 0/6] proposal for dynamic configurable netconsole

2007-04-18 Thread Keiichi KII

From: Keiichi KII [EMAIL PROTECTED]

The netconsole is a very useful module for collecting kernel message under
certain circumstances(e.g. disk logging fails, serial port is unavailable).

But current netconsole is not flexible. For example, if you want to change ip
address for logging agent, in the case of built-in netconsole, you can't change
config except for changing boot parameter and rebooting your system, or in the
case of module netconsole, you need to remove it and reload with different
parameters.

By adopting my patches, the current netconsole becomes a little complex.
But the kernel messages(especially panic messages) is significant information
 to solve bugs and troubles promptly and we have been losing serial console
port with PCs and Servers.

I think that we need the environment in which we can collect kernel messages
flexibly.

So, I propose the following extended features for netconsole.

1) support for multiple logging agents.
2) add interface to access each parameter of netconsole
   using sysfs.

[changes since take3]
-changing kernel base from 2.6.21-rc3-mm2 to 2.6.21-rc6-mm1.
-introducing CONFIG_NETCONSOLE_DYNCON.
-cleanup

Your comments are very welcome.

Signed-off-by: Keiichi KII [EMAIL PROTECTED]
Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED]
---

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC][PATCH -mm take4 1/6] marking __init

2007-04-18 Thread Keiichi KII

From: Keiichi KII [EMAIL PROTECTED]

This patch contains the following cleanups.
 - add __init for initialization functions(option_setup() and
   init_netconsole()).

Acked-by: Matt Mackall [EMAIL PROTECTED]
Signed-off-by: Keiichi KII [EMAIL PROTECTED]
Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED]
---
Index: linux-mm/drivers/net/netconsole.c
===
--- linux-mm.orig/drivers/net/netconsole.c
+++ linux-mm/drivers/net/netconsole.c
@@ -91,7 +91,7 @@ static struct console netconsole = {
.write = write_msg
 };
 
-static int option_setup(char *opt)
+static int __init option_setup(char *opt)
 {
configured = !netpoll_parse_options(np, opt);
return 1;
@@ -99,7 +99,7 @@ static int option_setup(char *opt)
 
 __setup(netconsole=, option_setup);
 
-static int init_netconsole(void)
+static int __init init_netconsole(void)
 {
int err;
 

-- 


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC][PATCH -mm take4 3/6] add interface for netconsole using sysfs

2007-04-18 Thread Keiichi KII

From: Keiichi KII [EMAIL PROTECTED]

This patch contains the following changes.

create a sysfs entry for netconsole in /sys/class/misc.
This entry has elements related to netconsole as follows.
You can change configuration of netconsole(writable attributes such as IP
address, port number and so on) and check current configuration of netconsole.

-+- /sys/class/misc/
 |-+- netconsole/
   |-+- port1/
   | |--- id  [r--r--r--]  unique port id
   | |--- local_ip[rw-r--r--]  source IP to use, writable
   | |--- local_mac   [r--r--r--]  source MAC address
   | |--- local_port  [rw-r--r--]  source port number for UDP packets, writable
   | |--- remote_ip   [rw-r--r--]  port number for logging agent, writable
   | |--- remote_mac  [rw-r--r--]  MAC address for logging agent, writable
   |  remote_port [rw-r--r--]  IP address for logging agent, writable
   |--- port2/
   |--- port3/
   ...

Signed-off-by: Keiichi KII [EMAIL PROTECTED]
Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED]
---
Index: mm/drivers/net/netconsole.c
===
--- mm.orig/drivers/net/netconsole.c
+++ mm/drivers/net/netconsole.c
@@ -45,6 +45,8 @@
 #include linux/sysrq.h
 #include linux/smp.h
 #include linux/netpoll.h
+#include linux/miscdevice.h
+#include linux/inet.h
 
 MODULE_AUTHOR(Maintainer: Matt Mackall [EMAIL PROTECTED]);
 MODULE_DESCRIPTION(Console driver for network interfaces);
@@ -67,6 +69,7 @@ static struct netpoll np = {
 #ifdef CONFIG_NETCONSOLE_DYNCON
 struct netconsole_target {
struct list_head list;
+   struct kobject obj;
int id;
struct netpoll np;
 };
@@ -77,6 +80,207 @@ static DEFINE_SPINLOCK(target_list_lock)
 static int add_target(char* target_config);
 static void remove_target(struct netconsole_target *nt);
 static void cleanup_netconsole(void);
+static int setup_target_sysfs(struct netconsole_target *nt);
+
+static int miscdev_configured;
+
+static ssize_t show_id(struct netconsole_target *nt, char *buf)
+{
+   return sprintf(buf, %d\n, nt-id);
+}
+
+static ssize_t show_local_port(struct netconsole_target *nt, char *buf)
+{
+   return sprintf(buf, %d\n, nt-np.local_port);
+}
+
+static ssize_t show_remote_port(struct netconsole_target *nt, char *buf)
+{
+   return sprintf(buf, %d\n, nt-np.remote_port);
+}
+
+static ssize_t show_local_ip(struct netconsole_target *nt, char *buf)
+{
+   return sprintf(buf, %d.%d.%d.%d\n, HIPQUAD(nt-np.local_ip));
+}
+
+static ssize_t show_remote_ip(struct netconsole_target *nt, char *buf)
+{
+   return sprintf(buf, %d.%d.%d.%d\n, HIPQUAD(nt-np.remote_ip));
+}
+
+static ssize_t show_local_mac(struct netconsole_target *nt, char *buf)
+{
+   return sprintf(buf, %02x:%02x:%02x:%02x:%02x:%02x\n,
+  nt-np.local_mac[0], nt-np.local_mac[1],
+  nt-np.local_mac[2], nt-np.local_mac[3],
+  nt-np.local_mac[4], nt-np.local_mac[5]);
+}
+
+static ssize_t show_remote_mac(struct netconsole_target *nt, char *buf)
+{
+   return sprintf(buf, %02x:%02x:%02x:%02x:%02x:%02x\n,
+  nt-np.remote_mac[0], nt-np.remote_mac[1],
+  nt-np.remote_mac[2], nt-np.remote_mac[3],
+  nt-np.remote_mac[4], nt-np.remote_mac[5]);
+}
+
+static ssize_t store_local_port(struct netconsole_target *nt, const char *buf,
+   size_t count)
+{
+   spin_lock(target_list_lock);
+   nt-np.local_port = simple_strtol(buf, NULL, 10);
+   spin_unlock(target_list_lock);
+
+   return count;
+}
+
+static ssize_t store_remote_port(struct netconsole_target *nt, const char *buf,
+   size_t count)
+{
+   spin_lock(target_list_lock);
+   nt-np.remote_port = simple_strtol(buf, NULL, 10);
+   spin_unlock(target_list_lock);
+
+   return count;
+}
+
+static ssize_t store_local_ip(struct netconsole_target *nt, const char *buf,
+ size_t count)
+{
+   spin_lock(target_list_lock);
+   nt-np.local_ip = ntohl(in_aton(buf));
+   spin_unlock(target_list_lock);
+
+   return count;
+}
+
+static ssize_t store_remote_ip(struct netconsole_target *nt, const char *buf,
+  size_t count)
+{
+   spin_lock(target_list_lock);
+   nt-np.remote_ip = ntohl(in_aton(buf));
+   spin_unlock(target_list_lock);
+
+   return count;
+}
+
+static ssize_t store_remote_mac(struct netconsole_target *nt, const char *buf,
+  size_t count)
+{
+   unsigned char input_mac[ETH_ALEN] =
+   {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+   const char *cur = buf;
+   int i = 0;
+
+   input_mac[i++] = simple_strtol(cur, NULL, 16);
+   while ((cur = strchr(cur, ':')) != NULL) {
+   cur++;
+   input_mac[i++] = simple_strtol(cur, NULL, 16);
+   }
+   if (i != ETH_ALEN)
+   return -EINVAL;
+

[RFC][PATCH -mm take4 2/6] support multiple logging

2007-04-18 Thread Keiichi KII

From: Keiichi KII [EMAIL PROTECTED]

This patch contains the following changes for supporting multiple logging
 agents.

1. extend netconsole to multiple netpolls
   To send kernel messages to multiple logging agents, extend netcosnole
to be able to use multiple netpolls. Each netpoll sends kernel messages
to its own logging agent.

2. change config parameter format
   We change config parameter format from single configuration to multiple 
   configurations separated by ';'.

   ex) sending kernel messages to destination1 and destination2 using eth0.
modprobe netconsole \
netconsole=@/eth0,@[destination1]/;@/eth0,@[destination2]/

3. introduce CONFIG_NETCONSOLE_DYNCON config to change between 
   existing netconsole and netconsole applying the above function.

Signed-off-by: Keiichi KII [EMAIL PROTECTED]
Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED]
---
Index: mm/drivers/net/netconsole.c
===
--- mm.orig/drivers/net/netconsole.c
+++ mm/drivers/net/netconsole.c
@@ -61,15 +61,102 @@ static struct netpoll np = {
.remote_port = ,
.remote_mac = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
 };
-static int configured = 0;
 
 #define MAX_PRINT_CHUNK 1000
 
+#ifdef CONFIG_NETCONSOLE_DYNCON
+struct netconsole_target {
+   struct list_head list;
+   int id;
+   struct netpoll np;
+};
+
+static LIST_HEAD(target_list);
+static DEFINE_SPINLOCK(target_list_lock);
+
+static int add_target(char* target_config);
+static void remove_target(struct netconsole_target *nt);
+static void cleanup_netconsole(void);
+
+static int add_target(char* target_config)
+{
+   int retval = 0;
+   static atomic_t target_count = ATOMIC_INIT(0);
+   struct netconsole_target *new_target;
+
+   new_target = kzalloc(sizeof(*new_target), GFP_KERNEL);
+   if (!new_target) {
+   printk(KERN_ERR netconsole: kmalloc() failed!\n);
+   retval = -ENOMEM;
+   goto out;
+   }
+
+   new_target-np = np;
+   if (netpoll_parse_options(new_target-np, target_config)) {
+   printk(KERN_ERR netconsole: can't parse config:%s\n,
+  target_config);
+   kfree(new_target);
+   retval = -EINVAL;
+   goto out;
+   }
+   if (netpoll_setup(new_target-np)) {
+   printk(KERN_ERR netconsole: can't setup netpoll:%s\n,
+  target_config);
+   kfree(new_target);
+   retval = -EINVAL;
+   goto out;
+   }
+
+   new_target-id = atomic_inc_return(target_count);
+
+   printk(KERN_INFO netconsole: add target: 
+  remote ip_addr=%d.%d.%d.%d remote port=%d\n,
+  HIPQUAD(new_target-np.remote_ip), new_target-np.remote_port);
+
+   spin_lock(target_list_lock);
+   list_add(new_target-list, target_list);
+   spin_unlock(target_list_lock);
+
+ out:
+   return retval;
+}
+
+static void remove_target(struct netconsole_target *nt)
+{
+   spin_lock(target_list_lock);
+   list_del(nt-list);
+   if (list_empty(target_list))
+   netpoll_cleanup(nt-np);
+   spin_unlock(target_list_lock);
+   kfree(nt);
+}
+#endif /* CONFIG_NETCONSOLE_DYNCON */
+
 static void write_msg(struct console *con, const char *msg, unsigned int len)
 {
int frag, left;
unsigned long flags;
+#ifdef CONFIG_NETCONSOLE_DYNCON
+   struct netconsole_target *target;
+
+   if (list_empty(target_list))
+   return;
 
+   local_irq_save(flags);
+   spin_lock(target_list_lock);
+
+   for(left = len; left; ) {
+   frag = min(left, MAX_PRINT_CHUNK);
+   list_for_each_entry(target, target_list, list) {
+   netpoll_send_udp(target-np, msg, frag);
+   }
+   msg += frag;
+   left -= frag;
+   }
+
+   spin_unlock(target_list_lock);
+   local_irq_restore(flags);
+#else
if (!np.dev)
return;
 
@@ -83,6 +170,7 @@ static void write_msg(struct console *co
}
 
local_irq_restore(flags);
+#endif /* CONFIG_NETCONSOLE_DYNCON */
 }
 
 static struct console netconsole = {
@@ -91,39 +179,60 @@ static struct console netconsole = {
.write = write_msg
 };
 
+#ifndef MODULE
 static int __init option_setup(char *opt)
 {
-   configured = !netpoll_parse_options(np, opt);
+   strncpy(config, opt, 256);
return 1;
 }
 
 __setup(netconsole=, option_setup);
+#endif
 
 static int __init init_netconsole(void)
 {
-   int err;
+   char *tmp = config;
+#ifdef CONFIG_NETCONSOLE_DYNCON
+   char *p;
 
-   if(strlen(config))
-   option_setup(config);
-
-   if(!configured) {
-   printk(netconsole: not configured, aborting\n);
+   register_console(netconsole);
+   if(!strlen(config)) {
+   printk(KERN_ERR

[RFC][PATCH -mm take4 4/6] using symlink for the net_device

2007-04-18 Thread Keiichi KII


From: Keiichi KII [EMAIL PROTECTED]

We use symbolic link for net_device.
The link in sysfs represents the corresponding network etherdevice.

-+- /sys/class/misc/
|-+- netconsole/
|-+- port1/
| |--- id [r--r--r--]  id
| |--- net:net_dev  [rw-r--r--]  net_dev: eth0,eth1,...
| ...
|--- port2/
...

Signed-off-by: Keiichi KII [EMAIL PROTECTED]
Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED]
---
Index: mm/drivers/net/netconsole.c
===
--- mm.orig/drivers/net/netconsole.c
+++ mm/drivers/net/netconsole.c
@@ -81,6 +81,9 @@ static int add_target(char* target_confi
static void remove_target(struct netconsole_target *nt);
static void cleanup_netconsole(void);
static int setup_target_sysfs(struct netconsole_target *nt);
+static char *make_netdev_class_name(char *netdev_name);
+static int netconsole_event(struct notifier_block *this, unsigned long event,
+   void *ptr);

static int miscdev_configured;

@@ -274,12 +277,77 @@ static struct miscdevice netconsole_misc
.name = netconsole,
};

+static struct notifier_block netconsole_notifier = {
+   .notifier_call = netconsole_event,
+};
+
static int setup_target_sysfs(struct netconsole_target *nt)
{
+   int retval = 0;
+   char *name;
+
kobject_set_name(nt-obj, port%d, nt-id);
nt-obj.parent = netconsole_miscdev.this_device-kobj;
nt-obj.ktype = target_ktype;
-   return kobject_register(nt-obj);
+   retval = kobject_register(nt-obj);
+   name = make_netdev_class_name(nt-np.dev_name);
+   if (IS_ERR(name))
+   return PTR_ERR(name);
+   retval = sysfs_create_link(nt-obj, nt-np.dev-dev.kobj, name);
+   kfree(name);
+
+   return retval;
+}
+
+static char *make_netdev_class_name(char *netdev_name)
+{
+   int size;
+   char *name;
+   char *netdev_class_prefix = net:;
+
+   size = strlen(netdev_class_prefix) + strlen(netdev_name) + 1;
+   name = kmalloc(size, GFP_KERNEL);
+   if (!name) {
+   printk(KERN_ERR netconsole: kmalloc() failed!\n);
+   return ERR_PTR(-ENOMEM);
+   }
+   strcpy(name, netdev_class_prefix);
+   strcat(name, netdev_name);
+
+   return name;
+}
+
+static int netconsole_event(struct notifier_block *this, unsigned long event,
+   void *ptr)
+{
+   int error = 0;
+   char *old_link_name = NULL, *new_link_name = NULL;
+   struct netconsole_target *nt;
+   struct net_device *dev = ptr;
+
+   if (event == NETDEV_CHANGENAME) {
+   spin_lock(target_list_lock);
+   list_for_each_entry(nt, target_list, list) {
+   if (nt-np.dev != dev)
+   continue;
+   new_link_name = make_netdev_class_name(dev-name);
+   old_link_name =
+   make_netdev_class_name(nt-np.dev_name);
+   sysfs_remove_link(nt-obj, old_link_name);
+   error = sysfs_create_link(nt-obj,
+ nt-np.dev-dev.kobj,
+ new_link_name);
+   if (error)
+   printk(KERN_ERR can't create link: %s\n,
+  new_link_name);
+   strcpy(nt-np.dev_name, dev-name);
+   kfree(new_link_name);
+   kfree(old_link_name);
+   }
+   spin_unlock(target_list_lock);
+   }
+
+   return NOTIFY_DONE;
}

static int add_target(char* target_config)
@@ -409,6 +477,7 @@ static int __init init_netconsole(void)
} else
miscdev_configured = 1;

+   register_netdevice_notifier(netconsole_notifier);
register_console(netconsole);
if(!strlen(config)) {
printk(KERN_ERR netconsole: not configured\n);
@@ -443,6 +512,7 @@ static void cleanup_netconsole(void)
list_for_each_entry_safe(nt, tmp, target_list, list) {
kobject_unregister(nt-obj);
}
+   unregister_netdevice_notifier(netconsole_notifier);
if (miscdev_configured)
misc_deregister(netconsole_miscdev);
#else

--


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC][PATCH -mm take4 5/6] switch function of netpoll

2007-04-18 Thread Keiichi KII

From: Keiichi KII [EMAIL PROTECTED]

This patch contains switch function of netpoll.

If enabled attribute of certain port is '1', this port is used
and the configurations of this port are uable to change.

If enabled attribute of certain port is '0', this port isn't used
and the configurations of this port are able to change.

-+- /sys/class/misc/
|-+- netconsole/
  |-+- port1/
  | |--- id  [r--r--r--]  id
  | |--- enabled [rw-r--r--]  0: disable 1: enable, writable
  | ...
  |--- port2/
  ...

Signed-off-by: Keiichi KII [EMAIL PROTECTED]
Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED]
---
Index: mm/drivers/net/netconsole.c
===
--- mm.orig/drivers/net/netconsole.c
+++ mm/drivers/net/netconsole.c
@@ -71,6 +71,7 @@ struct netconsole_target {
struct list_head list;
struct kobject obj;
int id;
+   int enabled;
struct netpoll np;
 };
 
@@ -128,10 +129,19 @@ static ssize_t show_remote_mac(struct ne
   nt-np.remote_mac[4], nt-np.remote_mac[5]);
 }
 
+static ssize_t show_enabled(struct netconsole_target *nt, char *buf)
+{
+   return sprintf(buf, %d\n, nt-enabled);
+}
+
 static ssize_t store_local_port(struct netconsole_target *nt, const char *buf,
size_t count)
 {
spin_lock(target_list_lock);
+   if (nt-enabled) {
+   spin_unlock(target_list_lock);
+   return -EINVAL;
+   }
nt-np.local_port = simple_strtol(buf, NULL, 10);
spin_unlock(target_list_lock);
 
@@ -142,6 +152,10 @@ static ssize_t store_remote_port(struct 
size_t count)
 {
spin_lock(target_list_lock);
+   if (nt-enabled) {
+   spin_unlock(target_list_lock);
+   return -EINVAL;
+   }
nt-np.remote_port = simple_strtol(buf, NULL, 10);
spin_unlock(target_list_lock);
 
@@ -152,6 +166,10 @@ static ssize_t store_local_ip(struct net
  size_t count)
 {
spin_lock(target_list_lock);
+   if (nt-enabled) {
+   spin_unlock(target_list_lock);
+   return -EINVAL;
+   }
nt-np.local_ip = ntohl(in_aton(buf));
spin_unlock(target_list_lock);
 
@@ -162,6 +180,10 @@ static ssize_t store_remote_ip(struct ne
   size_t count)
 {
spin_lock(target_list_lock);
+   if (nt-enabled) {
+   spin_unlock(target_list_lock);
+   return -EINVAL;
+   }
nt-np.remote_ip = ntohl(in_aton(buf));
spin_unlock(target_list_lock);
 
@@ -184,12 +206,39 @@ static ssize_t store_remote_mac(struct n
if (i != ETH_ALEN)
return -EINVAL;
spin_lock(target_list_lock);
+   if (nt-enabled) {
+   spin_unlock(target_list_lock);
+   return -EINVAL;
+   }
memcpy(nt-np.remote_mac, input_mac, ETH_ALEN);
spin_unlock(target_list_lock);
 
return count;
 }
 
+static ssize_t store_enabled(struct netconsole_target *nt, const char *buf,
+   size_t count)
+{
+   int enabled = 0;
+
+   if (count = 2  (count != 2 || buf[count - 1] != '\n')) {
+   printk(KERN_ERR netconsole: invalid argument: %s\n, buf);
+   return -EINVAL;
+   } else if (buf[0] == '1') {
+   enabled = 1;
+   } else if(buf[0] == '0') {
+   enabled = 0;
+   } else {
+   printk(KERN_ERR netconsole: invalid argument: %s\n, buf);
+   return -EINVAL;
+   }
+   spin_lock(target_list_lock);
+   nt-enabled = enabled;
+   spin_unlock(target_list_lock);
+
+   return count;
+}
+
 struct target_attr {
struct attribute attr;
ssize_t (*show)(struct netconsole_target*, char*);
@@ -213,6 +262,8 @@ static NETCON_CLASS_ATTR(remote_ip, S_IR
 static NETCON_CLASS_ATTR(local_mac, S_IRUGO, show_local_mac, NULL);
 static NETCON_CLASS_ATTR(remote_mac, S_IRUGO | S_IWUSR,
 show_remote_mac, store_remote_mac);
+static NETCON_CLASS_ATTR(enabled, S_IRUGO | S_IWUSR,
+show_enabled, store_enabled);
 
 static struct attribute *target_attrs[] = {
target_attr_id.attr,
@@ -222,6 +273,7 @@ static struct attribute *target_attrs[] 
target_attr_remote_ip.attr,
target_attr_local_mac.attr,
target_attr_remote_mac.attr,
+   target_attr_enabled.attr,
NULL
 };
 
@@ -380,6 +432,7 @@ static int add_target(char* target_confi
}
 
new_target-id = atomic_inc_return(target_count);
+   new_target-enabled = 1;
 
printk(KERN_INFO netconsole: add target: 
   remote ip_addr=%d.%d.%d.%d remote port=%d\n,
@@ -421,7 +474,8 @@ static void write_msg(struct console *co
for(left = len; left; ) {
frag = min(left, MAX_PRINT_CHUNK);

[RFC][PATCH -mm take4 6/6] add ioctls for adding/removing target

2007-04-18 Thread Keiichi KII

From: Keiichi KII [EMAIL PROTECTED]

We add ioctls for adding/removing target.
If we use NETCONSOLE_ADD_TARGET ioctl, 
we can dynamically add netconsole target.
If we use NETCONSOLE_REMOVE_TARGET ioctl,
we can dynamically remoe netconsole target.

We attach a sample program for ioctl.

Signed-off-by: Keiichi KII [EMAIL PROTECTED]
Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED]
---
/*
 * This software is a sample program for ioctl of netconsole.
 * You can add/remove netconsole port by using this software.
 *
 * Keiichi KII [EMAIL PROTECTED]
 * Copyright (C) 2007 by Keiichi KII
 * This software is under GPL version 2 of the license.
 */

#include stdio.h
#include unistd.h
#include string.h
#include stdlib.h
#include stropts.h
#include fcntl.h
#include arpa/inet.h
#include net/if.h
#include linux/if_ether.h
#include linux/netconsole.h

#define NETCONSOLE_DEV_NAME /dev/netconsole
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))

struct command {
char *name;
char *options;
int (*handle_command)(struct command* command, int argc, char* argv[]);
void (*usage)(char *msg);
};

extern char *optarg;
extern int opterr, optind, errno;

static void generic_usage(char *msg) {
fprintf(stderr, Usage  : netconfig command [option] [args]\n);
fprintf(stderr, command: add remove help\n);
exit(-1);
}

static int handle_command_add(struct command* command, int argc, char** argv)
{
int i, fd, ch;
unsigned int address;
unsigned char mac[ETH_ALEN];
struct netconsole_request req = {
.netdev_name = eth0,
.local_port = 6665,
.remote_port = ,
.remote_mac = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
};

while ((ch = getopt(argc, argv, command-options)) != -1) {
switch (ch) {
case 'p':
req.local_port = atoi(optarg);
break;
case 's':
address = inet_addr(optarg);
if (address == -1)
(*command-usage)(invlid IP address!\n);
req.local_ip = address;
break;
case 'h':
default:
(*command-usage)(NULL);
}
}
argc -= optind;
argv += optind;

if (argc  3 || argc  4)
(*command-usage)(NULL);

memcpy(req.netdev_name, argv[0], IFNAMSIZ);
address = inet_addr(argv[1]);
if (address == -1)
(*command-usage)(invlid IP address!\n);
req.remote_ip = address;
req.remote_port = atoi(argv[2]);
if (argc == 4) {
i = 0;
mac[i++] = strtol(argv[3], NULL, 16);
while ((argv[3] = strchr(argv[3], ':')) != NULL) {
argv[3]++;
mac[i++] = strtol(argv[3], NULL, 16);
}
if (i != ETH_ALEN)
(*command-usage)(Invalid MAC address!\n);
memcpy(req.remote_mac, mac, ETH_ALEN);
}

fd = open(NETCONSOLE_DEV_NAME, O_RDWR);
if (fd  0) {
fprintf(stderr, cannot open device NETCONSOLE_DEV_NAME \n);
return -1;
}

if(ioctl(fd, NETCON_ADD_TARGET, req) != 0)
perror(add);
close(fd);

return 0;
}

static void usage_add(char *msg)
{
if (msg != NULL)
fprintf(stderr, %s, msg);
fprintf(stderr, Usage  : netconfig add [-options] dev_name remote_ip 
remote_port [remote_mac]\n);
fprintf(stderr, options:\n);
fprintf(stderr, -p local_port :local port number\n);
fprintf(stderr, -s local_up   :local IP address\n);
exit(-1);
}

static int handle_command_remove(struct command *command,
 int argc, char** argv)
{
int fd, id, ch;

while ((ch = getopt(argc, argv, command-options)) != -1) {
switch (ch) {
case 'h':
default:
(*command-usage)(NULL);
}
}
argc -= optind;
argv += optind;

if (argc != 1)
(*command-usage)(NULL);

id = atoi(argv[0]);
fd = open(NETCONSOLE_DEV_NAME, O_RDWR);
if (fd  0) {
fprintf(stderr, can't open device  NETCONSOLE_DEV_NAME \n);
return -1;
}
if(ioctl(fd, NETCON_REMOVE_TARGET, id) != 0)
perror(remove);
close(fd);

return 0;
}

static void usage_remove(char *msg)
{
fprintf(stderr, Usage  : netconfig remove id\n);
exit(-1);
}

static int handle_command_help(struct command *command, int argc, char** argv)
{
(*command-usage)(NULL);

return 0;
}

static

Re: kernel BUG at net/core/skbuff.c in linux-2.6.21-rc6

2007-04-18 Thread Jarek Poplawski

Hi,

I didn't analyse this bug report but probably it
is nearly connected with one of the bugs visible in
a log from this submit:

http://bugzilla.kernel.org/show_bug.cgi?id=8132

On 15-04-2007 02:50, Paul Mackerras wrote:
 David Miller writes:
 
 Here is Patrick McHardy's patch:
 
 So this doesn't change process_input_packet(), which treats the case
 where the first byte is 0xff (PPP_ALLSTATIONS) but the second byte is
 0x03 (PPP_UI) as indicating a packet with a PPP protocol number of
 0xff.  Arguably that's wrong since PPP protocol 0xff is reserved, and
 the RFC does envision the possibility of receiving frames where the
 control field has values other than 0x03.
 
 Therefore I think this patch is probably better.  Could people try it
 out and let me know if it fixes the problem?
 
 Paul.
 
 diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c
 index 933e2f3..caabbc4 100644
 --- a/drivers/net/ppp_async.c
 +++ b/drivers/net/ppp_async.c
 @@ -802,9 +802,9 @@ process_input_packet(struct asyncppp *ap)
  
   /* check for address/control and protocol compression */
   p = skb-data;
 - if (p[0] == PPP_ALLSTATIONS  p[1] == PPP_UI) {
 + if (p[0] == PPP_ALLSTATIONS) {
   /* chop off address/control */
 - if (skb-len  3)
 + if (p[1] != PPP_UI || skb-len  3)
   goto err;
   p = skb_pull(skb, 2);
   }

Let's look farther:

proto = p[0];
if (proto  1) {
/* protocol is compressed */
skb_push(skb, 1)[0] = 0;

BTW - about Patrick's patch:

skb_push seems to be dependent here on the 1-st char of
skb-data, if above (p[0] != PPP_ALLSTATIONS), but on the
3-rd char otherwise (after skb_pull). But, Patrick's patch
reserves the place for this, looking always at 1-st char
(buf[0]) independently of PPP_ALLSTATIONS char presence,
or otherwise - always treating this char as protocol char.
It looks safe because of PPP_ALLSTATION current value,
but isn't too understandable.

On the other hand, without any reservation in the
ppp_async_input for the (buf[0] == PPP_ALLSTATIONS) case,
probably 4-byte alignement isn't achieved as planned. 

} else {
if (skb-len  2)
goto err;
proto = (proto  8) + p[1];
if (proto == PPP_LCP)
async_lcp_peek(ap, p, skb-len, 1);
}

/* queue the frame to be processed */
skb-cb[0] = ap-state;
skb_queue_tail(ap-rqueue, skb);
ap-rpkt = NULL;
ap-state = 0;
return;

 err:
/* frame had an error, remember that, reset SC_TOSS  SC_ESCAPE */
ap-state = SC_PREV_ERROR;
if (skb) {
/* make skb appear as freshly allocated */

Probably this isn't always true and here the problem
started...

skb_trim(skb, 0);
skb_reserve(skb, - skb_headroom(skb));

Isn't here lost e.g. NET_SKB_PAD probably reserved by
dev_alloc_skb?

On the other hand - this kind of pad can very good hide
similar reservation problems in many other places - maybe
it should be omitted or somehow counted in WARNs when some
debugging options are active?

Regards,
Jarek P.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Bugme-new] [Bug 8320] New: replacing route in kernel doesn't send netlink message

2007-04-18 Thread Milan Kocián

On Tue, 2007-04-17 at 14:58 +0200, Patrick McHardy wrote:
 David Miller wrote:
  From: Patrick McHardy [EMAIL PROTECTED]
  Date: Mon, 16 Apr 2007 06:59:06 +0200
  
  
 RTM_DELROUTE + RTM_NEWROUTE seem to be safer, although you're correct
 that it might cause userspace to perform some action upon receiving
 the DELROUTE message since the update is non-atomic. So I really don't
 know, I'm in favour of having notifications for replacements, but I
 fear we might break something.
  
  
  We can cry foul about a broken application if an application following
  the API correctly would interpret the new messages correctly.
  
  I think it doesn't make sense to do a delete then a newroute for
  the atomicity issues, and therefore the replace makes the most
  sense as long as existing correct uses of the API would not
  explode on this.

 They shouldn't, worst case is that they ignore NLM_F_REPLACE and treat
 it as a completely new route, which is at least half way correct and
 not really worse than today.
 
 Milan, could you cook up another patch which uses NLM_F_REPLACE?
 

I can try it. Output is in patch below. Review carefully. I don't know
if it's best approach. It's tested and working without problem
(probably :-))

--- net/ipv4.old/fib_hash.c 2007-04-18 12:50:11.0 +0200
+++ net/ipv4/fib_hash.c 2007-04-18 12:39:49.081369320 +0200
@@ -443,7 +443,6 @@
if (cfg-fc_nlflags  NLM_F_REPLACE) {
struct fib_info *fi_drop;
u8 state;
-
write_lock_bh(fib_hash_lock);
fi_drop = fa-fa_info;
fa-fa_info = fi;
@@ -457,6 +456,8 @@
fib_release_info(fi_drop);
if (state  FA_S_ACCESSED)
rt_cache_flush(-1);
+   rtmsg_fib(RTM_NEWROUTE, key, fa, cfg-fc_dst_len, 
tb-tb_id,
+ cfg-fc_nlinfo, NLM_F_REPLACE);
return 0;
}
 
@@ -524,7 +525,7 @@
rt_cache_flush(-1);
 
rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg-fc_dst_len, tb-tb_id,
- cfg-fc_nlinfo);
+ cfg-fc_nlinfo, 0);
return 0;
 
 out_free_new_fa:
@@ -590,7 +591,7 @@
 
fa = fa_to_delete;
rtmsg_fib(RTM_DELROUTE, key, fa, cfg-fc_dst_len,
- tb-tb_id, cfg-fc_nlinfo);
+ tb-tb_id, cfg-fc_nlinfo, 0);
 
kill_fn = 0;
write_lock_bh(fib_hash_lock);
--- net/ipv4.old/fib_trie.c 2007-04-18 12:50:11.0 +0200
+++ net/ipv4/fib_trie.c 2007-04-18 12:42:29.423993536 +0200
@@ -1205,6 +1205,9 @@
fib_release_info(fi_drop);
if (state  FA_S_ACCESSED)
rt_cache_flush(-1);
+   rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 
tb-tb_id,
+ cfg-fc_nlinfo, NLM_F_REPLACE);
+
goto succeeded;
}
/* Error if we find a perfect match which
@@ -1256,7 +1259,7 @@
 
rt_cache_flush(-1);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb-tb_id,
- cfg-fc_nlinfo);
+ cfg-fc_nlinfo, 0);
 succeeded:
return 0;
 
@@ -1599,7 +1602,7 @@
 
fa = fa_to_delete;
rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb-tb_id,
- cfg-fc_nlinfo);
+ cfg-fc_nlinfo, 0);
 
l = fib_find_node(t, key);
li = find_leaf_info(l, plen);
--- net/ipv4.old/fib_semantics.c2007-04-18 12:50:11.0 +0200
+++ net/ipv4/fib_semantics.c2007-04-18 12:40:54.807377448 +0200
@@ -301,7 +301,7 @@
 }
 
 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
-  int dst_len, u32 tb_id, struct nl_info *info)
+  int dst_len, u32 tb_id, struct nl_info *info, unsigned int
nlm_flags)
 {
struct sk_buff *skb;
u32 seq = info-nlh ? info-nlh-nlmsg_seq : 0;
@@ -313,7 +313,7 @@
 
err = fib_dump_info(skb, info-pid, seq, event, tb_id,
fa-fa_type, fa-fa_scope, key, dst_len,
-   fa-fa_tos, fa-fa_info, 0);
+   fa-fa_tos, fa-fa_info, nlm_flags);
/* failure implies BUG in fib_nlmsg_size() */
BUG_ON(err  0);
 
--- net/ipv4.old/fib_lookup.h   2007-04-18 12:50:11.0 +0200
+++ net/ipv4/fib_lookup.h   2007-04-18 12:43:42.377902856 +0200
@@ -30,7 +30,7 @@
 int dst_len, u8 tos, struct fib_info *fi,
 unsigned int);
 extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
- int dst_len, u32 tb_id, struct nl_info *info);
+ int dst_len, u32 tb_id, struct nl_info *info, unsigned int
nlm_flags);
 extern struct fib_alias *fib_find_alias(struct list_head *fah,

[PATCH] fix comments for register_netdev()

2007-04-18 Thread Borislav Petkov

Correct the function name in the comments supplied with register_netdev()

Signed-off-by: Borislav Petkov [EMAIL PROTECTED]


Index: 21-rc7/net/core/dev.c
===
--- 21-rc7.orig/net/core/dev.c
+++ 21-rc7/net/core/dev.c
@@ -3002,7 +3002,7 @@ out:
  * chain. 0 is returned on success. A negative errno code is returned
  * on a failure to set up the device, or if the name is a duplicate.
  *
- * This is a wrapper around register_netdev that takes the rtnl semaphore
+ * This is a wrapper around register_netdevice that takes the rtnl 
semaphore
  * and expands the device name if you passed a format string to
  * alloc_netdev.
  */
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Bugme-new] [Bug 8320] New: replacing route in kernel doesn't send netlink message

2007-04-18 Thread Patrick McHardy

Milan Kocián wrote:
 On Tue, 2007-04-17 at 14:58 +0200, Patrick McHardy wrote:
 
Milan, could you cook up another patch which uses NLM_F_REPLACE?
 
 
 I can try it. Output is in patch below. Review carefully. I don't know
 if it's best approach. It's tested and working without problem
 (probably :-))


Looks good, but your mailer corrupted long lines. Please resend as
attachment and sign off the patch.

 --- net/ipv4.old/fib_hash.c   2007-04-18 12:50:11.0 +0200
 +++ net/ipv4/fib_hash.c   2007-04-18 12:39:49.081369320 +0200
 @@ -443,7 +443,6 @@
   if (cfg-fc_nlflags  NLM_F_REPLACE) {
   struct fib_info *fi_drop;
   u8 state;
 -

And please drop this unrelated whitespace change.

 --- net/ipv4.old/fib_semantics.c  2007-04-18 12:50:11.0 +0200
 +++ net/ipv4/fib_semantics.c  2007-04-18 12:40:54.807377448 +0200
 @@ -301,7 +301,7 @@
  }
  
  void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 -int dst_len, u32 tb_id, struct nl_info *info)
 +int dst_len, u32 tb_id, struct nl_info *info, unsigned int
 nlm_flags)

This should go on a new line since it exceeds 80 characters.

 --- net/ipv4.old/fib_lookup.h 2007-04-18 12:50:11.0 +0200
 +++ net/ipv4/fib_lookup.h 2007-04-18 12:43:42.377902856 +0200
 @@ -30,7 +30,7 @@
int dst_len, u8 tos, struct fib_info *fi,
unsigned int);
  extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 -   int dst_len, u32 tb_id, struct nl_info *info);
 +   int dst_len, u32 tb_id, struct nl_info *info, unsigned int
 nlm_flags);

Same here.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH -mm take4 2/6] support multiple logging

2007-04-18 Thread Stephen Hemminger

On Wed, 18 Apr 2007 21:06:41 +0900
Keiichi KII [EMAIL PROTECTED] wrote:

 From: Keiichi KII [EMAIL PROTECTED]
 
 This patch contains the following changes for supporting multiple logging
  agents.
 
 1. extend netconsole to multiple netpolls
To send kernel messages to multiple logging agents, extend netcosnole
 to be able to use multiple netpolls. Each netpoll sends kernel messages
 to its own logging agent.
 
 2. change config parameter format
We change config parameter format from single configuration to multiple 
configurations separated by ';'.
 
ex) sending kernel messages to destination1 and destination2 using eth0.
 modprobe netconsole \
 netconsole=@/eth0,@[destination1]/;@/eth0,@[destination2]/

Please include an update to Documentation/networking/netconsole.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [BRIDGE] Unaligned access on IA64 when comparing ethernet addresses

2007-04-18 Thread Stephen Hemminger

On Wed, 18 Apr 2007 01:28:04 -0700 (PDT)
David Miller [EMAIL PROTECTED] wrote:

 From: Pavel Emelianov [EMAIL PROTECTED]
 Date: Wed, 18 Apr 2007 10:43:56 +0400

  [snip]

   --- linux-2.6.orig/net/bridge/br_private.h2007-04-17
   13:26:48.0 -0700 +++ linux-2.6/net/bridge/br_private.h
   2007-04-17 13:30:29.0 -0700 @@ -36,7 +36,7 @@
{
 unsigned char   prio[2];
 unsigned char   addr[6];
   -};
   +} __attribute__((aligned(8)));

  Why 8? Mustn't it be 16? Address is to be 2-bytes aligned...

 Actually it could be made 2, the aligned() attribute is
 in bytes, not bits.

It could be 2 but 8 might allow a compiler on a 64 bit platform
to be smarter in comparisons and assignments. For 2.6.22, I'll make
a nicer version similar to ktime_t.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] CONFIG_PACKET_MMAP should depend on MMU

2007-04-18 Thread David Howells

Aubrey Li [EMAIL PROTECTED] wrote:

 Here, in the attachment I wrote a small test app. Please correct if
 there is anything wrong, and feel free to improve it.

Okay... I have that working... probably.  I don't know what output it's
supposed to produce, but I see this:

# /packet-mmap/sample_packet_mmap
00-00-00-01-00-00-00-8a-00-00-00-8a-00-42-00-50-
38-43-13-a0-00-07-ff-3c-00-00-00-00-00-00-00-00-
00-11-08-00-00-00-00-01-00-01-00-06-00-d0-b7-de-
32-7b-00-00-00-00-00-00-00-00-00-00-00-00-00-00-
00-00-00-90-cc-a2-75-6b-00-d0-b7-de-32-7b-08-00-
45-00-00-7c-00-00-40-00-40-11-b4-13-c0-a8-02-80-
c0-a8-02-8d-08-01-03-20-00-68-8e-65-7f-5b-7e-03-
00-00-00-01-00-00-00-00-00-00-00-00-00-00-00-00-
00-00-00-00-00-00-00-00-00-00-00-01-00-00-81-a4-
00-00-00-01-00-00-00-00-00-00-00-00-00-1d-b8-86-
00-00-10-00-ff-ff-ff-ff-00-00-0e-f0-00-00-09-02-
01-cb-03-16-46-26-38-0d-00-00-00-00-46-26-38-1e-
00-00-00-00-46-26-38-1e-00-00-00-00-00-00-00-00-
00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00- [repeated]

Does that look reasonable?

I've attached the preliminary patch.  Note four things about it:

 (1) I've had to add the get_unmapped_area() op to the proto_ops struct, but
 I've only done it for CONFIG_MMU=n as making it available for CONFIG_MMU=y
 could cause problems.

 (2) There's a race between packet_get_unmapped_area() being called and
 packet_mmap() being called.

 (3) I've added an extra check into packet_set_ring() to make sure the caller
 isn't asking for a combination of buffer size and count that will exceed
 ULONG_MAX.  This protects a multiply done elsewhere.

 (4) The entire data buffer is allocated as one contiguous lump in NOMMU-mode.

David

---
[PATCH] NOMMU: Support mmap() on AF_PACKET sockets

From: David Howells [EMAIL PROTECTED]

Support mmap() on AF_PACKET sockets in NOMMU-mode kernels.

Signed-Off-By: David Howells [EMAIL PROTECTED]
---

 include/linux/net.h|7 +++
 include/net/sock.h |8 +++
 net/core/sock.c|   10 
 net/packet/af_packet.c |  118 
 net/socket.c   |   77 +++
 5 files changed, 219 insertions(+), 1 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 4db21e6..9e77cf6 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -161,6 +161,11 @@ struct proto_ops {
int (*recvmsg)   (struct kiocb *iocb, struct socket *sock,
  struct msghdr *m, size_t total_len,
  int flags);
+#ifndef CONFIG_MMU
+   unsigned long   (*get_unmapped_area)(struct file *file, struct socket 
*sock,
+unsigned long addr, unsigned long 
len,
+unsigned long pgoff, unsigned long 
flags);
+#endif
int (*mmap)  (struct file *file, struct socket *sock,
  struct vm_area_struct * vma);
ssize_t (*sendpage)  (struct socket *sock, struct page *page,
@@ -191,6 +196,8 @@ extern int   sock_sendmsg(struct socket *sock, 
struct msghdr *msg,
 extern int  sock_recvmsg(struct socket *sock, struct msghdr *msg,
  size_t size, int flags);
 extern int  sock_map_fd(struct socket *sock);
+extern void sock_make_mappable(struct socket *sock,
+   unsigned long prot);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define sockfd_put(sock) fput(sock-file)
 extern int  net_ratelimit(void);
diff --git a/include/net/sock.h b/include/net/sock.h
index 2c7d60c..d91edea 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -841,6 +841,14 @@ extern int  sock_no_sendmsg(struct 
kiocb *, struct socket *,
struct msghdr *, size_t);
 extern int  sock_no_recvmsg(struct kiocb *, struct socket 
*,
struct msghdr *, size_t, int);
+#ifndef CONFIG_MMU
+extern unsigned long   sock_no_get_unmapped_area(struct file *,
+ struct socket *,
+ unsigned long,
+ unsigned long,
+ unsigned long,
+ unsigned long);
+#endif
 extern int sock_no_mmap(struct file *file,
 struct socket *sock,
 struct vm_area_struct *vma);
diff --git a/net/core/sock.c b/net/core/sock.c
index 27c4f62..b288799

Re: [PATCH 2.6] WE-22 : prevent information leak on 64 bit

2007-04-18 Thread Jean Tourrilhes

On Wed, Apr 18, 2007 at 01:34:50AM +0200, Michael Buesch wrote:
 
 I'd say nobody but you does fully understand WEXT.

Not true. If tommorow I was run over by an ICE, you could ask
Jouni, Dan or Pavel to take over.
Have fun...

Jean
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] [SCTP] Do not interleave non-fragments when in partial delivery

2007-04-18 Thread Vlad Yasevich

Hi David

This is a bug fix, but done on top of 2.6.22 tree.  I am trying
to minimize the amount of conflict this would cause during merge
by doing it this way.  However, if you would rather keep all the bugfixes
in net-2.6, I can do that too, but that _will_ give you conflicts.

-vlad
---

[SCTP] Do not interleave non-fragments when in partial delivery

The way partial delivery is currently implemented, it is possible to
interleave a message (either from another stream, or unordered) that
is not part of partial delivery process.  The only way to this is for
a message to not be a fragment and be 'in order' or unordered for a
given stream.  This will result in bypassing the reassembly/ordering
queues where things live during partial delivery, and the
message will be delivered to the socket in the middle of partial delivery.

This is a two-fold problem, in that:
1.  the app now must check the stream-id and flags which it may not
be doing.
2.  this clears partial delivery state from the association and results
in app communication hanging.

This patch is a band-aid over a much bigger problem in that we
don't do stream interleave.

Signed-off-by: Vlad Yasevich [EMAIL PROTECTED]
---
 net/sctp/ulpqueue.c |9 -
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index ae374a9..fb2ec63 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -224,7 +224,14 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct 
sctp_ulpevent *event)
queue = sk-sk_receive_queue;
} else {
if (ulpq-pd_mode) {
-   if (event-msg_flags  MSG_NOTIFICATION)
+   /* If the association is in partial delivery, we
+* need to finish delivering the partially processed
+* packet before passing any other data.  This is
+* because we don't truly support stream interleaving.
+*/
+   if ((event-msg_flags  MSG_NOTIFICATION) ||
+   (SCTP_DATA_NOT_FRAG ==
+   (event-msg_flags  SCTP_DATA_FRAG_MASK)))
queue = sctp_sk(sk)-pd_lobby;
else {
clear_pd = event-msg_flags  MSG_EOR;
-- 
1.5.0.3.438.gc49b2

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [BRIDGE] Unaligned access on IA64 when comparing ethernet addresses

2007-04-18 Thread David Miller

From: Stephen Hemminger [EMAIL PROTECTED]
Date: Wed, 18 Apr 2007 07:44:39 -0700

 On Wed, 18 Apr 2007 01:28:04 -0700 (PDT)
 David Miller [EMAIL PROTECTED] wrote:

  From: Pavel Emelianov [EMAIL PROTECTED]
  Date: Wed, 18 Apr 2007 10:43:56 +0400

   [snip]

--- linux-2.6.orig/net/bridge/br_private.h  2007-04-17
13:26:48.0 -0700 +++ linux-2.6/net/bridge/br_private.h
2007-04-17 13:30:29.0 -0700 @@ -36,7 +36,7 @@
 {
unsigned char   prio[2];
unsigned char   addr[6];
-};
+} __attribute__((aligned(8)));

   Why 8? Mustn't it be 16? Address is to be 2-bytes aligned...

  Actually it could be made 2, the aligned() attribute is
  in bytes, not bits.

 It could be 2 but 8 might allow a compiler on a 64 bit platform
 to be smarter in comparisons and assignments.

Absolutely.

Although I don't think gcc does anything fancy since we don't
use memcmp().  It's a tradeoff, we'd like to use unsigned long
comparisons when both objects are aligned correctly but we also
don't want it to use any more than one potentially mispredicted
branch.

We could add some alignment tests to the ethernet address
comparison code, but it's probably more trouble than it's
worth.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] [SCTP] Do not interleave non-fragments when in partial delivery

2007-04-18 Thread David Miller

From: Vlad Yasevich [EMAIL PROTECTED]
Date: Wed, 18 Apr 2007 14:38:15 -0400

 This is a bug fix, but done on top of 2.6.22 tree.  I am trying
 to minimize the amount of conflict this would cause during merge
 by doing it this way.  However, if you would rather keep all the bugfixes
 in net-2.6, I can do that too, but that _will_ give you conflicts.

I've already determined that I'm destined to deal with tons
of conclicts anyways :-)

I'll backport this to net-2.6 unless you beat me to it :-)

Thanks!
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] [SCTP] Do not interleave non-fragments when in partial delivery

2007-04-18 Thread Vlad Yasevich

David Miller wrote:
 From: Vlad Yasevich [EMAIL PROTECTED]
 Date: Wed, 18 Apr 2007 14:38:15 -0400

 This is a bug fix, but done on top of 2.6.22 tree.  I am trying
 to minimize the amount of conflict this would cause during merge
 by doing it this way.  However, if you would rather keep all the bugfixes
 in net-2.6, I can do that too, but that _will_ give you conflicts.

 I've already determined that I'm destined to deal with tons
 of conclicts anyways :-)

 I'll backport this to net-2.6 unless you beat me to it :-)

attached :)

-vlad
From ebf07dc09049969873b50f594640fe475e1ee294 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich [EMAIL PROTECTED]
Date: Wed, 18 Apr 2007 16:48:24 -0400
Subject: [PATCH] [SCTP] Do not interleave non-fragments when in partial delivery

The way partial delivery is currently implemnted, it is possible to
intereleave a message (either from another steram, or unordered) that
is not part of partial delivery process.  The only way to this is for
a message to not be a fragment and be 'in order' or unorderd for a
given stream.  This will result in bypassing the reassembly/ordering
queues where things live duing partial delivery, and the
message will be delivered to the socket in the middle of partial delivery.

This is a two-fold problem, in that:
1.  the app now must check the stream-id and flags which it may not
be doing.
2.  this clearing partial delivery state from the association and results
in ulp hanging.

This patch is a band-aid over a much bigger problem in that we
don't do stream interleave.

Signed-off-by: Vlad Yasevich [EMAIL PROTECTED]
---
 net/sctp/ulpqueue.c |9 -
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index bfb197e..b29e3e4 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -190,7 +190,14 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct 
sctp_ulpevent *event)
if (!sctp_sk(sk)-pd_mode) {
queue = sk-sk_receive_queue;
} else if (ulpq-pd_mode) {
-   if (event-msg_flags  MSG_NOTIFICATION)
+   /* If the association is in partial delivery, we
+* need to finish delivering the partially processed
+* packet before passing any other data.  This is
+* because we don't truly support stream interleaving.
+*/
+   if ((event-msg_flags  MSG_NOTIFICATION) ||
+   (SCTP_DATA_NOT_FRAG ==
+   (event-msg_flags  SCTP_DATA_FRAG_MASK)))
queue = sctp_sk(sk)-pd_lobby;
else {
clear_pd = event-msg_flags  MSG_EOR;
-- 
1.5.0.3.438.gc49b2

Re: [PATCH] [SCTP] Do not interleave non-fragments when in partial delivery

2007-04-18 Thread David Miller

From: Vlad Yasevich [EMAIL PROTECTED]
Date: Wed, 18 Apr 2007 16:52:26 -0400

 David Miller wrote:
  From: Vlad Yasevich [EMAIL PROTECTED]
  Date: Wed, 18 Apr 2007 14:38:15 -0400

  This is a bug fix, but done on top of 2.6.22 tree.  I am trying
  to minimize the amount of conflict this would cause during merge
  by doing it this way.  However, if you would rather keep all the bugfixes
  in net-2.6, I can do that too, but that _will_ give you conflicts.

  I've already determined that I'm destined to deal with tons
  of conclicts anyways :-)

  I'll backport this to net-2.6 unless you beat me to it :-)

 attached :)

Thanks a lot, applied.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kernel BUG at net/core/skbuff.c in linux-2.6.21-rc6

2007-04-18 Thread Herbert Xu

Hi Paul:

Paul Mackerras [EMAIL PROTECTED] wrote:
 
 So this doesn't change process_input_packet(), which treats the case
 where the first byte is 0xff (PPP_ALLSTATIONS) but the second byte is
 0x03 (PPP_UI) as indicating a packet with a PPP protocol number of
 0xff.  Arguably that's wrong since PPP protocol 0xff is reserved, and
 the RFC does envision the possibility of receiving frames where the
 control field has values other than 0x03.

Your fix is probably needed too.  However, I think the issue that Patrick
was trying to fix is the case where p[0] != PPP_ALLSTATIONS and therefore
we'd still have a problem there.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] [net-2.6] IrDA: Correctly handling socket error

2007-04-18 Thread Samuel Ortiz

From: Olaf Kirch [EMAIL PROTECTED]

This patch fixes an oops first reported in mid 2006 - see
http://lkml.org/lkml/2006/8/29/358 The cause of this bug report is that
when an error is signalled on the socket, irda_recvmsg_stream returns
without removing a local wait_queue variable from the socket's sk_sleep
queue. This causes havoc further down the road.

In response to this problem, a patch was made that invoked sock_orphan on
the socket when receiving a disconnect indication. This is not a good fix,
as this sets sk_sleep to NULL, causing applications sleeping in recvmsg
(and other places) to oops.

This is against the latest net-2.6 and should be considered for -stable
inclusion. 

Signed-off-by: Olaf Kirch [EMAIL PROTECTED]
Signed-off-by: Samuel Ortiz [EMAIL PROTECTED]
---
 net/irda/af_irda.c |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index f9b15d4..bf994c8 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -138,7 +138,6 @@ static void irda_disconnect_indication(void *instance, void 
*sap,
sk-sk_shutdown |= SEND_SHUTDOWN;
 
sk-sk_state_change(sk);
-   sock_orphan(sk);
release_sock(sk);
 
/* Close our TSAP.
@@ -1445,7 +1444,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct 
socket *sock,
 */
ret = sock_error(sk);
if (ret)
-   break;
+   ;
else if (sk-sk_shutdown  RCV_SHUTDOWN)
;
else if (noblock)
-- 
1.5.1


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/7] [IrDA] af_irda: irda_recvmsg_stream cleanup

2007-04-18 Thread samuel

From: Olaf Kirch [EMAIL PROTECTED]

This patch cleans up some code in irda_recvmsg_stream, replacing some
homebrew code with prepare_to_wait/finish_wait, and by making the
code honor sock_rcvtimeo.

Signed-off-by: Olaf Kirch [EMAIL PROTECTED]
Signed-off-by: Samuel Ortiz [EMAIL PROTECTED]
---
 net/irda/af_irda.c |   31 +--
 1 files changed, 13 insertions(+), 18 deletions(-)

Index: net-2.6.22-quilt/net/irda/af_irda.c
===
--- net-2.6.22-quilt.orig/net/irda/af_irda.c2007-04-18 01:40:14.0 
+0300
+++ net-2.6.22-quilt/net/irda/af_irda.c 2007-04-18 01:40:28.0 +0300
@@ -1403,8 +1403,8 @@
struct irda_sock *self = irda_sk(sk);
int noblock = flags  MSG_DONTWAIT;
size_t copied = 0;
-   int target = 1;
-   DECLARE_WAITQUEUE(waitq, current);
+   int target;
+   long timeo;
 
IRDA_DEBUG(3, %s()\n, __FUNCTION__);
 
@@ -1417,8 +1417,8 @@
if (flags  MSG_OOB)
return -EOPNOTSUPP;
 
-   if (flags  MSG_WAITALL)
-   target = size;
+   target = sock_rcvlowat(sk, flags  MSG_WAITALL, size);
+   timeo = sock_rcvtimeo(sk, noblock);
 
msg-msg_namelen = 0;
 
@@ -1426,19 +1426,14 @@
int chunk;
struct sk_buff *skb = skb_dequeue(sk-sk_receive_queue);
 
-   if (skb==NULL) {
+   if (skb == NULL) {
+   DEFINE_WAIT(wait);
int ret = 0;
 
if (copied = target)
break;
 
-   /* The following code is a cut'n'paste of the
-* wait_event_interruptible() macro.
-* We don't us the macro because the test condition
-* is messy. - Jean II */
-   set_bit(SOCK_ASYNC_WAITDATA, sk-sk_socket-flags);
-   add_wait_queue(sk-sk_sleep, waitq);
-   set_current_state(TASK_INTERRUPTIBLE);
+   prepare_to_wait_exclusive(sk-sk_sleep, wait, 
TASK_INTERRUPTIBLE);
 
/*
 *  POSIX 1003.1g mandates this order.
@@ -1451,17 +1446,17 @@
else if (noblock)
ret = -EAGAIN;
else if (signal_pending(current))
-   ret = -ERESTARTSYS;
+   ret = sock_intr_errno(timeo);
+   else if (sk-sk_state != TCP_ESTABLISHED)
+   ret = -ENOTCONN;
else if (skb_peek(sk-sk_receive_queue) == NULL)
/* Wait process until data arrives */
schedule();
 
-   current-state = TASK_RUNNING;
-   remove_wait_queue(sk-sk_sleep, waitq);
-   clear_bit(SOCK_ASYNC_WAITDATA, sk-sk_socket-flags);
+   finish_wait(sk-sk_sleep, wait);
 
-   if(ret)
-   return(ret);
+   if (ret)
+   return ret;
if (sk-sk_shutdown  RCV_SHUTDOWN)
break;
 

--

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/7] [IrDA] af_irda: irda_accept cleanup

2007-04-18 Thread samuel

This patch removes a cut'n'paste copy of wait_event_interruptible
from irda_accept.

Signed-off-by: Samuel Ortiz [EMAIL PROTECTED]
Acked-by: Olaf Kirch [EMAIL PROTECTED]
---
 net/irda/af_irda.c |   34 --
 1 files changed, 8 insertions(+), 26 deletions(-)

Index: net-2.6.22-quilt/net/irda/af_irda.c
===
--- net-2.6.22-quilt.orig/net/irda/af_irda.c2007-04-18 02:16:43.0 
+0300
+++ net-2.6.22-quilt/net/irda/af_irda.c 2007-04-18 02:16:43.0 +0300
@@ -873,37 +873,19 @@
 * calling us, the data is waiting for us ;-)
 * Jean II
 */
-   skb = skb_dequeue(sk-sk_receive_queue);
-   if (skb == NULL) {
-   int ret = 0;
-   DECLARE_WAITQUEUE(waitq, current);
+   while (1) {
+   skb = skb_dequeue(sk-sk_receive_queue);
+   if (skb)
+   break;
 
/* Non blocking operation */
if (flags  O_NONBLOCK)
return -EWOULDBLOCK;
 
-   /* The following code is a cut'n'paste of the
-* wait_event_interruptible() macro.
-* We don't us the macro because the condition has
-* side effects : we want to make sure that only one
-* skb get dequeued - Jean II */
-   add_wait_queue(sk-sk_sleep, waitq);
-   for (;;) {
-   set_current_state(TASK_INTERRUPTIBLE);
-   skb = skb_dequeue(sk-sk_receive_queue);
-   if (skb != NULL)
-   break;
-   if (!signal_pending(current)) {
-   schedule();
-   continue;
-   }
-   ret = -ERESTARTSYS;
-   break;
-   }
-   current-state = TASK_RUNNING;
-   remove_wait_queue(sk-sk_sleep, waitq);
-   if(ret)
-   return -ERESTARTSYS;
+   err = wait_event_interruptible(*(sk-sk_sleep),
+   skb_peek(sk-sk_receive_queue));
+   if (err)
+   return err;
}
 
newsk = newsock-sk;

--

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/7] [IrDA] af_irda: Silence kernel message in irda_recvmsg_stream

2007-04-18 Thread samuel

From: Olaf Kirch [EMAIL PROTECTED]

This patch silences an IRDA_ASSERT in irda_recvmsg_stream, as described in
http://bugzilla.kernel.org/show_bug.cgi?id=7512 irda_disconnect_indication
would set sk-sk_err to ECONNRESET, and a subsequent call to recvmsg
would print an irritating kernel message and return -1.

When a connected socket is closed by the peer, recvmsg should return 0
rather than an error. This patch fixes this.

Signed-off-by: Olaf Kirch [EMAIL PROTECTED]
Signed-off-by: Samuel Ortiz [EMAIL PROTECTED]
---
 net/irda/af_irda.c |   18 +++---
 1 files changed, 11 insertions(+), 7 deletions(-)

Index: net-2.6.22-quilt/net/irda/af_irda.c
===
--- net-2.6.22-quilt.orig/net/irda/af_irda.c2007-04-18 02:10:56.0 
+0300
+++ net-2.6.22-quilt/net/irda/af_irda.c 2007-04-18 02:14:24.0 +0300
@@ -131,15 +131,13 @@
}
 
/* Prevent race conditions with irda_release() and irda_shutdown() */
+   bh_lock_sock(sk);
if (!sock_flag(sk, SOCK_DEAD)  sk-sk_state != TCP_CLOSE) {
-   lock_sock(sk);
sk-sk_state = TCP_CLOSE;
-   sk-sk_err   = ECONNRESET;
sk-sk_shutdown |= SEND_SHUTDOWN;
 
sk-sk_state_change(sk);
sock_orphan(sk);
-   release_sock(sk);
 
/* Close our TSAP.
 * If we leave it open, IrLMP put it back into the list of
@@ -159,6 +157,7 @@
self-tsap = NULL;
}
}
+   bh_unlock_sock(sk);
 
/* Note : once we are there, there is not much you want to do
 * with the socket anymore, apart from closing it.
@@ -1062,7 +1061,8 @@
 
if (sk-sk_state != TCP_ESTABLISHED) {
sock-state = SS_UNCONNECTED;
-   return sock_error(sk);  /* Always set at this point */
+   err = sock_error(sk);
+   return err? err : -ECONNRESET;
}
 
sock-state = SS_CONNECTED;
@@ -1356,7 +1356,9 @@
IRDA_DEBUG(4, %s()\n, __FUNCTION__);
 
IRDA_ASSERT(self != NULL, return -1;);
-   IRDA_ASSERT(!sock_error(sk), return -1;);
+
+   if ((err = sock_error(sk))  0)
+   return err;
 
skb = skb_recv_datagram(sk, flags  ~MSG_DONTWAIT,
flags  MSG_DONTWAIT, err);
@@ -1403,13 +1405,15 @@
struct irda_sock *self = irda_sk(sk);
int noblock = flags  MSG_DONTWAIT;
size_t copied = 0;
-   int target;
+   int target, err;
long timeo;
 
IRDA_DEBUG(3, %s()\n, __FUNCTION__);
 
IRDA_ASSERT(self != NULL, return -1;);
-   IRDA_ASSERT(!sock_error(sk), return -1;);
+
+   if ((err = sock_error(sk))  0)
+   return err;
 
if (sock-flags  __SO_ACCEPTCON)
return(-EINVAL);

--

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 5/7] [IrDA] IrDA monitor mode

2007-04-18 Thread samuel

Through a protocol specific ioctl, one can disable IrDA TX in order to
monitor an IrDA link.

Signed-off-by: Samuel Ortiz [EMAIL PROTECTED]
---
 include/linux/irda.h |7 +
 include/net/irda/irlap.h |2 +
 net/irda/af_irda.c   |   58 +-
 net/irda/irlap_frame.c   |8 ++
 4 files changed, 74 insertions(+), 1 deletions(-)

Index: net-2.6.22-quilt/include/linux/irda.h
===
--- net-2.6.22-quilt.orig/include/linux/irda.h  2007-04-18 01:57:48.0 
+0300
+++ net-2.6.22-quilt/include/linux/irda.h   2007-04-18 02:16:43.0 
+0300
@@ -172,6 +172,12 @@
 #define SIOCSDTRRTS(SIOCDEVPRIVATE + 8)
 #define SIOCGQOS   (SIOCDEVPRIVATE + 9)
 
+/* Protocol private ioctls */
+#define SIOCIRDASETMODE (SIOCPROTOPRIVATE + 0)
+#define SIOCIRDAGETMODE (SIOCPROTOPRIVATE + 1)
+
+#define IRDA_MODE_MONITOR   0x1
+
 /* No reason to include linux/if.h just because of this one ;-) */
 #define IRNAMSIZ 16 
 
@@ -209,6 +215,7 @@
} ifr_ifru;
 };
 
+#define ifr_name  ifr_ifrn.ifrn_name
 #define ifr_baudrate  ifr_ifru.ifru_qos.baudrate
 #define ifr_receiving ifr_ifru.ifru_receiving 
 #define ifr_dongleifr_ifru.ifru_dongle
Index: net-2.6.22-quilt/include/net/irda/irlap.h
===
--- net-2.6.22-quilt.orig/include/net/irda/irlap.h  2007-04-18 
01:57:48.0 +0300
+++ net-2.6.22-quilt/include/net/irda/irlap.h   2007-04-18 02:16:43.0 
+0300
@@ -208,6 +208,8 @@
intxbofs_delay;   /* Nr of XBOF's used to MTT */
intbofs_count;/* Negotiated extra BOFs */
intnext_bofs; /* Negotiated extra BOFs after next frame */
+
+   intmode; /* 1 is for monitor mode (TX disabled) */
 };
 
 /* 
Index: net-2.6.22-quilt/net/irda/af_irda.c
===
--- net-2.6.22-quilt.orig/net/irda/af_irda.c2007-04-18 02:16:43.0 
+0300
+++ net-2.6.22-quilt/net/irda/af_irda.c 2007-04-18 02:16:43.0 +0300
@@ -49,7 +49,6 @@
 #include linux/sockios.h
 #include linux/init.h
 #include linux/net.h
-#include linux/irda.h
 #include linux/poll.h
 
 #include asm/ioctls.h/* TIOCOUTQ, TIOCINQ */
@@ -1745,6 +1744,7 @@
 static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
struct sock *sk = sock-sk;
+   void __user *argp = (void __user *)arg;
 
IRDA_DEBUG(4, %s(), cmd=%#x\n, __FUNCTION__, cmd);
 
@@ -1786,6 +1786,62 @@
case SIOCGIFMETRIC:
case SIOCSIFMETRIC:
return -EINVAL;
+
+   case SIOCIRDASETMODE: {
+   struct if_irda_req if_irda;
+   struct net_device * dev;
+   struct irlap_cb * irlap;
+
+   if (!capable(CAP_NET_ADMIN))
+   return -EPERM;
+
+   if (copy_from_user(if_irda, argp, sizeof(struct if_irda_req)))
+   return -EFAULT;
+
+   dev = dev_get_by_name(if_irda.ifr_name);
+   if (!dev)
+   return -ENODEV;
+
+   irlap = (struct irlap_cb *)dev-atalk_ptr;
+   if (!irlap)
+   return -ENODEV;
+
+   IRDA_DEBUG(4, %s(): Setting %s to 0x%x\n, __FUNCTION__,
+  dev-name, if_irda.ifr_mode);
+
+   irlap-mode = if_irda.ifr_mode;
+
+   dev_put(dev);
+
+   break;
+   }
+   case SIOCIRDAGETMODE: {
+   struct if_irda_req if_irda;
+   struct net_device * dev;
+   struct irlap_cb * irlap;
+
+   if (copy_from_user(if_irda, argp, sizeof(struct if_irda_req)))
+   return -EFAULT;
+
+   dev = dev_get_by_name(if_irda.ifr_name);
+   if (!dev)
+   return -ENODEV;
+
+   irlap = (struct irlap_cb *)dev-atalk_ptr;
+   if (!irlap)
+   return -ENODEV;
+
+   if_irda.ifr_mode = irlap-mode;
+
+   dev_put(dev);
+
+   IRDA_DEBUG(4, %s(): %s mode is 0x%x\n, __FUNCTION__,
+  dev-name, if_irda.ifr_mode);
+
+   if (copy_to_user(argp, if_irda, sizeof(struct if_irda_req)))
+   return -EFAULT;
+   }
+   break;
default:
IRDA_DEBUG(1, %s(), doing device ioctl!\n, __FUNCTION__);
return -ENOIOCTLCMD;
Index: net-2.6.22-quilt/net/irda/irlap_frame.c
===
--- net-2.6.22-quilt.orig/net/irda/irlap_frame.c2007-04-18 
01:57:48.0 +0300
+++ net-2.6.22-quilt/net/irda/irlap_frame.c 2007-04-18 02:16:43.0 
+0300
@@ -101,6 +101,14 @@
 
irlap_insert_info(self, skb);
 
+   if (unlikely(self-mode  IRDA_MODE_MONITOR)) {
+

[PATCH 7/7] [IrDA] Misc spelling corrections.

2007-04-18 Thread samuel

From: Guennadi Liakhovetski [EMAIL PROTECTED]

Spelling corrections, from to to too.

Signed-off-by: G. Liakhovetski [EMAIL PROTECTED]
Signed-off-by: Samuel Ortiz [EMAIL PROTECTED]
---
 net/irda/irlap_event.c |2 +-
 net/irda/irlap_frame.c |   14 +++---
 net/irda/irttp.c   |4 ++--
 net/irda/parameters.c  |8 
 4 files changed, 14 insertions(+), 14 deletions(-)

Index: net-2.6.22-quilt/net/irda/irlap_event.c
===
--- net-2.6.22-quilt.orig/net/irda/irlap_event.c2007-04-18 
01:57:48.0 +0300
+++ net-2.6.22-quilt/net/irda/irlap_event.c 2007-04-18 02:16:44.0 
+0300
@@ -590,7 +590,7 @@
if (!self-discovery_log) {
IRDA_WARNING(%s: discovery log is gone! 
 maybe the discovery timeout has been set
- to short?\n, __FUNCTION__);
+ too short?\n, __FUNCTION__);
break;
}
hashbin_insert(self-discovery_log,
Index: net-2.6.22-quilt/net/irda/irlap_frame.c
===
--- net-2.6.22-quilt.orig/net/irda/irlap_frame.c2007-04-18 
02:16:43.0 +0300
+++ net-2.6.22-quilt/net/irda/irlap_frame.c 2007-04-18 02:16:44.0 
+0300
@@ -421,7 +421,7 @@
IRDA_ASSERT(self-magic == LAP_MAGIC, return;);
 
if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
-   IRDA_ERROR(%s: frame to short!\n, __FUNCTION__);
+   IRDA_ERROR(%s: frame too short!\n, __FUNCTION__);
return;
}
 
@@ -492,7 +492,7 @@
char *text;
 
if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
-   IRDA_ERROR(%s: frame to short!\n, __FUNCTION__);
+   IRDA_ERROR(%s: frame too short!\n, __FUNCTION__);
return;
}
 
@@ -536,7 +536,7 @@
/* Check if things are sane at this point... */
if((discovery_info == NULL) ||
   !pskb_may_pull(skb, 3)) {
-   IRDA_ERROR(%s: discovery frame to short!\n,
+   IRDA_ERROR(%s: discovery frame too short!\n,
   __FUNCTION__);
return;
}
@@ -1181,7 +1181,7 @@
IRDA_ASSERT(info != NULL, return;);
 
if (!pskb_may_pull(skb, 4)) {
-   IRDA_ERROR(%s: frame to short!\n, __FUNCTION__);
+   IRDA_ERROR(%s: frame too short!\n, __FUNCTION__);
return;
}
 
@@ -1270,7 +1270,7 @@
IRDA_DEBUG(2, %s()\n, __FUNCTION__);
 
if (!pskb_may_pull(skb, sizeof(*frame))) {
-   IRDA_ERROR(%s: frame to short!\n, __FUNCTION__);
+   IRDA_ERROR(%s: frame too short!\n, __FUNCTION__);
return;
}
frame = (struct test_frame *) skb-data;
@@ -1278,7 +1278,7 @@
/* Broadcast frames must carry saddr and daddr fields */
if (info-caddr == CBROADCAST) {
if (skb-len  sizeof(struct test_frame)) {
-   IRDA_DEBUG(0, %s() test frame to short!\n,
+   IRDA_DEBUG(0, %s() test frame too short!\n,
   __FUNCTION__);
return;
}
@@ -1344,7 +1344,7 @@
 
/* Check if frame is large enough for parsing */
if (!pskb_may_pull(skb, 2)) {
-   IRDA_ERROR(%s: frame to short!\n, __FUNCTION__);
+   IRDA_ERROR(%s: frame too short!\n, __FUNCTION__);
dev_kfree_skb(skb);
return -1;
}
Index: net-2.6.22-quilt/net/irda/irttp.c
===
--- net-2.6.22-quilt.orig/net/irda/irttp.c  2007-04-18 01:57:48.0 
+0300
+++ net-2.6.22-quilt/net/irda/irttp.c   2007-04-18 02:16:44.0 +0300
@@ -551,7 +551,7 @@
}
 
if (skb-len  self-max_seg_size) {
-   IRDA_DEBUG(1, %s(), UData is to large for IrLAP!\n,
+   IRDA_DEBUG(1, %s(), UData is too large for IrLAP!\n,
   __FUNCTION__);
goto err;
}
@@ -598,7 +598,7 @@
 *  inside an IrLAP frame
 */
if ((self-tx_max_sdu_size == 0)  (skb-len  self-max_seg_size)) {
-   IRDA_ERROR(%s: SAR disabled, and data is to large for 
IrLAP!\n,
+   IRDA_ERROR(%s: SAR disabled, and data is too large for 
IrLAP!\n,
   __FUNCTION__);
ret = -EMSGSIZE;
goto err;
Index: net-2.6.22-quilt/net/irda/parameters.c
===
--- net-2.6.22-quilt.orig/net/irda/parameters.c 2007-04-18 01:57:48.0 
+0300
+++ net-2.6.22-quilt/net/irda/parameters.c  2007-04-18 02:16:44.0

[PATCH 4/7] [IrDA] af_irda: IRDA_ASSERT cleanups

2007-04-18 Thread samuel

In af_irda.c, the multiple IRDA_ASSERT() are either hiding bugs, useless, or
returning the wrong value.
Let's clean that up.

Signed-off-by: Samuel Ortiz [EMAIL PROTECTED]
---
 net/irda/af_irda.c |   32 +---
 1 files changed, 5 insertions(+), 27 deletions(-)

Index: net-2.6.22-quilt/net/irda/af_irda.c
===
--- net-2.6.22-quilt.orig/net/irda/af_irda.c2007-04-18 02:16:43.0 
+0300
+++ net-2.6.22-quilt/net/irda/af_irda.c 2007-04-18 02:16:43.0 +0300
@@ -89,7 +89,6 @@
 
self = instance;
sk = instance;
-   IRDA_ASSERT(sk != NULL, return -1;);
 
err = sock_queue_rcv_skb(sk, skb);
if (err) {
@@ -306,8 +305,6 @@
 
IRDA_DEBUG(2, %s()\n, __FUNCTION__);
 
-   IRDA_ASSERT(self != NULL, return;);
-
skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
GFP_ATOMIC);
if (skb == NULL) {
@@ -337,7 +334,7 @@
 
self = instance;
sk = instance;
-   IRDA_ASSERT(sk != NULL, return;);
+   BUG_ON(sk == NULL);
 
switch (flow) {
case FLOW_STOP:
@@ -449,7 +446,7 @@
IRDA_DEBUG(2, %s()\n, __FUNCTION__);
 
self = (struct irda_sock *) priv;
-   IRDA_ASSERT(self != NULL, return;);
+   BUG_ON(self == NULL);
 
/* Nothing for the caller */
self-cachelog = NULL;
@@ -546,8 +543,6 @@
 {
IRDA_DEBUG(2, %s(%p, %s)\n, __FUNCTION__, self, name);
 
-   IRDA_ASSERT(self != NULL, return -1;);
-
if (self-iriap) {
IRDA_WARNING(%s(): busy with a previous query\n,
 __FUNCTION__);
@@ -635,8 +630,6 @@
 
IRDA_DEBUG(2, %s(), name=%s\n, __FUNCTION__, name);
 
-   IRDA_ASSERT(self != NULL, return -1;);
-
/* Ask lmp for the current discovery log
 * Note : we have to use irlmp_get_discoveries(), as opposed
 * to play with the cachelog directly, because while we are
@@ -784,8 +777,6 @@
struct irda_sock *self = irda_sk(sk);
int err;
 
-   IRDA_ASSERT(self != NULL, return -1;);
-
IRDA_DEBUG(2, %s(%p)\n, __FUNCTION__, self);
 
if (addr_len != sizeof(struct sockaddr_irda))
@@ -841,8 +832,6 @@
 
IRDA_DEBUG(2, %s()\n, __FUNCTION__);
 
-   IRDA_ASSERT(self != NULL, return -1;);
-
err = irda_create(newsock, sk-sk_protocol);
if (err)
return err;
@@ -889,10 +878,12 @@
}
 
newsk = newsock-sk;
+   if (newsk == NULL)
+   return -EIO;
+
newsk-sk_state = TCP_ESTABLISHED;
 
new = irda_sk(newsk);
-   IRDA_ASSERT(new != NULL, return -1;);
 
/* Now attach up the new socket */
new-tsap = irttp_dup(self-tsap, new);
@@ -1154,8 +1145,6 @@
 {
IRDA_DEBUG(2, %s(%p)\n, __FUNCTION__, self);
 
-   IRDA_ASSERT(self != NULL, return;);
-
/* Unregister with IrLMP */
irlmp_unregister_client(self-ckey);
irlmp_unregister_service(self-skey);
@@ -1274,7 +1263,6 @@
return -ENOTCONN;
 
self = irda_sk(sk);
-   IRDA_ASSERT(self != NULL, return -1;);
 
/* Check if IrTTP is wants us to slow down */
 
@@ -1337,8 +1325,6 @@
 
IRDA_DEBUG(4, %s()\n, __FUNCTION__);
 
-   IRDA_ASSERT(self != NULL, return -1;);
-
if ((err = sock_error(sk))  0)
return err;
 
@@ -1392,8 +1378,6 @@
 
IRDA_DEBUG(3, %s()\n, __FUNCTION__);
 
-   IRDA_ASSERT(self != NULL, return -1;);
-
if ((err = sock_error(sk))  0)
return err;
 
@@ -1527,7 +1511,6 @@
return -ENOTCONN;
 
self = irda_sk(sk);
-   IRDA_ASSERT(self != NULL, return -1;);
 
/*
 * Check that we don't send out too big frames. This is an unreliable
@@ -1596,7 +1579,6 @@
}
 
self = irda_sk(sk);
-   IRDA_ASSERT(self != NULL, return -1;);
 
/* Check if an address was specified with sendto. Jean II */
if (msg-msg_name) {
@@ -1670,8 +1652,6 @@
struct sock *sk = sock-sk;
struct irda_sock *self = irda_sk(sk);
 
-   IRDA_ASSERT(self != NULL, return -1;);
-
IRDA_DEBUG(1, %s(%p)\n, __FUNCTION__, self);
 
sk-sk_state   = TCP_CLOSE;
@@ -1844,8 +1824,6 @@
struct ias_attrib * ias_attr;   /* Attribute in IAS object */
int opt;
 
-   IRDA_ASSERT(self != NULL, return -1;);
-
IRDA_DEBUG(2, %s(%p)\n, __FUNCTION__, self);
 
if (level != SOL_IRLMP)

--

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/7] [IrDA] net-2.6.22 fixes

2007-04-18 Thread samuel

Hi Dave,

Here go 7 patches for IrDA, against your latest net-2.6.22.
They are mainly af_irda.c fixes/cleanup. Also, I just sent another af_irda.c
oops fix, against net-2.6 (IrDA: Correctly handling socket error).
If you want it against net-2.6.22 as well, just let me know.

Cheers,
Samuel.


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] [net-2.6] IrDA: Correctly handling socket error

2007-04-18 Thread David Miller

From: Samuel Ortiz [EMAIL PROTECTED]
Date: Thu, 19 Apr 2007 00:45:26 +0300

 From: Olaf Kirch [EMAIL PROTECTED]

 This patch fixes an oops first reported in mid 2006 - see
 http://lkml.org/lkml/2006/8/29/358 The cause of this bug report is that
 when an error is signalled on the socket, irda_recvmsg_stream returns
 without removing a local wait_queue variable from the socket's sk_sleep
 queue. This causes havoc further down the road.

 In response to this problem, a patch was made that invoked sock_orphan on
 the socket when receiving a disconnect indication. This is not a good fix,
 as this sets sk_sleep to NULL, causing applications sleeping in recvmsg
 (and other places) to oops.

 This is against the latest net-2.6 and should be considered for -stable
 inclusion. 

 Signed-off-by: Olaf Kirch [EMAIL PROTECTED]
 Signed-off-by: Samuel Ortiz [EMAIL PROTECTED]

Applied and I'll push this to -stable too, thanks!
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 6/7] [IrDA] Adding carriage returns to mcs7780 debug statements

2007-04-18 Thread samuel


Signed-off-by: Samuel Ortiz [EMAIL PROTECTED]
---
 drivers/net/irda/mcs7780.c |   26 +-
 1 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index 54d1d54..0de8672 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -200,14 +200,14 @@ static inline int mcs_setup_transceiver_vishay(struct 
mcs_cb *mcs)
 /* Setup a communication between mcs7780 and agilent chip. */
 static inline int mcs_setup_transceiver_agilent(struct mcs_cb *mcs)
 {
-   IRDA_WARNING(This transceiver type is not supported yet.);
+   IRDA_WARNING(This transceiver type is not supported yet.\n);
return 1;
 }
 
 /* Setup a communication between mcs7780 and sharp chip. */
 static inline int mcs_setup_transceiver_sharp(struct mcs_cb *mcs)
 {
-   IRDA_WARNING(This transceiver type is not supported yet.);
+   IRDA_WARNING(This transceiver type is not supported yet.\n);
return 1;
 }
 
@@ -279,7 +279,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs)
break;
 
default:
-   IRDA_WARNING(Unknown transceiver type: %d,
+   IRDA_WARNING(Unknown transceiver type: %d\n,
 mcs-transceiver_type);
ret = 1;
}
@@ -318,7 +318,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs)
return ret;
 
 error:
-   IRDA_ERROR(%s, msg);
+   IRDA_ERROR(%s\n, msg);
return ret;
 }
 
@@ -587,7 +587,7 @@ static int mcs_speed_change(struct mcs_cb *mcs)
} while(cnt++  100  (rval  MCS_IRINTX));
 
if(cnt = 100) {
-   IRDA_ERROR(unable to change speed);
+   IRDA_ERROR(unable to change speed\n);
ret = -EIO;
goto error;
}
@@ -638,7 +638,7 @@ static int mcs_speed_change(struct mcs_cb *mcs)
 
default:
ret = 1;
-   IRDA_WARNING(Unknown transceiver type: %d,
+   IRDA_WARNING(Unknown transceiver type: %d\n,
 mcs-transceiver_type);
}
if (unlikely(ret))
@@ -733,7 +733,7 @@ static int mcs_net_open(struct net_device *netdev)
sprintf(hwname, usb#%d, mcs-usbdev-devnum);
mcs-irlap = irlap_open(netdev, mcs-qos, hwname);
if (!mcs-irlap) {
-   IRDA_ERROR(mcs7780: irlap_open failed);
+   IRDA_ERROR(mcs7780: irlap_open failed\n);
goto error2;
}
 
@@ -862,7 +862,7 @@ static int mcs_hard_xmit(struct sk_buff *skb, struct 
net_device *ndev)
  mcs-out_buf, wraplen, mcs_send_irq, mcs);
 
if ((ret = usb_submit_urb(mcs-tx_urb, GFP_ATOMIC))) {
-   IRDA_ERROR(failed tx_urb: %d, ret);
+   IRDA_ERROR(failed tx_urb: %d\n, ret);
switch (ret) {
case -ENODEV:
case -EPIPE:
@@ -897,7 +897,7 @@ static int mcs_probe(struct usb_interface *intf,
if (!ndev)
goto error1;
 
-   IRDA_DEBUG(1, MCS7780 USB-IrDA bridge found at %d., udev-devnum);
+   IRDA_DEBUG(1, MCS7780 USB-IrDA bridge found at %d.\n, udev-devnum);
 
/* what is it realy for? */
SET_MODULE_OWNER(ndev);
@@ -905,7 +905,7 @@ static int mcs_probe(struct usb_interface *intf,
 
ret = usb_reset_configuration(udev);
if (ret != 0) {
-   IRDA_ERROR(mcs7780: usb reset configuration failed);
+   IRDA_ERROR(mcs7780: usb reset configuration failed\n);
goto error2;
}
 
@@ -950,7 +950,7 @@ static int mcs_probe(struct usb_interface *intf,
if (ret != 0)
goto error2;
 
-   IRDA_DEBUG(1, IrDA: Registered MosChip MCS7780 device as %s,
+   IRDA_DEBUG(1, IrDA: Registered MosChip MCS7780 device as %s\n,
   ndev-name);
 
mcs-transceiver_type = transceiver_type;
@@ -981,7 +981,7 @@ static void mcs_disconnect(struct usb_interface *intf)
free_netdev(mcs-netdev);
 
usb_set_intfdata(intf, NULL);
-   IRDA_DEBUG(0, MCS7780 now disconnected.);
+   IRDA_DEBUG(0, MCS7780 now disconnected.\n);
 }
 
 /* Module insertion */
@@ -992,7 +992,7 @@ static int __init mcs_init(void)
/* register this driver with the USB subsystem */
result = usb_register(mcs_driver);
if (result)
-   IRDA_ERROR(usb_register failed. Error number %d, result);
+   IRDA_ERROR(usb_register failed. Error number %d\n, result);
 
return result;
 }
-- 
1.5.1

--

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] Make dev_kfree_skb_any check if the skb is valid

2007-04-18 Thread Erik Hovland

If dev_kfree_skb_any is called and it then calls dev_kfree_skb_irq.
That call will dereference the skb. If the skb is invalid, down the
drain we go.

This one-liner checks to see if the skb is valid as part of the
determination of whether to call dev_kfree_skb_irq.

Signed-off-by: Erik Hovland [EMAIL PROTECTED]

---

 net/core/dev.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 4dc93cc..85f4a4c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1131,7 +1131,7 @@ EXPORT_SYMBOL(__netif_rx_schedule);
 
 void dev_kfree_skb_any(struct sk_buff *skb)
 {
-   if (in_irq() || irqs_disabled())
+   if (skb  (in_irq() || irqs_disabled()))
dev_kfree_skb_irq(skb);
else
dev_kfree_skb(skb);

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Make dev_kfree_skb_any check if the skb is valid

2007-04-18 Thread David Miller

From: Erik Hovland [EMAIL PROTECTED]
Date: Wed, 18 Apr 2007 15:33:44 -0700

 If dev_kfree_skb_any is called and it then calls dev_kfree_skb_irq.
 That call will dereference the skb. If the skb is invalid, down the
 drain we go.

 This one-liner checks to see if the skb is valid as part of the
 determination of whether to call dev_kfree_skb_irq.

 Signed-off-by: Erik Hovland [EMAIL PROTECTED]

This should never be invoked with a NULL skb argument.

Who is doing that?
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

AF_PACKET how to get the original netdev from a packet received from a bonded master

2007-04-18 Thread Laurent Chavey


When using PF_PACKET socket with bonded interfaces, there is no
way to get the slave interface (physical interface) the packet was
actually received on.

It looks like  there isn't a way to pass the original device {see
packet_rcv() in ./net/packet/af_packet.c} to the socket reader.
When an interface is bounded, dev, by the time it reaches this function,
points to the the bonding interface not the Ethernet interface the packet
was receive on.  sockaddr_ll does not contain any fields that could be use to
pass the original device in.  As such there is no ways for an application
that is interested on the physical interface the packet was received on,
to get the information when running a bond (one that uses the same mac address
for all of its slave).


observation,
if one uses tcpdump on a bonded slave, only egress packets are taped.
if one uses tcpdump on a bonded master, ingress packets are taped, egress
packets are taped

from the above, it would make sense to have the same behavior on ingress/egress
for both slave and master. the later would require the taped packet be
duplicated
at packet_rcv() and passed up to the socket layer as two copies
  - one associated with the master dev
  - one associated with the slave dev.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Make dev_kfree_skb_any check if the skb is valid

2007-04-18 Thread Erik Hovland

On Wed, Apr 18, 2007 at 03:44:16PM -0700, David Miller wrote:
 From: Erik Hovland [EMAIL PROTECTED]
 Date: Wed, 18 Apr 2007 15:33:44 -0700

  If dev_kfree_skb_any is called and it then calls dev_kfree_skb_irq.
  That call will dereference the skb. If the skb is invalid, down the
  drain we go.

  This one-liner checks to see if the skb is valid as part of the
  determination of whether to call dev_kfree_skb_irq.

  Signed-off-by: Erik Hovland [EMAIL PROTECTED]

 This should never be invoked with a NULL skb argument.

 Who is doing that?

Heh, the reason I came up with this patch is that the code in
drivers/usb/gadget/ether.c at about line 1653 will attempt to allocate
an skb. If it fails then it uses a goto to jump to line 1672 where it
will call dev_kfree_skb_any (skb) on a potentially null skb. I put a
validity check there and sent it off to the USB gadget maintainer. He
asked me to instead make the dev_kfree_skb_any call more robust and see
how that went over on the netdev list. Like a lead balloon?

E

-- 
Erik Hovland
mail: [EMAIL PROTECTED]
web: http://hovland.org/
PGP/GPG public key available on request

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: AF_PACKET how to get the original netdev from a packet received from a bonded master

2007-04-18 Thread David Miller

From: Laurent Chavey [EMAIL PROTECTED]
Date: Wed, 18 Apr 2007 16:05:27 -0700

 When using PF_PACKET socket with bonded interfaces, there is no
 way to get the slave interface (physical interface) the packet was
 actually received on.

That's right.

There isn't a real good solution to this problem either.
Decapsulation can happen to arbitrary levels, how many
devices should we remember and how to do the reference
counting on that correctly?

As such, I don't think this will ever be provided.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Make dev_kfree_skb_any check if the skb is valid

2007-04-18 Thread David Miller

From: Erik Hovland [EMAIL PROTECTED]
Date: Wed, 18 Apr 2007 16:18:15 -0700

 Heh, the reason I came up with this patch is that the code in
 drivers/usb/gadget/ether.c at about line 1653 will attempt to allocate
 an skb. If it fails then it uses a goto to jump to line 1672 where it
 will call dev_kfree_skb_any (skb) on a potentially null skb. I put a
 validity check there and sent it off to the USB gadget maintainer. He
 asked me to instead make the dev_kfree_skb_any call more robust and see
 how that went over on the netdev list. Like a lead balloon?

Yep, like a lead baloon.

The USB gadget driver maintainer should accept your patch
to check for NULL in the gadget driver as that is the one
and only case in the entire tree where that can happen and
we're not eating a conditional and a return just for that
one esoteric case.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread David Miller

From: Pavel Emelianov [EMAIL PROTECTED]
Date: Wed, 18 Apr 2007 12:16:18 +0400

 The proposal it to make sock_orphan before detaching the callback
 in netlink_release() and to check for the sock to be SOCK_DEAD in
 netlink_dump_start() before setting a new callback.

As discussed in this thread there might be other ways to a
approach this, but this fix is good for now.

Patch applied, thank you.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: AF_PACKET how to get the original netdev from a packet received from a bonded master

2007-04-18 Thread Chris Leech


On 4/18/07, David Miller [EMAIL PROTECTED] wrote:

Ok, it will give you one level of decapsulation.

What do we tell people who want 2 devices previous? :-)


I can tell you that the intent of PJs patch was to provide the ifindex
of the physical interface that a packet entered the system on,
regardless of how many layers of encapsulation are involved.

Of course it may not actually do that ...
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] CONFIG_PACKET_MMAP should depend on MMU

2007-04-18 Thread Aubrey Li


On 4/18/07, David Howells [EMAIL PROTECTED] wrote:

Aubrey Li [EMAIL PROTECTED] wrote:

 Here, in the attachment I wrote a small test app. Please correct if
 there is anything wrong, and feel free to improve it.

Okay... I have that working... probably.  I don't know what output it's
supposed to produce, but I see this:

# /packet-mmap/sample_packet_mmap
00-00-00-01-00-00-00-8a-00-00-00-8a-00-42-00-50-
38-43-13-a0-00-07-ff-3c-00-00-00-00-00-00-00-00-
00-11-08-00-00-00-00-01-00-01-00-06-00-d0-b7-de-
32-7b-00-00-00-00-00-00-00-00-00-00-00-00-00-00-
00-00-00-90-cc-a2-75-6b-00-d0-b7-de-32-7b-08-00-
45-00-00-7c-00-00-40-00-40-11-b4-13-c0-a8-02-80-
c0-a8-02-8d-08-01-03-20-00-68-8e-65-7f-5b-7e-03-
00-00-00-01-00-00-00-00-00-00-00-00-00-00-00-00-
00-00-00-00-00-00-00-00-00-00-00-01-00-00-81-a4-
00-00-00-01-00-00-00-00-00-00-00-00-00-1d-b8-86-
00-00-10-00-ff-ff-ff-ff-00-00-0e-f0-00-00-09-02-
01-cb-03-16-46-26-38-0d-00-00-00-00-46-26-38-1e-
00-00-00-00-46-26-38-1e-00-00-00-00-00-00-00-00-
00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00- [repeated]

Does that look reasonable?


Yes, it's reasonable for me, as long as your
host IP is 192.168.2.128
and
target IP is 192.168.2.141
See below


00-90-cc-a2-75-6b-|___ MAC Address
00-d0-b7-de-32-7b-|
08-00Type: IP
45-00Ver, IHL, TOS
00-7cIP.total.length
00-00-
40-00-
40TTL
11UDP protocol
b4-13Checksum
c0-a8-02-80---Source IP: 192.168.2.128
c0-a8-02-8d---Dest IP: 192.168.2.141

snip--



I've attached the preliminary patch.


Thanks, I'll take a look and try to see if I can give some feedback.

-Aubrey


Note four things about it:

 (1) I've had to add the get_unmapped_area() op to the proto_ops struct, but
 I've only done it for CONFIG_MMU=n as making it available for CONFIG_MMU=y
 could cause problems.

 (2) There's a race between packet_get_unmapped_area() being called and
 packet_mmap() being called.

 (3) I've added an extra check into packet_set_ring() to make sure the caller
 isn't asking for a combination of buffer size and count that will exceed
 ULONG_MAX.  This protects a multiply done elsewhere.

 (4) The entire data buffer is allocated as one contiguous lump in NOMMU-mode.

David

---
[PATCH] NOMMU: Support mmap() on AF_PACKET sockets

From: David Howells [EMAIL PROTECTED]

Support mmap() on AF_PACKET sockets in NOMMU-mode kernels.

Signed-Off-By: David Howells [EMAIL PROTECTED]
---

 include/linux/net.h|7 +++
 include/net/sock.h |8 +++
 net/core/sock.c|   10 
 net/packet/af_packet.c |  118 
 net/socket.c   |   77 +++
 5 files changed, 219 insertions(+), 1 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 4db21e6..9e77cf6 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -161,6 +161,11 @@ struct proto_ops {
int (*recvmsg)   (struct kiocb *iocb, struct socket *sock,
  struct msghdr *m, size_t total_len,
  int flags);
+#ifndef CONFIG_MMU
+   unsigned long   (*get_unmapped_area)(struct file *file, struct socket 
*sock,
+unsigned long addr, unsigned long 
len,
+unsigned long pgoff, unsigned long 
flags);
+#endif
int (*mmap)  (struct file *file, struct socket *sock,
  struct vm_area_struct * vma);
ssize_t (*sendpage)  (struct socket *sock, struct page *page,
@@ -191,6 +196,8 @@ extern int   sock_sendmsg(struct socket *sock, 
struct msghdr *msg,
 extern int  sock_recvmsg(struct socket *sock, struct msghdr *msg,
  size_t size, int flags);
 extern int  sock_map_fd(struct socket *sock);
+extern void sock_make_mappable(struct socket *sock,
+   unsigned long prot);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define sockfd_put(sock) fput(sock-file)
 extern int  net_ratelimit(void);
diff --git a/include/net/sock.h b/include/net/sock.h
index 2c7d60c..d91edea 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -841,6 +841,14 @@ extern int  sock_no_sendmsg(struct 
kiocb *, struct socket *,
struct msghdr *, size_t);
 extern int  sock_no_recvmsg(struct kiocb *, struct socket 
*,

Re: AF_PACKET how to get the original netdev from a packet received from a bonded master

2007-04-18 Thread David Miller

From: Chris Leech [EMAIL PROTECTED]
Date: Wed, 18 Apr 2007 17:17:45 -0700

 On 4/18/07, David Miller [EMAIL PROTECTED] wrote:
  Ok, it will give you one level of decapsulation.

  What do we tell people who want 2 devices previous? :-)

 I can tell you that the intent of PJs patch was to provide the ifindex
 of the physical interface that a packet entered the system on,
 regardless of how many layers of encapsulation are involved.

 Of course it may not actually do that ...

Ok, I'll try to remember to high-priority reviewing PJ's patch
on my next rebase of the net-2.6.22 tree which should be
tonight or tomorrow sometime.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/0] Re-try changes for PMTUDISC_PROBE

2007-04-18 Thread John Heffner

This backs out the the transport layer MTU checks that don't work.  As a 
consequence, I had to back out the PMTUDISC_PROBE patch as well.  These 
patches should fix the problem with ipv6 that the transport layer change 
tried to address, and re-implement PMTUDISC_PROBE.  I think this 
approach is nicer than the last one, since it doesn't require a bit in 
struct sk_buff.


Thanks,
  -John
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] Revert [NET] Do pmtu check in transport layer

2007-04-18 Thread John Heffner

This reverts commit 87e927a0583bd4a8ba9e97cd75b58d8aa1c76e37.

This idea does not work, as pointed at by Patrick McHardy.

Signed-off-by: John Heffner [EMAIL PROTECTED]
---
 net/ipv4/ip_output.c  |4 +---
 net/ipv4/raw.c|8 +++-
 net/ipv6/ip6_output.c |   11 +--
 net/ipv6/raw.c|7 ++-
 4 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 79e71ee..34606ef 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -810,9 +810,7 @@ int ip_append_data(struct sock *sk,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt-optlen : 0);
maxfraglen = ((mtu - fragheaderlen)  ~7) + fragheaderlen;
 
-   if (inet-cork.length + length  0x - fragheaderlen ||
-   (inet-pmtudisc = IP_PMTUDISC_DO 
-inet-cork.length + length  mtu)) {
+   if (inet-cork.length + length  0x - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport, 
mtu-exthdrlen);
return -EMSGSIZE;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index c60aadf..24d7c9f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -271,12 +271,10 @@ static int raw_send_hdrinc(struct sock *sk, void *from, 
size_t length,
struct iphdr *iph;
struct sk_buff *skb;
int err;
-   int mtu;
 
-   mtu = inet-pmtudisc == IP_PMTUDISC_DO ? dst_mtu(rt-u.dst) :
-rt-u.dst.dev-mtu;
-   if (length  mtu) {
-   ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport, mtu);
+   if (length  rt-u.dst.dev-mtu) {
+   ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport,
+  rt-u.dst.dev-mtu);
return -EMSGSIZE;
}
if (flagsMSG_PROBE)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b8e307a..4cfdad4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1079,12 +1079,11 @@ int ip6_append_data(struct sock *sk, int getfrag(void 
*from, char *to,
fragheaderlen = sizeof(struct ipv6hdr) + rt-u.dst.nfheader_len + (opt 
? opt-opt_nflen : 0);
maxfraglen = ((mtu - fragheaderlen)  ~7) + fragheaderlen - 
sizeof(struct frag_hdr);
 
-   if ((mtu = sizeof(struct ipv6hdr) + IPV6_MAXPLEN 
-inet-cork.length + length  sizeof(struct ipv6hdr) + IPV6_MAXPLEN 
- fragheaderlen) ||
-   (np-pmtudisc = IPV6_PMTUDISC_DO 
-inet-cork.length + length  mtu)) {
-   ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
-   return -EMSGSIZE;
+   if (mtu = sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
+   if (inet-cork.length + length  sizeof(struct ipv6hdr) + 
IPV6_MAXPLEN - fragheaderlen) {
+   ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
+   return -EMSGSIZE;
+   }
}
 
/*
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index f4cd90b..f65fcd7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -558,12 +558,9 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, 
int length,
struct sk_buff *skb;
unsigned int hh_len;
int err;
-   int mtu;
 
-   mtu = np-pmtudisc == IPV6_PMTUDISC_DO ? dst_mtu(rt-u.dst) :
-rt-u.dst.dev-mtu;
-   if (length  mtu) {
-   ipv6_local_error(sk, EMSGSIZE, fl, mtu);
+   if (length  rt-u.dst.dev-mtu) {
+   ipv6_local_error(sk, EMSGSIZE, fl, rt-u.dst.dev-mtu);
return -EMSGSIZE;
}
if (flagsMSG_PROBE)
-- 
1.5.1.rc3.30.ga8f4-dirty

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] [NET] MTU discovery check in ip6_fragment()

2007-04-18 Thread John Heffner

Adds a check in ip6_fragment() mirroring ip_fragment() for packets
that we can't fragment, and sends an ICMP Packet Too Big message
in response.

Signed-off-by: John Heffner [EMAIL PROTECTED]
---
 net/ipv6/ip6_output.c |   13 +
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4cfdad4..5a5b7d4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -567,6 +567,19 @@ static int ip6_fragment(struct sk_buff *skb, int 
(*output)(struct sk_buff *))
nexthdr = *prevhdr;
 
mtu = dst_mtu(rt-u.dst);
+
+   /* We must not fragment if the socket is set to force MTU discovery
+* or if the skb it not generated by a local socket.  (This last
+* check should be redundant, but it's free.)
+*/
+   if (!np || np-pmtudisc = IPV6_PMTUDISC_DO) {
+   skb-dev = skb-dst-dev;
+   icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb-dev);
+   IP6_INC_STATS(ip6_dst_idev(skb-dst), IPSTATS_MIB_FRAGFAILS);
+   kfree_skb(skb);
+   return -EMSGSIZE;
+   }
+
if (np  np-frag_size  mtu) {
if (np-frag_size)
mtu = np-frag_size;
-- 
1.5.1.rc3.30.ga8f4-dirty

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] Revert [NET] Add IP(V6)_PMTUDISC_RPOBE

2007-04-18 Thread John Heffner

This reverts commit d21d2a90b879c0cf159df5944847e6d9833816eb.

Must be backed out because commit 87e927a0583bd4a8ba9e97cd75b58d8aa1c76e37
does not work.

Signed-off-by: John Heffner [EMAIL PROTECTED]
---
 include/linux/in.h   |1 -
 include/linux/in6.h  |1 -
 include/linux/skbuff.h   |3 +--
 include/net/ip.h |2 +-
 net/core/skbuff.c|2 --
 net/ipv4/ip_output.c |   14 --
 net/ipv4/ip_sockglue.c   |2 +-
 net/ipv4/raw.c   |3 ---
 net/ipv6/ip6_output.c|   12 
 net/ipv6/ipv6_sockglue.c |2 +-
 net/ipv6/raw.c   |3 ---
 11 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/include/linux/in.h b/include/linux/in.h
index 2dc1f8a..1912e7c 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -83,7 +83,6 @@ struct in_addr {
 #define IP_PMTUDISC_DONT   0   /* Never send DF frames */
 #define IP_PMTUDISC_WANT   1   /* Use per route hints  */
 #define IP_PMTUDISC_DO 2   /* Always DF*/
-#define IP_PMTUDISC_PROBE  3   /* Ignore dst pmtu  */
 
 #define IP_MULTICAST_IF32
 #define IP_MULTICAST_TTL   33
diff --git a/include/linux/in6.h b/include/linux/in6.h
index d559fac..4e8350a 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -179,7 +179,6 @@ struct in6_flowlabel_req
 #define IPV6_PMTUDISC_DONT 0
 #define IPV6_PMTUDISC_WANT 1
 #define IPV6_PMTUDISC_DO   2
-#define IPV6_PMTUDISC_PROBE3
 
 /* Flowlabel */
 #define IPV6_FLOWLABEL_MGR 32
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8bf9b9f..7f17cfc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -277,8 +277,7 @@ struct sk_buff {
nfctinfo:3;
__u8pkt_type:3,
fclone:2,
-   ipvs_property:1,
-   ign_dst_mtu:1;
+   ipvs_property:1;
__be16  protocol;
 
void(*destructor)(struct sk_buff *skb);
diff --git a/include/net/ip.h b/include/net/ip.h
index 6a08b65..75f226d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -206,7 +206,7 @@ int ip_decrease_ttl(struct iphdr *iph)
 static inline
 int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
 {
-   return (inet_sk(sk)-pmtudisc = IP_PMTUDISC_DO ||
+   return (inet_sk(sk)-pmtudisc == IP_PMTUDISC_DO ||
(inet_sk(sk)-pmtudisc == IP_PMTUDISC_WANT 
 !(dst_metric(dst, RTAX_LOCK)(1RTAX_MTU;
 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2391cdf..f0d986a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -479,7 +479,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t 
gfp_mask)
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
C(ipvs_property);
 #endif
-   C(ign_dst_mtu);
C(protocol);
n-destructor = NULL;
C(mark);
@@ -543,7 +542,6 @@ static void copy_skb_header(struct sk_buff *new, const 
struct sk_buff *old)
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
new-ipvs_property = old-ipvs_property;
 #endif
-   new-ign_dst_mtu= old-ign_dst_mtu;
 #ifdef CONFIG_NET_SCHED
 #ifdef CONFIG_NET_CLS_ACT
new-tc_verd = old-tc_verd;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 704bc44..79e71ee 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -198,8 +198,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
return dst_output(skb);
}
 #endif
-   if (skb-len  dst_mtu(skb-dst) 
-   !skb-ign_dst_mtu  !skb_is_gso(skb))
+   if (skb-len  dst_mtu(skb-dst)  !skb_is_gso(skb))
return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb);
@@ -788,9 +787,7 @@ int ip_append_data(struct sock *sk,
inet-cork.addr = ipc-addr;
}
dst_hold(rt-u.dst);
-   inet-cork.fragsize = mtu = inet-pmtudisc == IP_PMTUDISC_PROBE 
?
-   rt-u.dst.dev-mtu :
-   dst_mtu(rt-u.dst.path);
+   inet-cork.fragsize = mtu = dst_mtu(rt-u.dst.path);
inet-cork.rt = rt;
inet-cork.length = 0;
sk-sk_sndmsg_page = NULL;
@@ -1208,16 +1205,13 @@ int ip_push_pending_frames(struct sock *sk)
 * to fragment the frame generated here. No matter, what transforms
 * how transforms change size of the packet, it will come out.
 */
-   if (inet-pmtudisc  IP_PMTUDISC_DO)
+   if (inet-pmtudisc != IP_PMTUDISC_DO)
skb-local_df = 1;
 
-   if (inet-pmtudisc == IP_PMTUDISC_PROBE)
-   skb-ign_dst_mtu = 1;
-

[PATCH] [NET] Add IP(V6)_PMTUDISC_RPOBE

2007-04-18 Thread John Heffner

Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER.  This option forces
us not to fragment, but does not make use of the kernel path MTU discovery.
That is, it allows for user-mode MTU probing (or, packetization-layer path
MTU discovery).  This is particularly useful for diagnostic utilities, like
traceroute/tracepath.

Signed-off-by: John Heffner [EMAIL PROTECTED]
---
 include/linux/in.h   |1 +
 include/linux/in6.h  |1 +
 net/ipv4/ip_output.c |   20 +++-
 net/ipv4/ip_sockglue.c   |2 +-
 net/ipv6/ip6_output.c|   15 ---
 net/ipv6/ipv6_sockglue.c |2 +-
 6 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/include/linux/in.h b/include/linux/in.h
index 1912e7c..3975cbf 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -83,6 +83,7 @@ struct in_addr {
 #define IP_PMTUDISC_DONT   0   /* Never send DF frames */
 #define IP_PMTUDISC_WANT   1   /* Use per route hints  */
 #define IP_PMTUDISC_DO 2   /* Always DF*/
+#define IP_PMTUDISC_PROBE  3   /* Ignore dst pmtu  */
 
 #define IP_MULTICAST_IF32
 #define IP_MULTICAST_TTL   33
diff --git a/include/linux/in6.h b/include/linux/in6.h
index 4e8350a..d559fac 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -179,6 +179,7 @@ struct in6_flowlabel_req
 #define IPV6_PMTUDISC_DONT 0
 #define IPV6_PMTUDISC_WANT 1
 #define IPV6_PMTUDISC_DO   2
+#define IPV6_PMTUDISC_PROBE3
 
 /* Flowlabel */
 #define IPV6_FLOWLABEL_MGR 32
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 34606ef..66e2c3a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
return -EINVAL;
 }
 
+static inline int ip_skb_dst_mtu(struct sk_buff *skb)
+{
+   struct inet_sock *inet = skb-sk ? inet_sk(skb-sk) : NULL;
+
+   return (inet  inet-pmtudisc == IP_PMTUDISC_PROBE) ?
+  skb-dst-dev-mtu : dst_mtu(skb-dst);
+}
+
 static inline int ip_finish_output(struct sk_buff *skb)
 {
 #if defined(CONFIG_NETFILTER)  defined(CONFIG_XFRM)
@@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
return dst_output(skb);
}
 #endif
-   if (skb-len  dst_mtu(skb-dst)  !skb_is_gso(skb))
+   if (skb-len  ip_skb_dst_mtu(skb)  !skb_is_gso(skb))
return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb);
@@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct 
sk_buff*))
if (unlikely((iph-frag_off  htons(IP_DF))  !skb-local_df)) {
IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(dst_mtu(rt-u.dst)));
+ htonl(ip_skb_dst_mtu(skb)));
kfree_skb(skb);
return -EMSGSIZE;
}
@@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk,
inet-cork.addr = ipc-addr;
}
dst_hold(rt-u.dst);
-   inet-cork.fragsize = mtu = dst_mtu(rt-u.dst.path);
+   inet-cork.fragsize = mtu = inet-pmtudisc == IP_PMTUDISC_PROBE 
?
+   rt-u.dst.dev-mtu : 
+   dst_mtu(rt-u.dst.path);
inet-cork.rt = rt;
inet-cork.length = 0;
sk-sk_sndmsg_page = NULL;
@@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk)
 * to fragment the frame generated here. No matter, what transforms
 * how transforms change size of the packet, it will come out.
 */
-   if (inet-pmtudisc != IP_PMTUDISC_DO)
+   if (inet-pmtudisc  IP_PMTUDISC_DO)
skb-local_df = 1;
 
/* DF bit is set when we want to see DF on outgoing frames.
 * If local_df is set too, we still allow to fragment this frame
 * locally. */
-   if (inet-pmtudisc == IP_PMTUDISC_DO ||
+   if (inet-pmtudisc = IP_PMTUDISC_DO ||
(skb-len = dst_mtu(rt-u.dst) 
 ip_dont_fragment(sk, rt-u.dst)))
df = htons(IP_DF);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c199d23..4d54457 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet-hdrincl = val ? 1 : 0;
break;
case IP_MTU_DISCOVER:
-   if (val0 || val2)
+   if (val0 || val3)
goto e_inval;
inet-pmtudisc = val;
break;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5a5b7d4..f508171 100644
--- a/net/ipv6/ip6_output.c
+++

[PATCH 2/4] Revert [NET] Do pmtu check in transport layer

2007-04-18 Thread John Heffner

This reverts commit 87e927a0583bd4a8ba9e97cd75b58d8aa1c76e37.

This idea does not work, as pointed at by Patrick McHardy.

Signed-off-by: John Heffner [EMAIL PROTECTED]
---
 net/ipv4/ip_output.c  |4 +---
 net/ipv4/raw.c|8 +++-
 net/ipv6/ip6_output.c |   11 +--
 net/ipv6/raw.c|7 ++-
 4 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 79e71ee..34606ef 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -810,9 +810,7 @@ int ip_append_data(struct sock *sk,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt-optlen : 0);
maxfraglen = ((mtu - fragheaderlen)  ~7) + fragheaderlen;
 
-   if (inet-cork.length + length  0x - fragheaderlen ||
-   (inet-pmtudisc = IP_PMTUDISC_DO 
-inet-cork.length + length  mtu)) {
+   if (inet-cork.length + length  0x - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport, 
mtu-exthdrlen);
return -EMSGSIZE;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index c60aadf..24d7c9f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -271,12 +271,10 @@ static int raw_send_hdrinc(struct sock *sk, void *from, 
size_t length,
struct iphdr *iph;
struct sk_buff *skb;
int err;
-   int mtu;
 
-   mtu = inet-pmtudisc == IP_PMTUDISC_DO ? dst_mtu(rt-u.dst) :
-rt-u.dst.dev-mtu;
-   if (length  mtu) {
-   ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport, mtu);
+   if (length  rt-u.dst.dev-mtu) {
+   ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport,
+  rt-u.dst.dev-mtu);
return -EMSGSIZE;
}
if (flagsMSG_PROBE)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b8e307a..4cfdad4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1079,12 +1079,11 @@ int ip6_append_data(struct sock *sk, int getfrag(void 
*from, char *to,
fragheaderlen = sizeof(struct ipv6hdr) + rt-u.dst.nfheader_len + (opt 
? opt-opt_nflen : 0);
maxfraglen = ((mtu - fragheaderlen)  ~7) + fragheaderlen - 
sizeof(struct frag_hdr);
 
-   if ((mtu = sizeof(struct ipv6hdr) + IPV6_MAXPLEN 
-inet-cork.length + length  sizeof(struct ipv6hdr) + IPV6_MAXPLEN 
- fragheaderlen) ||
-   (np-pmtudisc = IPV6_PMTUDISC_DO 
-inet-cork.length + length  mtu)) {
-   ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
-   return -EMSGSIZE;
+   if (mtu = sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
+   if (inet-cork.length + length  sizeof(struct ipv6hdr) + 
IPV6_MAXPLEN - fragheaderlen) {
+   ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
+   return -EMSGSIZE;
+   }
}
 
/*
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index f4cd90b..f65fcd7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -558,12 +558,9 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, 
int length,
struct sk_buff *skb;
unsigned int hh_len;
int err;
-   int mtu;
 
-   mtu = np-pmtudisc == IPV6_PMTUDISC_DO ? dst_mtu(rt-u.dst) :
-rt-u.dst.dev-mtu;
-   if (length  mtu) {
-   ipv6_local_error(sk, EMSGSIZE, fl, mtu);
+   if (length  rt-u.dst.dev-mtu) {
+   ipv6_local_error(sk, EMSGSIZE, fl, rt-u.dst.dev-mtu);
return -EMSGSIZE;
}
if (flagsMSG_PROBE)
-- 
1.5.1.rc3.30.ga8f4-dirty

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/4] Revert [NET] Add IP(V6)_PMTUDISC_RPOBE

2007-04-18 Thread John Heffner

This reverts commit d21d2a90b879c0cf159df5944847e6d9833816eb.

Must be backed out because commit 87e927a0583bd4a8ba9e97cd75b58d8aa1c76e37
does not work.

Signed-off-by: John Heffner [EMAIL PROTECTED]
---
 include/linux/in.h   |1 -
 include/linux/in6.h  |1 -
 include/linux/skbuff.h   |3 +--
 include/net/ip.h |2 +-
 net/core/skbuff.c|2 --
 net/ipv4/ip_output.c |   14 --
 net/ipv4/ip_sockglue.c   |2 +-
 net/ipv4/raw.c   |3 ---
 net/ipv6/ip6_output.c|   12 
 net/ipv6/ipv6_sockglue.c |2 +-
 net/ipv6/raw.c   |3 ---
 11 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/include/linux/in.h b/include/linux/in.h
index 2dc1f8a..1912e7c 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -83,7 +83,6 @@ struct in_addr {
 #define IP_PMTUDISC_DONT   0   /* Never send DF frames */
 #define IP_PMTUDISC_WANT   1   /* Use per route hints  */
 #define IP_PMTUDISC_DO 2   /* Always DF*/
-#define IP_PMTUDISC_PROBE  3   /* Ignore dst pmtu  */
 
 #define IP_MULTICAST_IF32
 #define IP_MULTICAST_TTL   33
diff --git a/include/linux/in6.h b/include/linux/in6.h
index d559fac..4e8350a 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -179,7 +179,6 @@ struct in6_flowlabel_req
 #define IPV6_PMTUDISC_DONT 0
 #define IPV6_PMTUDISC_WANT 1
 #define IPV6_PMTUDISC_DO   2
-#define IPV6_PMTUDISC_PROBE3
 
 /* Flowlabel */
 #define IPV6_FLOWLABEL_MGR 32
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8bf9b9f..7f17cfc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -277,8 +277,7 @@ struct sk_buff {
nfctinfo:3;
__u8pkt_type:3,
fclone:2,
-   ipvs_property:1,
-   ign_dst_mtu:1;
+   ipvs_property:1;
__be16  protocol;
 
void(*destructor)(struct sk_buff *skb);
diff --git a/include/net/ip.h b/include/net/ip.h
index 6a08b65..75f226d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -206,7 +206,7 @@ int ip_decrease_ttl(struct iphdr *iph)
 static inline
 int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
 {
-   return (inet_sk(sk)-pmtudisc = IP_PMTUDISC_DO ||
+   return (inet_sk(sk)-pmtudisc == IP_PMTUDISC_DO ||
(inet_sk(sk)-pmtudisc == IP_PMTUDISC_WANT 
 !(dst_metric(dst, RTAX_LOCK)(1RTAX_MTU;
 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2391cdf..f0d986a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -479,7 +479,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t 
gfp_mask)
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
C(ipvs_property);
 #endif
-   C(ign_dst_mtu);
C(protocol);
n-destructor = NULL;
C(mark);
@@ -543,7 +542,6 @@ static void copy_skb_header(struct sk_buff *new, const 
struct sk_buff *old)
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
new-ipvs_property = old-ipvs_property;
 #endif
-   new-ign_dst_mtu= old-ign_dst_mtu;
 #ifdef CONFIG_NET_SCHED
 #ifdef CONFIG_NET_CLS_ACT
new-tc_verd = old-tc_verd;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 704bc44..79e71ee 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -198,8 +198,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
return dst_output(skb);
}
 #endif
-   if (skb-len  dst_mtu(skb-dst) 
-   !skb-ign_dst_mtu  !skb_is_gso(skb))
+   if (skb-len  dst_mtu(skb-dst)  !skb_is_gso(skb))
return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb);
@@ -788,9 +787,7 @@ int ip_append_data(struct sock *sk,
inet-cork.addr = ipc-addr;
}
dst_hold(rt-u.dst);
-   inet-cork.fragsize = mtu = inet-pmtudisc == IP_PMTUDISC_PROBE 
?
-   rt-u.dst.dev-mtu :
-   dst_mtu(rt-u.dst.path);
+   inet-cork.fragsize = mtu = dst_mtu(rt-u.dst.path);
inet-cork.rt = rt;
inet-cork.length = 0;
sk-sk_sndmsg_page = NULL;
@@ -1208,16 +1205,13 @@ int ip_push_pending_frames(struct sock *sk)
 * to fragment the frame generated here. No matter, what transforms
 * how transforms change size of the packet, it will come out.
 */
-   if (inet-pmtudisc  IP_PMTUDISC_DO)
+   if (inet-pmtudisc != IP_PMTUDISC_DO)
skb-local_df = 1;
 
-   if (inet-pmtudisc == IP_PMTUDISC_PROBE)
-   skb-ign_dst_mtu = 1;
-

[PATCH 4/4] [NET] Add IP(V6)_PMTUDISC_RPOBE

2007-04-18 Thread John Heffner

Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER.  This option forces
us not to fragment, but does not make use of the kernel path MTU discovery.
That is, it allows for user-mode MTU probing (or, packetization-layer path
MTU discovery).  This is particularly useful for diagnostic utilities, like
traceroute/tracepath.

Signed-off-by: John Heffner [EMAIL PROTECTED]
---
 include/linux/in.h   |1 +
 include/linux/in6.h  |1 +
 net/ipv4/ip_output.c |   20 +++-
 net/ipv4/ip_sockglue.c   |2 +-
 net/ipv6/ip6_output.c|   15 ---
 net/ipv6/ipv6_sockglue.c |2 +-
 6 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/include/linux/in.h b/include/linux/in.h
index 1912e7c..3975cbf 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -83,6 +83,7 @@ struct in_addr {
 #define IP_PMTUDISC_DONT   0   /* Never send DF frames */
 #define IP_PMTUDISC_WANT   1   /* Use per route hints  */
 #define IP_PMTUDISC_DO 2   /* Always DF*/
+#define IP_PMTUDISC_PROBE  3   /* Ignore dst pmtu  */
 
 #define IP_MULTICAST_IF32
 #define IP_MULTICAST_TTL   33
diff --git a/include/linux/in6.h b/include/linux/in6.h
index 4e8350a..d559fac 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -179,6 +179,7 @@ struct in6_flowlabel_req
 #define IPV6_PMTUDISC_DONT 0
 #define IPV6_PMTUDISC_WANT 1
 #define IPV6_PMTUDISC_DO   2
+#define IPV6_PMTUDISC_PROBE3
 
 /* Flowlabel */
 #define IPV6_FLOWLABEL_MGR 32
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 34606ef..66e2c3a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
return -EINVAL;
 }
 
+static inline int ip_skb_dst_mtu(struct sk_buff *skb)
+{
+   struct inet_sock *inet = skb-sk ? inet_sk(skb-sk) : NULL;
+
+   return (inet  inet-pmtudisc == IP_PMTUDISC_PROBE) ?
+  skb-dst-dev-mtu : dst_mtu(skb-dst);
+}
+
 static inline int ip_finish_output(struct sk_buff *skb)
 {
 #if defined(CONFIG_NETFILTER)  defined(CONFIG_XFRM)
@@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
return dst_output(skb);
}
 #endif
-   if (skb-len  dst_mtu(skb-dst)  !skb_is_gso(skb))
+   if (skb-len  ip_skb_dst_mtu(skb)  !skb_is_gso(skb))
return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb);
@@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct 
sk_buff*))
if (unlikely((iph-frag_off  htons(IP_DF))  !skb-local_df)) {
IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(dst_mtu(rt-u.dst)));
+ htonl(ip_skb_dst_mtu(skb)));
kfree_skb(skb);
return -EMSGSIZE;
}
@@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk,
inet-cork.addr = ipc-addr;
}
dst_hold(rt-u.dst);
-   inet-cork.fragsize = mtu = dst_mtu(rt-u.dst.path);
+   inet-cork.fragsize = mtu = inet-pmtudisc == IP_PMTUDISC_PROBE 
?
+   rt-u.dst.dev-mtu : 
+   dst_mtu(rt-u.dst.path);
inet-cork.rt = rt;
inet-cork.length = 0;
sk-sk_sndmsg_page = NULL;
@@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk)
 * to fragment the frame generated here. No matter, what transforms
 * how transforms change size of the packet, it will come out.
 */
-   if (inet-pmtudisc != IP_PMTUDISC_DO)
+   if (inet-pmtudisc  IP_PMTUDISC_DO)
skb-local_df = 1;
 
/* DF bit is set when we want to see DF on outgoing frames.
 * If local_df is set too, we still allow to fragment this frame
 * locally. */
-   if (inet-pmtudisc == IP_PMTUDISC_DO ||
+   if (inet-pmtudisc = IP_PMTUDISC_DO ||
(skb-len = dst_mtu(rt-u.dst) 
 ip_dont_fragment(sk, rt-u.dst)))
df = htons(IP_DF);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c199d23..4d54457 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet-hdrincl = val ? 1 : 0;
break;
case IP_MTU_DISCOVER:
-   if (val0 || val2)
+   if (val0 || val3)
goto e_inval;
inet-pmtudisc = val;
break;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5a5b7d4..f508171 100644
--- a/net/ipv6/ip6_output.c
+++

[PATCH 3/4] [NET] MTU discovery check in ip6_fragment()

2007-04-18 Thread John Heffner

Adds a check in ip6_fragment() mirroring ip_fragment() for packets
that we can't fragment, and sends an ICMP Packet Too Big message
in response.

Signed-off-by: John Heffner [EMAIL PROTECTED]
---
 net/ipv6/ip6_output.c |   13 +
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4cfdad4..5a5b7d4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -567,6 +567,19 @@ static int ip6_fragment(struct sk_buff *skb, int 
(*output)(struct sk_buff *))
nexthdr = *prevhdr;
 
mtu = dst_mtu(rt-u.dst);
+
+   /* We must not fragment if the socket is set to force MTU discovery
+* or if the skb it not generated by a local socket.  (This last
+* check should be redundant, but it's free.)
+*/
+   if (!np || np-pmtudisc = IPV6_PMTUDISC_DO) {
+   skb-dev = skb-dst-dev;
+   icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb-dev);
+   IP6_INC_STATS(ip6_dst_idev(skb-dst), IPSTATS_MIB_FRAGFAILS);
+   kfree_skb(skb);
+   return -EMSGSIZE;
+   }
+
if (np  np-frag_size  mtu) {
if (np-frag_size)
mtu = np-frag_size;
-- 
1.5.1.rc3.30.ga8f4-dirty

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] [NET] Add IP(V6)_PMTUDISC_RPOBE

2007-04-18 Thread John Heffner

Sorry, forgot the -n flag on git-format-patch.  Patches resent with 
correct sequence numbers.


Thanks,
  -John
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] [NET] Add IP(V6)_PMTUDISC_RPOBE

2007-04-18 Thread David Miller

From: John Heffner [EMAIL PROTECTED]
Date: Wed, 18 Apr 2007 21:11:26 -0400

 Sorry, forgot the -n flag on git-format-patch.  Patches resent with 
 correct sequence numbers.

Thanks for fixing that.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: AF_PACKET how to get the original netdev from a packet received from a bonded master

2007-04-18 Thread Chris Leech


On 4/18/07, David Miller [EMAIL PROTECTED] wrote:


Ok, I'll try to remember to high-priority reviewing PJ's patch
on my next rebase of the net-2.6.22 tree which should be
tonight or tomorrow sometime.


Thanks Dave, PJ is offline this week so I'm trying to keep an eye out
for discussions related to his various patches :-)

Just to give you an idea of our motivation around this, we're looking
at layer 2 configuration protocols implemented from user space.  As an
example Link Layer Discovery Protocol could be used to detect trunking
misconfiguration, but only if you can track that information for the
underlying interfaces of a bond.  Things like 802.1x authenticated
links in a bond would have a similar issue of needing to configure
each underlying interface before bringing up the bond, but with LLDP
there's the added fun of being able to receive updated notifications
of configuration changes from the link partner at any time.

- Chris
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NETLINK] Don't attach callback to a going-away netlink socket

2007-04-18 Thread Herbert Xu

David Miller [EMAIL PROTECTED] wrote:
 
 As discussed in this thread there might be other ways to a
 approach this, but this fix is good for now.
 
 Patch applied, thank you.

Actually I was going to suggest something like this:

[NETLINK]: Kill CB only when socket is unused

Since we can still receive packets until all references to the
socket are gone, we don't need to kill the CB until that happens.
This also aligns ourselves with the receive queue purging which
happens at that point.

Original patch by Pavel Emelianov who noticed this race condition.

Signed-off-by: Herbert Xu [EMAIL PROTECTED]

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0be19b7..914884c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -139,6 +139,15 @@ static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash 
*hash, u32 pid)
 
 static void netlink_sock_destruct(struct sock *sk)
 {
+   struct netlink_sock *nlk = nlk_sk(sk);
+
+   WARN_ON(mutex_is_locked(nlk_sk(sk)-cb_mutex));
+   if (nlk-cb) {
+   if (nlk-cb-done)
+   nlk-cb-done(nlk-cb);
+   netlink_destroy_callback(nlk-cb);
+   }
+
skb_queue_purge(sk-sk_receive_queue);
 
if (!sock_flag(sk, SOCK_DEAD)) {
@@ -147,7 +156,6 @@ static void netlink_sock_destruct(struct sock *sk)
}
BUG_TRAP(!atomic_read(sk-sk_rmem_alloc));
BUG_TRAP(!atomic_read(sk-sk_wmem_alloc));
-   BUG_TRAP(!nlk_sk(sk)-cb);
BUG_TRAP(!nlk_sk(sk)-groups);
 }
 
@@ -450,17 +458,7 @@ static int netlink_release(struct socket *sock)
netlink_remove(sk);
nlk = nlk_sk(sk);
 
-   mutex_lock(nlk-cb_mutex);
-   if (nlk-cb) {
-   if (nlk-cb-done)
-   nlk-cb-done(nlk-cb);
-   netlink_destroy_callback(nlk-cb);
-   nlk-cb = NULL;
-   }
-   mutex_unlock(nlk-cb_mutex);
-
-   /* OK. Socket is unlinked, and, therefore,
-  no new packets will arrive */
+   /* OK. Socket is unlinked. */
 
sock_orphan(sk);
sock-sk = NULL;
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: ESP interfamily tunnel bug?

2007-04-18 Thread Kazunori MIYAZAWA


Hello Diego,

I tried to reproduce the bug. But I got a panic of the kernel :-
I'm using current net-2.6.

I suspect that some special routing for loopback is related
because I checked with kdb and got the backtrace like

fib_sync_down
ipv6_rcv
netif_receive_skb
__mod_timer
net_rx_action
__do_softirq
do_softirq
local_bh_enable
dev_queue_xmit
neigh_resolve_output
ip_output
xfrm4_output_finish
xfrm4_output
ip_generic_getfrag
ip6_push_pending_frames

I think ip_rcv or some IPv4 function should be called between netif_receive_skb
and ipv6_rcv.

Anyway I could not classify the way to make a panic.
I'll trace it.

Thank you,

Diego Beltrami wrote:

Hi,

we have discovered a routing related problem in ESP tunnel and beet mode.
We don't know whether it is a bug in the XFRM, or just in the way the
virtual addresses and the corresponding routes are set-up. We set up a
dummy0 device for the virtual addresses:

[EMAIL PROTECTED]:~# ip addr show dummy0
5: dummy0: BROADCAST,NOARP,UP,1 mtu 1500 qdisc noqueue
 link/ether 92:09:fe:11:81:1b brd ff:ff:ff:ff:ff:ff
 inet6 2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e/28 scope global
valid_lft forever preferred_lft forever
 inet6 2001:74:32e0:df36:e862:3963:523e:dd7d/28 scope global
valid_lft forever preferred_lft forever
 inet6 2001:73:d3a8:8723:d572:7549:7f2c:e590/28 scope global
valid_lft forever preferred_lft forever
 inet6 2001:75:a2e6:aad6:e901:dd1c:ba95:e300/28 scope global
valid_lft forever preferred_lft forever
 inet6 fe80::9009:feff:fe11:811b/64 scope link
valid_lft forever preferred_lft forever

And then we have routes for the virtual addresses:

[EMAIL PROTECTED]:~# ip -6 route
2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e dev dummy0  metric 1024  expires
21334305sec mtu 1500 advmss 1440 metric 10 4294967295
2001:73:d3a8:8723:d572:7549:7f2c:e590 dev dummy0  metric 1024  expires
21334305sec mtu 1500 advmss 1440 metric 10 4294967295
2001:74:32e0:df36:e862:3963:523e:dd7d dev dummy0  metric 1024  expires
21334305sec mtu 1500 advmss 1440 metric 10 4294967295
2001:75:a2e6:aad6:e901:dd1c:ba95:e300 dev dummy0  metric 1024  expires
21334305sec mtu 1500 advmss 1440 metric 10 4294967295
2001:70::/28 dev dummy0  metric 256  expires 21334305sec mtu 1500 advmss
1440 metric 10 4294967295
fe80::/64 dev dummy0  metric 256  expires 21334305sec mtu 1500 advmss 1440
metric 10 4294967295
ff00::/8 dev eth0  metric 256  expires 21325454sec mtu 1500 advmss 1440
metric 10 4294967295
ff00::/8 dev dummy0  metric 256  expires 21334305sec mtu 1500 advmss 1440
metric 10 4294967295
unreachable default dev lo  proto none  metric -1  error -101 metric 10
255

...and set-up policies and associations. The virtual IPv6 addresses
are inner and IPv4 addresses are outer addresses:

[EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm policy show
src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128 dst
2001:74:32e0:df36:e862:3963:523e:dd7d/128
 dir in priority 0
 tmpl src c1a7:bb82:: dst c0a8:65::
 proto esp reqid 0 mode beet
src 2001:74:32e0:df36:e862:3963:523e:dd7d/128 dst
2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128
 dir out priority 0
 tmpl src c0a8:65:: dst c1a7:bb82::
 proto esp reqid 0 mode beet

[EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm state show
src 193.167.187.130 dst 192.168.0.101
 proto esp spi 0xf556c7c7 reqid 0 mode beet
 replay-window 0
 auth sha1 0xab327b944011c94a0c54a097b4752e23f377ff34
 enc aes 0x882a334830b1cd14b9e411ec37a4242f
 encap type espinudp-nonike sport 50500 dport 50500
   addr 193.167.187.130
 sel src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0
 dst 2001:74:32e0:df36:e862:3963:523e:dd7d/0
 src 192.168.0.101 dst 193.167.187.130
 proto esp spi 0x1663f3a4 reqid 0 mode beet
 replay-window 0
 auth sha1 0x9f07dabce4abf2ebfe45e247ede2cf15f9156a13
 enc aes 0xfc50593b9af6d296b042a16ca00bad20
 encap type espinudp-nonike
 sport 50500 dport 50500 addr 192.168.0.101
 sel src 2001:74:32e0:df36:e862:3963:523e:dd7d/0
 dst 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0

And then we try to ping6 the virtual address:

[EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ping6 -I
2001:0074:32e0:df36:e862:3963:523e:dd7d
2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15
PING
2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15(2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15)
from 2001:74:32e0:df36:e862:3963:523e:dd7d : 56 data bytes
ping: sendmsg: Network is unreachable
ping: sendmsg: Network is unreachable

Tcpdump shows no traffic at the host. We can repeat the problem both with
tunnel and beet modes in 2.6.21-rc6 (and also in 2.6.17.14).

I have tried also ip rule stuff but it seems that it does not rule with
IPv6 :) It does help either to

Re: ESP interfamily tunnel bug?

2007-04-18 Thread Diego Beltrami

Hi Kazunori,
thanks for reply.

In your backtrace I see that there are both input and output functions calls. Is
it the right way?

One more thing, were your two hosts you used located on the same network?
In fact it seems that if the machines are on the same network, this bug doesn't
manifest.

Thanks,

Diego


 Hello Diego,

 I tried to reproduce the bug. But I got a panic of the kernel :-
 I'm using current net-2.6.

 I suspect that some special routing for loopback is related
 because I checked with kdb and got the backtrace like

   fib_sync_down
   ipv6_rcv
   netif_receive_skb
   __mod_timer
   net_rx_action
   __do_softirq
   do_softirq
   local_bh_enable
   dev_queue_xmit
   neigh_resolve_output
   ip_output
   xfrm4_output_finish
   xfrm4_output
   ip_generic_getfrag
   ip6_push_pending_frames

 I think ip_rcv or some IPv4 function should be called between
 netif_receive_skb
 and ipv6_rcv.

 Anyway I could not classify the way to make a panic.
 I'll trace it.

 Thank you,

 Diego Beltrami wrote:
  Hi,
 
  we have discovered a routing related problem in ESP tunnel and beet mode.
  We don't know whether it is a bug in the XFRM, or just in the way the
  virtual addresses and the corresponding routes are set-up. We set up a
  dummy0 device for the virtual addresses:
 
  [EMAIL PROTECTED]:~# ip addr show dummy0
  5: dummy0: BROADCAST,NOARP,UP,1 mtu 1500 qdisc noqueue
   link/ether 92:09:fe:11:81:1b brd ff:ff:ff:ff:ff:ff
   inet6 2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e/28 scope global
  valid_lft forever preferred_lft forever
   inet6 2001:74:32e0:df36:e862:3963:523e:dd7d/28 scope global
  valid_lft forever preferred_lft forever
   inet6 2001:73:d3a8:8723:d572:7549:7f2c:e590/28 scope global
  valid_lft forever preferred_lft forever
   inet6 2001:75:a2e6:aad6:e901:dd1c:ba95:e300/28 scope global
  valid_lft forever preferred_lft forever
   inet6 fe80::9009:feff:fe11:811b/64 scope link
  valid_lft forever preferred_lft forever
 
  And then we have routes for the virtual addresses:
 
  [EMAIL PROTECTED]:~# ip -6 route
  2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e dev dummy0  metric 1024  expires
  21334305sec mtu 1500 advmss 1440 metric 10 4294967295
  2001:73:d3a8:8723:d572:7549:7f2c:e590 dev dummy0  metric 1024  expires
  21334305sec mtu 1500 advmss 1440 metric 10 4294967295
  2001:74:32e0:df36:e862:3963:523e:dd7d dev dummy0  metric 1024  expires
  21334305sec mtu 1500 advmss 1440 metric 10 4294967295
  2001:75:a2e6:aad6:e901:dd1c:ba95:e300 dev dummy0  metric 1024  expires
  21334305sec mtu 1500 advmss 1440 metric 10 4294967295
  2001:70::/28 dev dummy0  metric 256  expires 21334305sec mtu 1500 advmss
  1440 metric 10 4294967295
  fe80::/64 dev dummy0  metric 256  expires 21334305sec mtu 1500 advmss 1440
  metric 10 4294967295
  ff00::/8 dev eth0  metric 256  expires 21325454sec mtu 1500 advmss 1440
  metric 10 4294967295
  ff00::/8 dev dummy0  metric 256  expires 21334305sec mtu 1500 advmss 1440
  metric 10 4294967295
  unreachable default dev lo  proto none  metric -1  error -101 metric 10
  255
 
  ...and set-up policies and associations. The virtual IPv6 addresses
  are inner and IPv4 addresses are outer addresses:
 
  [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm policy show
  src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128 dst
  2001:74:32e0:df36:e862:3963:523e:dd7d/128
   dir in priority 0
   tmpl src c1a7:bb82:: dst c0a8:65::
   proto esp reqid 0 mode beet
  src 2001:74:32e0:df36:e862:3963:523e:dd7d/128 dst
  2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128
   dir out priority 0
   tmpl src c0a8:65:: dst c1a7:bb82::
   proto esp reqid 0 mode beet
 
  [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm state show
  src 193.167.187.130 dst 192.168.0.101
   proto esp spi 0xf556c7c7 reqid 0 mode beet
   replay-window 0
   auth sha1 0xab327b944011c94a0c54a097b4752e23f377ff34
   enc aes 0x882a334830b1cd14b9e411ec37a4242f
   encap type espinudp-nonike sport 50500 dport 50500
 addr 193.167.187.130
   sel src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0
   dst 2001:74:32e0:df36:e862:3963:523e:dd7d/0
   src 192.168.0.101 dst 193.167.187.130
   proto esp spi 0x1663f3a4 reqid 0 mode beet
   replay-window 0
   auth sha1 0x9f07dabce4abf2ebfe45e247ede2cf15f9156a13
   enc aes 0xfc50593b9af6d296b042a16ca00bad20
   encap type espinudp-nonike
   sport 50500 dport 50500 addr 192.168.0.101
   sel src 2001:74:32e0:df36:e862:3963:523e:dd7d/0
   dst 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0
 
  And then we try to ping6 the virtual address:
 
  [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ping6 -I
  2001:0074:32e0:df36:e862:3963:523e:dd7d

Re: + ppp_generic-fix-lockdep-warning.patch added to -mm tree

2007-04-18 Thread Jarek Poplawski

On Tue, Apr 17, 2007 at 09:37:44AM +0200, Jarek Poplawski wrote:
...
 Yuriy - thanks for testing my patch ...(pause) Not!
 
 It seems this patch is not visible in this version - probably
...

Sorry! It was only something with my eyes.
(Probably too much of Pamela!).

Jarek P.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

79 matches

Mail list logo