[PATCH] [0/5] [v2] pasemi_mac: fixes and enhancements
Hi, The five following patches contain a number of fixes and improvements of the pasemi_mac driver: 1/5: A couple of minor bugfixes. 2/5: Move the IRQ mapping from the PCI layer under our platform, to the driver. 3/5: A rather large patch with various NAPI/performance-related fixes and enhancements. 4/5: phy support 5/5: use local-mac-address instead of mac-address if available. (Changes from last time: Added 5/5, changes to 2/5 to use virq_to_hw()). -Olof - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [0/5] [v2] pasemi_mac: minor bugfixes
Ethernet bugfixes: * Move the was_full/wake_queue logic from tx_intr to clean_tx * Fix polarity in checks in pasemi_mac_close Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: linux-2.6/drivers/net/pasemi_mac.c === --- linux-2.6.orig/drivers/net/pasemi_mac.c +++ linux-2.6/drivers/net/pasemi_mac.c @@ -451,9 +451,12 @@ static int pasemi_mac_clean_tx(struct pa struct pas_dma_xct_descr *dp; int start, count; int flags; + int was_full; spin_lock_irqsave(mac-tx-lock, flags); + was_full = mac-tx-next_to_clean - mac-tx-next_to_use == TX_RING_SIZE; + start = mac-tx-next_to_clean; count = 0; @@ -478,6 +481,9 @@ static int pasemi_mac_clean_tx(struct pa mac-tx-next_to_clean += count; spin_unlock_irqrestore(mac-tx-lock, flags); + if (was_full) + netif_wake_queue(mac-netdev); + return count; } @@ -512,9 +518,6 @@ static irqreturn_t pasemi_mac_tx_intr(in struct net_device *dev = data; struct pasemi_mac *mac = netdev_priv(dev); unsigned int reg; - int was_full; - - was_full = mac-tx-next_to_clean - mac-tx-next_to_use == TX_RING_SIZE; if (!(*mac-tx_status PAS_STATUS_INT)) return IRQ_NONE; @@ -528,9 +531,6 @@ static irqreturn_t pasemi_mac_tx_intr(in pci_write_config_dword(mac-iob_pdev, PAS_IOB_DMA_TXCH_RESET(mac-dma_txch), reg); - if (was_full) - netif_wake_queue(dev); - return IRQ_HANDLED; } @@ -662,40 +665,37 @@ static int pasemi_mac_close(struct net_d pci_read_config_dword(mac-dma_pdev, PAS_DMA_TXCHAN_TCMDSTA(mac-dma_txch), stat); - if (stat PAS_DMA_TXCHAN_TCMDSTA_ACT) + if (!(stat PAS_DMA_TXCHAN_TCMDSTA_ACT)) break; cond_resched(); } - if (!(stat PAS_DMA_TXCHAN_TCMDSTA_ACT)) { + if (stat PAS_DMA_TXCHAN_TCMDSTA_ACT) dev_err(mac-dma_pdev-dev, Failed to stop tx channel\n); - } for (retries = 0; retries MAX_RETRIES; retries++) { pci_read_config_dword(mac-dma_pdev, PAS_DMA_RXCHAN_CCMDSTA(mac-dma_rxch), stat); - if (stat PAS_DMA_RXCHAN_CCMDSTA_ACT) + if (!(stat PAS_DMA_RXCHAN_CCMDSTA_ACT)) break; cond_resched(); } - if (!(stat PAS_DMA_RXCHAN_CCMDSTA_ACT)) { + if (stat PAS_DMA_RXCHAN_CCMDSTA_ACT) dev_err(mac-dma_pdev-dev, Failed to stop rx channel\n); - } for (retries = 0; retries MAX_RETRIES; retries++) { pci_read_config_dword(mac-dma_pdev, PAS_DMA_RXINT_RCMDSTA(mac-dma_if), stat); - if (stat PAS_DMA_RXINT_RCMDSTA_ACT) + if (!(stat PAS_DMA_RXINT_RCMDSTA_ACT)) break; cond_resched(); } - if (!(stat PAS_DMA_RXINT_RCMDSTA_ACT)) { + if (stat PAS_DMA_RXINT_RCMDSTA_ACT) dev_err(mac-dma_pdev-dev, Failed to stop rx interface\n); - } /* Then, disable the channel. This must be done separately from * stopping, since you can't disable when active. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [2/5] [v2] pasemi_mac: irq mapping changes
Fixes for ethernet IRQ mapping, to be done in the driver instead of in the platform setup code. Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: powerpc/arch/powerpc/platforms/pasemi/pci.c === --- powerpc.orig/arch/powerpc/platforms/pasemi/pci.c +++ powerpc/arch/powerpc/platforms/pasemi/pci.c @@ -163,19 +163,6 @@ static void __init pas_fixup_phb_resourc } -void __devinit pas_pci_irq_fixup(struct pci_dev *dev) -{ - /* DMA is special, 84 interrupts (128 - 211), all but 128 -* need to be mapped by hand here. -*/ - if (dev-vendor == 0x1959 dev-device == 0xa007) { - int i; - for (i = 129; i 212; i++) - irq_create_mapping(NULL, i); - } -} - - void __init pas_pci_init(void) { struct device_node *np, *root; Index: powerpc/arch/powerpc/platforms/pasemi/setup.c === --- powerpc.orig/arch/powerpc/platforms/pasemi/setup.c +++ powerpc/arch/powerpc/platforms/pasemi/setup.c @@ -240,5 +240,4 @@ define_machine(pas) { .check_legacy_ioport= pas_check_legacy_ioport, .progress = pas_progress, .machine_check_exception = pas_machine_check_handler, - .pci_irq_fixup = pas_pci_irq_fixup, }; Index: powerpc/drivers/net/pasemi_mac.c === --- powerpc.orig/drivers/net/pasemi_mac.c +++ powerpc/drivers/net/pasemi_mac.c @@ -33,6 +33,8 @@ #include linux/tcp.h #include net/checksum.h +#include asm/irq.h + #include pasemi_mac.h @@ -537,6 +539,7 @@ static irqreturn_t pasemi_mac_tx_intr(in static int pasemi_mac_open(struct net_device *dev) { struct pasemi_mac *mac = netdev_priv(dev); + int base_irq; unsigned int flags; int ret; @@ -600,28 +603,37 @@ static int pasemi_mac_open(struct net_de netif_start_queue(dev); netif_poll_enable(dev); - ret = request_irq(mac-dma_pdev-irq + mac-dma_txch, - pasemi_mac_tx_intr, IRQF_DISABLED, + /* Interrupts are a bit different for our DMA controller: While +* it's got one a regular PCI device header, the interrupt there +* is really the base of the range it's using. Each tx and rx +* channel has it's own interrupt source. +*/ + + base_irq = virq_to_hw(mac-dma_pdev-irq); + + mac-tx_irq = irq_create_mapping(NULL, base_irq + mac-dma_txch); + mac-rx_irq = irq_create_mapping(NULL, base_irq + 20 + mac-dma_txch); + + ret = request_irq(mac-tx_irq, pasemi_mac_tx_intr, IRQF_DISABLED, mac-tx-irq_name, dev); if (ret) { dev_err(mac-pdev-dev, request_irq of irq %d failed: %d\n, - mac-dma_pdev-irq + mac-dma_txch, ret); + base_irq + mac-dma_txch, ret); goto out_tx_int; } - ret = request_irq(mac-dma_pdev-irq + 20 + mac-dma_rxch, - pasemi_mac_rx_intr, IRQF_DISABLED, + ret = request_irq(mac-rx_irq, pasemi_mac_rx_intr, IRQF_DISABLED, mac-rx-irq_name, dev); if (ret) { dev_err(mac-pdev-dev, request_irq of irq %d failed: %d\n, - mac-dma_pdev-irq + 20 + mac-dma_rxch, ret); + base_irq + 20 + mac-dma_rxch, ret); goto out_rx_int; } return 0; out_rx_int: - free_irq(mac-dma_pdev-irq + mac-dma_txch, dev); + free_irq(mac-tx_irq, dev); out_tx_int: netif_poll_disable(dev); netif_stop_queue(dev); @@ -705,8 +717,8 @@ static int pasemi_mac_close(struct net_d pci_write_config_dword(mac-dma_pdev, PAS_DMA_RXINT_RCMDSTA(mac-dma_if), 0); - free_irq(mac-dma_pdev-irq + mac-dma_txch, dev); - free_irq(mac-dma_pdev-irq + 20 + mac-dma_rxch, dev); + free_irq(mac-tx_irq, dev); + free_irq(mac-rx_irq, dev); /* Free resources */ pasemi_mac_free_rx_resources(dev); Index: powerpc/drivers/net/pasemi_mac.h === --- powerpc.orig/drivers/net/pasemi_mac.h +++ powerpc/drivers/net/pasemi_mac.h @@ -73,6 +73,8 @@ struct pasemi_mac { struct pasemi_mac_txring *tx; struct pasemi_mac_rxring *rx; + unsigned long tx_irq; + unsigned long rx_irq; }; /* Software status descriptor (desc_info) */ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [4/5] [v2] pasemi_mac: phy support
PHY support for pasemi_mac. Also add msg_enable flags for future disablement of the link messages. Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: powerpc/drivers/net/pasemi_mac.c === --- powerpc.orig/drivers/net/pasemi_mac.c +++ powerpc/drivers/net/pasemi_mac.c @@ -594,6 +592,110 @@ static irqreturn_t pasemi_mac_tx_intr(in return IRQ_HANDLED; } +static void pasemi_adjust_link(struct net_device *dev) +{ + struct pasemi_mac *mac = netdev_priv(dev); + int msg; + unsigned int flags; + unsigned int new_flags; + + if (!mac-phydev-link) { + /* If no link, MAC speed settings don't matter. Just report +* link down and return. +*/ + if (mac-link netif_msg_link(mac)) + printk(KERN_INFO %s: Link is down.\n, dev-name); + + netif_carrier_off(dev); + mac-link = 0; + + return; + } else + netif_carrier_on(dev); + + pci_read_config_dword(mac-pdev, PAS_MAC_CFG_PCFG, flags); + new_flags = flags ~(PAS_MAC_CFG_PCFG_HD | PAS_MAC_CFG_PCFG_SPD_M); + + if (!mac-phydev-duplex) + new_flags |= PAS_MAC_CFG_PCFG_HD; + + switch (mac-phydev-speed) { + case 1000: + new_flags |= PAS_MAC_CFG_PCFG_SPD_1G; + break; + case 100: + new_flags |= PAS_MAC_CFG_PCFG_SPD_100M; + break; + case 10: + new_flags |= PAS_MAC_CFG_PCFG_SPD_10M; + break; + default: + printk(Unsupported speed %d\n, mac-phydev-speed); + } + + /* Print on link or speed/duplex change */ + msg = mac-link != mac-phydev-link || flags != new_flags; + + mac-duplex = mac-phydev-duplex; + mac-speed = mac-phydev-speed; + mac-link = mac-phydev-link; + + if (new_flags != flags) + pci_write_config_dword(mac-pdev, PAS_MAC_CFG_PCFG, new_flags); + + if (msg netif_msg_link(mac)) + printk(KERN_INFO %s: Link is up at %d Mbps, %s duplex.\n, + dev-name, mac-speed, mac-duplex ? full : half); +} + +static int pasemi_mac_phy_init(struct net_device *dev) +{ + struct pasemi_mac *mac = netdev_priv(dev); + struct device_node *dn, *phy_dn; + struct phy_device *phydev; + unsigned int phy_id; + const phandle *ph; + const unsigned int *prop; + struct resource r; + int ret; + + dn = pci_device_to_OF_node(mac-pdev); + ph = get_property(dn, phy-handle, NULL); + if (!ph) + return -ENODEV; + phy_dn = of_find_node_by_phandle(*ph); + + prop = get_property(phy_dn, reg, NULL); + ret = of_address_to_resource(phy_dn-parent, 0, r); + if (ret) + goto err; + + phy_id = *prop; + snprintf(mac-phy_id, BUS_ID_SIZE, PHY_ID_FMT, (int)r.start, phy_id); + + of_node_put(phy_dn); + + mac-link = 0; + mac-speed = 0; + mac-duplex = -1; + + phydev = phy_connect(dev, mac-phy_id, pasemi_adjust_link, 0, PHY_INTERFACE_MODE_SGMII); + + if (IS_ERR(phydev)) { + printk(KERN_ERR %s: Could not attach to phy\n, dev-name); + return PTR_ERR(phydev); + } + + mac-phydev = phydev; + + return 0; + +err: + of_node_put(phy_dn); + return -ENODEV; +} + + static int pasemi_mac_open(struct net_device *dev) { struct pasemi_mac *mac = netdev_priv(dev); @@ -667,6 +769,13 @@ static int pasemi_mac_open(struct net_de pasemi_mac_replenish_rx_ring(dev); + ret = pasemi_mac_phy_init(dev); + /* Some configs don't have PHYs (XAUI etc), so don't complain about +* failed init due to -ENODEV. +*/ + if (ret ret != -ENODEV) + dev_warn(mac-pdev-dev, phy init failed: %d\n, ret); + netif_start_queue(dev); netif_poll_enable(dev); @@ -697,6 +806,9 @@ static int pasemi_mac_open(struct net_de goto out_rx_int; } + if (mac-phydev) + phy_start(mac-phydev); + return 0; out_rx_int: @@ -720,6 +832,11 @@ static int pasemi_mac_close(struct net_d unsigned int stat; int retries; + if (mac-phydev) { + phy_stop(mac-phydev); + phy_disconnect(mac-phydev); + } + netif_stop_queue(dev); /* Clean out any pending buffers */ @@ -1013,6 +1130,9 @@ pasemi_mac_probe(struct pci_dev *pdev, c mac-rx_status = dma_status-rx_sta[mac-dma_rxch]; mac-tx_status = dma_status-tx_sta[mac-dma_txch]; + /* Enable most messages by default */ + mac-msg_enable = (NETIF_MSG_IFUP 1 ) - 1; + err = register_netdev(dev); if (err) { Index: powerpc/drivers/net/pasemi_mac.h
[PATCH] [5/5] [v2] pasemi_mac: use local-mac-address
Use local-mac-address in the device tree instead. Fall back to mac-address for older firmware. Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: powerpc/drivers/net/pasemi_mac.c === --- powerpc.orig/drivers/net/pasemi_mac.c +++ powerpc/drivers/net/pasemi_mac.c @@ -74,7 +74,12 @@ static int pasemi_get_mac_addr(struct pa return -ENOENT; } - maddr = get_property(dn, mac-address, NULL); + maddr = get_property(dn, local-mac-address, NULL); + + /* Fall back to mac-address for older firmware */ + if (maddr == NULL) + maddr = get_property(dn, mac-address, NULL); + if (maddr == NULL) { dev_warn(pdev-dev, no mac address in device tree, not configuring\n); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: + ppp_generic-fix-lockdep-warning.patch added to -mm tree
On Tue, Apr 17, 2007 at 08:26:32AM -0500, Michal Ostrowski wrote: The xmit function of a PPP channel is a synchronous operation. If the transmission fails, we must notify the caller and let them re-submit the skb later. The return status of dev_queue_xmit is needed to determine the return code passed back to the caller and thus the call is made synchronously and not in a tasklet. Sure! But on the other hand: - the return code from dev_queue_xmit doesn't guarantee the transmission won't fail, - similar code in ppp_async: ppp_async_send isn't so truthful and doesn't even check the return from ppp_async_push; BTW - probably other layers should care for transmission errors and re-submiting, - maybe I'm wrong here, but I think every layer should look (work) similarly here: dev_queue_xmit (or qdisc_run) thinks it's talking to some independent network device, which after dev_hard_start_xmit (and dev-hard_start_xmit) does some transmission; if, instead of this, next dev_queue_xmits are called with xmit locks held from previous devs, then it looks like logical recursion and locking is really hard to follow (even if it's OK). Looking at the stack traces earlier in this thread, it seems to me that even if the PPPoE call was made in a tasklet, this same warning could be generated. Of course a tasklet by itself isn't a cure, but if dev_queue_xmit is done from tasklet - only locks got within this tasklet should be counted. Thanks for response best regards, Jarek P. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BRIDGE] Unaligned access on IA64 when comparing ethernet addresses
[snip] --- linux-2.6.orig/net/bridge/br_private.h2007-04-17 13:26:48.0 -0700 +++ linux-2.6/net/bridge/br_private.h 2007-04-17 13:30:29.0 -0700 @@ -36,7 +36,7 @@ { unsigned char prio[2]; unsigned char addr[6]; -}; +} __attribute__((aligned(8))); Why 8? Mustn't it be 16? Address is to be 2-bytes aligned... struct mac_addr { - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[NETLINK] Don't attach callback to a going-away netlink socket
Sorry, I forgot to put netdev and David in Cc when I first sent it. There is a race between netlink_dump_start() and netlink_release() that can lead to the situation when a netlink socket with non-zero callback is freed. Here it is: CPU1: CPU2 netlink_release(): netlink_dump_start(): sk = netlink_lookup(); /* OK */ netlink_remove(); spin_lock(nlk-cb_lock); if (nlk-cb) { /* false */ ... } spin_unlock(nlk-cb_lock); spin_lock(nlk-cb_lock); if (nlk-cb) { /* false */ ... } nlk-cb = cb; spin_unlock(nlk-cb_lock); ... sock_orphan(sk); /* * proceed with releasing * the socket */ The proposal it to make sock_orphan before detaching the callback in netlink_release() and to check for the sock to be SOCK_DEAD in netlink_dump_start() before setting a new callback. Signed-off-by: Denis Lunev [EMAIL PROTECTED] Signed-off-by: Kirill Korotaev [EMAIL PROTECTED] Signed-off-by: Pavel Emelianov [EMAIL PROTECTED] Acked-by: Patrick McHardy [EMAIL PROTECTED] --- --- a/net/netlink/af_netlink.c 2004-10-25 12:12:23.0 +0400 +++ b/net/netlink/af_netlink.c 2004-10-28 16:26:12.0 +0400 @@ -255,6 +255,7 @@ static int netlink_release(struct socket return 0; netlink_remove(sk); + sock_orphan(sk); nlk = nlk_sk(sk); spin_lock(nlk-cb_lock); @@ -269,7 +270,6 @@ static int netlink_release(struct socket /* OK. Socket is unlinked, and, therefore, no new packets will arrive */ - sock_orphan(sk); sock-sk = NULL; wake_up_interruptible_all(nlk-wait); @@ -942,9 +942,9 @@ int netlink_dump_start(struct sock *ssk, return -ECONNREFUSED; } nlk = nlk_sk(sk); - /* A dump is in progress... */ + /* A dump or destruction is in progress... */ spin_lock(nlk-cb_lock); - if (nlk-cb) { + if (nlk-cb || sock_flag(sk, SOCK_DEAD)) { spin_unlock(nlk-cb_lock); netlink_destroy_callback(cb); sock_put(sk); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) wrote: Sorry, I forgot to put netdev and David in Cc when I first sent it. There is a race between netlink_dump_start() and netlink_release() that can lead to the situation when a netlink socket with non-zero callback is freed. Out of curiosity, why not to fix a netlink_dump_start() to remove callback in error path, since in 'no-error' path it removes it in netlink_dump(). And, btw, can release method be called while socket is being used, I thought about proper reference counters should prevent this, but not 100% sure with RCU dereferencing of the descriptor. -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
Evgeniy Polyakov wrote: On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) wrote: Sorry, I forgot to put netdev and David in Cc when I first sent it. There is a race between netlink_dump_start() and netlink_release() that can lead to the situation when a netlink socket with non-zero callback is freed. Out of curiosity, why not to fix a netlink_dump_start() to remove callback in error path, since in 'no-error' path it removes it in netlink_dump(). It already does (netlink_destroy_callback), but that doesn't help with this race though since without this patch we don't enter the error path. And, btw, can release method be called while socket is being used, I thought about proper reference counters should prevent this, but not 100% sure with RCU dereferencing of the descriptor. The problem is asynchronous processing of the dump request in the context of a different process. Process requests a dump, message is queued and process returns from sendmsg since some other process is already processing the queue. Then the process closes the socket, resulting in netlink_release being called. When the dump request is finally processed the race Pavel described might happen. This can only happen for netlink families that use mutex_try_lock for queue processing of course. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
Evgeniy Polyakov wrote: On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) wrote: Sorry, I forgot to put netdev and David in Cc when I first sent it. There is a race between netlink_dump_start() and netlink_release() that can lead to the situation when a netlink socket with non-zero callback is freed. Out of curiosity, why not to fix a netlink_dump_start() to remove callback in error path, since in 'no-error' path it removes it in Error path is not relevant here. The problem is that we keep a calback on a socket that is about to be freed. netlink_dump(). And, btw, can release method be called while socket is being used, I thought about proper reference counters should prevent this, but not 100% sure with RCU dereferencing of the descriptor. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BRIDGE] Unaligned access on IA64 when comparing ethernet addresses
From: Pavel Emelianov [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 10:43:56 +0400 [snip] --- linux-2.6.orig/net/bridge/br_private.h 2007-04-17 13:26:48.0 -0700 +++ linux-2.6/net/bridge/br_private.h 2007-04-17 13:30:29.0 -0700 @@ -36,7 +36,7 @@ { unsigned char prio[2]; unsigned char addr[6]; -}; +} __attribute__((aligned(8))); Why 8? Mustn't it be 16? Address is to be 2-bytes aligned... Actually it could be made 2, the aligned() attribute is in bytes, not bits. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BRIDGE] Unaligned access on IA64 when comparing ethernet addresses
David Miller wrote: From: Pavel Emelianov [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 10:43:56 +0400 [snip] --- linux-2.6.orig/net/bridge/br_private.h 2007-04-17 13:26:48.0 -0700 +++ linux-2.6/net/bridge/br_private.h 2007-04-17 13:30:29.0 -0700 @@ -36,7 +36,7 @@ { unsigned char prio[2]; unsigned char addr[6]; -}; +} __attribute__((aligned(8))); Why 8? Mustn't it be 16? Address is to be 2-bytes aligned... Actually it could be made 2, the aligned() attribute is in bytes, not bits. Indeed :) My bad :( Thank you... - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
On Wed, Apr 18, 2007 at 10:26:31AM +0200, Patrick McHardy ([EMAIL PROTECTED]) wrote: Evgeniy Polyakov wrote: On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) wrote: Sorry, I forgot to put netdev and David in Cc when I first sent it. There is a race between netlink_dump_start() and netlink_release() that can lead to the situation when a netlink socket with non-zero callback is freed. Out of curiosity, why not to fix a netlink_dump_start() to remove callback in error path, since in 'no-error' path it removes it in netlink_dump(). It already does (netlink_destroy_callback), but that doesn't help with this race though since without this patch we don't enter the error path. I thought that with releasing a socket, which will have a callback attached only results in a leak of the callback? In that case we can just free it in dump() just like it is done in no-error path already. Or do I miss something additional? And, btw, can release method be called while socket is being used, I thought about proper reference counters should prevent this, but not 100% sure with RCU dereferencing of the descriptor. The problem is asynchronous processing of the dump request in the context of a different process. Process requests a dump, message is queued and process returns from sendmsg since some other process is already processing the queue. Then the process closes the socket, resulting in netlink_release being called. When the dump request is finally processed the race Pavel described might happen. This can only happen for netlink families that use mutex_try_lock for queue processing of course. Doesn't it called from -sk_data_ready() which is synchronous with respect to sendmsg, not sure about conntrack though, but it looks so? -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
On Wed, Apr 18, 2007 at 12:32:40PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) wrote: Evgeniy Polyakov wrote: On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) wrote: Sorry, I forgot to put netdev and David in Cc when I first sent it. There is a race between netlink_dump_start() and netlink_release() that can lead to the situation when a netlink socket with non-zero callback is freed. Out of curiosity, why not to fix a netlink_dump_start() to remove callback in error path, since in 'no-error' path it removes it in Error path is not relevant here. The problem is that we keep a calback on a socket that is about to be freed. Yes, you are right, that it will not be freed in netlink_release(), but it will be freed in netlink_dump() after it is processed (in no-error path only though). -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
Evgeniy Polyakov wrote: On Wed, Apr 18, 2007 at 10:26:31AM +0200, Patrick McHardy ([EMAIL PROTECTED]) wrote: Out of curiosity, why not to fix a netlink_dump_start() to remove callback in error path, since in 'no-error' path it removes it in netlink_dump(). It already does (netlink_destroy_callback), but that doesn't help with this race though since without this patch we don't enter the error path. I thought that with releasing a socket, which will have a callback attached only results in a leak of the callback? In that case we can just free it in dump() just like it is done in no-error path already. Or do I miss something additional? That would only work if there is nothing to dump (cb-dump returns 0). Otherwise it is not freed. The problem is asynchronous processing of the dump request in the context of a different process. Process requests a dump, message is queued and process returns from sendmsg since some other process is already processing the queue. Then the process closes the socket, resulting in netlink_release being called. When the dump request is finally processed the race Pavel described might happen. This can only happen for netlink families that use mutex_try_lock for queue processing of course. Doesn't it called from -sk_data_ready() which is synchronous with respect to sendmsg, not sure about conntrack though, but it looks so? Yes, but for kernel sockets we end up calling the input function, which when mutex_trylock is used returns immediately when some other process is already processing the queue, so the requesting process might close the socket before the request is processed. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
Evgeniy Polyakov wrote: On Wed, Apr 18, 2007 at 12:32:40PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) wrote: Evgeniy Polyakov wrote: On Wed, Apr 18, 2007 at 12:16:18PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) wrote: Sorry, I forgot to put netdev and David in Cc when I first sent it. There is a race between netlink_dump_start() and netlink_release() that can lead to the situation when a netlink socket with non-zero callback is freed. Out of curiosity, why not to fix a netlink_dump_start() to remove callback in error path, since in 'no-error' path it removes it in Error path is not relevant here. The problem is that we keep a calback on a socket that is about to be freed. Yes, you are right, that it will not be freed in netlink_release(), but it will be freed in netlink_dump() after it is processed (in no-error path only though). But error path will leak it. On success path we would have a leaked packet in sk_write_queue, since we did't see it in skb_queue_purge() while doing netlink_release(). Of course we can place the struts in code to handle the case when we have a released socket with the attached callback, but it is more correct (IMHO) not to allow to attach the callbacks to dead sockets. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
On Wed, Apr 18, 2007 at 10:50:42AM +0200, Patrick McHardy ([EMAIL PROTECTED]) wrote: It already does (netlink_destroy_callback), but that doesn't help with this race though since without this patch we don't enter the error path. I thought that with releasing a socket, which will have a callback attached only results in a leak of the callback? In that case we can just free it in dump() just like it is done in no-error path already. Or do I miss something additional? That would only work if there is nothing to dump (cb-dump returns 0). Otherwise it is not freed. That is what I referred to as error path. Btw, with positive return value we end up in subsequent call to input which will free callback under lock as expected. I do not object against the patch, just want to make a clear vision about dumps - if callback is allocated to be used in dump only, then we could just free it there without passing to next round. The problem is asynchronous processing of the dump request in the context of a different process. Process requests a dump, message is queued and process returns from sendmsg since some other process is already processing the queue. Then the process closes the socket, resulting in netlink_release being called. When the dump request is finally processed the race Pavel described might happen. This can only happen for netlink families that use mutex_try_lock for queue processing of course. Doesn't it called from -sk_data_ready() which is synchronous with respect to sendmsg, not sure about conntrack though, but it looks so? Yes, but for kernel sockets we end up calling the input function, which when mutex_trylock is used returns immediately when some other process is already processing the queue, so the requesting process might close the socket before the request is processed. So far it is only netfilter and gennetlink, we would see huge dump from netlink_sock_destruct. Anyway, that is possible situation, thanks for clearing this up. -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
On Wed, Apr 18, 2007 at 01:03:56PM +0400, Pavel Emelianov ([EMAIL PROTECTED]) wrote: Yes, you are right, that it will not be freed in netlink_release(), but it will be freed in netlink_dump() after it is processed (in no-error path only though). But error path will leak it. On success path we would have a leaked packet in sk_write_queue, since we did't see it in skb_queue_purge() while doing netlink_release(). Of course we can place the struts in code to handle the case when we have a released socket with the attached callback, but it is more correct (IMHO) not to allow to attach the callbacks to dead sockets. That is why I've asked why such approach is used but not freeing callback in errror (well, no-dump name is better to describe that path) path, and more generally, why callback is attached, but not freed in the function, but instead is freed next time dump started. -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
Evgeniy Polyakov wrote: On Wed, Apr 18, 2007 at 10:50:42AM +0200, Patrick McHardy ([EMAIL PROTECTED]) wrote: I thought that with releasing a socket, which will have a callback attached only results in a leak of the callback? In that case we can just free it in dump() just like it is done in no-error path already. Or do I miss something additional? That would only work if there is nothing to dump (cb-dump returns 0). Otherwise it is not freed. That is what I referred to as error path. Btw, with positive return value we end up in subsequent call to input which will free callback under lock as expected. No, nothing is going to call netlink_dump after the initial call since the socket is gone. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
ESP interfamily tunnel bug?
Hi, we have discovered a routing related problem in ESP tunnel and beet mode. We don't know whether it is a bug in the XFRM, or just in the way the virtual addresses and the corresponding routes are set-up. We set up a dummy0 device for the virtual addresses: [EMAIL PROTECTED]:~# ip addr show dummy0 5: dummy0: BROADCAST,NOARP,UP,1 mtu 1500 qdisc noqueue link/ether 92:09:fe:11:81:1b brd ff:ff:ff:ff:ff:ff inet6 2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e/28 scope global valid_lft forever preferred_lft forever inet6 2001:74:32e0:df36:e862:3963:523e:dd7d/28 scope global valid_lft forever preferred_lft forever inet6 2001:73:d3a8:8723:d572:7549:7f2c:e590/28 scope global valid_lft forever preferred_lft forever inet6 2001:75:a2e6:aad6:e901:dd1c:ba95:e300/28 scope global valid_lft forever preferred_lft forever inet6 fe80::9009:feff:fe11:811b/64 scope link valid_lft forever preferred_lft forever And then we have routes for the virtual addresses: [EMAIL PROTECTED]:~# ip -6 route 2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:73:d3a8:8723:d572:7549:7f2c:e590 dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:74:32e0:df36:e862:3963:523e:dd7d dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:75:a2e6:aad6:e901:dd1c:ba95:e300 dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:70::/28 dev dummy0 metric 256 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 fe80::/64 dev dummy0 metric 256 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 ff00::/8 dev eth0 metric 256 expires 21325454sec mtu 1500 advmss 1440 metric 10 4294967295 ff00::/8 dev dummy0 metric 256 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 unreachable default dev lo proto none metric -1 error -101 metric 10 255 ...and set-up policies and associations. The virtual IPv6 addresses are inner and IPv4 addresses are outer addresses: [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm policy show src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128 dst 2001:74:32e0:df36:e862:3963:523e:dd7d/128 dir in priority 0 tmpl src c1a7:bb82:: dst c0a8:65:: proto esp reqid 0 mode beet src 2001:74:32e0:df36:e862:3963:523e:dd7d/128 dst 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128 dir out priority 0 tmpl src c0a8:65:: dst c1a7:bb82:: proto esp reqid 0 mode beet [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm state show src 193.167.187.130 dst 192.168.0.101 proto esp spi 0xf556c7c7 reqid 0 mode beet replay-window 0 auth sha1 0xab327b944011c94a0c54a097b4752e23f377ff34 enc aes 0x882a334830b1cd14b9e411ec37a4242f encap type espinudp-nonike sport 50500 dport 50500 addr 193.167.187.130 sel src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0 dst 2001:74:32e0:df36:e862:3963:523e:dd7d/0 src 192.168.0.101 dst 193.167.187.130 proto esp spi 0x1663f3a4 reqid 0 mode beet replay-window 0 auth sha1 0x9f07dabce4abf2ebfe45e247ede2cf15f9156a13 enc aes 0xfc50593b9af6d296b042a16ca00bad20 encap type espinudp-nonike sport 50500 dport 50500 addr 192.168.0.101 sel src 2001:74:32e0:df36:e862:3963:523e:dd7d/0 dst 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0 And then we try to ping6 the virtual address: [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ping6 -I 2001:0074:32e0:df36:e862:3963:523e:dd7d 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15 PING 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15(2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15) from 2001:74:32e0:df36:e862:3963:523e:dd7d : 56 data bytes ping: sendmsg: Network is unreachable ping: sendmsg: Network is unreachable Tcpdump shows no traffic at the host. We can repeat the problem both with tunnel and beet modes in 2.6.21-rc6 (and also in 2.6.17.14). I have tried also ip rule stuff but it seems that it does not rule with IPv6 :) It does help either to reduce the number of virtual addresses to a single one. It is weird that the ESP actually works some combinations of virtual addresses (4 of 16) in both directions, or works unidirectionally on some and does not work at all on the rest. I verified the unidirectional property using a simple UDP based application: sender xmits UDP packet, receiver gets it ok, but cannot respond. So, the problem is in the transmission of packets. I traced the ENETUNREACH in the kernel side to here: net/ipv4/route.c:ip_route_output_slow: if (fib_lookup(fl, res)) { if (dev_out) dev_put(dev_out); err = -ENETUNREACH; FIB lookup up is returning an error net/ipv4/fib_rules: int fib_lookup(const struct flowi
Re: [PATCH 2.6] WE-22 : prevent information leak on 64 bit
Jean, First, I'm the current active maintainer of the wext-over-netlink interface, and nobody bothered to even 'inform' me about its removal, let alone consult with me. I definitely should have copied you on the feature-removal schedule patch for wext-over-netlink and then the actual removal in wireless-dev; please accept my apologies for not doing that, it was not done in bad faith. It was never my intention to demote you to a second class citizen, I'm sorry you feel that way. I have previously (and multiple times) given technical justification for removing this code (even recorded in the kernel changelog now) and I contend your allegation that it is a political issue. Others in this thread have pointed out the technical issues with wext and wext/nl so I will not repeat them. I hope that despite my mistakes in handling the wext/nl removal we will be able to work together in the future to have wext fully supported with clear semantics for backwards compatibility while the kernel internally migrates towards cfg80211. johannes signature.asc Description: This is a digitally signed message part
[RFC][PATCH -mm take4 0/6] proposal for dynamic configurable netconsole
From: Keiichi KII [EMAIL PROTECTED] The netconsole is a very useful module for collecting kernel message under certain circumstances(e.g. disk logging fails, serial port is unavailable). But current netconsole is not flexible. For example, if you want to change ip address for logging agent, in the case of built-in netconsole, you can't change config except for changing boot parameter and rebooting your system, or in the case of module netconsole, you need to remove it and reload with different parameters. By adopting my patches, the current netconsole becomes a little complex. But the kernel messages(especially panic messages) is significant information to solve bugs and troubles promptly and we have been losing serial console port with PCs and Servers. I think that we need the environment in which we can collect kernel messages flexibly. So, I propose the following extended features for netconsole. 1) support for multiple logging agents. 2) add interface to access each parameter of netconsole using sysfs. [changes since take3] -changing kernel base from 2.6.21-rc3-mm2 to 2.6.21-rc6-mm1. -introducing CONFIG_NETCONSOLE_DYNCON. -cleanup Your comments are very welcome. Signed-off-by: Keiichi KII [EMAIL PROTECTED] Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED] --- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC][PATCH -mm take4 1/6] marking __init
From: Keiichi KII [EMAIL PROTECTED] This patch contains the following cleanups. - add __init for initialization functions(option_setup() and init_netconsole()). Acked-by: Matt Mackall [EMAIL PROTECTED] Signed-off-by: Keiichi KII [EMAIL PROTECTED] Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED] --- Index: linux-mm/drivers/net/netconsole.c === --- linux-mm.orig/drivers/net/netconsole.c +++ linux-mm/drivers/net/netconsole.c @@ -91,7 +91,7 @@ static struct console netconsole = { .write = write_msg }; -static int option_setup(char *opt) +static int __init option_setup(char *opt) { configured = !netpoll_parse_options(np, opt); return 1; @@ -99,7 +99,7 @@ static int option_setup(char *opt) __setup(netconsole=, option_setup); -static int init_netconsole(void) +static int __init init_netconsole(void) { int err; -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC][PATCH -mm take4 3/6] add interface for netconsole using sysfs
From: Keiichi KII [EMAIL PROTECTED] This patch contains the following changes. create a sysfs entry for netconsole in /sys/class/misc. This entry has elements related to netconsole as follows. You can change configuration of netconsole(writable attributes such as IP address, port number and so on) and check current configuration of netconsole. -+- /sys/class/misc/ |-+- netconsole/ |-+- port1/ | |--- id [r--r--r--] unique port id | |--- local_ip[rw-r--r--] source IP to use, writable | |--- local_mac [r--r--r--] source MAC address | |--- local_port [rw-r--r--] source port number for UDP packets, writable | |--- remote_ip [rw-r--r--] port number for logging agent, writable | |--- remote_mac [rw-r--r--] MAC address for logging agent, writable | remote_port [rw-r--r--] IP address for logging agent, writable |--- port2/ |--- port3/ ... Signed-off-by: Keiichi KII [EMAIL PROTECTED] Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED] --- Index: mm/drivers/net/netconsole.c === --- mm.orig/drivers/net/netconsole.c +++ mm/drivers/net/netconsole.c @@ -45,6 +45,8 @@ #include linux/sysrq.h #include linux/smp.h #include linux/netpoll.h +#include linux/miscdevice.h +#include linux/inet.h MODULE_AUTHOR(Maintainer: Matt Mackall [EMAIL PROTECTED]); MODULE_DESCRIPTION(Console driver for network interfaces); @@ -67,6 +69,7 @@ static struct netpoll np = { #ifdef CONFIG_NETCONSOLE_DYNCON struct netconsole_target { struct list_head list; + struct kobject obj; int id; struct netpoll np; }; @@ -77,6 +80,207 @@ static DEFINE_SPINLOCK(target_list_lock) static int add_target(char* target_config); static void remove_target(struct netconsole_target *nt); static void cleanup_netconsole(void); +static int setup_target_sysfs(struct netconsole_target *nt); + +static int miscdev_configured; + +static ssize_t show_id(struct netconsole_target *nt, char *buf) +{ + return sprintf(buf, %d\n, nt-id); +} + +static ssize_t show_local_port(struct netconsole_target *nt, char *buf) +{ + return sprintf(buf, %d\n, nt-np.local_port); +} + +static ssize_t show_remote_port(struct netconsole_target *nt, char *buf) +{ + return sprintf(buf, %d\n, nt-np.remote_port); +} + +static ssize_t show_local_ip(struct netconsole_target *nt, char *buf) +{ + return sprintf(buf, %d.%d.%d.%d\n, HIPQUAD(nt-np.local_ip)); +} + +static ssize_t show_remote_ip(struct netconsole_target *nt, char *buf) +{ + return sprintf(buf, %d.%d.%d.%d\n, HIPQUAD(nt-np.remote_ip)); +} + +static ssize_t show_local_mac(struct netconsole_target *nt, char *buf) +{ + return sprintf(buf, %02x:%02x:%02x:%02x:%02x:%02x\n, + nt-np.local_mac[0], nt-np.local_mac[1], + nt-np.local_mac[2], nt-np.local_mac[3], + nt-np.local_mac[4], nt-np.local_mac[5]); +} + +static ssize_t show_remote_mac(struct netconsole_target *nt, char *buf) +{ + return sprintf(buf, %02x:%02x:%02x:%02x:%02x:%02x\n, + nt-np.remote_mac[0], nt-np.remote_mac[1], + nt-np.remote_mac[2], nt-np.remote_mac[3], + nt-np.remote_mac[4], nt-np.remote_mac[5]); +} + +static ssize_t store_local_port(struct netconsole_target *nt, const char *buf, + size_t count) +{ + spin_lock(target_list_lock); + nt-np.local_port = simple_strtol(buf, NULL, 10); + spin_unlock(target_list_lock); + + return count; +} + +static ssize_t store_remote_port(struct netconsole_target *nt, const char *buf, + size_t count) +{ + spin_lock(target_list_lock); + nt-np.remote_port = simple_strtol(buf, NULL, 10); + spin_unlock(target_list_lock); + + return count; +} + +static ssize_t store_local_ip(struct netconsole_target *nt, const char *buf, + size_t count) +{ + spin_lock(target_list_lock); + nt-np.local_ip = ntohl(in_aton(buf)); + spin_unlock(target_list_lock); + + return count; +} + +static ssize_t store_remote_ip(struct netconsole_target *nt, const char *buf, + size_t count) +{ + spin_lock(target_list_lock); + nt-np.remote_ip = ntohl(in_aton(buf)); + spin_unlock(target_list_lock); + + return count; +} + +static ssize_t store_remote_mac(struct netconsole_target *nt, const char *buf, + size_t count) +{ + unsigned char input_mac[ETH_ALEN] = + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + const char *cur = buf; + int i = 0; + + input_mac[i++] = simple_strtol(cur, NULL, 16); + while ((cur = strchr(cur, ':')) != NULL) { + cur++; + input_mac[i++] = simple_strtol(cur, NULL, 16); + } + if (i != ETH_ALEN) + return -EINVAL; +
[RFC][PATCH -mm take4 2/6] support multiple logging
From: Keiichi KII [EMAIL PROTECTED] This patch contains the following changes for supporting multiple logging agents. 1. extend netconsole to multiple netpolls To send kernel messages to multiple logging agents, extend netcosnole to be able to use multiple netpolls. Each netpoll sends kernel messages to its own logging agent. 2. change config parameter format We change config parameter format from single configuration to multiple configurations separated by ';'. ex) sending kernel messages to destination1 and destination2 using eth0. modprobe netconsole \ netconsole=@/eth0,@[destination1]/;@/eth0,@[destination2]/ 3. introduce CONFIG_NETCONSOLE_DYNCON config to change between existing netconsole and netconsole applying the above function. Signed-off-by: Keiichi KII [EMAIL PROTECTED] Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED] --- Index: mm/drivers/net/netconsole.c === --- mm.orig/drivers/net/netconsole.c +++ mm/drivers/net/netconsole.c @@ -61,15 +61,102 @@ static struct netpoll np = { .remote_port = , .remote_mac = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, }; -static int configured = 0; #define MAX_PRINT_CHUNK 1000 +#ifdef CONFIG_NETCONSOLE_DYNCON +struct netconsole_target { + struct list_head list; + int id; + struct netpoll np; +}; + +static LIST_HEAD(target_list); +static DEFINE_SPINLOCK(target_list_lock); + +static int add_target(char* target_config); +static void remove_target(struct netconsole_target *nt); +static void cleanup_netconsole(void); + +static int add_target(char* target_config) +{ + int retval = 0; + static atomic_t target_count = ATOMIC_INIT(0); + struct netconsole_target *new_target; + + new_target = kzalloc(sizeof(*new_target), GFP_KERNEL); + if (!new_target) { + printk(KERN_ERR netconsole: kmalloc() failed!\n); + retval = -ENOMEM; + goto out; + } + + new_target-np = np; + if (netpoll_parse_options(new_target-np, target_config)) { + printk(KERN_ERR netconsole: can't parse config:%s\n, + target_config); + kfree(new_target); + retval = -EINVAL; + goto out; + } + if (netpoll_setup(new_target-np)) { + printk(KERN_ERR netconsole: can't setup netpoll:%s\n, + target_config); + kfree(new_target); + retval = -EINVAL; + goto out; + } + + new_target-id = atomic_inc_return(target_count); + + printk(KERN_INFO netconsole: add target: + remote ip_addr=%d.%d.%d.%d remote port=%d\n, + HIPQUAD(new_target-np.remote_ip), new_target-np.remote_port); + + spin_lock(target_list_lock); + list_add(new_target-list, target_list); + spin_unlock(target_list_lock); + + out: + return retval; +} + +static void remove_target(struct netconsole_target *nt) +{ + spin_lock(target_list_lock); + list_del(nt-list); + if (list_empty(target_list)) + netpoll_cleanup(nt-np); + spin_unlock(target_list_lock); + kfree(nt); +} +#endif /* CONFIG_NETCONSOLE_DYNCON */ + static void write_msg(struct console *con, const char *msg, unsigned int len) { int frag, left; unsigned long flags; +#ifdef CONFIG_NETCONSOLE_DYNCON + struct netconsole_target *target; + + if (list_empty(target_list)) + return; + local_irq_save(flags); + spin_lock(target_list_lock); + + for(left = len; left; ) { + frag = min(left, MAX_PRINT_CHUNK); + list_for_each_entry(target, target_list, list) { + netpoll_send_udp(target-np, msg, frag); + } + msg += frag; + left -= frag; + } + + spin_unlock(target_list_lock); + local_irq_restore(flags); +#else if (!np.dev) return; @@ -83,6 +170,7 @@ static void write_msg(struct console *co } local_irq_restore(flags); +#endif /* CONFIG_NETCONSOLE_DYNCON */ } static struct console netconsole = { @@ -91,39 +179,60 @@ static struct console netconsole = { .write = write_msg }; +#ifndef MODULE static int __init option_setup(char *opt) { - configured = !netpoll_parse_options(np, opt); + strncpy(config, opt, 256); return 1; } __setup(netconsole=, option_setup); +#endif static int __init init_netconsole(void) { - int err; + char *tmp = config; +#ifdef CONFIG_NETCONSOLE_DYNCON + char *p; - if(strlen(config)) - option_setup(config); - - if(!configured) { - printk(netconsole: not configured, aborting\n); + register_console(netconsole); + if(!strlen(config)) { + printk(KERN_ERR
[RFC][PATCH -mm take4 4/6] using symlink for the net_device
From: Keiichi KII [EMAIL PROTECTED] We use symbolic link for net_device. The link in sysfs represents the corresponding network etherdevice. -+- /sys/class/misc/ |-+- netconsole/ |-+- port1/ | |--- id [r--r--r--] id | |--- net:net_dev [rw-r--r--] net_dev: eth0,eth1,... | ... |--- port2/ ... Signed-off-by: Keiichi KII [EMAIL PROTECTED] Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED] --- Index: mm/drivers/net/netconsole.c === --- mm.orig/drivers/net/netconsole.c +++ mm/drivers/net/netconsole.c @@ -81,6 +81,9 @@ static int add_target(char* target_confi static void remove_target(struct netconsole_target *nt); static void cleanup_netconsole(void); static int setup_target_sysfs(struct netconsole_target *nt); +static char *make_netdev_class_name(char *netdev_name); +static int netconsole_event(struct notifier_block *this, unsigned long event, + void *ptr); static int miscdev_configured; @@ -274,12 +277,77 @@ static struct miscdevice netconsole_misc .name = netconsole, }; +static struct notifier_block netconsole_notifier = { + .notifier_call = netconsole_event, +}; + static int setup_target_sysfs(struct netconsole_target *nt) { + int retval = 0; + char *name; + kobject_set_name(nt-obj, port%d, nt-id); nt-obj.parent = netconsole_miscdev.this_device-kobj; nt-obj.ktype = target_ktype; - return kobject_register(nt-obj); + retval = kobject_register(nt-obj); + name = make_netdev_class_name(nt-np.dev_name); + if (IS_ERR(name)) + return PTR_ERR(name); + retval = sysfs_create_link(nt-obj, nt-np.dev-dev.kobj, name); + kfree(name); + + return retval; +} + +static char *make_netdev_class_name(char *netdev_name) +{ + int size; + char *name; + char *netdev_class_prefix = net:; + + size = strlen(netdev_class_prefix) + strlen(netdev_name) + 1; + name = kmalloc(size, GFP_KERNEL); + if (!name) { + printk(KERN_ERR netconsole: kmalloc() failed!\n); + return ERR_PTR(-ENOMEM); + } + strcpy(name, netdev_class_prefix); + strcat(name, netdev_name); + + return name; +} + +static int netconsole_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + int error = 0; + char *old_link_name = NULL, *new_link_name = NULL; + struct netconsole_target *nt; + struct net_device *dev = ptr; + + if (event == NETDEV_CHANGENAME) { + spin_lock(target_list_lock); + list_for_each_entry(nt, target_list, list) { + if (nt-np.dev != dev) + continue; + new_link_name = make_netdev_class_name(dev-name); + old_link_name = + make_netdev_class_name(nt-np.dev_name); + sysfs_remove_link(nt-obj, old_link_name); + error = sysfs_create_link(nt-obj, + nt-np.dev-dev.kobj, + new_link_name); + if (error) + printk(KERN_ERR can't create link: %s\n, + new_link_name); + strcpy(nt-np.dev_name, dev-name); + kfree(new_link_name); + kfree(old_link_name); + } + spin_unlock(target_list_lock); + } + + return NOTIFY_DONE; } static int add_target(char* target_config) @@ -409,6 +477,7 @@ static int __init init_netconsole(void) } else miscdev_configured = 1; + register_netdevice_notifier(netconsole_notifier); register_console(netconsole); if(!strlen(config)) { printk(KERN_ERR netconsole: not configured\n); @@ -443,6 +512,7 @@ static void cleanup_netconsole(void) list_for_each_entry_safe(nt, tmp, target_list, list) { kobject_unregister(nt-obj); } + unregister_netdevice_notifier(netconsole_notifier); if (miscdev_configured) misc_deregister(netconsole_miscdev); #else -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC][PATCH -mm take4 5/6] switch function of netpoll
From: Keiichi KII [EMAIL PROTECTED] This patch contains switch function of netpoll. If enabled attribute of certain port is '1', this port is used and the configurations of this port are uable to change. If enabled attribute of certain port is '0', this port isn't used and the configurations of this port are able to change. -+- /sys/class/misc/ |-+- netconsole/ |-+- port1/ | |--- id [r--r--r--] id | |--- enabled [rw-r--r--] 0: disable 1: enable, writable | ... |--- port2/ ... Signed-off-by: Keiichi KII [EMAIL PROTECTED] Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED] --- Index: mm/drivers/net/netconsole.c === --- mm.orig/drivers/net/netconsole.c +++ mm/drivers/net/netconsole.c @@ -71,6 +71,7 @@ struct netconsole_target { struct list_head list; struct kobject obj; int id; + int enabled; struct netpoll np; }; @@ -128,10 +129,19 @@ static ssize_t show_remote_mac(struct ne nt-np.remote_mac[4], nt-np.remote_mac[5]); } +static ssize_t show_enabled(struct netconsole_target *nt, char *buf) +{ + return sprintf(buf, %d\n, nt-enabled); +} + static ssize_t store_local_port(struct netconsole_target *nt, const char *buf, size_t count) { spin_lock(target_list_lock); + if (nt-enabled) { + spin_unlock(target_list_lock); + return -EINVAL; + } nt-np.local_port = simple_strtol(buf, NULL, 10); spin_unlock(target_list_lock); @@ -142,6 +152,10 @@ static ssize_t store_remote_port(struct size_t count) { spin_lock(target_list_lock); + if (nt-enabled) { + spin_unlock(target_list_lock); + return -EINVAL; + } nt-np.remote_port = simple_strtol(buf, NULL, 10); spin_unlock(target_list_lock); @@ -152,6 +166,10 @@ static ssize_t store_local_ip(struct net size_t count) { spin_lock(target_list_lock); + if (nt-enabled) { + spin_unlock(target_list_lock); + return -EINVAL; + } nt-np.local_ip = ntohl(in_aton(buf)); spin_unlock(target_list_lock); @@ -162,6 +180,10 @@ static ssize_t store_remote_ip(struct ne size_t count) { spin_lock(target_list_lock); + if (nt-enabled) { + spin_unlock(target_list_lock); + return -EINVAL; + } nt-np.remote_ip = ntohl(in_aton(buf)); spin_unlock(target_list_lock); @@ -184,12 +206,39 @@ static ssize_t store_remote_mac(struct n if (i != ETH_ALEN) return -EINVAL; spin_lock(target_list_lock); + if (nt-enabled) { + spin_unlock(target_list_lock); + return -EINVAL; + } memcpy(nt-np.remote_mac, input_mac, ETH_ALEN); spin_unlock(target_list_lock); return count; } +static ssize_t store_enabled(struct netconsole_target *nt, const char *buf, + size_t count) +{ + int enabled = 0; + + if (count = 2 (count != 2 || buf[count - 1] != '\n')) { + printk(KERN_ERR netconsole: invalid argument: %s\n, buf); + return -EINVAL; + } else if (buf[0] == '1') { + enabled = 1; + } else if(buf[0] == '0') { + enabled = 0; + } else { + printk(KERN_ERR netconsole: invalid argument: %s\n, buf); + return -EINVAL; + } + spin_lock(target_list_lock); + nt-enabled = enabled; + spin_unlock(target_list_lock); + + return count; +} + struct target_attr { struct attribute attr; ssize_t (*show)(struct netconsole_target*, char*); @@ -213,6 +262,8 @@ static NETCON_CLASS_ATTR(remote_ip, S_IR static NETCON_CLASS_ATTR(local_mac, S_IRUGO, show_local_mac, NULL); static NETCON_CLASS_ATTR(remote_mac, S_IRUGO | S_IWUSR, show_remote_mac, store_remote_mac); +static NETCON_CLASS_ATTR(enabled, S_IRUGO | S_IWUSR, +show_enabled, store_enabled); static struct attribute *target_attrs[] = { target_attr_id.attr, @@ -222,6 +273,7 @@ static struct attribute *target_attrs[] target_attr_remote_ip.attr, target_attr_local_mac.attr, target_attr_remote_mac.attr, + target_attr_enabled.attr, NULL }; @@ -380,6 +432,7 @@ static int add_target(char* target_confi } new_target-id = atomic_inc_return(target_count); + new_target-enabled = 1; printk(KERN_INFO netconsole: add target: remote ip_addr=%d.%d.%d.%d remote port=%d\n, @@ -421,7 +474,8 @@ static void write_msg(struct console *co for(left = len; left; ) { frag = min(left, MAX_PRINT_CHUNK);
[RFC][PATCH -mm take4 6/6] add ioctls for adding/removing target
From: Keiichi KII [EMAIL PROTECTED] We add ioctls for adding/removing target. If we use NETCONSOLE_ADD_TARGET ioctl, we can dynamically add netconsole target. If we use NETCONSOLE_REMOVE_TARGET ioctl, we can dynamically remoe netconsole target. We attach a sample program for ioctl. Signed-off-by: Keiichi KII [EMAIL PROTECTED] Signed-off-by: Takayoshi Kochi [EMAIL PROTECTED] --- /* * This software is a sample program for ioctl of netconsole. * You can add/remove netconsole port by using this software. * * Keiichi KII [EMAIL PROTECTED] * Copyright (C) 2007 by Keiichi KII * This software is under GPL version 2 of the license. */ #include stdio.h #include unistd.h #include string.h #include stdlib.h #include stropts.h #include fcntl.h #include arpa/inet.h #include net/if.h #include linux/if_ether.h #include linux/netconsole.h #define NETCONSOLE_DEV_NAME /dev/netconsole #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) struct command { char *name; char *options; int (*handle_command)(struct command* command, int argc, char* argv[]); void (*usage)(char *msg); }; extern char *optarg; extern int opterr, optind, errno; static void generic_usage(char *msg) { fprintf(stderr, Usage : netconfig command [option] [args]\n); fprintf(stderr, command: add remove help\n); exit(-1); } static int handle_command_add(struct command* command, int argc, char** argv) { int i, fd, ch; unsigned int address; unsigned char mac[ETH_ALEN]; struct netconsole_request req = { .netdev_name = eth0, .local_port = 6665, .remote_port = , .remote_mac = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, }; while ((ch = getopt(argc, argv, command-options)) != -1) { switch (ch) { case 'p': req.local_port = atoi(optarg); break; case 's': address = inet_addr(optarg); if (address == -1) (*command-usage)(invlid IP address!\n); req.local_ip = address; break; case 'h': default: (*command-usage)(NULL); } } argc -= optind; argv += optind; if (argc 3 || argc 4) (*command-usage)(NULL); memcpy(req.netdev_name, argv[0], IFNAMSIZ); address = inet_addr(argv[1]); if (address == -1) (*command-usage)(invlid IP address!\n); req.remote_ip = address; req.remote_port = atoi(argv[2]); if (argc == 4) { i = 0; mac[i++] = strtol(argv[3], NULL, 16); while ((argv[3] = strchr(argv[3], ':')) != NULL) { argv[3]++; mac[i++] = strtol(argv[3], NULL, 16); } if (i != ETH_ALEN) (*command-usage)(Invalid MAC address!\n); memcpy(req.remote_mac, mac, ETH_ALEN); } fd = open(NETCONSOLE_DEV_NAME, O_RDWR); if (fd 0) { fprintf(stderr, cannot open device NETCONSOLE_DEV_NAME \n); return -1; } if(ioctl(fd, NETCON_ADD_TARGET, req) != 0) perror(add); close(fd); return 0; } static void usage_add(char *msg) { if (msg != NULL) fprintf(stderr, %s, msg); fprintf(stderr, Usage : netconfig add [-options] dev_name remote_ip remote_port [remote_mac]\n); fprintf(stderr, options:\n); fprintf(stderr, -p local_port :local port number\n); fprintf(stderr, -s local_up :local IP address\n); exit(-1); } static int handle_command_remove(struct command *command, int argc, char** argv) { int fd, id, ch; while ((ch = getopt(argc, argv, command-options)) != -1) { switch (ch) { case 'h': default: (*command-usage)(NULL); } } argc -= optind; argv += optind; if (argc != 1) (*command-usage)(NULL); id = atoi(argv[0]); fd = open(NETCONSOLE_DEV_NAME, O_RDWR); if (fd 0) { fprintf(stderr, can't open device NETCONSOLE_DEV_NAME \n); return -1; } if(ioctl(fd, NETCON_REMOVE_TARGET, id) != 0) perror(remove); close(fd); return 0; } static void usage_remove(char *msg) { fprintf(stderr, Usage : netconfig remove id\n); exit(-1); } static int handle_command_help(struct command *command, int argc, char** argv) { (*command-usage)(NULL); return 0; } static
Re: kernel BUG at net/core/skbuff.c in linux-2.6.21-rc6
Hi, I didn't analyse this bug report but probably it is nearly connected with one of the bugs visible in a log from this submit: http://bugzilla.kernel.org/show_bug.cgi?id=8132 On 15-04-2007 02:50, Paul Mackerras wrote: David Miller writes: Here is Patrick McHardy's patch: So this doesn't change process_input_packet(), which treats the case where the first byte is 0xff (PPP_ALLSTATIONS) but the second byte is 0x03 (PPP_UI) as indicating a packet with a PPP protocol number of 0xff. Arguably that's wrong since PPP protocol 0xff is reserved, and the RFC does envision the possibility of receiving frames where the control field has values other than 0x03. Therefore I think this patch is probably better. Could people try it out and let me know if it fixes the problem? Paul. diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index 933e2f3..caabbc4 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -802,9 +802,9 @@ process_input_packet(struct asyncppp *ap) /* check for address/control and protocol compression */ p = skb-data; - if (p[0] == PPP_ALLSTATIONS p[1] == PPP_UI) { + if (p[0] == PPP_ALLSTATIONS) { /* chop off address/control */ - if (skb-len 3) + if (p[1] != PPP_UI || skb-len 3) goto err; p = skb_pull(skb, 2); } Let's look farther: proto = p[0]; if (proto 1) { /* protocol is compressed */ skb_push(skb, 1)[0] = 0; BTW - about Patrick's patch: skb_push seems to be dependent here on the 1-st char of skb-data, if above (p[0] != PPP_ALLSTATIONS), but on the 3-rd char otherwise (after skb_pull). But, Patrick's patch reserves the place for this, looking always at 1-st char (buf[0]) independently of PPP_ALLSTATIONS char presence, or otherwise - always treating this char as protocol char. It looks safe because of PPP_ALLSTATION current value, but isn't too understandable. On the other hand, without any reservation in the ppp_async_input for the (buf[0] == PPP_ALLSTATIONS) case, probably 4-byte alignement isn't achieved as planned. } else { if (skb-len 2) goto err; proto = (proto 8) + p[1]; if (proto == PPP_LCP) async_lcp_peek(ap, p, skb-len, 1); } /* queue the frame to be processed */ skb-cb[0] = ap-state; skb_queue_tail(ap-rqueue, skb); ap-rpkt = NULL; ap-state = 0; return; err: /* frame had an error, remember that, reset SC_TOSS SC_ESCAPE */ ap-state = SC_PREV_ERROR; if (skb) { /* make skb appear as freshly allocated */ Probably this isn't always true and here the problem started... skb_trim(skb, 0); skb_reserve(skb, - skb_headroom(skb)); Isn't here lost e.g. NET_SKB_PAD probably reserved by dev_alloc_skb? On the other hand - this kind of pad can very good hide similar reservation problems in many other places - maybe it should be omitted or somehow counted in WARNs when some debugging options are active? Regards, Jarek P. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Bugme-new] [Bug 8320] New: replacing route in kernel doesn't send netlink message
On Tue, 2007-04-17 at 14:58 +0200, Patrick McHardy wrote: David Miller wrote: From: Patrick McHardy [EMAIL PROTECTED] Date: Mon, 16 Apr 2007 06:59:06 +0200 RTM_DELROUTE + RTM_NEWROUTE seem to be safer, although you're correct that it might cause userspace to perform some action upon receiving the DELROUTE message since the update is non-atomic. So I really don't know, I'm in favour of having notifications for replacements, but I fear we might break something. We can cry foul about a broken application if an application following the API correctly would interpret the new messages correctly. I think it doesn't make sense to do a delete then a newroute for the atomicity issues, and therefore the replace makes the most sense as long as existing correct uses of the API would not explode on this. They shouldn't, worst case is that they ignore NLM_F_REPLACE and treat it as a completely new route, which is at least half way correct and not really worse than today. Milan, could you cook up another patch which uses NLM_F_REPLACE? I can try it. Output is in patch below. Review carefully. I don't know if it's best approach. It's tested and working without problem (probably :-)) --- net/ipv4.old/fib_hash.c 2007-04-18 12:50:11.0 +0200 +++ net/ipv4/fib_hash.c 2007-04-18 12:39:49.081369320 +0200 @@ -443,7 +443,6 @@ if (cfg-fc_nlflags NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; - write_lock_bh(fib_hash_lock); fi_drop = fa-fa_info; fa-fa_info = fi; @@ -457,6 +456,8 @@ fib_release_info(fi_drop); if (state FA_S_ACCESSED) rt_cache_flush(-1); + rtmsg_fib(RTM_NEWROUTE, key, fa, cfg-fc_dst_len, tb-tb_id, + cfg-fc_nlinfo, NLM_F_REPLACE); return 0; } @@ -524,7 +525,7 @@ rt_cache_flush(-1); rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg-fc_dst_len, tb-tb_id, - cfg-fc_nlinfo); + cfg-fc_nlinfo, 0); return 0; out_free_new_fa: @@ -590,7 +591,7 @@ fa = fa_to_delete; rtmsg_fib(RTM_DELROUTE, key, fa, cfg-fc_dst_len, - tb-tb_id, cfg-fc_nlinfo); + tb-tb_id, cfg-fc_nlinfo, 0); kill_fn = 0; write_lock_bh(fib_hash_lock); --- net/ipv4.old/fib_trie.c 2007-04-18 12:50:11.0 +0200 +++ net/ipv4/fib_trie.c 2007-04-18 12:42:29.423993536 +0200 @@ -1205,6 +1205,9 @@ fib_release_info(fi_drop); if (state FA_S_ACCESSED) rt_cache_flush(-1); + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb-tb_id, + cfg-fc_nlinfo, NLM_F_REPLACE); + goto succeeded; } /* Error if we find a perfect match which @@ -1256,7 +1259,7 @@ rt_cache_flush(-1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb-tb_id, - cfg-fc_nlinfo); + cfg-fc_nlinfo, 0); succeeded: return 0; @@ -1599,7 +1602,7 @@ fa = fa_to_delete; rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb-tb_id, - cfg-fc_nlinfo); + cfg-fc_nlinfo, 0); l = fib_find_node(t, key); li = find_leaf_info(l, plen); --- net/ipv4.old/fib_semantics.c2007-04-18 12:50:11.0 +0200 +++ net/ipv4/fib_semantics.c2007-04-18 12:40:54.807377448 +0200 @@ -301,7 +301,7 @@ } void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, - int dst_len, u32 tb_id, struct nl_info *info) + int dst_len, u32 tb_id, struct nl_info *info, unsigned int nlm_flags) { struct sk_buff *skb; u32 seq = info-nlh ? info-nlh-nlmsg_seq : 0; @@ -313,7 +313,7 @@ err = fib_dump_info(skb, info-pid, seq, event, tb_id, fa-fa_type, fa-fa_scope, key, dst_len, - fa-fa_tos, fa-fa_info, 0); + fa-fa_tos, fa-fa_info, nlm_flags); /* failure implies BUG in fib_nlmsg_size() */ BUG_ON(err 0); --- net/ipv4.old/fib_lookup.h 2007-04-18 12:50:11.0 +0200 +++ net/ipv4/fib_lookup.h 2007-04-18 12:43:42.377902856 +0200 @@ -30,7 +30,7 @@ int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, - int dst_len, u32 tb_id, struct nl_info *info); + int dst_len, u32 tb_id, struct nl_info *info, unsigned int nlm_flags); extern struct fib_alias *fib_find_alias(struct list_head *fah,
[PATCH] fix comments for register_netdev()
Correct the function name in the comments supplied with register_netdev() Signed-off-by: Borislav Petkov [EMAIL PROTECTED] Index: 21-rc7/net/core/dev.c === --- 21-rc7.orig/net/core/dev.c +++ 21-rc7/net/core/dev.c @@ -3002,7 +3002,7 @@ out: * chain. 0 is returned on success. A negative errno code is returned * on a failure to set up the device, or if the name is a duplicate. * - * This is a wrapper around register_netdev that takes the rtnl semaphore + * This is a wrapper around register_netdevice that takes the rtnl semaphore * and expands the device name if you passed a format string to * alloc_netdev. */ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Bugme-new] [Bug 8320] New: replacing route in kernel doesn't send netlink message
Milan Kocián wrote: On Tue, 2007-04-17 at 14:58 +0200, Patrick McHardy wrote: Milan, could you cook up another patch which uses NLM_F_REPLACE? I can try it. Output is in patch below. Review carefully. I don't know if it's best approach. It's tested and working without problem (probably :-)) Looks good, but your mailer corrupted long lines. Please resend as attachment and sign off the patch. --- net/ipv4.old/fib_hash.c 2007-04-18 12:50:11.0 +0200 +++ net/ipv4/fib_hash.c 2007-04-18 12:39:49.081369320 +0200 @@ -443,7 +443,6 @@ if (cfg-fc_nlflags NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; - And please drop this unrelated whitespace change. --- net/ipv4.old/fib_semantics.c 2007-04-18 12:50:11.0 +0200 +++ net/ipv4/fib_semantics.c 2007-04-18 12:40:54.807377448 +0200 @@ -301,7 +301,7 @@ } void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, -int dst_len, u32 tb_id, struct nl_info *info) +int dst_len, u32 tb_id, struct nl_info *info, unsigned int nlm_flags) This should go on a new line since it exceeds 80 characters. --- net/ipv4.old/fib_lookup.h 2007-04-18 12:50:11.0 +0200 +++ net/ipv4/fib_lookup.h 2007-04-18 12:43:42.377902856 +0200 @@ -30,7 +30,7 @@ int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, - int dst_len, u32 tb_id, struct nl_info *info); + int dst_len, u32 tb_id, struct nl_info *info, unsigned int nlm_flags); Same here. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH -mm take4 2/6] support multiple logging
On Wed, 18 Apr 2007 21:06:41 +0900 Keiichi KII [EMAIL PROTECTED] wrote: From: Keiichi KII [EMAIL PROTECTED] This patch contains the following changes for supporting multiple logging agents. 1. extend netconsole to multiple netpolls To send kernel messages to multiple logging agents, extend netcosnole to be able to use multiple netpolls. Each netpoll sends kernel messages to its own logging agent. 2. change config parameter format We change config parameter format from single configuration to multiple configurations separated by ';'. ex) sending kernel messages to destination1 and destination2 using eth0. modprobe netconsole \ netconsole=@/eth0,@[destination1]/;@/eth0,@[destination2]/ Please include an update to Documentation/networking/netconsole.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BRIDGE] Unaligned access on IA64 when comparing ethernet addresses
On Wed, 18 Apr 2007 01:28:04 -0700 (PDT) David Miller [EMAIL PROTECTED] wrote: From: Pavel Emelianov [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 10:43:56 +0400 [snip] --- linux-2.6.orig/net/bridge/br_private.h2007-04-17 13:26:48.0 -0700 +++ linux-2.6/net/bridge/br_private.h 2007-04-17 13:30:29.0 -0700 @@ -36,7 +36,7 @@ { unsigned char prio[2]; unsigned char addr[6]; -}; +} __attribute__((aligned(8))); Why 8? Mustn't it be 16? Address is to be 2-bytes aligned... Actually it could be made 2, the aligned() attribute is in bytes, not bits. It could be 2 but 8 might allow a compiler on a 64 bit platform to be smarter in comparisons and assignments. For 2.6.22, I'll make a nicer version similar to ktime_t. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] CONFIG_PACKET_MMAP should depend on MMU
Aubrey Li [EMAIL PROTECTED] wrote: Here, in the attachment I wrote a small test app. Please correct if there is anything wrong, and feel free to improve it. Okay... I have that working... probably. I don't know what output it's supposed to produce, but I see this: # /packet-mmap/sample_packet_mmap 00-00-00-01-00-00-00-8a-00-00-00-8a-00-42-00-50- 38-43-13-a0-00-07-ff-3c-00-00-00-00-00-00-00-00- 00-11-08-00-00-00-00-01-00-01-00-06-00-d0-b7-de- 32-7b-00-00-00-00-00-00-00-00-00-00-00-00-00-00- 00-00-00-90-cc-a2-75-6b-00-d0-b7-de-32-7b-08-00- 45-00-00-7c-00-00-40-00-40-11-b4-13-c0-a8-02-80- c0-a8-02-8d-08-01-03-20-00-68-8e-65-7f-5b-7e-03- 00-00-00-01-00-00-00-00-00-00-00-00-00-00-00-00- 00-00-00-00-00-00-00-00-00-00-00-01-00-00-81-a4- 00-00-00-01-00-00-00-00-00-00-00-00-00-1d-b8-86- 00-00-10-00-ff-ff-ff-ff-00-00-0e-f0-00-00-09-02- 01-cb-03-16-46-26-38-0d-00-00-00-00-46-26-38-1e- 00-00-00-00-46-26-38-1e-00-00-00-00-00-00-00-00- 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00- [repeated] Does that look reasonable? I've attached the preliminary patch. Note four things about it: (1) I've had to add the get_unmapped_area() op to the proto_ops struct, but I've only done it for CONFIG_MMU=n as making it available for CONFIG_MMU=y could cause problems. (2) There's a race between packet_get_unmapped_area() being called and packet_mmap() being called. (3) I've added an extra check into packet_set_ring() to make sure the caller isn't asking for a combination of buffer size and count that will exceed ULONG_MAX. This protects a multiply done elsewhere. (4) The entire data buffer is allocated as one contiguous lump in NOMMU-mode. David --- [PATCH] NOMMU: Support mmap() on AF_PACKET sockets From: David Howells [EMAIL PROTECTED] Support mmap() on AF_PACKET sockets in NOMMU-mode kernels. Signed-Off-By: David Howells [EMAIL PROTECTED] --- include/linux/net.h|7 +++ include/net/sock.h |8 +++ net/core/sock.c| 10 net/packet/af_packet.c | 118 net/socket.c | 77 +++ 5 files changed, 219 insertions(+), 1 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index 4db21e6..9e77cf6 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -161,6 +161,11 @@ struct proto_ops { int (*recvmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags); +#ifndef CONFIG_MMU + unsigned long (*get_unmapped_area)(struct file *file, struct socket *sock, +unsigned long addr, unsigned long len, +unsigned long pgoff, unsigned long flags); +#endif int (*mmap) (struct file *file, struct socket *sock, struct vm_area_struct * vma); ssize_t (*sendpage) (struct socket *sock, struct page *page, @@ -191,6 +196,8 @@ extern int sock_sendmsg(struct socket *sock, struct msghdr *msg, extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); extern int sock_map_fd(struct socket *sock); +extern void sock_make_mappable(struct socket *sock, + unsigned long prot); extern struct socket *sockfd_lookup(int fd, int *err); #define sockfd_put(sock) fput(sock-file) extern int net_ratelimit(void); diff --git a/include/net/sock.h b/include/net/sock.h index 2c7d60c..d91edea 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -841,6 +841,14 @@ extern int sock_no_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); extern int sock_no_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int); +#ifndef CONFIG_MMU +extern unsigned long sock_no_get_unmapped_area(struct file *, + struct socket *, + unsigned long, + unsigned long, + unsigned long, + unsigned long); +#endif extern int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); diff --git a/net/core/sock.c b/net/core/sock.c index 27c4f62..b288799
Re: [PATCH 2.6] WE-22 : prevent information leak on 64 bit
On Wed, Apr 18, 2007 at 01:34:50AM +0200, Michael Buesch wrote: I'd say nobody but you does fully understand WEXT. Not true. If tommorow I was run over by an ICE, you could ask Jouni, Dan or Pavel to take over. Have fun... Jean - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [SCTP] Do not interleave non-fragments when in partial delivery
Hi David This is a bug fix, but done on top of 2.6.22 tree. I am trying to minimize the amount of conflict this would cause during merge by doing it this way. However, if you would rather keep all the bugfixes in net-2.6, I can do that too, but that _will_ give you conflicts. -vlad --- [SCTP] Do not interleave non-fragments when in partial delivery The way partial delivery is currently implemented, it is possible to interleave a message (either from another stream, or unordered) that is not part of partial delivery process. The only way to this is for a message to not be a fragment and be 'in order' or unordered for a given stream. This will result in bypassing the reassembly/ordering queues where things live during partial delivery, and the message will be delivered to the socket in the middle of partial delivery. This is a two-fold problem, in that: 1. the app now must check the stream-id and flags which it may not be doing. 2. this clears partial delivery state from the association and results in app communication hanging. This patch is a band-aid over a much bigger problem in that we don't do stream interleave. Signed-off-by: Vlad Yasevich [EMAIL PROTECTED] --- net/sctp/ulpqueue.c |9 - 1 files changed, 8 insertions(+), 1 deletions(-) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index ae374a9..fb2ec63 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -224,7 +224,14 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) queue = sk-sk_receive_queue; } else { if (ulpq-pd_mode) { - if (event-msg_flags MSG_NOTIFICATION) + /* If the association is in partial delivery, we +* need to finish delivering the partially processed +* packet before passing any other data. This is +* because we don't truly support stream interleaving. +*/ + if ((event-msg_flags MSG_NOTIFICATION) || + (SCTP_DATA_NOT_FRAG == + (event-msg_flags SCTP_DATA_FRAG_MASK))) queue = sctp_sk(sk)-pd_lobby; else { clear_pd = event-msg_flags MSG_EOR; -- 1.5.0.3.438.gc49b2 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BRIDGE] Unaligned access on IA64 when comparing ethernet addresses
From: Stephen Hemminger [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 07:44:39 -0700 On Wed, 18 Apr 2007 01:28:04 -0700 (PDT) David Miller [EMAIL PROTECTED] wrote: From: Pavel Emelianov [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 10:43:56 +0400 [snip] --- linux-2.6.orig/net/bridge/br_private.h 2007-04-17 13:26:48.0 -0700 +++ linux-2.6/net/bridge/br_private.h 2007-04-17 13:30:29.0 -0700 @@ -36,7 +36,7 @@ { unsigned char prio[2]; unsigned char addr[6]; -}; +} __attribute__((aligned(8))); Why 8? Mustn't it be 16? Address is to be 2-bytes aligned... Actually it could be made 2, the aligned() attribute is in bytes, not bits. It could be 2 but 8 might allow a compiler on a 64 bit platform to be smarter in comparisons and assignments. Absolutely. Although I don't think gcc does anything fancy since we don't use memcmp(). It's a tradeoff, we'd like to use unsigned long comparisons when both objects are aligned correctly but we also don't want it to use any more than one potentially mispredicted branch. We could add some alignment tests to the ethernet address comparison code, but it's probably more trouble than it's worth. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [SCTP] Do not interleave non-fragments when in partial delivery
From: Vlad Yasevich [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 14:38:15 -0400 This is a bug fix, but done on top of 2.6.22 tree. I am trying to minimize the amount of conflict this would cause during merge by doing it this way. However, if you would rather keep all the bugfixes in net-2.6, I can do that too, but that _will_ give you conflicts. I've already determined that I'm destined to deal with tons of conclicts anyways :-) I'll backport this to net-2.6 unless you beat me to it :-) Thanks! - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [SCTP] Do not interleave non-fragments when in partial delivery
David Miller wrote: From: Vlad Yasevich [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 14:38:15 -0400 This is a bug fix, but done on top of 2.6.22 tree. I am trying to minimize the amount of conflict this would cause during merge by doing it this way. However, if you would rather keep all the bugfixes in net-2.6, I can do that too, but that _will_ give you conflicts. I've already determined that I'm destined to deal with tons of conclicts anyways :-) I'll backport this to net-2.6 unless you beat me to it :-) attached :) -vlad From ebf07dc09049969873b50f594640fe475e1ee294 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 16:48:24 -0400 Subject: [PATCH] [SCTP] Do not interleave non-fragments when in partial delivery The way partial delivery is currently implemnted, it is possible to intereleave a message (either from another steram, or unordered) that is not part of partial delivery process. The only way to this is for a message to not be a fragment and be 'in order' or unorderd for a given stream. This will result in bypassing the reassembly/ordering queues where things live duing partial delivery, and the message will be delivered to the socket in the middle of partial delivery. This is a two-fold problem, in that: 1. the app now must check the stream-id and flags which it may not be doing. 2. this clearing partial delivery state from the association and results in ulp hanging. This patch is a band-aid over a much bigger problem in that we don't do stream interleave. Signed-off-by: Vlad Yasevich [EMAIL PROTECTED] --- net/sctp/ulpqueue.c |9 - 1 files changed, 8 insertions(+), 1 deletions(-) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index bfb197e..b29e3e4 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -190,7 +190,14 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if (!sctp_sk(sk)-pd_mode) { queue = sk-sk_receive_queue; } else if (ulpq-pd_mode) { - if (event-msg_flags MSG_NOTIFICATION) + /* If the association is in partial delivery, we +* need to finish delivering the partially processed +* packet before passing any other data. This is +* because we don't truly support stream interleaving. +*/ + if ((event-msg_flags MSG_NOTIFICATION) || + (SCTP_DATA_NOT_FRAG == + (event-msg_flags SCTP_DATA_FRAG_MASK))) queue = sctp_sk(sk)-pd_lobby; else { clear_pd = event-msg_flags MSG_EOR; -- 1.5.0.3.438.gc49b2
Re: [PATCH] [SCTP] Do not interleave non-fragments when in partial delivery
From: Vlad Yasevich [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 16:52:26 -0400 David Miller wrote: From: Vlad Yasevich [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 14:38:15 -0400 This is a bug fix, but done on top of 2.6.22 tree. I am trying to minimize the amount of conflict this would cause during merge by doing it this way. However, if you would rather keep all the bugfixes in net-2.6, I can do that too, but that _will_ give you conflicts. I've already determined that I'm destined to deal with tons of conclicts anyways :-) I'll backport this to net-2.6 unless you beat me to it :-) attached :) Thanks a lot, applied. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kernel BUG at net/core/skbuff.c in linux-2.6.21-rc6
Hi Paul: Paul Mackerras [EMAIL PROTECTED] wrote: So this doesn't change process_input_packet(), which treats the case where the first byte is 0xff (PPP_ALLSTATIONS) but the second byte is 0x03 (PPP_UI) as indicating a packet with a PPP protocol number of 0xff. Arguably that's wrong since PPP protocol 0xff is reserved, and the RFC does envision the possibility of receiving frames where the control field has values other than 0x03. Your fix is probably needed too. However, I think the issue that Patrick was trying to fix is the case where p[0] != PPP_ALLSTATIONS and therefore we'd still have a problem there. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [net-2.6] IrDA: Correctly handling socket error
From: Olaf Kirch [EMAIL PROTECTED] This patch fixes an oops first reported in mid 2006 - see http://lkml.org/lkml/2006/8/29/358 The cause of this bug report is that when an error is signalled on the socket, irda_recvmsg_stream returns without removing a local wait_queue variable from the socket's sk_sleep queue. This causes havoc further down the road. In response to this problem, a patch was made that invoked sock_orphan on the socket when receiving a disconnect indication. This is not a good fix, as this sets sk_sleep to NULL, causing applications sleeping in recvmsg (and other places) to oops. This is against the latest net-2.6 and should be considered for -stable inclusion. Signed-off-by: Olaf Kirch [EMAIL PROTECTED] Signed-off-by: Samuel Ortiz [EMAIL PROTECTED] --- net/irda/af_irda.c |3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index f9b15d4..bf994c8 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -138,7 +138,6 @@ static void irda_disconnect_indication(void *instance, void *sap, sk-sk_shutdown |= SEND_SHUTDOWN; sk-sk_state_change(sk); - sock_orphan(sk); release_sock(sk); /* Close our TSAP. @@ -1445,7 +1444,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, */ ret = sock_error(sk); if (ret) - break; + ; else if (sk-sk_shutdown RCV_SHUTDOWN) ; else if (noblock) -- 1.5.1 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/7] [IrDA] af_irda: irda_recvmsg_stream cleanup
From: Olaf Kirch [EMAIL PROTECTED] This patch cleans up some code in irda_recvmsg_stream, replacing some homebrew code with prepare_to_wait/finish_wait, and by making the code honor sock_rcvtimeo. Signed-off-by: Olaf Kirch [EMAIL PROTECTED] Signed-off-by: Samuel Ortiz [EMAIL PROTECTED] --- net/irda/af_irda.c | 31 +-- 1 files changed, 13 insertions(+), 18 deletions(-) Index: net-2.6.22-quilt/net/irda/af_irda.c === --- net-2.6.22-quilt.orig/net/irda/af_irda.c2007-04-18 01:40:14.0 +0300 +++ net-2.6.22-quilt/net/irda/af_irda.c 2007-04-18 01:40:28.0 +0300 @@ -1403,8 +1403,8 @@ struct irda_sock *self = irda_sk(sk); int noblock = flags MSG_DONTWAIT; size_t copied = 0; - int target = 1; - DECLARE_WAITQUEUE(waitq, current); + int target; + long timeo; IRDA_DEBUG(3, %s()\n, __FUNCTION__); @@ -1417,8 +1417,8 @@ if (flags MSG_OOB) return -EOPNOTSUPP; - if (flags MSG_WAITALL) - target = size; + target = sock_rcvlowat(sk, flags MSG_WAITALL, size); + timeo = sock_rcvtimeo(sk, noblock); msg-msg_namelen = 0; @@ -1426,19 +1426,14 @@ int chunk; struct sk_buff *skb = skb_dequeue(sk-sk_receive_queue); - if (skb==NULL) { + if (skb == NULL) { + DEFINE_WAIT(wait); int ret = 0; if (copied = target) break; - /* The following code is a cut'n'paste of the -* wait_event_interruptible() macro. -* We don't us the macro because the test condition -* is messy. - Jean II */ - set_bit(SOCK_ASYNC_WAITDATA, sk-sk_socket-flags); - add_wait_queue(sk-sk_sleep, waitq); - set_current_state(TASK_INTERRUPTIBLE); + prepare_to_wait_exclusive(sk-sk_sleep, wait, TASK_INTERRUPTIBLE); /* * POSIX 1003.1g mandates this order. @@ -1451,17 +1446,17 @@ else if (noblock) ret = -EAGAIN; else if (signal_pending(current)) - ret = -ERESTARTSYS; + ret = sock_intr_errno(timeo); + else if (sk-sk_state != TCP_ESTABLISHED) + ret = -ENOTCONN; else if (skb_peek(sk-sk_receive_queue) == NULL) /* Wait process until data arrives */ schedule(); - current-state = TASK_RUNNING; - remove_wait_queue(sk-sk_sleep, waitq); - clear_bit(SOCK_ASYNC_WAITDATA, sk-sk_socket-flags); + finish_wait(sk-sk_sleep, wait); - if(ret) - return(ret); + if (ret) + return ret; if (sk-sk_shutdown RCV_SHUTDOWN) break; -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/7] [IrDA] af_irda: irda_accept cleanup
This patch removes a cut'n'paste copy of wait_event_interruptible from irda_accept. Signed-off-by: Samuel Ortiz [EMAIL PROTECTED] Acked-by: Olaf Kirch [EMAIL PROTECTED] --- net/irda/af_irda.c | 34 -- 1 files changed, 8 insertions(+), 26 deletions(-) Index: net-2.6.22-quilt/net/irda/af_irda.c === --- net-2.6.22-quilt.orig/net/irda/af_irda.c2007-04-18 02:16:43.0 +0300 +++ net-2.6.22-quilt/net/irda/af_irda.c 2007-04-18 02:16:43.0 +0300 @@ -873,37 +873,19 @@ * calling us, the data is waiting for us ;-) * Jean II */ - skb = skb_dequeue(sk-sk_receive_queue); - if (skb == NULL) { - int ret = 0; - DECLARE_WAITQUEUE(waitq, current); + while (1) { + skb = skb_dequeue(sk-sk_receive_queue); + if (skb) + break; /* Non blocking operation */ if (flags O_NONBLOCK) return -EWOULDBLOCK; - /* The following code is a cut'n'paste of the -* wait_event_interruptible() macro. -* We don't us the macro because the condition has -* side effects : we want to make sure that only one -* skb get dequeued - Jean II */ - add_wait_queue(sk-sk_sleep, waitq); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - skb = skb_dequeue(sk-sk_receive_queue); - if (skb != NULL) - break; - if (!signal_pending(current)) { - schedule(); - continue; - } - ret = -ERESTARTSYS; - break; - } - current-state = TASK_RUNNING; - remove_wait_queue(sk-sk_sleep, waitq); - if(ret) - return -ERESTARTSYS; + err = wait_event_interruptible(*(sk-sk_sleep), + skb_peek(sk-sk_receive_queue)); + if (err) + return err; } newsk = newsock-sk; -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/7] [IrDA] af_irda: Silence kernel message in irda_recvmsg_stream
From: Olaf Kirch [EMAIL PROTECTED] This patch silences an IRDA_ASSERT in irda_recvmsg_stream, as described in http://bugzilla.kernel.org/show_bug.cgi?id=7512 irda_disconnect_indication would set sk-sk_err to ECONNRESET, and a subsequent call to recvmsg would print an irritating kernel message and return -1. When a connected socket is closed by the peer, recvmsg should return 0 rather than an error. This patch fixes this. Signed-off-by: Olaf Kirch [EMAIL PROTECTED] Signed-off-by: Samuel Ortiz [EMAIL PROTECTED] --- net/irda/af_irda.c | 18 +++--- 1 files changed, 11 insertions(+), 7 deletions(-) Index: net-2.6.22-quilt/net/irda/af_irda.c === --- net-2.6.22-quilt.orig/net/irda/af_irda.c2007-04-18 02:10:56.0 +0300 +++ net-2.6.22-quilt/net/irda/af_irda.c 2007-04-18 02:14:24.0 +0300 @@ -131,15 +131,13 @@ } /* Prevent race conditions with irda_release() and irda_shutdown() */ + bh_lock_sock(sk); if (!sock_flag(sk, SOCK_DEAD) sk-sk_state != TCP_CLOSE) { - lock_sock(sk); sk-sk_state = TCP_CLOSE; - sk-sk_err = ECONNRESET; sk-sk_shutdown |= SEND_SHUTDOWN; sk-sk_state_change(sk); sock_orphan(sk); - release_sock(sk); /* Close our TSAP. * If we leave it open, IrLMP put it back into the list of @@ -159,6 +157,7 @@ self-tsap = NULL; } } + bh_unlock_sock(sk); /* Note : once we are there, there is not much you want to do * with the socket anymore, apart from closing it. @@ -1062,7 +1061,8 @@ if (sk-sk_state != TCP_ESTABLISHED) { sock-state = SS_UNCONNECTED; - return sock_error(sk); /* Always set at this point */ + err = sock_error(sk); + return err? err : -ECONNRESET; } sock-state = SS_CONNECTED; @@ -1356,7 +1356,9 @@ IRDA_DEBUG(4, %s()\n, __FUNCTION__); IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(!sock_error(sk), return -1;); + + if ((err = sock_error(sk)) 0) + return err; skb = skb_recv_datagram(sk, flags ~MSG_DONTWAIT, flags MSG_DONTWAIT, err); @@ -1403,13 +1405,15 @@ struct irda_sock *self = irda_sk(sk); int noblock = flags MSG_DONTWAIT; size_t copied = 0; - int target; + int target, err; long timeo; IRDA_DEBUG(3, %s()\n, __FUNCTION__); IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(!sock_error(sk), return -1;); + + if ((err = sock_error(sk)) 0) + return err; if (sock-flags __SO_ACCEPTCON) return(-EINVAL); -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/7] [IrDA] IrDA monitor mode
Through a protocol specific ioctl, one can disable IrDA TX in order to monitor an IrDA link. Signed-off-by: Samuel Ortiz [EMAIL PROTECTED] --- include/linux/irda.h |7 + include/net/irda/irlap.h |2 + net/irda/af_irda.c | 58 +- net/irda/irlap_frame.c |8 ++ 4 files changed, 74 insertions(+), 1 deletions(-) Index: net-2.6.22-quilt/include/linux/irda.h === --- net-2.6.22-quilt.orig/include/linux/irda.h 2007-04-18 01:57:48.0 +0300 +++ net-2.6.22-quilt/include/linux/irda.h 2007-04-18 02:16:43.0 +0300 @@ -172,6 +172,12 @@ #define SIOCSDTRRTS(SIOCDEVPRIVATE + 8) #define SIOCGQOS (SIOCDEVPRIVATE + 9) +/* Protocol private ioctls */ +#define SIOCIRDASETMODE (SIOCPROTOPRIVATE + 0) +#define SIOCIRDAGETMODE (SIOCPROTOPRIVATE + 1) + +#define IRDA_MODE_MONITOR 0x1 + /* No reason to include linux/if.h just because of this one ;-) */ #define IRNAMSIZ 16 @@ -209,6 +215,7 @@ } ifr_ifru; }; +#define ifr_name ifr_ifrn.ifrn_name #define ifr_baudrate ifr_ifru.ifru_qos.baudrate #define ifr_receiving ifr_ifru.ifru_receiving #define ifr_dongleifr_ifru.ifru_dongle Index: net-2.6.22-quilt/include/net/irda/irlap.h === --- net-2.6.22-quilt.orig/include/net/irda/irlap.h 2007-04-18 01:57:48.0 +0300 +++ net-2.6.22-quilt/include/net/irda/irlap.h 2007-04-18 02:16:43.0 +0300 @@ -208,6 +208,8 @@ intxbofs_delay; /* Nr of XBOF's used to MTT */ intbofs_count;/* Negotiated extra BOFs */ intnext_bofs; /* Negotiated extra BOFs after next frame */ + + intmode; /* 1 is for monitor mode (TX disabled) */ }; /* Index: net-2.6.22-quilt/net/irda/af_irda.c === --- net-2.6.22-quilt.orig/net/irda/af_irda.c2007-04-18 02:16:43.0 +0300 +++ net-2.6.22-quilt/net/irda/af_irda.c 2007-04-18 02:16:43.0 +0300 @@ -49,7 +49,6 @@ #include linux/sockios.h #include linux/init.h #include linux/net.h -#include linux/irda.h #include linux/poll.h #include asm/ioctls.h/* TIOCOUTQ, TIOCINQ */ @@ -1745,6 +1744,7 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock-sk; + void __user *argp = (void __user *)arg; IRDA_DEBUG(4, %s(), cmd=%#x\n, __FUNCTION__, cmd); @@ -1786,6 +1786,62 @@ case SIOCGIFMETRIC: case SIOCSIFMETRIC: return -EINVAL; + + case SIOCIRDASETMODE: { + struct if_irda_req if_irda; + struct net_device * dev; + struct irlap_cb * irlap; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (copy_from_user(if_irda, argp, sizeof(struct if_irda_req))) + return -EFAULT; + + dev = dev_get_by_name(if_irda.ifr_name); + if (!dev) + return -ENODEV; + + irlap = (struct irlap_cb *)dev-atalk_ptr; + if (!irlap) + return -ENODEV; + + IRDA_DEBUG(4, %s(): Setting %s to 0x%x\n, __FUNCTION__, + dev-name, if_irda.ifr_mode); + + irlap-mode = if_irda.ifr_mode; + + dev_put(dev); + + break; + } + case SIOCIRDAGETMODE: { + struct if_irda_req if_irda; + struct net_device * dev; + struct irlap_cb * irlap; + + if (copy_from_user(if_irda, argp, sizeof(struct if_irda_req))) + return -EFAULT; + + dev = dev_get_by_name(if_irda.ifr_name); + if (!dev) + return -ENODEV; + + irlap = (struct irlap_cb *)dev-atalk_ptr; + if (!irlap) + return -ENODEV; + + if_irda.ifr_mode = irlap-mode; + + dev_put(dev); + + IRDA_DEBUG(4, %s(): %s mode is 0x%x\n, __FUNCTION__, + dev-name, if_irda.ifr_mode); + + if (copy_to_user(argp, if_irda, sizeof(struct if_irda_req))) + return -EFAULT; + } + break; default: IRDA_DEBUG(1, %s(), doing device ioctl!\n, __FUNCTION__); return -ENOIOCTLCMD; Index: net-2.6.22-quilt/net/irda/irlap_frame.c === --- net-2.6.22-quilt.orig/net/irda/irlap_frame.c2007-04-18 01:57:48.0 +0300 +++ net-2.6.22-quilt/net/irda/irlap_frame.c 2007-04-18 02:16:43.0 +0300 @@ -101,6 +101,14 @@ irlap_insert_info(self, skb); + if (unlikely(self-mode IRDA_MODE_MONITOR)) { +
[PATCH 7/7] [IrDA] Misc spelling corrections.
From: Guennadi Liakhovetski [EMAIL PROTECTED] Spelling corrections, from to to too. Signed-off-by: G. Liakhovetski [EMAIL PROTECTED] Signed-off-by: Samuel Ortiz [EMAIL PROTECTED] --- net/irda/irlap_event.c |2 +- net/irda/irlap_frame.c | 14 +++--- net/irda/irttp.c |4 ++-- net/irda/parameters.c |8 4 files changed, 14 insertions(+), 14 deletions(-) Index: net-2.6.22-quilt/net/irda/irlap_event.c === --- net-2.6.22-quilt.orig/net/irda/irlap_event.c2007-04-18 01:57:48.0 +0300 +++ net-2.6.22-quilt/net/irda/irlap_event.c 2007-04-18 02:16:44.0 +0300 @@ -590,7 +590,7 @@ if (!self-discovery_log) { IRDA_WARNING(%s: discovery log is gone! maybe the discovery timeout has been set - to short?\n, __FUNCTION__); + too short?\n, __FUNCTION__); break; } hashbin_insert(self-discovery_log, Index: net-2.6.22-quilt/net/irda/irlap_frame.c === --- net-2.6.22-quilt.orig/net/irda/irlap_frame.c2007-04-18 02:16:43.0 +0300 +++ net-2.6.22-quilt/net/irda/irlap_frame.c 2007-04-18 02:16:44.0 +0300 @@ -421,7 +421,7 @@ IRDA_ASSERT(self-magic == LAP_MAGIC, return;); if (!pskb_may_pull(skb, sizeof(struct xid_frame))) { - IRDA_ERROR(%s: frame to short!\n, __FUNCTION__); + IRDA_ERROR(%s: frame too short!\n, __FUNCTION__); return; } @@ -492,7 +492,7 @@ char *text; if (!pskb_may_pull(skb, sizeof(struct xid_frame))) { - IRDA_ERROR(%s: frame to short!\n, __FUNCTION__); + IRDA_ERROR(%s: frame too short!\n, __FUNCTION__); return; } @@ -536,7 +536,7 @@ /* Check if things are sane at this point... */ if((discovery_info == NULL) || !pskb_may_pull(skb, 3)) { - IRDA_ERROR(%s: discovery frame to short!\n, + IRDA_ERROR(%s: discovery frame too short!\n, __FUNCTION__); return; } @@ -1181,7 +1181,7 @@ IRDA_ASSERT(info != NULL, return;); if (!pskb_may_pull(skb, 4)) { - IRDA_ERROR(%s: frame to short!\n, __FUNCTION__); + IRDA_ERROR(%s: frame too short!\n, __FUNCTION__); return; } @@ -1270,7 +1270,7 @@ IRDA_DEBUG(2, %s()\n, __FUNCTION__); if (!pskb_may_pull(skb, sizeof(*frame))) { - IRDA_ERROR(%s: frame to short!\n, __FUNCTION__); + IRDA_ERROR(%s: frame too short!\n, __FUNCTION__); return; } frame = (struct test_frame *) skb-data; @@ -1278,7 +1278,7 @@ /* Broadcast frames must carry saddr and daddr fields */ if (info-caddr == CBROADCAST) { if (skb-len sizeof(struct test_frame)) { - IRDA_DEBUG(0, %s() test frame to short!\n, + IRDA_DEBUG(0, %s() test frame too short!\n, __FUNCTION__); return; } @@ -1344,7 +1344,7 @@ /* Check if frame is large enough for parsing */ if (!pskb_may_pull(skb, 2)) { - IRDA_ERROR(%s: frame to short!\n, __FUNCTION__); + IRDA_ERROR(%s: frame too short!\n, __FUNCTION__); dev_kfree_skb(skb); return -1; } Index: net-2.6.22-quilt/net/irda/irttp.c === --- net-2.6.22-quilt.orig/net/irda/irttp.c 2007-04-18 01:57:48.0 +0300 +++ net-2.6.22-quilt/net/irda/irttp.c 2007-04-18 02:16:44.0 +0300 @@ -551,7 +551,7 @@ } if (skb-len self-max_seg_size) { - IRDA_DEBUG(1, %s(), UData is to large for IrLAP!\n, + IRDA_DEBUG(1, %s(), UData is too large for IrLAP!\n, __FUNCTION__); goto err; } @@ -598,7 +598,7 @@ * inside an IrLAP frame */ if ((self-tx_max_sdu_size == 0) (skb-len self-max_seg_size)) { - IRDA_ERROR(%s: SAR disabled, and data is to large for IrLAP!\n, + IRDA_ERROR(%s: SAR disabled, and data is too large for IrLAP!\n, __FUNCTION__); ret = -EMSGSIZE; goto err; Index: net-2.6.22-quilt/net/irda/parameters.c === --- net-2.6.22-quilt.orig/net/irda/parameters.c 2007-04-18 01:57:48.0 +0300 +++ net-2.6.22-quilt/net/irda/parameters.c 2007-04-18 02:16:44.0
[PATCH 4/7] [IrDA] af_irda: IRDA_ASSERT cleanups
In af_irda.c, the multiple IRDA_ASSERT() are either hiding bugs, useless, or returning the wrong value. Let's clean that up. Signed-off-by: Samuel Ortiz [EMAIL PROTECTED] --- net/irda/af_irda.c | 32 +--- 1 files changed, 5 insertions(+), 27 deletions(-) Index: net-2.6.22-quilt/net/irda/af_irda.c === --- net-2.6.22-quilt.orig/net/irda/af_irda.c2007-04-18 02:16:43.0 +0300 +++ net-2.6.22-quilt/net/irda/af_irda.c 2007-04-18 02:16:43.0 +0300 @@ -89,7 +89,6 @@ self = instance; sk = instance; - IRDA_ASSERT(sk != NULL, return -1;); err = sock_queue_rcv_skb(sk, skb); if (err) { @@ -306,8 +305,6 @@ IRDA_DEBUG(2, %s()\n, __FUNCTION__); - IRDA_ASSERT(self != NULL, return;); - skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_ATOMIC); if (skb == NULL) { @@ -337,7 +334,7 @@ self = instance; sk = instance; - IRDA_ASSERT(sk != NULL, return;); + BUG_ON(sk == NULL); switch (flow) { case FLOW_STOP: @@ -449,7 +446,7 @@ IRDA_DEBUG(2, %s()\n, __FUNCTION__); self = (struct irda_sock *) priv; - IRDA_ASSERT(self != NULL, return;); + BUG_ON(self == NULL); /* Nothing for the caller */ self-cachelog = NULL; @@ -546,8 +543,6 @@ { IRDA_DEBUG(2, %s(%p, %s)\n, __FUNCTION__, self, name); - IRDA_ASSERT(self != NULL, return -1;); - if (self-iriap) { IRDA_WARNING(%s(): busy with a previous query\n, __FUNCTION__); @@ -635,8 +630,6 @@ IRDA_DEBUG(2, %s(), name=%s\n, __FUNCTION__, name); - IRDA_ASSERT(self != NULL, return -1;); - /* Ask lmp for the current discovery log * Note : we have to use irlmp_get_discoveries(), as opposed * to play with the cachelog directly, because while we are @@ -784,8 +777,6 @@ struct irda_sock *self = irda_sk(sk); int err; - IRDA_ASSERT(self != NULL, return -1;); - IRDA_DEBUG(2, %s(%p)\n, __FUNCTION__, self); if (addr_len != sizeof(struct sockaddr_irda)) @@ -841,8 +832,6 @@ IRDA_DEBUG(2, %s()\n, __FUNCTION__); - IRDA_ASSERT(self != NULL, return -1;); - err = irda_create(newsock, sk-sk_protocol); if (err) return err; @@ -889,10 +878,12 @@ } newsk = newsock-sk; + if (newsk == NULL) + return -EIO; + newsk-sk_state = TCP_ESTABLISHED; new = irda_sk(newsk); - IRDA_ASSERT(new != NULL, return -1;); /* Now attach up the new socket */ new-tsap = irttp_dup(self-tsap, new); @@ -1154,8 +1145,6 @@ { IRDA_DEBUG(2, %s(%p)\n, __FUNCTION__, self); - IRDA_ASSERT(self != NULL, return;); - /* Unregister with IrLMP */ irlmp_unregister_client(self-ckey); irlmp_unregister_service(self-skey); @@ -1274,7 +1263,6 @@ return -ENOTCONN; self = irda_sk(sk); - IRDA_ASSERT(self != NULL, return -1;); /* Check if IrTTP is wants us to slow down */ @@ -1337,8 +1325,6 @@ IRDA_DEBUG(4, %s()\n, __FUNCTION__); - IRDA_ASSERT(self != NULL, return -1;); - if ((err = sock_error(sk)) 0) return err; @@ -1392,8 +1378,6 @@ IRDA_DEBUG(3, %s()\n, __FUNCTION__); - IRDA_ASSERT(self != NULL, return -1;); - if ((err = sock_error(sk)) 0) return err; @@ -1527,7 +1511,6 @@ return -ENOTCONN; self = irda_sk(sk); - IRDA_ASSERT(self != NULL, return -1;); /* * Check that we don't send out too big frames. This is an unreliable @@ -1596,7 +1579,6 @@ } self = irda_sk(sk); - IRDA_ASSERT(self != NULL, return -1;); /* Check if an address was specified with sendto. Jean II */ if (msg-msg_name) { @@ -1670,8 +1652,6 @@ struct sock *sk = sock-sk; struct irda_sock *self = irda_sk(sk); - IRDA_ASSERT(self != NULL, return -1;); - IRDA_DEBUG(1, %s(%p)\n, __FUNCTION__, self); sk-sk_state = TCP_CLOSE; @@ -1844,8 +1824,6 @@ struct ias_attrib * ias_attr; /* Attribute in IAS object */ int opt; - IRDA_ASSERT(self != NULL, return -1;); - IRDA_DEBUG(2, %s(%p)\n, __FUNCTION__, self); if (level != SOL_IRLMP) -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/7] [IrDA] net-2.6.22 fixes
Hi Dave, Here go 7 patches for IrDA, against your latest net-2.6.22. They are mainly af_irda.c fixes/cleanup. Also, I just sent another af_irda.c oops fix, against net-2.6 (IrDA: Correctly handling socket error). If you want it against net-2.6.22 as well, just let me know. Cheers, Samuel. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [net-2.6] IrDA: Correctly handling socket error
From: Samuel Ortiz [EMAIL PROTECTED] Date: Thu, 19 Apr 2007 00:45:26 +0300 From: Olaf Kirch [EMAIL PROTECTED] This patch fixes an oops first reported in mid 2006 - see http://lkml.org/lkml/2006/8/29/358 The cause of this bug report is that when an error is signalled on the socket, irda_recvmsg_stream returns without removing a local wait_queue variable from the socket's sk_sleep queue. This causes havoc further down the road. In response to this problem, a patch was made that invoked sock_orphan on the socket when receiving a disconnect indication. This is not a good fix, as this sets sk_sleep to NULL, causing applications sleeping in recvmsg (and other places) to oops. This is against the latest net-2.6 and should be considered for -stable inclusion. Signed-off-by: Olaf Kirch [EMAIL PROTECTED] Signed-off-by: Samuel Ortiz [EMAIL PROTECTED] Applied and I'll push this to -stable too, thanks! - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/7] [IrDA] Adding carriage returns to mcs7780 debug statements
Signed-off-by: Samuel Ortiz [EMAIL PROTECTED] --- drivers/net/irda/mcs7780.c | 26 +- 1 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c index 54d1d54..0de8672 100644 --- a/drivers/net/irda/mcs7780.c +++ b/drivers/net/irda/mcs7780.c @@ -200,14 +200,14 @@ static inline int mcs_setup_transceiver_vishay(struct mcs_cb *mcs) /* Setup a communication between mcs7780 and agilent chip. */ static inline int mcs_setup_transceiver_agilent(struct mcs_cb *mcs) { - IRDA_WARNING(This transceiver type is not supported yet.); + IRDA_WARNING(This transceiver type is not supported yet.\n); return 1; } /* Setup a communication between mcs7780 and sharp chip. */ static inline int mcs_setup_transceiver_sharp(struct mcs_cb *mcs) { - IRDA_WARNING(This transceiver type is not supported yet.); + IRDA_WARNING(This transceiver type is not supported yet.\n); return 1; } @@ -279,7 +279,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs) break; default: - IRDA_WARNING(Unknown transceiver type: %d, + IRDA_WARNING(Unknown transceiver type: %d\n, mcs-transceiver_type); ret = 1; } @@ -318,7 +318,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs) return ret; error: - IRDA_ERROR(%s, msg); + IRDA_ERROR(%s\n, msg); return ret; } @@ -587,7 +587,7 @@ static int mcs_speed_change(struct mcs_cb *mcs) } while(cnt++ 100 (rval MCS_IRINTX)); if(cnt = 100) { - IRDA_ERROR(unable to change speed); + IRDA_ERROR(unable to change speed\n); ret = -EIO; goto error; } @@ -638,7 +638,7 @@ static int mcs_speed_change(struct mcs_cb *mcs) default: ret = 1; - IRDA_WARNING(Unknown transceiver type: %d, + IRDA_WARNING(Unknown transceiver type: %d\n, mcs-transceiver_type); } if (unlikely(ret)) @@ -733,7 +733,7 @@ static int mcs_net_open(struct net_device *netdev) sprintf(hwname, usb#%d, mcs-usbdev-devnum); mcs-irlap = irlap_open(netdev, mcs-qos, hwname); if (!mcs-irlap) { - IRDA_ERROR(mcs7780: irlap_open failed); + IRDA_ERROR(mcs7780: irlap_open failed\n); goto error2; } @@ -862,7 +862,7 @@ static int mcs_hard_xmit(struct sk_buff *skb, struct net_device *ndev) mcs-out_buf, wraplen, mcs_send_irq, mcs); if ((ret = usb_submit_urb(mcs-tx_urb, GFP_ATOMIC))) { - IRDA_ERROR(failed tx_urb: %d, ret); + IRDA_ERROR(failed tx_urb: %d\n, ret); switch (ret) { case -ENODEV: case -EPIPE: @@ -897,7 +897,7 @@ static int mcs_probe(struct usb_interface *intf, if (!ndev) goto error1; - IRDA_DEBUG(1, MCS7780 USB-IrDA bridge found at %d., udev-devnum); + IRDA_DEBUG(1, MCS7780 USB-IrDA bridge found at %d.\n, udev-devnum); /* what is it realy for? */ SET_MODULE_OWNER(ndev); @@ -905,7 +905,7 @@ static int mcs_probe(struct usb_interface *intf, ret = usb_reset_configuration(udev); if (ret != 0) { - IRDA_ERROR(mcs7780: usb reset configuration failed); + IRDA_ERROR(mcs7780: usb reset configuration failed\n); goto error2; } @@ -950,7 +950,7 @@ static int mcs_probe(struct usb_interface *intf, if (ret != 0) goto error2; - IRDA_DEBUG(1, IrDA: Registered MosChip MCS7780 device as %s, + IRDA_DEBUG(1, IrDA: Registered MosChip MCS7780 device as %s\n, ndev-name); mcs-transceiver_type = transceiver_type; @@ -981,7 +981,7 @@ static void mcs_disconnect(struct usb_interface *intf) free_netdev(mcs-netdev); usb_set_intfdata(intf, NULL); - IRDA_DEBUG(0, MCS7780 now disconnected.); + IRDA_DEBUG(0, MCS7780 now disconnected.\n); } /* Module insertion */ @@ -992,7 +992,7 @@ static int __init mcs_init(void) /* register this driver with the USB subsystem */ result = usb_register(mcs_driver); if (result) - IRDA_ERROR(usb_register failed. Error number %d, result); + IRDA_ERROR(usb_register failed. Error number %d\n, result); return result; } -- 1.5.1 -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Make dev_kfree_skb_any check if the skb is valid
If dev_kfree_skb_any is called and it then calls dev_kfree_skb_irq. That call will dereference the skb. If the skb is invalid, down the drain we go. This one-liner checks to see if the skb is valid as part of the determination of whether to call dev_kfree_skb_irq. Signed-off-by: Erik Hovland [EMAIL PROTECTED] --- net/core/dev.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 4dc93cc..85f4a4c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1131,7 +1131,7 @@ EXPORT_SYMBOL(__netif_rx_schedule); void dev_kfree_skb_any(struct sk_buff *skb) { - if (in_irq() || irqs_disabled()) + if (skb (in_irq() || irqs_disabled())) dev_kfree_skb_irq(skb); else dev_kfree_skb(skb); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Make dev_kfree_skb_any check if the skb is valid
From: Erik Hovland [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 15:33:44 -0700 If dev_kfree_skb_any is called and it then calls dev_kfree_skb_irq. That call will dereference the skb. If the skb is invalid, down the drain we go. This one-liner checks to see if the skb is valid as part of the determination of whether to call dev_kfree_skb_irq. Signed-off-by: Erik Hovland [EMAIL PROTECTED] This should never be invoked with a NULL skb argument. Who is doing that? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
AF_PACKET how to get the original netdev from a packet received from a bonded master
When using PF_PACKET socket with bonded interfaces, there is no way to get the slave interface (physical interface) the packet was actually received on. It looks like there isn't a way to pass the original device {see packet_rcv() in ./net/packet/af_packet.c} to the socket reader. When an interface is bounded, dev, by the time it reaches this function, points to the the bonding interface not the Ethernet interface the packet was receive on. sockaddr_ll does not contain any fields that could be use to pass the original device in. As such there is no ways for an application that is interested on the physical interface the packet was received on, to get the information when running a bond (one that uses the same mac address for all of its slave). observation, if one uses tcpdump on a bonded slave, only egress packets are taped. if one uses tcpdump on a bonded master, ingress packets are taped, egress packets are taped from the above, it would make sense to have the same behavior on ingress/egress for both slave and master. the later would require the taped packet be duplicated at packet_rcv() and passed up to the socket layer as two copies - one associated with the master dev - one associated with the slave dev. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Make dev_kfree_skb_any check if the skb is valid
On Wed, Apr 18, 2007 at 03:44:16PM -0700, David Miller wrote: From: Erik Hovland [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 15:33:44 -0700 If dev_kfree_skb_any is called and it then calls dev_kfree_skb_irq. That call will dereference the skb. If the skb is invalid, down the drain we go. This one-liner checks to see if the skb is valid as part of the determination of whether to call dev_kfree_skb_irq. Signed-off-by: Erik Hovland [EMAIL PROTECTED] This should never be invoked with a NULL skb argument. Who is doing that? Heh, the reason I came up with this patch is that the code in drivers/usb/gadget/ether.c at about line 1653 will attempt to allocate an skb. If it fails then it uses a goto to jump to line 1672 where it will call dev_kfree_skb_any (skb) on a potentially null skb. I put a validity check there and sent it off to the USB gadget maintainer. He asked me to instead make the dev_kfree_skb_any call more robust and see how that went over on the netdev list. Like a lead balloon? E -- Erik Hovland mail: [EMAIL PROTECTED] web: http://hovland.org/ PGP/GPG public key available on request - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: AF_PACKET how to get the original netdev from a packet received from a bonded master
From: Laurent Chavey [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 16:05:27 -0700 When using PF_PACKET socket with bonded interfaces, there is no way to get the slave interface (physical interface) the packet was actually received on. That's right. There isn't a real good solution to this problem either. Decapsulation can happen to arbitrary levels, how many devices should we remember and how to do the reference counting on that correctly? As such, I don't think this will ever be provided. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Make dev_kfree_skb_any check if the skb is valid
From: Erik Hovland [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 16:18:15 -0700 Heh, the reason I came up with this patch is that the code in drivers/usb/gadget/ether.c at about line 1653 will attempt to allocate an skb. If it fails then it uses a goto to jump to line 1672 where it will call dev_kfree_skb_any (skb) on a potentially null skb. I put a validity check there and sent it off to the USB gadget maintainer. He asked me to instead make the dev_kfree_skb_any call more robust and see how that went over on the netdev list. Like a lead balloon? Yep, like a lead baloon. The USB gadget driver maintainer should accept your patch to check for NULL in the gadget driver as that is the one and only case in the entire tree where that can happen and we're not eating a conditional and a return just for that one esoteric case. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
From: Pavel Emelianov [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 12:16:18 +0400 The proposal it to make sock_orphan before detaching the callback in netlink_release() and to check for the sock to be SOCK_DEAD in netlink_dump_start() before setting a new callback. As discussed in this thread there might be other ways to a approach this, but this fix is good for now. Patch applied, thank you. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: AF_PACKET how to get the original netdev from a packet received from a bonded master
On 4/18/07, David Miller [EMAIL PROTECTED] wrote: Ok, it will give you one level of decapsulation. What do we tell people who want 2 devices previous? :-) I can tell you that the intent of PJs patch was to provide the ifindex of the physical interface that a packet entered the system on, regardless of how many layers of encapsulation are involved. Of course it may not actually do that ... - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] CONFIG_PACKET_MMAP should depend on MMU
On 4/18/07, David Howells [EMAIL PROTECTED] wrote: Aubrey Li [EMAIL PROTECTED] wrote: Here, in the attachment I wrote a small test app. Please correct if there is anything wrong, and feel free to improve it. Okay... I have that working... probably. I don't know what output it's supposed to produce, but I see this: # /packet-mmap/sample_packet_mmap 00-00-00-01-00-00-00-8a-00-00-00-8a-00-42-00-50- 38-43-13-a0-00-07-ff-3c-00-00-00-00-00-00-00-00- 00-11-08-00-00-00-00-01-00-01-00-06-00-d0-b7-de- 32-7b-00-00-00-00-00-00-00-00-00-00-00-00-00-00- 00-00-00-90-cc-a2-75-6b-00-d0-b7-de-32-7b-08-00- 45-00-00-7c-00-00-40-00-40-11-b4-13-c0-a8-02-80- c0-a8-02-8d-08-01-03-20-00-68-8e-65-7f-5b-7e-03- 00-00-00-01-00-00-00-00-00-00-00-00-00-00-00-00- 00-00-00-00-00-00-00-00-00-00-00-01-00-00-81-a4- 00-00-00-01-00-00-00-00-00-00-00-00-00-1d-b8-86- 00-00-10-00-ff-ff-ff-ff-00-00-0e-f0-00-00-09-02- 01-cb-03-16-46-26-38-0d-00-00-00-00-46-26-38-1e- 00-00-00-00-46-26-38-1e-00-00-00-00-00-00-00-00- 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00- [repeated] Does that look reasonable? Yes, it's reasonable for me, as long as your host IP is 192.168.2.128 and target IP is 192.168.2.141 See below 00-90-cc-a2-75-6b-|___ MAC Address 00-d0-b7-de-32-7b-| 08-00Type: IP 45-00Ver, IHL, TOS 00-7cIP.total.length 00-00- 40-00- 40TTL 11UDP protocol b4-13Checksum c0-a8-02-80---Source IP: 192.168.2.128 c0-a8-02-8d---Dest IP: 192.168.2.141 snip-- I've attached the preliminary patch. Thanks, I'll take a look and try to see if I can give some feedback. -Aubrey Note four things about it: (1) I've had to add the get_unmapped_area() op to the proto_ops struct, but I've only done it for CONFIG_MMU=n as making it available for CONFIG_MMU=y could cause problems. (2) There's a race between packet_get_unmapped_area() being called and packet_mmap() being called. (3) I've added an extra check into packet_set_ring() to make sure the caller isn't asking for a combination of buffer size and count that will exceed ULONG_MAX. This protects a multiply done elsewhere. (4) The entire data buffer is allocated as one contiguous lump in NOMMU-mode. David --- [PATCH] NOMMU: Support mmap() on AF_PACKET sockets From: David Howells [EMAIL PROTECTED] Support mmap() on AF_PACKET sockets in NOMMU-mode kernels. Signed-Off-By: David Howells [EMAIL PROTECTED] --- include/linux/net.h|7 +++ include/net/sock.h |8 +++ net/core/sock.c| 10 net/packet/af_packet.c | 118 net/socket.c | 77 +++ 5 files changed, 219 insertions(+), 1 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index 4db21e6..9e77cf6 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -161,6 +161,11 @@ struct proto_ops { int (*recvmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags); +#ifndef CONFIG_MMU + unsigned long (*get_unmapped_area)(struct file *file, struct socket *sock, +unsigned long addr, unsigned long len, +unsigned long pgoff, unsigned long flags); +#endif int (*mmap) (struct file *file, struct socket *sock, struct vm_area_struct * vma); ssize_t (*sendpage) (struct socket *sock, struct page *page, @@ -191,6 +196,8 @@ extern int sock_sendmsg(struct socket *sock, struct msghdr *msg, extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); extern int sock_map_fd(struct socket *sock); +extern void sock_make_mappable(struct socket *sock, + unsigned long prot); extern struct socket *sockfd_lookup(int fd, int *err); #define sockfd_put(sock) fput(sock-file) extern int net_ratelimit(void); diff --git a/include/net/sock.h b/include/net/sock.h index 2c7d60c..d91edea 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -841,6 +841,14 @@ extern int sock_no_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); extern int sock_no_recvmsg(struct kiocb *, struct socket *,
Re: AF_PACKET how to get the original netdev from a packet received from a bonded master
From: Chris Leech [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 17:17:45 -0700 On 4/18/07, David Miller [EMAIL PROTECTED] wrote: Ok, it will give you one level of decapsulation. What do we tell people who want 2 devices previous? :-) I can tell you that the intent of PJs patch was to provide the ifindex of the physical interface that a packet entered the system on, regardless of how many layers of encapsulation are involved. Of course it may not actually do that ... Ok, I'll try to remember to high-priority reviewing PJ's patch on my next rebase of the net-2.6.22 tree which should be tonight or tomorrow sometime. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/0] Re-try changes for PMTUDISC_PROBE
This backs out the the transport layer MTU checks that don't work. As a consequence, I had to back out the PMTUDISC_PROBE patch as well. These patches should fix the problem with ipv6 that the transport layer change tried to address, and re-implement PMTUDISC_PROBE. I think this approach is nicer than the last one, since it doesn't require a bit in struct sk_buff. Thanks, -John - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Revert [NET] Do pmtu check in transport layer
This reverts commit 87e927a0583bd4a8ba9e97cd75b58d8aa1c76e37. This idea does not work, as pointed at by Patrick McHardy. Signed-off-by: John Heffner [EMAIL PROTECTED] --- net/ipv4/ip_output.c |4 +--- net/ipv4/raw.c|8 +++- net/ipv6/ip6_output.c | 11 +-- net/ipv6/raw.c|7 ++- 4 files changed, 11 insertions(+), 19 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 79e71ee..34606ef 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -810,9 +810,7 @@ int ip_append_data(struct sock *sk, fragheaderlen = sizeof(struct iphdr) + (opt ? opt-optlen : 0); maxfraglen = ((mtu - fragheaderlen) ~7) + fragheaderlen; - if (inet-cork.length + length 0x - fragheaderlen || - (inet-pmtudisc = IP_PMTUDISC_DO -inet-cork.length + length mtu)) { + if (inet-cork.length + length 0x - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport, mtu-exthdrlen); return -EMSGSIZE; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c60aadf..24d7c9f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -271,12 +271,10 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, struct iphdr *iph; struct sk_buff *skb; int err; - int mtu; - mtu = inet-pmtudisc == IP_PMTUDISC_DO ? dst_mtu(rt-u.dst) : -rt-u.dst.dev-mtu; - if (length mtu) { - ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport, mtu); + if (length rt-u.dst.dev-mtu) { + ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport, + rt-u.dst.dev-mtu); return -EMSGSIZE; } if (flagsMSG_PROBE) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b8e307a..4cfdad4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1079,12 +1079,11 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, fragheaderlen = sizeof(struct ipv6hdr) + rt-u.dst.nfheader_len + (opt ? opt-opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) ~7) + fragheaderlen - sizeof(struct frag_hdr); - if ((mtu = sizeof(struct ipv6hdr) + IPV6_MAXPLEN -inet-cork.length + length sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) || - (np-pmtudisc = IPV6_PMTUDISC_DO -inet-cork.length + length mtu)) { - ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); - return -EMSGSIZE; + if (mtu = sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { + if (inet-cork.length + length sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { + ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); + return -EMSGSIZE; + } } /* diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index f4cd90b..f65fcd7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -558,12 +558,9 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, struct sk_buff *skb; unsigned int hh_len; int err; - int mtu; - mtu = np-pmtudisc == IPV6_PMTUDISC_DO ? dst_mtu(rt-u.dst) : -rt-u.dst.dev-mtu; - if (length mtu) { - ipv6_local_error(sk, EMSGSIZE, fl, mtu); + if (length rt-u.dst.dev-mtu) { + ipv6_local_error(sk, EMSGSIZE, fl, rt-u.dst.dev-mtu); return -EMSGSIZE; } if (flagsMSG_PROBE) -- 1.5.1.rc3.30.ga8f4-dirty - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [NET] MTU discovery check in ip6_fragment()
Adds a check in ip6_fragment() mirroring ip_fragment() for packets that we can't fragment, and sends an ICMP Packet Too Big message in response. Signed-off-by: John Heffner [EMAIL PROTECTED] --- net/ipv6/ip6_output.c | 13 + 1 files changed, 13 insertions(+), 0 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4cfdad4..5a5b7d4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -567,6 +567,19 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) nexthdr = *prevhdr; mtu = dst_mtu(rt-u.dst); + + /* We must not fragment if the socket is set to force MTU discovery +* or if the skb it not generated by a local socket. (This last +* check should be redundant, but it's free.) +*/ + if (!np || np-pmtudisc = IPV6_PMTUDISC_DO) { + skb-dev = skb-dst-dev; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb-dev); + IP6_INC_STATS(ip6_dst_idev(skb-dst), IPSTATS_MIB_FRAGFAILS); + kfree_skb(skb); + return -EMSGSIZE; + } + if (np np-frag_size mtu) { if (np-frag_size) mtu = np-frag_size; -- 1.5.1.rc3.30.ga8f4-dirty - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Revert [NET] Add IP(V6)_PMTUDISC_RPOBE
This reverts commit d21d2a90b879c0cf159df5944847e6d9833816eb. Must be backed out because commit 87e927a0583bd4a8ba9e97cd75b58d8aa1c76e37 does not work. Signed-off-by: John Heffner [EMAIL PROTECTED] --- include/linux/in.h |1 - include/linux/in6.h |1 - include/linux/skbuff.h |3 +-- include/net/ip.h |2 +- net/core/skbuff.c|2 -- net/ipv4/ip_output.c | 14 -- net/ipv4/ip_sockglue.c |2 +- net/ipv4/raw.c |3 --- net/ipv6/ip6_output.c| 12 net/ipv6/ipv6_sockglue.c |2 +- net/ipv6/raw.c |3 --- 11 files changed, 12 insertions(+), 33 deletions(-) diff --git a/include/linux/in.h b/include/linux/in.h index 2dc1f8a..1912e7c 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -83,7 +83,6 @@ struct in_addr { #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ #define IP_PMTUDISC_DO 2 /* Always DF*/ -#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ #define IP_MULTICAST_IF32 #define IP_MULTICAST_TTL 33 diff --git a/include/linux/in6.h b/include/linux/in6.h index d559fac..4e8350a 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -179,7 +179,6 @@ struct in6_flowlabel_req #define IPV6_PMTUDISC_DONT 0 #define IPV6_PMTUDISC_WANT 1 #define IPV6_PMTUDISC_DO 2 -#define IPV6_PMTUDISC_PROBE3 /* Flowlabel */ #define IPV6_FLOWLABEL_MGR 32 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8bf9b9f..7f17cfc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -277,8 +277,7 @@ struct sk_buff { nfctinfo:3; __u8pkt_type:3, fclone:2, - ipvs_property:1, - ign_dst_mtu:1; + ipvs_property:1; __be16 protocol; void(*destructor)(struct sk_buff *skb); diff --git a/include/net/ip.h b/include/net/ip.h index 6a08b65..75f226d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -206,7 +206,7 @@ int ip_decrease_ttl(struct iphdr *iph) static inline int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) { - return (inet_sk(sk)-pmtudisc = IP_PMTUDISC_DO || + return (inet_sk(sk)-pmtudisc == IP_PMTUDISC_DO || (inet_sk(sk)-pmtudisc == IP_PMTUDISC_WANT !(dst_metric(dst, RTAX_LOCK)(1RTAX_MTU; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2391cdf..f0d986a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -479,7 +479,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) C(ipvs_property); #endif - C(ign_dst_mtu); C(protocol); n-destructor = NULL; C(mark); @@ -543,7 +542,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new-ipvs_property = old-ipvs_property; #endif - new-ign_dst_mtu= old-ign_dst_mtu; #ifdef CONFIG_NET_SCHED #ifdef CONFIG_NET_CLS_ACT new-tc_verd = old-tc_verd; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 704bc44..79e71ee 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -198,8 +198,7 @@ static inline int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb-len dst_mtu(skb-dst) - !skb-ign_dst_mtu !skb_is_gso(skb)) + if (skb-len dst_mtu(skb-dst) !skb_is_gso(skb)) return ip_fragment(skb, ip_finish_output2); else return ip_finish_output2(skb); @@ -788,9 +787,7 @@ int ip_append_data(struct sock *sk, inet-cork.addr = ipc-addr; } dst_hold(rt-u.dst); - inet-cork.fragsize = mtu = inet-pmtudisc == IP_PMTUDISC_PROBE ? - rt-u.dst.dev-mtu : - dst_mtu(rt-u.dst.path); + inet-cork.fragsize = mtu = dst_mtu(rt-u.dst.path); inet-cork.rt = rt; inet-cork.length = 0; sk-sk_sndmsg_page = NULL; @@ -1208,16 +1205,13 @@ int ip_push_pending_frames(struct sock *sk) * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - if (inet-pmtudisc IP_PMTUDISC_DO) + if (inet-pmtudisc != IP_PMTUDISC_DO) skb-local_df = 1; - if (inet-pmtudisc == IP_PMTUDISC_PROBE) - skb-ign_dst_mtu = 1; -
[PATCH] [NET] Add IP(V6)_PMTUDISC_RPOBE
Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER. This option forces us not to fragment, but does not make use of the kernel path MTU discovery. That is, it allows for user-mode MTU probing (or, packetization-layer path MTU discovery). This is particularly useful for diagnostic utilities, like traceroute/tracepath. Signed-off-by: John Heffner [EMAIL PROTECTED] --- include/linux/in.h |1 + include/linux/in6.h |1 + net/ipv4/ip_output.c | 20 +++- net/ipv4/ip_sockglue.c |2 +- net/ipv6/ip6_output.c| 15 --- net/ipv6/ipv6_sockglue.c |2 +- 6 files changed, 31 insertions(+), 10 deletions(-) diff --git a/include/linux/in.h b/include/linux/in.h index 1912e7c..3975cbf 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -83,6 +83,7 @@ struct in_addr { #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ #define IP_PMTUDISC_DO 2 /* Always DF*/ +#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ #define IP_MULTICAST_IF32 #define IP_MULTICAST_TTL 33 diff --git a/include/linux/in6.h b/include/linux/in6.h index 4e8350a..d559fac 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -179,6 +179,7 @@ struct in6_flowlabel_req #define IPV6_PMTUDISC_DONT 0 #define IPV6_PMTUDISC_WANT 1 #define IPV6_PMTUDISC_DO 2 +#define IPV6_PMTUDISC_PROBE3 /* Flowlabel */ #define IPV6_FLOWLABEL_MGR 32 diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 34606ef..66e2c3a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } +static inline int ip_skb_dst_mtu(struct sk_buff *skb) +{ + struct inet_sock *inet = skb-sk ? inet_sk(skb-sk) : NULL; + + return (inet inet-pmtudisc == IP_PMTUDISC_PROBE) ? + skb-dst-dev-mtu : dst_mtu(skb-dst); +} + static inline int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) defined(CONFIG_XFRM) @@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb-len dst_mtu(skb-dst) !skb_is_gso(skb)) + if (skb-len ip_skb_dst_mtu(skb) !skb_is_gso(skb)) return ip_fragment(skb, ip_finish_output2); else return ip_finish_output2(skb); @@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) if (unlikely((iph-frag_off htons(IP_DF)) !skb-local_df)) { IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(dst_mtu(rt-u.dst))); + htonl(ip_skb_dst_mtu(skb))); kfree_skb(skb); return -EMSGSIZE; } @@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk, inet-cork.addr = ipc-addr; } dst_hold(rt-u.dst); - inet-cork.fragsize = mtu = dst_mtu(rt-u.dst.path); + inet-cork.fragsize = mtu = inet-pmtudisc == IP_PMTUDISC_PROBE ? + rt-u.dst.dev-mtu : + dst_mtu(rt-u.dst.path); inet-cork.rt = rt; inet-cork.length = 0; sk-sk_sndmsg_page = NULL; @@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk) * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - if (inet-pmtudisc != IP_PMTUDISC_DO) + if (inet-pmtudisc IP_PMTUDISC_DO) skb-local_df = 1; /* DF bit is set when we want to see DF on outgoing frames. * If local_df is set too, we still allow to fragment this frame * locally. */ - if (inet-pmtudisc == IP_PMTUDISC_DO || + if (inet-pmtudisc = IP_PMTUDISC_DO || (skb-len = dst_mtu(rt-u.dst) ip_dont_fragment(sk, rt-u.dst))) df = htons(IP_DF); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c199d23..4d54457 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, inet-hdrincl = val ? 1 : 0; break; case IP_MTU_DISCOVER: - if (val0 || val2) + if (val0 || val3) goto e_inval; inet-pmtudisc = val; break; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5a5b7d4..f508171 100644 --- a/net/ipv6/ip6_output.c +++
[PATCH 2/4] Revert [NET] Do pmtu check in transport layer
This reverts commit 87e927a0583bd4a8ba9e97cd75b58d8aa1c76e37. This idea does not work, as pointed at by Patrick McHardy. Signed-off-by: John Heffner [EMAIL PROTECTED] --- net/ipv4/ip_output.c |4 +--- net/ipv4/raw.c|8 +++- net/ipv6/ip6_output.c | 11 +-- net/ipv6/raw.c|7 ++- 4 files changed, 11 insertions(+), 19 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 79e71ee..34606ef 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -810,9 +810,7 @@ int ip_append_data(struct sock *sk, fragheaderlen = sizeof(struct iphdr) + (opt ? opt-optlen : 0); maxfraglen = ((mtu - fragheaderlen) ~7) + fragheaderlen; - if (inet-cork.length + length 0x - fragheaderlen || - (inet-pmtudisc = IP_PMTUDISC_DO -inet-cork.length + length mtu)) { + if (inet-cork.length + length 0x - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport, mtu-exthdrlen); return -EMSGSIZE; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c60aadf..24d7c9f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -271,12 +271,10 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, struct iphdr *iph; struct sk_buff *skb; int err; - int mtu; - mtu = inet-pmtudisc == IP_PMTUDISC_DO ? dst_mtu(rt-u.dst) : -rt-u.dst.dev-mtu; - if (length mtu) { - ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport, mtu); + if (length rt-u.dst.dev-mtu) { + ip_local_error(sk, EMSGSIZE, rt-rt_dst, inet-dport, + rt-u.dst.dev-mtu); return -EMSGSIZE; } if (flagsMSG_PROBE) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b8e307a..4cfdad4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1079,12 +1079,11 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, fragheaderlen = sizeof(struct ipv6hdr) + rt-u.dst.nfheader_len + (opt ? opt-opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) ~7) + fragheaderlen - sizeof(struct frag_hdr); - if ((mtu = sizeof(struct ipv6hdr) + IPV6_MAXPLEN -inet-cork.length + length sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) || - (np-pmtudisc = IPV6_PMTUDISC_DO -inet-cork.length + length mtu)) { - ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); - return -EMSGSIZE; + if (mtu = sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { + if (inet-cork.length + length sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { + ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); + return -EMSGSIZE; + } } /* diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index f4cd90b..f65fcd7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -558,12 +558,9 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, struct sk_buff *skb; unsigned int hh_len; int err; - int mtu; - mtu = np-pmtudisc == IPV6_PMTUDISC_DO ? dst_mtu(rt-u.dst) : -rt-u.dst.dev-mtu; - if (length mtu) { - ipv6_local_error(sk, EMSGSIZE, fl, mtu); + if (length rt-u.dst.dev-mtu) { + ipv6_local_error(sk, EMSGSIZE, fl, rt-u.dst.dev-mtu); return -EMSGSIZE; } if (flagsMSG_PROBE) -- 1.5.1.rc3.30.ga8f4-dirty - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/4] Revert [NET] Add IP(V6)_PMTUDISC_RPOBE
This reverts commit d21d2a90b879c0cf159df5944847e6d9833816eb. Must be backed out because commit 87e927a0583bd4a8ba9e97cd75b58d8aa1c76e37 does not work. Signed-off-by: John Heffner [EMAIL PROTECTED] --- include/linux/in.h |1 - include/linux/in6.h |1 - include/linux/skbuff.h |3 +-- include/net/ip.h |2 +- net/core/skbuff.c|2 -- net/ipv4/ip_output.c | 14 -- net/ipv4/ip_sockglue.c |2 +- net/ipv4/raw.c |3 --- net/ipv6/ip6_output.c| 12 net/ipv6/ipv6_sockglue.c |2 +- net/ipv6/raw.c |3 --- 11 files changed, 12 insertions(+), 33 deletions(-) diff --git a/include/linux/in.h b/include/linux/in.h index 2dc1f8a..1912e7c 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -83,7 +83,6 @@ struct in_addr { #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ #define IP_PMTUDISC_DO 2 /* Always DF*/ -#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ #define IP_MULTICAST_IF32 #define IP_MULTICAST_TTL 33 diff --git a/include/linux/in6.h b/include/linux/in6.h index d559fac..4e8350a 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -179,7 +179,6 @@ struct in6_flowlabel_req #define IPV6_PMTUDISC_DONT 0 #define IPV6_PMTUDISC_WANT 1 #define IPV6_PMTUDISC_DO 2 -#define IPV6_PMTUDISC_PROBE3 /* Flowlabel */ #define IPV6_FLOWLABEL_MGR 32 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8bf9b9f..7f17cfc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -277,8 +277,7 @@ struct sk_buff { nfctinfo:3; __u8pkt_type:3, fclone:2, - ipvs_property:1, - ign_dst_mtu:1; + ipvs_property:1; __be16 protocol; void(*destructor)(struct sk_buff *skb); diff --git a/include/net/ip.h b/include/net/ip.h index 6a08b65..75f226d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -206,7 +206,7 @@ int ip_decrease_ttl(struct iphdr *iph) static inline int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) { - return (inet_sk(sk)-pmtudisc = IP_PMTUDISC_DO || + return (inet_sk(sk)-pmtudisc == IP_PMTUDISC_DO || (inet_sk(sk)-pmtudisc == IP_PMTUDISC_WANT !(dst_metric(dst, RTAX_LOCK)(1RTAX_MTU; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2391cdf..f0d986a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -479,7 +479,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) C(ipvs_property); #endif - C(ign_dst_mtu); C(protocol); n-destructor = NULL; C(mark); @@ -543,7 +542,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new-ipvs_property = old-ipvs_property; #endif - new-ign_dst_mtu= old-ign_dst_mtu; #ifdef CONFIG_NET_SCHED #ifdef CONFIG_NET_CLS_ACT new-tc_verd = old-tc_verd; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 704bc44..79e71ee 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -198,8 +198,7 @@ static inline int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb-len dst_mtu(skb-dst) - !skb-ign_dst_mtu !skb_is_gso(skb)) + if (skb-len dst_mtu(skb-dst) !skb_is_gso(skb)) return ip_fragment(skb, ip_finish_output2); else return ip_finish_output2(skb); @@ -788,9 +787,7 @@ int ip_append_data(struct sock *sk, inet-cork.addr = ipc-addr; } dst_hold(rt-u.dst); - inet-cork.fragsize = mtu = inet-pmtudisc == IP_PMTUDISC_PROBE ? - rt-u.dst.dev-mtu : - dst_mtu(rt-u.dst.path); + inet-cork.fragsize = mtu = dst_mtu(rt-u.dst.path); inet-cork.rt = rt; inet-cork.length = 0; sk-sk_sndmsg_page = NULL; @@ -1208,16 +1205,13 @@ int ip_push_pending_frames(struct sock *sk) * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - if (inet-pmtudisc IP_PMTUDISC_DO) + if (inet-pmtudisc != IP_PMTUDISC_DO) skb-local_df = 1; - if (inet-pmtudisc == IP_PMTUDISC_PROBE) - skb-ign_dst_mtu = 1; -
[PATCH 4/4] [NET] Add IP(V6)_PMTUDISC_RPOBE
Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER. This option forces us not to fragment, but does not make use of the kernel path MTU discovery. That is, it allows for user-mode MTU probing (or, packetization-layer path MTU discovery). This is particularly useful for diagnostic utilities, like traceroute/tracepath. Signed-off-by: John Heffner [EMAIL PROTECTED] --- include/linux/in.h |1 + include/linux/in6.h |1 + net/ipv4/ip_output.c | 20 +++- net/ipv4/ip_sockglue.c |2 +- net/ipv6/ip6_output.c| 15 --- net/ipv6/ipv6_sockglue.c |2 +- 6 files changed, 31 insertions(+), 10 deletions(-) diff --git a/include/linux/in.h b/include/linux/in.h index 1912e7c..3975cbf 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -83,6 +83,7 @@ struct in_addr { #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ #define IP_PMTUDISC_DO 2 /* Always DF*/ +#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ #define IP_MULTICAST_IF32 #define IP_MULTICAST_TTL 33 diff --git a/include/linux/in6.h b/include/linux/in6.h index 4e8350a..d559fac 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -179,6 +179,7 @@ struct in6_flowlabel_req #define IPV6_PMTUDISC_DONT 0 #define IPV6_PMTUDISC_WANT 1 #define IPV6_PMTUDISC_DO 2 +#define IPV6_PMTUDISC_PROBE3 /* Flowlabel */ #define IPV6_FLOWLABEL_MGR 32 diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 34606ef..66e2c3a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } +static inline int ip_skb_dst_mtu(struct sk_buff *skb) +{ + struct inet_sock *inet = skb-sk ? inet_sk(skb-sk) : NULL; + + return (inet inet-pmtudisc == IP_PMTUDISC_PROBE) ? + skb-dst-dev-mtu : dst_mtu(skb-dst); +} + static inline int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) defined(CONFIG_XFRM) @@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb-len dst_mtu(skb-dst) !skb_is_gso(skb)) + if (skb-len ip_skb_dst_mtu(skb) !skb_is_gso(skb)) return ip_fragment(skb, ip_finish_output2); else return ip_finish_output2(skb); @@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) if (unlikely((iph-frag_off htons(IP_DF)) !skb-local_df)) { IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(dst_mtu(rt-u.dst))); + htonl(ip_skb_dst_mtu(skb))); kfree_skb(skb); return -EMSGSIZE; } @@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk, inet-cork.addr = ipc-addr; } dst_hold(rt-u.dst); - inet-cork.fragsize = mtu = dst_mtu(rt-u.dst.path); + inet-cork.fragsize = mtu = inet-pmtudisc == IP_PMTUDISC_PROBE ? + rt-u.dst.dev-mtu : + dst_mtu(rt-u.dst.path); inet-cork.rt = rt; inet-cork.length = 0; sk-sk_sndmsg_page = NULL; @@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk) * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - if (inet-pmtudisc != IP_PMTUDISC_DO) + if (inet-pmtudisc IP_PMTUDISC_DO) skb-local_df = 1; /* DF bit is set when we want to see DF on outgoing frames. * If local_df is set too, we still allow to fragment this frame * locally. */ - if (inet-pmtudisc == IP_PMTUDISC_DO || + if (inet-pmtudisc = IP_PMTUDISC_DO || (skb-len = dst_mtu(rt-u.dst) ip_dont_fragment(sk, rt-u.dst))) df = htons(IP_DF); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c199d23..4d54457 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, inet-hdrincl = val ? 1 : 0; break; case IP_MTU_DISCOVER: - if (val0 || val2) + if (val0 || val3) goto e_inval; inet-pmtudisc = val; break; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5a5b7d4..f508171 100644 --- a/net/ipv6/ip6_output.c +++
[PATCH 3/4] [NET] MTU discovery check in ip6_fragment()
Adds a check in ip6_fragment() mirroring ip_fragment() for packets that we can't fragment, and sends an ICMP Packet Too Big message in response. Signed-off-by: John Heffner [EMAIL PROTECTED] --- net/ipv6/ip6_output.c | 13 + 1 files changed, 13 insertions(+), 0 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4cfdad4..5a5b7d4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -567,6 +567,19 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) nexthdr = *prevhdr; mtu = dst_mtu(rt-u.dst); + + /* We must not fragment if the socket is set to force MTU discovery +* or if the skb it not generated by a local socket. (This last +* check should be redundant, but it's free.) +*/ + if (!np || np-pmtudisc = IPV6_PMTUDISC_DO) { + skb-dev = skb-dst-dev; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb-dev); + IP6_INC_STATS(ip6_dst_idev(skb-dst), IPSTATS_MIB_FRAGFAILS); + kfree_skb(skb); + return -EMSGSIZE; + } + if (np np-frag_size mtu) { if (np-frag_size) mtu = np-frag_size; -- 1.5.1.rc3.30.ga8f4-dirty - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [NET] Add IP(V6)_PMTUDISC_RPOBE
Sorry, forgot the -n flag on git-format-patch. Patches resent with correct sequence numbers. Thanks, -John - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [NET] Add IP(V6)_PMTUDISC_RPOBE
From: John Heffner [EMAIL PROTECTED] Date: Wed, 18 Apr 2007 21:11:26 -0400 Sorry, forgot the -n flag on git-format-patch. Patches resent with correct sequence numbers. Thanks for fixing that. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: AF_PACKET how to get the original netdev from a packet received from a bonded master
On 4/18/07, David Miller [EMAIL PROTECTED] wrote: Ok, I'll try to remember to high-priority reviewing PJ's patch on my next rebase of the net-2.6.22 tree which should be tonight or tomorrow sometime. Thanks Dave, PJ is offline this week so I'm trying to keep an eye out for discussions related to his various patches :-) Just to give you an idea of our motivation around this, we're looking at layer 2 configuration protocols implemented from user space. As an example Link Layer Discovery Protocol could be used to detect trunking misconfiguration, but only if you can track that information for the underlying interfaces of a bond. Things like 802.1x authenticated links in a bond would have a similar issue of needing to configure each underlying interface before bringing up the bond, but with LLDP there's the added fun of being able to receive updated notifications of configuration changes from the link partner at any time. - Chris - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NETLINK] Don't attach callback to a going-away netlink socket
David Miller [EMAIL PROTECTED] wrote: As discussed in this thread there might be other ways to a approach this, but this fix is good for now. Patch applied, thank you. Actually I was going to suggest something like this: [NETLINK]: Kill CB only when socket is unused Since we can still receive packets until all references to the socket are gone, we don't need to kill the CB until that happens. This also aligns ourselves with the receive queue purging which happens at that point. Original patch by Pavel Emelianov who noticed this race condition. Signed-off-by: Herbert Xu [EMAIL PROTECTED] Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0be19b7..914884c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -139,6 +139,15 @@ static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) static void netlink_sock_destruct(struct sock *sk) { + struct netlink_sock *nlk = nlk_sk(sk); + + WARN_ON(mutex_is_locked(nlk_sk(sk)-cb_mutex)); + if (nlk-cb) { + if (nlk-cb-done) + nlk-cb-done(nlk-cb); + netlink_destroy_callback(nlk-cb); + } + skb_queue_purge(sk-sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { @@ -147,7 +156,6 @@ static void netlink_sock_destruct(struct sock *sk) } BUG_TRAP(!atomic_read(sk-sk_rmem_alloc)); BUG_TRAP(!atomic_read(sk-sk_wmem_alloc)); - BUG_TRAP(!nlk_sk(sk)-cb); BUG_TRAP(!nlk_sk(sk)-groups); } @@ -450,17 +458,7 @@ static int netlink_release(struct socket *sock) netlink_remove(sk); nlk = nlk_sk(sk); - mutex_lock(nlk-cb_mutex); - if (nlk-cb) { - if (nlk-cb-done) - nlk-cb-done(nlk-cb); - netlink_destroy_callback(nlk-cb); - nlk-cb = NULL; - } - mutex_unlock(nlk-cb_mutex); - - /* OK. Socket is unlinked, and, therefore, - no new packets will arrive */ + /* OK. Socket is unlinked. */ sock_orphan(sk); sock-sk = NULL; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: ESP interfamily tunnel bug?
Hello Diego, I tried to reproduce the bug. But I got a panic of the kernel :- I'm using current net-2.6. I suspect that some special routing for loopback is related because I checked with kdb and got the backtrace like fib_sync_down ipv6_rcv netif_receive_skb __mod_timer net_rx_action __do_softirq do_softirq local_bh_enable dev_queue_xmit neigh_resolve_output ip_output xfrm4_output_finish xfrm4_output ip_generic_getfrag ip6_push_pending_frames I think ip_rcv or some IPv4 function should be called between netif_receive_skb and ipv6_rcv. Anyway I could not classify the way to make a panic. I'll trace it. Thank you, Diego Beltrami wrote: Hi, we have discovered a routing related problem in ESP tunnel and beet mode. We don't know whether it is a bug in the XFRM, or just in the way the virtual addresses and the corresponding routes are set-up. We set up a dummy0 device for the virtual addresses: [EMAIL PROTECTED]:~# ip addr show dummy0 5: dummy0: BROADCAST,NOARP,UP,1 mtu 1500 qdisc noqueue link/ether 92:09:fe:11:81:1b brd ff:ff:ff:ff:ff:ff inet6 2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e/28 scope global valid_lft forever preferred_lft forever inet6 2001:74:32e0:df36:e862:3963:523e:dd7d/28 scope global valid_lft forever preferred_lft forever inet6 2001:73:d3a8:8723:d572:7549:7f2c:e590/28 scope global valid_lft forever preferred_lft forever inet6 2001:75:a2e6:aad6:e901:dd1c:ba95:e300/28 scope global valid_lft forever preferred_lft forever inet6 fe80::9009:feff:fe11:811b/64 scope link valid_lft forever preferred_lft forever And then we have routes for the virtual addresses: [EMAIL PROTECTED]:~# ip -6 route 2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:73:d3a8:8723:d572:7549:7f2c:e590 dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:74:32e0:df36:e862:3963:523e:dd7d dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:75:a2e6:aad6:e901:dd1c:ba95:e300 dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:70::/28 dev dummy0 metric 256 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 fe80::/64 dev dummy0 metric 256 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 ff00::/8 dev eth0 metric 256 expires 21325454sec mtu 1500 advmss 1440 metric 10 4294967295 ff00::/8 dev dummy0 metric 256 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 unreachable default dev lo proto none metric -1 error -101 metric 10 255 ...and set-up policies and associations. The virtual IPv6 addresses are inner and IPv4 addresses are outer addresses: [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm policy show src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128 dst 2001:74:32e0:df36:e862:3963:523e:dd7d/128 dir in priority 0 tmpl src c1a7:bb82:: dst c0a8:65:: proto esp reqid 0 mode beet src 2001:74:32e0:df36:e862:3963:523e:dd7d/128 dst 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128 dir out priority 0 tmpl src c0a8:65:: dst c1a7:bb82:: proto esp reqid 0 mode beet [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm state show src 193.167.187.130 dst 192.168.0.101 proto esp spi 0xf556c7c7 reqid 0 mode beet replay-window 0 auth sha1 0xab327b944011c94a0c54a097b4752e23f377ff34 enc aes 0x882a334830b1cd14b9e411ec37a4242f encap type espinudp-nonike sport 50500 dport 50500 addr 193.167.187.130 sel src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0 dst 2001:74:32e0:df36:e862:3963:523e:dd7d/0 src 192.168.0.101 dst 193.167.187.130 proto esp spi 0x1663f3a4 reqid 0 mode beet replay-window 0 auth sha1 0x9f07dabce4abf2ebfe45e247ede2cf15f9156a13 enc aes 0xfc50593b9af6d296b042a16ca00bad20 encap type espinudp-nonike sport 50500 dport 50500 addr 192.168.0.101 sel src 2001:74:32e0:df36:e862:3963:523e:dd7d/0 dst 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0 And then we try to ping6 the virtual address: [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ping6 -I 2001:0074:32e0:df36:e862:3963:523e:dd7d 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15 PING 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15(2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15) from 2001:74:32e0:df36:e862:3963:523e:dd7d : 56 data bytes ping: sendmsg: Network is unreachable ping: sendmsg: Network is unreachable Tcpdump shows no traffic at the host. We can repeat the problem both with tunnel and beet modes in 2.6.21-rc6 (and also in 2.6.17.14). I have tried also ip rule stuff but it seems that it does not rule with IPv6 :) It does help either to
Re: ESP interfamily tunnel bug?
Hi Kazunori, thanks for reply. In your backtrace I see that there are both input and output functions calls. Is it the right way? One more thing, were your two hosts you used located on the same network? In fact it seems that if the machines are on the same network, this bug doesn't manifest. Thanks, Diego Hello Diego, I tried to reproduce the bug. But I got a panic of the kernel :- I'm using current net-2.6. I suspect that some special routing for loopback is related because I checked with kdb and got the backtrace like fib_sync_down ipv6_rcv netif_receive_skb __mod_timer net_rx_action __do_softirq do_softirq local_bh_enable dev_queue_xmit neigh_resolve_output ip_output xfrm4_output_finish xfrm4_output ip_generic_getfrag ip6_push_pending_frames I think ip_rcv or some IPv4 function should be called between netif_receive_skb and ipv6_rcv. Anyway I could not classify the way to make a panic. I'll trace it. Thank you, Diego Beltrami wrote: Hi, we have discovered a routing related problem in ESP tunnel and beet mode. We don't know whether it is a bug in the XFRM, or just in the way the virtual addresses and the corresponding routes are set-up. We set up a dummy0 device for the virtual addresses: [EMAIL PROTECTED]:~# ip addr show dummy0 5: dummy0: BROADCAST,NOARP,UP,1 mtu 1500 qdisc noqueue link/ether 92:09:fe:11:81:1b brd ff:ff:ff:ff:ff:ff inet6 2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e/28 scope global valid_lft forever preferred_lft forever inet6 2001:74:32e0:df36:e862:3963:523e:dd7d/28 scope global valid_lft forever preferred_lft forever inet6 2001:73:d3a8:8723:d572:7549:7f2c:e590/28 scope global valid_lft forever preferred_lft forever inet6 2001:75:a2e6:aad6:e901:dd1c:ba95:e300/28 scope global valid_lft forever preferred_lft forever inet6 fe80::9009:feff:fe11:811b/64 scope link valid_lft forever preferred_lft forever And then we have routes for the virtual addresses: [EMAIL PROTECTED]:~# ip -6 route 2001:72:e6d3:1cf3:e11d:5bb0:b99:e85e dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:73:d3a8:8723:d572:7549:7f2c:e590 dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:74:32e0:df36:e862:3963:523e:dd7d dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:75:a2e6:aad6:e901:dd1c:ba95:e300 dev dummy0 metric 1024 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 2001:70::/28 dev dummy0 metric 256 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 fe80::/64 dev dummy0 metric 256 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 ff00::/8 dev eth0 metric 256 expires 21325454sec mtu 1500 advmss 1440 metric 10 4294967295 ff00::/8 dev dummy0 metric 256 expires 21334305sec mtu 1500 advmss 1440 metric 10 4294967295 unreachable default dev lo proto none metric -1 error -101 metric 10 255 ...and set-up policies and associations. The virtual IPv6 addresses are inner and IPv4 addresses are outer addresses: [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm policy show src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128 dst 2001:74:32e0:df36:e862:3963:523e:dd7d/128 dir in priority 0 tmpl src c1a7:bb82:: dst c0a8:65:: proto esp reqid 0 mode beet src 2001:74:32e0:df36:e862:3963:523e:dd7d/128 dst 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/128 dir out priority 0 tmpl src c0a8:65:: dst c1a7:bb82:: proto esp reqid 0 mode beet [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ip xfrm state show src 193.167.187.130 dst 192.168.0.101 proto esp spi 0xf556c7c7 reqid 0 mode beet replay-window 0 auth sha1 0xab327b944011c94a0c54a097b4752e23f377ff34 enc aes 0x882a334830b1cd14b9e411ec37a4242f encap type espinudp-nonike sport 50500 dport 50500 addr 193.167.187.130 sel src 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0 dst 2001:74:32e0:df36:e862:3963:523e:dd7d/0 src 192.168.0.101 dst 193.167.187.130 proto esp spi 0x1663f3a4 reqid 0 mode beet replay-window 0 auth sha1 0x9f07dabce4abf2ebfe45e247ede2cf15f9156a13 enc aes 0xfc50593b9af6d296b042a16ca00bad20 encap type espinudp-nonike sport 50500 dport 50500 addr 192.168.0.101 sel src 2001:74:32e0:df36:e862:3963:523e:dd7d/0 dst 2001:76:7d5a:88d7:51af:cdd1:6bf5:3d15/0 And then we try to ping6 the virtual address: [EMAIL PROTECTED]:~/projects/hipl--userspace--2.6# ping6 -I 2001:0074:32e0:df36:e862:3963:523e:dd7d
Re: + ppp_generic-fix-lockdep-warning.patch added to -mm tree
On Tue, Apr 17, 2007 at 09:37:44AM +0200, Jarek Poplawski wrote: ... Yuriy - thanks for testing my patch ...(pause) Not! It seems this patch is not visible in this version - probably ... Sorry! It was only something with my eyes. (Probably too much of Pamela!). Jarek P. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html