[PATCH] drivers/net: chelsio/cxgb*: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Santosh RaspaturCc: Ganesh Goudar Cc: Casey Leedom Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook --- drivers/net/ethernet/chelsio/cxgb3/sge.c | 12 ++-- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 7 +++ drivers/net/ethernet/chelsio/cxgb4/sge.c | 12 ++-- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 12 ++-- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index e2d342647b19..e3d28ae75360 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -2853,9 +2853,9 @@ void t3_sge_err_intr_handler(struct adapter *adapter) * bother cleaning them up here. * */ -static void sge_timer_tx(unsigned long data) +static void sge_timer_tx(struct timer_list *t) { - struct sge_qset *qs = (struct sge_qset *)data; + struct sge_qset *qs = from_timer(qs, t, tx_reclaim_timer); struct port_info *pi = netdev_priv(qs->netdev); struct adapter *adap = pi->adapter; unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0}; @@ -2893,10 +2893,10 @@ static void sge_timer_tx(unsigned long data) * starved. * */ -static void sge_timer_rx(unsigned long data) +static void sge_timer_rx(struct timer_list *t) { spinlock_t *lock; - struct sge_qset *qs = (struct sge_qset *)data; + struct sge_qset *qs = from_timer(qs, t, rx_reclaim_timer); struct port_info *pi = netdev_priv(qs->netdev); struct adapter *adap = pi->adapter; u32 status; @@ -2976,8 +2976,8 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, struct sge_qset *q = >sge.qs[id]; init_qset_cntxt(q, id); - setup_timer(>tx_reclaim_timer, sge_timer_tx, (unsigned long)q); - setup_timer(>rx_reclaim_timer, sge_timer_rx, (unsigned long)q); + timer_setup(>tx_reclaim_timer, sge_timer_tx, 0); + timer_setup(>rx_reclaim_timer, sge_timer_rx, 0); q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size, sizeof(struct rx_desc), diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 92a311767381..0c154c663748 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -366,9 +366,9 @@ int cxgb4_tc_flower_destroy(struct net_device *dev, return ret; } -static void ch_flower_stats_cb(unsigned long data) +static void ch_flower_stats_cb(struct timer_list *t) { - struct adapter *adap = (struct adapter *)data; + struct adapter *adap = from_timer(adap, t, flower_stats_timer); struct ch_tc_flower_entry *flower_entry; struct ch_tc_flower_stats *ofld_stats; unsigned int i; @@ -440,8 +440,7 @@ int cxgb4_tc_flower_stats(struct net_device *dev, void cxgb4_init_tc_flower(struct adapter *adap) { hash_init(adap->flower_anymatch_tbl); - setup_timer(>flower_stats_timer, ch_flower_stats_cb, - (unsigned long)adap); + timer_setup(>flower_stats_timer, ch_flower_stats_cb, 0); mod_timer(>flower_stats_timer, jiffies + STATS_CHECK_PERIOD); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 4ef68f69b58c..486b01fe23bd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2583,11 +2583,11 @@ irq_handler_t t4_intr_handler(struct adapter *adap) return t4_intr_intx; } -static void sge_rx_timer_cb(unsigned long data) +static void sge_rx_timer_cb(struct timer_list *t) { unsigned long m; unsigned int i; - struct adapter *adap = (struct adapter *)data; + struct adapter *adap = from_timer(adap, t, sge.rx_timer); struct sge *s = >sge; for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) @@ -2620,11 +2620,11 @@ static void sge_rx_timer_cb(unsigned long data) mod_timer(>rx_timer, jiffies + RX_QCHECK_PERIOD); } -static void sge_tx_timer_cb(unsigned long data) +static void sge_tx_timer_cb(struct timer_list *t) { unsigned long m; unsigned int i, budget; - struct adapter *adap = (struct adapter *)data; + struct adapter *adap = from_timer(adap, t, sge.tx_timer); struct sge *s = >sge; for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) @@ -3458,8 +3458,8 @@ int t4_sge_init(struct adapter *adap) /* Set up timers used for recuring callbacks to process RX and TX * administrative tasks. */
[PATCH] drivers/net: appletalk/cops: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Allen PaisCc: "David S. Miller" Cc: David Howells Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook --- drivers/net/appletalk/cops.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index caf04284711a..bb49f6e40a19 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -155,6 +155,7 @@ static int cops_irqlist[] = { }; static struct timer_list cops_timer; +static struct net_device *cops_timer_dev; /* use 0 for production, 1 for verification, 2 for debug, 3 for verbose debug */ #ifndef COPS_DEBUG @@ -187,7 +188,7 @@ static void cops_load (struct net_device *dev); static int cops_nodeid (struct net_device *dev, int nodeid); static irqreturn_t cops_interrupt (int irq, void *dev_id); -static void cops_poll (unsigned long ltdev); +static void cops_poll(struct timer_list *t); static void cops_timeout(struct net_device *dev); static void cops_rx (struct net_device *dev); static netdev_tx_t cops_send_packet (struct sk_buff *skb, @@ -424,7 +425,8 @@ static int cops_open(struct net_device *dev) */ if(lp->board==TANGENT) /* Poll 20 times per second */ { - setup_timer(_timer, cops_poll, (unsigned long)dev); + cops_timer_dev = dev; + timer_setup(_timer, cops_poll, 0); cops_timer.expires = jiffies + HZ/20; add_timer(_timer); } @@ -671,12 +673,11 @@ static int cops_nodeid (struct net_device *dev, int nodeid) * Poll the Tangent type cards to see if we have work. */ -static void cops_poll(unsigned long ltdev) +static void cops_poll(struct timer_list *unused) { int ioaddr, status; int boguscount = 0; - - struct net_device *dev = (struct net_device *)ltdev; + struct net_device *dev = cops_timer_dev; del_timer(_timer); -- 2.7.4 -- Kees Cook Pixel Security
[PATCH] drivers/net: amd: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Tom LendackyCc: "David S. Miller" Cc: Allen Pais Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook --- drivers/net/ethernet/amd/a2065.c | 13 ++--- drivers/net/ethernet/amd/am79c961a.c | 9 + drivers/net/ethernet/amd/am79c961a.h | 1 + drivers/net/ethernet/amd/declance.c | 10 ++ drivers/net/ethernet/amd/pcnet32.c | 10 +- drivers/net/ethernet/amd/sunlance.c | 8 drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 14 ++ 7 files changed, 37 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index 998d30e050a6..212fe72a190b 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -123,6 +123,7 @@ struct lance_private { int burst_sizes; /* ledma SBus burst sizes */ #endif struct timer_list multicast_timer; + struct net_device *dev; }; #define LANCE_ADDR(x) ((int)(x) & ~0xff00) @@ -638,6 +639,13 @@ static void lance_set_multicast(struct net_device *dev) netif_wake_queue(dev); } +static void lance_set_multicast_retry(struct timer_list *t) +{ + struct lance_private *lp = from_timer(lp, t, multicast_timer); + + lance_set_multicast(lp->dev); +} + static int a2065_init_one(struct zorro_dev *z, const struct zorro_device_id *ent); static void a2065_remove_one(struct zorro_dev *z); @@ -728,14 +736,13 @@ static int a2065_init_one(struct zorro_dev *z, priv->lance_log_tx_bufs = LANCE_LOG_TX_BUFFERS; priv->rx_ring_mod_mask = RX_RING_MOD_MASK; priv->tx_ring_mod_mask = TX_RING_MOD_MASK; + priv->dev = dev; dev->netdev_ops = _netdev_ops; dev->watchdog_timeo = 5*HZ; dev->dma = 0; - setup_timer(>multicast_timer, - (void(*)(unsigned long))lance_set_multicast, - (unsigned long)dev); + timer_setup(>multicast_timer, lance_set_multicast_retry, 0); err = register_netdev(dev); if (err) { diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index 0612dbee00d2..01d132c02ff9 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -302,10 +302,10 @@ am79c961_init_for_open(struct net_device *dev) write_rreg (dev->base_addr, CSR0, CSR0_IENA|CSR0_STRT); } -static void am79c961_timer(unsigned long data) +static void am79c961_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct dev_priv *priv = netdev_priv(dev); + struct dev_priv *priv = from_timer(priv, t, timer); + struct net_device *dev = priv->dev; unsigned int lnkstat, carrier; unsigned long flags; @@ -728,7 +728,8 @@ static int am79c961_probe(struct platform_device *pdev) am79c961_banner(); spin_lock_init(>chip_lock); - setup_timer(>timer, am79c961_timer, (unsigned long)dev); + priv->dev = dev; + timer_setup(>timer, am79c961_timer, 0); if (am79c961_hw_init(dev)) goto release; diff --git a/drivers/net/ethernet/amd/am79c961a.h b/drivers/net/ethernet/amd/am79c961a.h index 9f384b79507b..fc5088c70731 100644 --- a/drivers/net/ethernet/amd/am79c961a.h +++ b/drivers/net/ethernet/amd/am79c961a.h @@ -140,6 +140,7 @@ struct dev_priv { unsigned long txhdr; spinlock_t chip_lock; struct timer_list timer; +struct net_device *dev; }; #endif diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 9bdf81c2cd00..116997a8b593 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -260,6 +260,7 @@ struct lance_private { unsigned short busmaster_regval; struct timer_list multicast_timer; + struct net_device *dev; /* Pointers to the ring buffers as seen from the CPU */ char *rx_buf_ptr_cpu[RX_RING_SIZE]; @@ -1000,9 +1001,10 @@ static void lance_set_multicast(struct net_device *dev) netif_wake_queue(dev); } -static void lance_set_multicast_retry(unsigned long _opaque) +static void lance_set_multicast_retry(struct timer_list *t) { - struct net_device *dev = (struct net_device *) _opaque; + struct lance_private *lp = from_timer(lp, t, multicast_timer); + struct net_device *dev = lp->dev; lance_set_multicast(dev); } @@ -1246,8 +1248,8 @@ static int dec_lance_probe(struct device *bdev, const int type) * can occur from interrupts (ex. IPv6). So we * use a timer to try
[PATCH] drivers/net: korina: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller"Cc: Roman Yeryomin Cc: Florian Fainelli Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook --- drivers/net/ethernet/korina.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 7cecd9dbc111..ae195f8adff5 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -653,10 +653,10 @@ static void korina_check_media(struct net_device *dev, unsigned int init_media) >eth_regs->ethmac2); } -static void korina_poll_media(unsigned long data) +static void korina_poll_media(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct korina_private *lp = netdev_priv(dev); + struct korina_private *lp = from_timer(lp, t, media_check_timer); + struct net_device *dev = lp->dev; korina_check_media(dev, 0); mod_timer(>media_check_timer, jiffies + HZ); @@ -1103,7 +1103,7 @@ static int korina_probe(struct platform_device *pdev) ": cannot register net device: %d\n", rc); goto probe_err_register; } - setup_timer(>media_check_timer, korina_poll_media, (unsigned long) dev); + timer_setup(>media_check_timer, korina_poll_media, 0); INIT_WORK(>restart_task, korina_restart_task); -- 2.7.4 -- Kees Cook Pixel Security
[PATCH] drivers/net: fealnx: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller"Cc: "yuval.sh...@oracle.com" Cc: Allen Pais Cc: Stephen Hemminger Cc: Philippe Reynes Cc: Johannes Berg Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook --- drivers/net/ethernet/fealnx.c | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index c8982313d850..23053919ebf5 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -426,8 +426,8 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int netdev_open(struct net_device *dev); static void getlinktype(struct net_device *dev); static void getlinkstatus(struct net_device *dev); -static void netdev_timer(unsigned long data); -static void reset_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); +static void reset_timer(struct timer_list *t); static void fealnx_tx_timeout(struct net_device *dev); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); @@ -909,13 +909,13 @@ static int netdev_open(struct net_device *dev) printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name); /* Set the timer to check for link beat. */ - setup_timer(>timer, netdev_timer, (unsigned long)dev); + timer_setup(>timer, netdev_timer, 0); np->timer.expires = RUN_AT(3 * HZ); /* timer handler */ add_timer(>timer); - setup_timer(>reset_timer, reset_timer, (unsigned long)dev); + timer_setup(>reset_timer, reset_timer, 0); np->reset_timer_armed = 0; return rc; } @@ -1078,10 +1078,10 @@ static void allocate_rx_buffers(struct net_device *dev) } -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->mii.dev; void __iomem *ioaddr = np->mem; int old_crvalue = np->crvalue; unsigned int old_linkok = np->linkok; @@ -1167,10 +1167,10 @@ static void enable_rxtx(struct net_device *dev) } -static void reset_timer(unsigned long data) +static void reset_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, reset_timer); + struct net_device *dev = np->mii.dev; unsigned long flags; printk(KERN_WARNING "%s: resetting tx and rx machinery\n", dev->name); -- 2.7.4 -- Kees Cook Pixel Security
[PATCH] drivers/net: natsemi: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller"Cc: Allen Pais Cc: Eric Dumazet Cc: Philippe Reynes Cc: Wei Yongjun Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook --- drivers/net/ethernet/natsemi/natsemi.c | 10 +- drivers/net/ethernet/natsemi/ns83820.c | 8 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index dedeacd0bbca..b9a1a9f999ea 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -610,7 +610,7 @@ static int netdev_open(struct net_device *dev); static void do_cable_magic(struct net_device *dev); static void undo_cable_magic(struct net_device *dev); static void check_link(struct net_device *dev); -static void netdev_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); static void dump_ring(struct net_device *dev); static void ns_tx_timeout(struct net_device *dev); static int alloc_ring(struct net_device *dev); @@ -1571,7 +1571,7 @@ static int netdev_open(struct net_device *dev) dev->name, (int)readl(ioaddr + ChipCmd)); /* Set the timer to check for link beat. */ - setup_timer(>timer, netdev_timer, (unsigned long)dev); + timer_setup(>timer, netdev_timer, 0); np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ); add_timer(>timer); @@ -1787,10 +1787,10 @@ static void init_registers(struct net_device *dev) *this check via dspcfg_workaround sysfs option. * 3) check of death of the RX path due to OOM */ -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->dev; void __iomem * ioaddr = ns_ioaddr(dev); int next_tick = NATSEMI_TIMER_FREQ; const int irq = np->pci_dev->irq; diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 99d3c7884a4a..958fced4dacf 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1600,10 +1600,10 @@ static void ns83820_tx_timeout(struct net_device *ndev) spin_unlock_irqrestore(>tx_lock, flags); } -static void ns83820_tx_watch(unsigned long data) +static void ns83820_tx_watch(struct timer_list *t) { - struct net_device *ndev = (void *)data; - struct ns83820 *dev = PRIV(ndev); + struct ns83820 *dev = from_timer(dev, t, tx_watchdog); + struct net_device *ndev = dev->ndev; #if defined(DEBUG) printk("ns83820_tx_watch: %u %u %d\n", @@ -1652,7 +1652,7 @@ static int ns83820_open(struct net_device *ndev) writel(0, dev->base + TXDP_HI); writel(desc, dev->base + TXDP); - setup_timer(>tx_watchdog, ns83820_tx_watch, (unsigned long)ndev); + timer_setup(>tx_watchdog, ns83820_tx_watch, 0); mod_timer(>tx_watchdog, jiffies + 2*HZ); netif_start_queue(ndev);/* FIXME: wait for phy to come up */ -- 2.7.4 -- Kees Cook Pixel Security
[PATCH] drivers/net: packetengines: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller"Cc: Allen Pais Cc: yuan linyu Cc: Philippe Reynes Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook --- drivers/net/ethernet/packetengines/hamachi.c | 14 +++--- drivers/net/ethernet/packetengines/yellowfin.c | 10 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index 77bc7cca8980..c9529c29a0a7 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -413,13 +413,13 @@ that case. /* The rest of these values should never change. */ -static void hamachi_timer(unsigned long data); +static void hamachi_timer(struct timer_list *t); enum capability_flags {CanHaveMII=1, }; static const struct chip_info { u16 vendor_id, device_id, device_id_mask, pad; const char *name; - void (*media_timer)(unsigned long data); + void (*media_timer)(struct timer_list *t); int flags; } chip_tbl[] = { {0x1318, 0x0911, 0x, 0, "Hamachi GNIC-II", hamachi_timer, 0}, @@ -547,7 +547,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int hamachi_open(struct net_device *dev); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -static void hamachi_timer(unsigned long data); +static void hamachi_timer(struct timer_list *t); static void hamachi_tx_timeout(struct net_device *dev); static void hamachi_init_ring(struct net_device *dev); static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb, @@ -979,7 +979,7 @@ static int hamachi_open(struct net_device *dev) dev->name, readw(ioaddr + RxStatus), readw(ioaddr + TxStatus)); } /* Set the timer to check for link beat. */ - setup_timer(>timer, hamachi_timer, (unsigned long)dev); + timer_setup(>timer, hamachi_timer, 0); hmp->timer.expires = RUN_AT((24*HZ)/10);/* 2.4 sec. */ add_timer(>timer); @@ -1017,10 +1017,10 @@ static inline int hamachi_tx(struct net_device *dev) return 0; } -static void hamachi_timer(unsigned long data) +static void hamachi_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct hamachi_private *hmp = netdev_priv(dev); + struct hamachi_private *hmp = from_timer(hmp, t, timer); + struct net_device *dev = hmp->mii_if.dev; void __iomem *ioaddr = hmp->base; int next_tick = 10*HZ; diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 33c241f52a71..54224d1822e3 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -343,7 +343,7 @@ static int mdio_read(void __iomem *ioaddr, int phy_id, int location); static void mdio_write(void __iomem *ioaddr, int phy_id, int location, int value); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int yellowfin_open(struct net_device *dev); -static void yellowfin_timer(unsigned long data); +static void yellowfin_timer(struct timer_list *t); static void yellowfin_tx_timeout(struct net_device *dev); static int yellowfin_init_ring(struct net_device *dev); static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb, @@ -632,7 +632,7 @@ static int yellowfin_open(struct net_device *dev) } /* Set the timer to check for link beat. */ - setup_timer(>timer, yellowfin_timer, (unsigned long)dev); + timer_setup(>timer, yellowfin_timer, 0); yp->timer.expires = jiffies + 3*HZ; add_timer(>timer); out: @@ -643,10 +643,10 @@ static int yellowfin_open(struct net_device *dev) goto out; } -static void yellowfin_timer(unsigned long data) +static void yellowfin_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct yellowfin_private *yp = netdev_priv(dev); + struct yellowfin_private *yp = from_timer(yp, t, timer); + struct net_device *dev = pci_get_drvdata(yp->pci_dev); void __iomem *ioaddr = yp->base; int next_tick = 60*HZ; -- 2.7.4 -- Kees Cook Pixel Security
[PATCH] drivers/net: mellanox: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Saeed MahameedCc: Matan Barak Cc: Leon Romanovsky Cc: netdev@vger.kernel.org Cc: linux-r...@vger.kernel.org Signed-off-by: Kees Cook --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index a89a68ce53ad..185dcac0abe7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -285,9 +285,9 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) spin_unlock_irqrestore(>wq_lock, flags); } -static void poll_health(unsigned long data) +static void poll_health(struct timer_list *t) { - struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; + struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); struct mlx5_core_health *health = >priv.health; u32 count; @@ -320,7 +320,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = >priv.health; - setup_timer(>timer, poll_health, (unsigned long)dev); + timer_setup(>timer, poll_health, 0); health->sick = 0; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, >flags); clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, >flags); -- 2.7.4 -- Kees Cook Pixel Security
[PATCH] drivers/net: smsc: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller"Cc: "yuval.sh...@oracle.com" Cc: Eric Dumazet Cc: Philippe Reynes Cc: Allen Pais Cc: Tobias Klauser Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook --- drivers/net/ethernet/smsc/epic100.c | 10 +- drivers/net/ethernet/smsc/smc91c92_cs.c | 10 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 2a9724898fcf..949aaef390b6 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -290,7 +290,7 @@ static int read_eeprom(struct epic_private *, int); static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int loc, int val); static void epic_restart(struct net_device *dev); -static void epic_timer(unsigned long data); +static void epic_timer(struct timer_list *t); static void epic_tx_timeout(struct net_device *dev); static void epic_init_ring(struct net_device *dev); static netdev_tx_t epic_start_xmit(struct sk_buff *skb, @@ -739,7 +739,7 @@ static int epic_open(struct net_device *dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - setup_timer(>timer, epic_timer, (unsigned long)dev); + timer_setup(>timer, epic_timer, 0); ep->timer.expires = jiffies + 3*HZ; add_timer(>timer); @@ -843,10 +843,10 @@ static void check_media(struct net_device *dev) } } -static void epic_timer(unsigned long data) +static void epic_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct epic_private *ep = netdev_priv(dev); + struct epic_private *ep = from_timer(ep, t, timer); + struct net_device *dev = ep->mii.dev; void __iomem *ioaddr = ep->ioaddr; int next_tick = 5*HZ; diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index 92c927aec66d..a55f430f6a7b 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -280,7 +280,7 @@ static void set_rx_mode(struct net_device *dev); static int s9k_config(struct net_device *dev, struct ifmap *map); static void smc_set_xcvr(struct net_device *dev, int if_port); static void smc_reset(struct net_device *dev); -static void media_check(u_long arg); +static void media_check(struct timer_list *t); static void mdio_sync(unsigned int addr); static int mdio_read(struct net_device *dev, int phy_id, int loc); static void mdio_write(struct net_device *dev, int phy_id, int loc, int value); @@ -1070,7 +1070,7 @@ static int smc_open(struct net_device *dev) smc->packets_waiting = 0; smc_reset(dev); -setup_timer(>media, media_check, (u_long)dev); +timer_setup(>media, media_check, 0); mod_timer(>media, jiffies + HZ); return 0; @@ -1708,10 +1708,10 @@ static void smc_reset(struct net_device *dev) ==*/ -static void media_check(u_long arg) +static void media_check(struct timer_list *t) { -struct net_device *dev = (struct net_device *) arg; -struct smc_private *smc = netdev_priv(dev); +struct smc_private *smc = from_timer(smc, t, media); +struct net_device *dev = smc->mii_if.dev; unsigned int ioaddr = dev->base_addr; u_short i, media, saved_bank; u_short link; -- 2.7.4 -- Kees Cook Pixel Security
[PATCH] netfilter: ipvs: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Wensong ZhangCc: Simon Horman Cc: Julian Anastasov Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: lvs-de...@vger.kernel.org Cc: netfilter-de...@vger.kernel.org Cc: coret...@netfilter.org Signed-off-by: Kees Cook --- net/netfilter/ipvs/ip_vs_conn.c | 10 +- net/netfilter/ipvs/ip_vs_ctl.c | 7 +++ net/netfilter/ipvs/ip_vs_est.c | 6 +++--- net/netfilter/ipvs/ip_vs_lblc.c | 11 ++- net/netfilter/ipvs/ip_vs_lblcr.c | 11 ++- 5 files changed, 23 insertions(+), 22 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 3d2ac71a83ec..3a43b3470331 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -104,7 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key) spin_unlock_bh(&__ip_vs_conntbl_lock_array[key_LOCKARRAY_MASK].l); } -static void ip_vs_conn_expire(unsigned long data); +static void ip_vs_conn_expire(struct timer_list *t); /* * Returns hash value for IPVS connection entry @@ -457,7 +457,7 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp) { __ip_vs_conn_put(cp); - ip_vs_conn_expire((unsigned long)cp); + ip_vs_conn_expire(>timer); } /* @@ -817,9 +817,9 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head) kmem_cache_free(ip_vs_conn_cachep, cp); } -static void ip_vs_conn_expire(unsigned long data) +static void ip_vs_conn_expire(struct timer_list *t) { - struct ip_vs_conn *cp = (struct ip_vs_conn *)data; + struct ip_vs_conn *cp = from_timer(cp, t, timer); struct netns_ipvs *ipvs = cp->ipvs; /* @@ -909,7 +909,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, } INIT_HLIST_NODE(>c_list); - setup_timer(>timer, ip_vs_conn_expire, (unsigned long)cp); + timer_setup(>timer, ip_vs_conn_expire, 0); cp->ipvs = ipvs; cp->af = p->af; cp->daf= dest_af; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 4f940d7eb2f7..b47e266c6eca 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1146,9 +1146,9 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return 0; } -static void ip_vs_dest_trash_expire(unsigned long data) +static void ip_vs_dest_trash_expire(struct timer_list *t) { - struct netns_ipvs *ipvs = (struct netns_ipvs *)data; + struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer); struct ip_vs_dest *dest, *next; unsigned long now = jiffies; @@ -4019,8 +4019,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) INIT_LIST_HEAD(>dest_trash); spin_lock_init(>dest_trash_lock); - setup_timer(>dest_trash_timer, ip_vs_dest_trash_expire, - (unsigned long) ipvs); + timer_setup(>dest_trash_timer, ip_vs_dest_trash_expire, 0); atomic_set(>ftpsvc_counter, 0); atomic_set(>nullsvc_counter, 0); atomic_set(>conn_out_counter, 0); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 457c6c193e13..489055091a9b 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -97,12 +97,12 @@ static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum, } -static void estimation_timer(unsigned long arg) +static void estimation_timer(struct timer_list *t) { struct ip_vs_estimator *e; struct ip_vs_stats *s; u64 rate; - struct netns_ipvs *ipvs = (struct netns_ipvs *)arg; + struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer); spin_lock(>est_lock); list_for_each_entry(e, >est_list, list) { @@ -192,7 +192,7 @@ int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs) { INIT_LIST_HEAD(>est_list); spin_lock_init(>est_lock); - setup_timer(>est_timer, estimation_timer, (unsigned long)ipvs); + timer_setup(>est_timer, estimation_timer, 0); mod_timer(>est_timer, jiffies + 2 * HZ); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index b6aa4a970c6e..d625179de485 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -106,6 +106,7 @@ struct ip_vs_lblc_table { struct rcu_head rcu_head; struct hlist_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ struct
[PATCH] drivers/net: dlink: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Denis KirjanovCc: netdev@vger.kernel.org Signed-off-by: Kees Cook --- drivers/net/ethernet/dlink/sundance.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 6ca9e981ad57..1a27176381fb 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -431,7 +431,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int mdio_wait_link(struct net_device *dev, int wait); static int netdev_open(struct net_device *dev); static void check_duplex(struct net_device *dev); -static void netdev_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); static void tx_timeout(struct net_device *dev); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); @@ -913,7 +913,7 @@ static int netdev_open(struct net_device *dev) ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0)); /* Set the timer to check for link beat. */ - setup_timer(>timer, netdev_timer, (unsigned long)dev); + timer_setup(>timer, netdev_timer, 0); np->timer.expires = jiffies + 3*HZ; add_timer(>timer); @@ -951,10 +951,10 @@ static void check_duplex(struct net_device *dev) } } -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->mii_if.dev; void __iomem *ioaddr = np->base; int next_tick = 10*HZ; -- 2.7.4 -- Kees Cook Pixel Security
[PATCH] drivers/net: 8390: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook--- drivers/net/ethernet/8390/axnet_cs.c | 10 +- drivers/net/ethernet/8390/pcnet_cs.c | 10 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 3da1fc539ef9..7bddb8efb6d5 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -85,7 +85,7 @@ static struct net_device_stats *get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); static void axnet_tx_timeout(struct net_device *dev); static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); -static void ei_watchdog(u_long arg); +static void ei_watchdog(struct timer_list *t); static void axnet_reset_8390(struct net_device *dev); static int mdio_read(unsigned int addr, int phy_id, int loc); @@ -483,7 +483,7 @@ static int axnet_open(struct net_device *dev) link->open++; info->link_status = 0x00; -setup_timer(>watchdog, ei_watchdog, (u_long)dev); +timer_setup(>watchdog, ei_watchdog, 0); mod_timer(>watchdog, jiffies + HZ); return ax_open(dev); @@ -547,10 +547,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id) return ax_interrupt(irq, dev_id); } -static void ei_watchdog(u_long arg) +static void ei_watchdog(struct timer_list *t) { -struct net_device *dev = (struct net_device *)(arg); -struct axnet_dev *info = PRIV(dev); +struct axnet_dev *info = from_timer(info, t, watchdog); +struct net_device *dev = info->p_dev->priv; unsigned int nic_base = dev->base_addr; unsigned int mii_addr = nic_base + AXNET_MII_EEP; u_short link; diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index bd0a2a14b649..eae9827035dc 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -99,7 +99,7 @@ static int pcnet_open(struct net_device *dev); static int pcnet_close(struct net_device *dev); static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); -static void ei_watchdog(u_long arg); +static void ei_watchdog(struct timer_list *t); static void pcnet_reset_8390(struct net_device *dev); static int set_config(struct net_device *dev, struct ifmap *map); static int setup_shmem_window(struct pcmcia_device *link, int start_pg, @@ -917,7 +917,7 @@ static int pcnet_open(struct net_device *dev) info->phy_id = info->eth_phy; info->link_status = 0x00; -setup_timer(>watchdog, ei_watchdog, (u_long)dev); +timer_setup(>watchdog, ei_watchdog, 0); mod_timer(>watchdog, jiffies + HZ); return ei_open(dev); @@ -1006,10 +1006,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id) return ret; } -static void ei_watchdog(u_long arg) +static void ei_watchdog(struct timer_list *t) { -struct net_device *dev = (struct net_device *)arg; -struct pcnet_dev *info = PRIV(dev); +struct pcnet_dev *info = from_timer(info, t, watchdog); +struct net_device *dev = info->p_dev->priv; unsigned int nic_base = dev->base_addr; unsigned int mii_addr = nic_base + DLINK_GPIO; u_short link; -- 2.7.4 -- Kees Cook Pixel Security
[PATCH v3 1/2] net: netrom: nr_route: refactor code in nr_add_node
Code refactoring in order to make the code easier to read and maintain. Signed-off-by: Gustavo A. R. Silva--- Changes in v2: Make use of the swap macro and remove inline keyword. Changes in v3: Update subject. net/netrom/nr_route.c | 59 ++- 1 file changed, 16 insertions(+), 43 deletions(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 0c59354..fba4b4c 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -80,6 +80,19 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign, static void nr_remove_neigh(struct nr_neigh *); +/* re-sort the routes in quality order.*/ +static void re_sort_routes(struct nr_node *nr_node, int x, int y) +{ + if (nr_node->routes[y].quality > nr_node->routes[x].quality) { + if (nr_node->which == x) + nr_node->which = y; + else if (nr_node->which == y) + nr_node->which = x; + + swap(nr_node->routes[x], nr_node->routes[y]); + } +} + /* * Add a new route to a node, and in the process add the node and the * neighbour if it is new. @@ -90,7 +103,6 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, { struct nr_node *nr_node; struct nr_neigh *nr_neigh; - struct nr_route nr_route; int i, found; struct net_device *odev; @@ -251,49 +263,10 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, /* Now re-sort the routes in quality order */ switch (nr_node->count) { case 3: - if (nr_node->routes[1].quality > nr_node->routes[0].quality) { - switch (nr_node->which) { - case 0: - nr_node->which = 1; - break; - case 1: - nr_node->which = 0; - break; - } - nr_route = nr_node->routes[0]; - nr_node->routes[0] = nr_node->routes[1]; - nr_node->routes[1] = nr_route; - } - if (nr_node->routes[2].quality > nr_node->routes[1].quality) { - switch (nr_node->which) { - case 1: nr_node->which = 2; - break; - - case 2: nr_node->which = 1; - break; - - default: - break; - } - nr_route = nr_node->routes[1]; - nr_node->routes[1] = nr_node->routes[2]; - nr_node->routes[2] = nr_route; - } + re_sort_routes(nr_node, 0, 1); + re_sort_routes(nr_node, 1, 2); case 2: - if (nr_node->routes[1].quality > nr_node->routes[0].quality) { - switch (nr_node->which) { - case 0: nr_node->which = 1; - break; - - case 1: nr_node->which = 0; - break; - - default: break; - } - nr_route = nr_node->routes[0]; - nr_node->routes[0] = nr_node->routes[1]; - nr_node->routes[1] = nr_route; - } + re_sort_routes(nr_node, 0, 1); case 1: break; } -- 2.7.4
[PATCH v3 2/2] net: netrom: nr_route: mark expected switch fall-throughs
In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva--- Changes in v2: None. Changes in v3: Update subject. net/netrom/nr_route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index fba4b4c..75e6ba9 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -265,6 +265,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, case 3: re_sort_routes(nr_node, 0, 1); re_sort_routes(nr_node, 1, 2); + /* fall through */ case 2: re_sort_routes(nr_node, 0, 1); case 1: @@ -357,6 +358,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n switch (i) { case 0: nr_node->routes[0] = nr_node->routes[1]; + /* fall through */ case 1: nr_node->routes[1] = nr_node->routes[2]; case 2: @@ -526,6 +528,7 @@ void nr_rt_device_down(struct net_device *dev) switch (i) { case 0: t->routes[0] = t->routes[1]; + /* fall through */ case 1: t->routes[1] = t->routes[2]; case 2: -- 2.7.4
Re: v6/sit tunnels and VRFs
Thanks, David. I corrected the static route, applied the patch, and set the link/output dev on the tunnel and it works now. Is it required to set the link/output dev? I was thinking that this should not be required for cases where the outgoing device is not known, for instance on a router or device with multiple interfaces. Also, what is the expected behavior of loopback addresses in a VRF context? For instance, if an application were being run under "ip vrf exec" and it tried to use these addresses. jeff@VM2:~$ ping -I myvrf 127.0.0.1 PING 127.0.0.1 (127.0.0.1) from 127.0.0.1 myvrf: 56(84) bytes of data. ^C --- 127.0.0.1 ping statistics --- 3 packets transmitted, 0 received, 100% packet loss, time 2033ms jeff@VM2:~$ ping -I myvrf ::1 connect: Network is unreachable Thanks, Jeff On Thu, Oct 26, 2017 at 1:24 PM, David Ahernwrote: > On 10/25/17 9:28 PM, Jeff Barnhill wrote: >> Thanks, David. >> >> VM1: >> sudo ip addr add 192.168.200.1/24 dev enp0s8 broadcast 192.168.200.255 >> sudo ip link set enp0s8 up >> sudo ip route add 192.168.210.0/24 nexthop via 192.168.200.3 dev enp0s8 >> sudo ip tunnel add jtun mode sit remote 192.168.210.2 local 192.168.200.1 >> sudo ip -6 addr add 2001::1/64 dev jtun >> sudo ip link set jtun up >> >> VM2: >> sudo ip addr add 192.168.210.2/24 dev enp0s8 broadcast 192.168.210.255 >> sudo ip link set enp0s8 up >> sudo ip route add 192.168.200.0/24 nexthop via 192.168.210.3 dev enp0s8 >> sudo ip link add dev myvrf type vrf table 256 >> sudo ip link set myvrf up >> sudo ip link set enp0s8 vrf myvrf > > You lost the static route by doing the enslaving here. When the device > is added to or removed from a VRF it is cycled specifically to dump > routes and neighbor entries associated with the prior vrf. Always create > the vrf and enslave first, then add routes: > > sudo ip link add dev myvrf type vrf table 256 > sudo ip link set myvrf up > sudo ip link set enp0s8 vrf myvrf > > sudo ip addr add 192.168.210.2/24 dev enp0s8 broadcast 192.168.210.255 > sudo ip link set enp0s8 up > sudo ip route add 192.168.200.0/24 nexthop via 192.168.210.3 dev enp0s8 > > That said, the above works for the wrong reason -- it is not really > doing VRF based routing. For that to happen, the static route should be > added to the vrf table: > > sudo ip route add vrf myvrf 192.168.200.0/24 nexthop via 192.168.210.3 > dev enp0s8 > > And ... > >> sudo ip tunnel add jtun mode sit remote 192.168.200.1 local 192.168.210.2 > > you need to specify the link on the tunnel create: > > sudo ip tunnel add jtun mode sit remote 192.168.200.1 local > 192.168.210.2 dev enp0s8. > > And ... > > The tunnel lookup needs to account for the VRF device switch: > > (whitespace damaged on paste) > > diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c > index a799f5258614..cf0512054fa7 100644 > --- a/net/ipv6/sit.c > +++ b/net/ipv6/sit.c > @@ -632,11 +632,18 @@ static bool packet_is_spoofed(struct sk_buff *skb, > static int ipip6_rcv(struct sk_buff *skb) > { > const struct iphdr *iph = ip_hdr(skb); > + struct net_device *dev = skb->dev; > + struct net *net = dev_net(dev); > struct ip_tunnel *tunnel; > int err; > > - tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, > -iph->saddr, iph->daddr); > + if (netif_is_l3_master(dev)) { > + dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); > + if (!dev) > + goto out; > + } > + > + tunnel = ipip6_tunnel_lookup(net, dev, iph->saddr, iph->daddr); > if (tunnel) { > struct pcpu_sw_netstats *tstats; >
Re: [PATCH net-next] net: dsa: Simplify dsa_slave_phy_setup()
On 2017-10-26 18:45, Florian Fainelli wrote: On 10/26/2017 04:07 AM, Martin Hundebøll wrote: On 2017-10-26 02:32, Florian Fainelli wrote: Remove the code that tried to identify if a PHY designated by Device Tree required diversion through the DSA-created MDIO bus. This was created mainly for the bcm_sf2.c driver back when it did not have its own MDIO bus driver, which it now has since 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus"). Signed-off-by: Florian FainelliTested-by: Martin Hundebøll Thanks Martin, does that correctly fix the problem you reported a week ago on 639X? It does indeed. Thanks for the work on this. // Martin
Re: [PATCH net-next] tcp: add tracepoint trace_tcp_retransmit_synack()
> On Oct 26, 2017, at 7:01 PM, Cong Wangwrote: > > On Thu, Oct 26, 2017 at 4:50 PM, Song Liu wrote: >> In this case, we are putting CONFIG_IPV6 in TRACE_EVENT macro, which >> generates >> warnings like: >> >> ./include/trace/events/tcp.h:274:1: error: directive in argument list >> ./include/trace/events/tcp.h:281:1: error: directive in argument list >> >> Seems these warning cannot be easily avoided. This is also the same pattern >> we >> have been using in include/trace/events/tcp.h. > > Hmm, we use the same so why it only complains about this one?\ sparse reports same warning for all the lines in tcp.h. Don't know why kbuild test bot only complains about this patch. > >> >> Any suggestions on how shall we proceed from here? >> > > I think this warning is harmless, so perhaps not worthy time to > shut it up, unless sparse provides a simple way to do so.
About CFG80211_REQUIRE_SIGNED_REGDB in Kconfig
Hi, I have a question about CFG80211_REQUIRE_SIGNED_REGDB behavior in the latest net-next.git. Since my environment disables CONFIG_EXPERT, CFG80211_CERTIFICATION_ONUS is also disabled. In this case, menuconfig doesn't show me the config because the net/wireless/Kconfig has: config CFG80211_REQUIRE_SIGNED_REGDB bool "require regdb signature" if CFG80211_CERTIFICATION_ONUS default y select SYSTEM_DATA_VERIFICATION Does this mean that non expert users should enable CFG80211_REQUIRE_SIGNED_REGDB anyway? Or, does this have special other reasons? Best regards, Yoshihiro Shimoda
Re: [PATCH v2] ipv6: esp6: use BUG_ON instead of if condition followed by BUG
Quoting Herbert Xu: On Thu, Oct 26, 2017 at 07:51:06AM -0500, Gustavo A. R. Silva wrote: Use BUG_ON instead of if condition followed by BUG in esp_remove_trailer. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Herbert Xu Thanks! -- Gustavo A. R. Silva
Re: [PATCH] net: tipc: Convert timers to use timer_setup()
On Tue, Oct 24, 2017 at 11:44 AM, Jon Maloywrote: > NAK. It doesn't sound like a good idea to send this to net. Especially since > one of these timers has already been refactored in net-next. Hi! I'm not sure what you mean about the one timer issue. I don't see any use of timer_setup() in net/tipc (and no recent conversions to the older setup_timer() API). What's the preferred path for landing this API conversion in net/tipc/? And, just to note, these changes are almost entirely mechanical. The only "special" case is in tipc_sk_timeout() where the argument needs to be slightly adjusted to fetch the tsk from the sk again. Thanks! -Kees -- Kees Cook Pixel Security
[PATCH v2 net-next 13/15] tcp: Namespace-ify sysctl_tcp_app_win
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 8 net/ipv4/tcp_ipv4.c| 1 + 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 956957a77db96ad3d231cc018c13503d615d8d2e..63f91d52cbc0ad35d8e04a8da0d9f57aa960bcb0 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -139,6 +139,7 @@ struct netns_ipv4 { int sysctl_tcp_fack; int sysctl_tcp_max_reordering; int sysctl_tcp_dsack; + int sysctl_tcp_app_win; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 8b2ae3e8d79f223d4637226fc7278fe751d0b5d7..7aa3d65062a14a98358f8868fa2c0dbb2c74a0ce 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7652a9c2a65d3f1cfa0a75d1198e1d9d56761c35..e057788834a99cf99e141a602ddbe19b8e6fce3c 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = , }, - { - .procname = "tcp_app_win", - .data = _tcp_app_win, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_adv_win_scale", .data = _tcp_adv_win_scale, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_app_win", + .data = _net.ipv4.sysctl_tcp_app_win, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fd77037ac800a1153ec0ef904fcf00b93c061fa1..6af4b58ac6d5de54bdbb418f41a0b18eee38ca50 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -79,7 +79,6 @@ #include #include -int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); @@ -428,6 +427,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) */ void tcp_init_buffer_space(struct sock *sk) { + int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; struct tcp_sock *tp = tcp_sk(sk); int maxwin; @@ -446,14 +446,14 @@ void tcp_init_buffer_space(struct sock *sk) if (tp->window_clamp >= maxwin) { tp->window_clamp = maxwin; - if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss) + if (tcp_app_win && maxwin > 4 * tp->advmss) tp->window_clamp = max(maxwin - - (maxwin >> sysctl_tcp_app_win), + (maxwin >> tcp_app_win), 4 * tp->advmss); } /* Force reservation of one segment. */ - if (sysctl_tcp_app_win && + if (tcp_app_win && tp->window_clamp > 2 * tp->advmss && tp->window_clamp + tp->advmss > maxwin) tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d9d4d191e8f3c962a6ee68015ffe5a6e7fb8e9c1..189664ebd28e4cda7ef40a47591c3bd8cac3574b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2490,6 +2490,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_retrans_collapse = 1; net->ipv4.sysctl_tcp_max_reordering = 300; net->ipv4.sysctl_tcp_dsack = 1; + net->ipv4.sysctl_tcp_app_win = 31; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 05/15] tcp: Namespace-ify sysctl_tcp_retrans_collapse
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_ipv4.c| 2 +- net/ipv4/tcp_output.c | 5 + 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8662692686b3af98a94a176230b9ed147881d87a..b28c172b10e497f235b51aae0fc2d3bbf7cc51f3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -132,6 +132,7 @@ struct netns_ipv4 { int sysctl_tcp_recovery; int sysctl_tcp_thin_linear_timeouts; int sysctl_tcp_slow_start_after_idle; + int sysctl_tcp_retrans_collapse; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index cc2ab522eb5cf7cb08b6918cdfd5c5500cfbf057..33cc86355b8ff9b506d21ad46cfc01b3916f5b61 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 40d69af8b363bc236e23879973872d8f9346d85e..533b92ad39dd0cada542028fe2f276d9eebcd2c8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -386,13 +386,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, } static struct ctl_table ipv4_table[] = { - { - .procname = "tcp_retrans_collapse", - .data = _tcp_retrans_collapse, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_max_orphans", .data = _tcp_max_orphans, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_retrans_collapse", + .data = _net.ipv4.sysctl_tcp_retrans_collapse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cea63a4b59655823def7a423d27191003c7f084c..2bc6ba2059d32aa848dbc415b4b0e194b61b0268 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2487,7 +2487,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_early_retrans = 3; net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ - + net->ipv4.sysctl_tcp_retrans_collapse = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bdc288a06f941add38a5cde434081c63ee94ed42..55a0aa4b96dfc7cd8f703ad42b932bae23ea5660 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -45,9 +45,6 @@ #include -/* People can turn this off for buggy TCP's found in printers etc. */ -int sysctl_tcp_retrans_collapse __read_mostly = 1; - /* People can turn this on to work with those rare, broken TCPs that * interpret the window field as a signed quantity. */ @@ -2804,7 +2801,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, struct sk_buff *skb = to, *tmp; bool first = true; - if (!sysctl_tcp_retrans_collapse) + if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) return; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 10/15] tcp: remove stale sysctl_tcp_reordering
This extern is no longer used. Signed-off-by: Eric Dumazet--- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index e7b15e9f6e288908bf58a28fe24554630c1e0710..fc134ba74c7d38d08304b5be36506946784538f2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_reordering; extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 08/15] tcp: Namespace-ify sysctl_tcp_abort_on_overflow
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_minisocks.c | 4 +--- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 968edce38eb5d3399724b3142277eab44f19f2fb..3875fdf6b18653477408beb25176eac849e65ba4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -135,6 +135,7 @@ struct netns_ipv4 { int sysctl_tcp_retrans_collapse; int sysctl_tcp_stdurg; int sysctl_tcp_rfc1337; + int sysctl_tcp_abort_on_overflow; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 2aea2b3373b38dde9aabf869931448e9ecd38649..7331281a229289f130ad7b5c5ddec1eba1ea2747 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 832e554235df37770809541ad8f9f1ca2f201739..ffd1fd769bba7c3524aa6dfac734e1de0cad1506 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -393,13 +393,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_abort_on_overflow", - .data = _tcp_abort_on_overflow, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "inet_peer_threshold", .data = _peer_threshold, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_abort_on_overflow", + .data = _net.ipv4.sysctl_tcp_abort_on_overflow, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 11836667763cf9a32c673086b6dc2d759833c856..3674d63170b293778d32abd34aa32043c001aa82 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -29,8 +29,6 @@ #include #include -int sysctl_tcp_abort_on_overflow __read_mostly; - static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -783,7 +781,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, return inet_csk_complete_hashdance(sk, child, req, own_req); listen_overflow: - if (!sysctl_tcp_abort_on_overflow) { + if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) { inet_rsk(req)->acked = 1; return NULL; } -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 15/15] tcp: Namespace-ify sysctl_tcp_frto
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_ipv4.c| 1 + 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9dbb07d4eff465428817831e55c6a4922b7208fb..f4622e28db3a1484553f51709b144ee769766a28 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -141,6 +141,7 @@ struct netns_ipv4 { int sysctl_tcp_dsack; int sysctl_tcp_app_win; int sysctl_tcp_adv_win_scale; + int sysctl_tcp_frto; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0dc27cd248997bf6a0463477db38db483c312fb0..18f047501f53be3780bd41a5c8234adf9683cebf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a95123e1e7da706c88bf5553b7d8ef6c2653ab50..f1bcb9b7e082c6688fad12e15be9b872ebed8151 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = , }, - { - .procname = "tcp_frto", - .data = _tcp_frto, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_min_rtt_wlen", .data = _tcp_min_rtt_wlen, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = _adv_win_scale_min, .extra2 = _adv_win_scale_max, }, + { + .procname = "tcp_frto", + .data = _net.ipv4.sysctl_tcp_frto, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8ee2c84b0bc67f943dbaea95d9433e82b9a7d082..90d76f1c8f96bc89618ddc59ae237a34cd25db7c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -83,7 +83,6 @@ int sysctl_tcp_challenge_ack_limit = 1000; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; -int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_min_rtt_wlen __read_mostly = 300; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; @@ -2026,7 +2025,7 @@ void tcp_enter_loss(struct sock *sk) * falsely raise the receive window, which results in repeated * timeouts and stop-and-go behavior. */ - tp->frto = sysctl_tcp_frto && + tp->frto = net->ipv4.sysctl_tcp_frto && (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1fe30fb99308b3e3fd07509b509b0e3727cc5d44..49757c7582c6d2cf413415be2c1b58482659 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2492,6 +2492,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_dsack = 1; net->ipv4.sysctl_tcp_app_win = 31; net->ipv4.sysctl_tcp_adv_win_scale = 1; + net->ipv4.sysctl_tcp_frto = 2; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 09/15] tcp: Namespace-ify sysctl_tcp_fack
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_minisocks.c | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3875fdf6b18653477408beb25176eac849e65ba4..f0e792beeea974b0850090d7624a3d7490124067 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -136,6 +136,7 @@ struct netns_ipv4 { int sysctl_tcp_stdurg; int sysctl_tcp_rfc1337; int sysctl_tcp_abort_on_overflow; + int sysctl_tcp_fack; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 7331281a229289f130ad7b5c5ddec1eba1ea2747..e7b15e9f6e288908bf58a28fe24554630c1e0710 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ffd1fd769bba7c3524aa6dfac734e1de0cad1506..1f23be13ce7be8b2a12b82aada36c6351fdfb70a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "tcp_fack", - .data = _tcp_fack, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_max_reordering", .data = _tcp_max_reordering, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_fack", + .data = _net.ipv4.sysctl_tcp_fack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f6e1c00e300eeedcfe2ff0f4f2a4e1d997cd315d..c7c983f0f817c639e68f6fb1a70916cb604de90b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2517,7 +2517,7 @@ static int tcp_repair_options_est(struct sock *sk, return -EINVAL; tp->rx_opt.sack_ok |= TCP_SACK_SEEN; - if (sysctl_tcp_fack) + if (sock_net(sk)->ipv4.sysctl_tcp_fack) tcp_enable_fack(tp); break; case TCPOPT_TIMESTAMP: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 64fde81b0eb70feccffd18a703e2b604e306ea65..c5b94460793f9693719b38978c123209e2b6ec0f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -79,7 +79,6 @@ #include #include -int sysctl_tcp_fack __read_mostly; int sysctl_tcp_max_reordering __read_mostly = 300; int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; @@ -5720,7 +5719,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->tcp_header_len = sizeof(struct tcphdr); } - if (tcp_is_sack(tp) && sysctl_tcp_fack) + if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack) tcp_enable_fack(tp); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3674d63170b293778d32abd34aa32043c001aa82..3270ab8416ce8691cbb1c3a25533142fe1029bed 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -510,7 +510,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { - if (sysctl_tcp_fack) + if (sock_net(sk)->ipv4.sysctl_tcp_fack) tcp_enable_fack(newtp); } newtp->window_clamp = req->rsk_window_clamp; -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 12/15] tcp: Namespace-ify sysctl_tcp_dsack
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 5 ++--- net/ipv4/tcp_ipv4.c| 1 + 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3f6844665a2fbe66fc0c91bd13e057ac2e03007a..956957a77db96ad3d231cc018c13503d615d8d2e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -138,6 +138,7 @@ struct netns_ipv4 { int sysctl_tcp_abort_on_overflow; int sysctl_tcp_fack; int sysctl_tcp_max_reordering; + int sysctl_tcp_dsack; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 8cd286226a1eca27d97b9f182d1a951b072e4575..8b2ae3e8d79f223d4637226fc7278fe751d0b5d7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 18cd228a20690541936dd6b3d9bb02cb283a9740..7652a9c2a65d3f1cfa0a75d1198e1d9d56761c35 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "tcp_dsack", - .data = _tcp_dsack, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_mem", .maxlen = sizeof(sysctl_tcp_mem), @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_dsack", + .data = _net.ipv4.sysctl_tcp_dsack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c118657f06ee390053e38c35f03bea5b82845513..fd77037ac800a1153ec0ef904fcf00b93c061fa1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -79,7 +79,6 @@ #include #include -int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); @@ -4150,7 +4149,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4185,7 +4184,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c379a242abb3546044da9a3ef032f6f68acafe88..d9d4d191e8f3c962a6ee68015ffe5a6e7fb8e9c1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2489,6 +2489,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ net->ipv4.sysctl_tcp_retrans_collapse = 1; net->ipv4.sysctl_tcp_max_reordering = 300; + net->ipv4.sysctl_tcp_dsack = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 06/15] tcp: Namespace-ify sysctl_tcp_stdurg
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 3 +-- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b28c172b10e497f235b51aae0fc2d3bbf7cc51f3..ffa2cf3dc747ca9443df3927dc7928c18357f872 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -133,6 +133,7 @@ struct netns_ipv4 { int sysctl_tcp_thin_linear_timeouts; int sysctl_tcp_slow_start_after_idle; int sysctl_tcp_retrans_collapse; + int sysctl_tcp_stdurg; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 33cc86355b8ff9b506d21ad46cfc01b3916f5b61..cf3fac7008d791f2a01e4df9178164769a861c60 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 533b92ad39dd0cada542028fe2f276d9eebcd2c8..a34bb75815c15afc077ba7ff36939b5abc9229f6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_stdurg", - .data = _tcp_stdurg, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_rfc1337", .data = _tcp_rfc1337, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_stdurg", + .data = _net.ipv4.sysctl_tcp_stdurg, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5b2272dbf6a9a507d62d8ee594fab53284b22a6d..14b06963c102dc8c747050448e504fc2e75a4eb4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -89,7 +89,6 @@ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; -int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; @@ -5123,7 +5122,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); - if (ptr && !sysctl_tcp_stdurg) + if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg) ptr--; ptr += ntohl(th->seq); -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 14/15] tcp: Namespace-ify sysctl_tcp_adv_win_scale
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 9 - net/ipv4/sysctl_net_ipv4.c | 18 +- net/ipv4/tcp_input.c | 13 + net/ipv4/tcp_ipv4.c| 1 + 5 files changed, 20 insertions(+), 22 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 63f91d52cbc0ad35d8e04a8da0d9f57aa960bcb0..9dbb07d4eff465428817831e55c6a4922b7208fb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -140,6 +140,7 @@ struct netns_ipv4 { int sysctl_tcp_max_reordering; int sysctl_tcp_dsack; int sysctl_tcp_app_win; + int sysctl_tcp_adv_win_scale; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 7aa3d65062a14a98358f8868fa2c0dbb2c74a0ce..0dc27cd248997bf6a0463477db38db483c312fb0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; @@ -1311,9 +1310,9 @@ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd); -static inline int tcp_win_from_space(int space) +static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : @@ -1323,13 +1322,13 @@ static inline int tcp_win_from_space(int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk->sk_rcvbuf - + return tcp_win_from_space(sk, sk->sk_rcvbuf - atomic_read(>sk_rmem_alloc)); } static inline int tcp_full_space(const struct sock *sk) { - return tcp_win_from_space(sk->sk_rcvbuf); + return tcp_win_from_space(sk, sk->sk_rcvbuf); } extern void tcp_openreq_init_rwin(struct request_sock *req, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e057788834a99cf99e141a602ddbe19b8e6fce3c..a95123e1e7da706c88bf5553b7d8ef6c2653ab50 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -437,15 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = , }, - { - .procname = "tcp_adv_win_scale", - .data = _tcp_adv_win_scale, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = _adv_win_scale_min, - .extra2 = _adv_win_scale_max, - }, { .procname = "tcp_frto", .data = _tcp_frto, @@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_adv_win_scale", + .data = _net.ipv4.sysctl_tcp_adv_win_scale, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = _adv_win_scale_min, + .extra2 = _adv_win_scale_max, + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6af4b58ac6d5de54bdbb418f41a0b18eee38ca50..8ee2c84b0bc67f943dbaea95d9433e82b9a7d082 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -79,9 +79,6 @@ #include #include -int sysctl_tcp_adv_win_scale __read_mostly = 1; -EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); - /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; @@ -363,8 +360,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ - int truesize = tcp_win_from_space(skb->truesize) >> 1; - int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1; + int truesize = tcp_win_from_space(sk, skb->truesize) >> 1; + int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) @@ -389,7 +386,7 @@ static void tcp_grow_window(struct
[PATCH v2 net-next 11/15] tcp: Namespace-ify sysctl_tcp_max_reordering
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_ipv4.c| 2 ++ 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f0e792beeea974b0850090d7624a3d7490124067..3f6844665a2fbe66fc0c91bd13e057ac2e03007a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -137,6 +137,7 @@ struct netns_ipv4 { int sysctl_tcp_rfc1337; int sysctl_tcp_abort_on_overflow; int sysctl_tcp_fack; + int sysctl_tcp_max_reordering; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index fc134ba74c7d38d08304b5be36506946784538f2..8cd286226a1eca27d97b9f182d1a951b072e4575 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1f23be13ce7be8b2a12b82aada36c6351fdfb70a..18cd228a20690541936dd6b3d9bb02cb283a9740 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "tcp_max_reordering", - .data = _tcp_max_reordering, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_dsack", .data = _tcp_dsack, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_max_reordering", + .data = _net.ipv4.sysctl_tcp_max_reordering, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c5b94460793f9693719b38978c123209e2b6ec0f..c118657f06ee390053e38c35f03bea5b82845513 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -79,7 +79,6 @@ #include #include -int sysctl_tcp_max_reordering __read_mostly = 300; int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; @@ -889,7 +888,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, return; if (metric > tp->reordering) { - tp->reordering = min(sysctl_tcp_max_reordering, metric); + tp->reordering = min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric); #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2bc6ba2059d32aa848dbc415b4b0e194b61b0268..c379a242abb3546044da9a3ef032f6f68acafe88 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2488,6 +2488,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ net->ipv4.sysctl_tcp_retrans_collapse = 1; + net->ipv4.sysctl_tcp_max_reordering = 300; + net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 01/15] tcp: Namespace-ify sysctl_tcp_early_retrans
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 18 +- net/ipv4/tcp_input.c | 1 - net/ipv4/tcp_ipv4.c| 1 + net/ipv4/tcp_output.c | 4 +++- 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2c4222a5d1025f5928665e10edb70fad65352dba..a7f39e3ea666a835b6042e4008c86ccaadd14b46 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -128,6 +128,7 @@ struct netns_ipv4 { int sysctl_tcp_sack; int sysctl_tcp_window_scaling; int sysctl_tcp_timestamps; + int sysctl_tcp_early_retrans; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 285bc82dea410b22ac585ee65daff5cbac7c3fc7..a12b71d4118baa6b939bdeba7380cb3830d46ff0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -265,7 +265,6 @@ extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; -extern int sysctl_tcp_early_retrans; extern int sysctl_tcp_recovery; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 81d218346cf7a0f340f964c434a21cace5c41fa0..f0f650f020afd535f41943c6c9fb1483be7cfb8d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -634,15 +634,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_early_retrans", - .data = _tcp_early_retrans, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = , - .extra2 = , - }, { .procname = "tcp_min_tso_segs", .data = _tcp_min_tso_segs, @@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_early_retrans", + .data = _net.ipv4.sysctl_tcp_early_retrans, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = , + .extra2 = , + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 337f6011528a7d4c3ab7fdcc0623496cfefafc71..7656b1e6d5046297b4c5e6cf5591266b9be40095 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -95,7 +95,6 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_min_rtt_wlen __read_mostly = 300; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_early_retrans __read_mostly = 3; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 23a8100af5ad399d4fa2568f4cac19192a008055..7ab313f6768e234173d78f17cfb1f664b230e958 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2484,6 +2484,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_sack = 1; net->ipv4.sysctl_tcp_window_scaling = 1; net->ipv4.sysctl_tcp_timestamps = 1; + net->ipv4.sysctl_tcp_early_retrans = 3; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c8fc512e0bbb48f7d36e159e8aae56ec70a24498..21713836d46af9d48de10e8ec0e7410572ed7eeb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2435,6 +2435,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 timeout, rto_delta_us; + int early_retrans; /* Don't do any loss probe on a Fast Open connection before 3WHS * finishes. @@ -2442,10 +2443,11 @@ bool tcp_schedule_loss_probe(struct sock *sk) if (tp->fastopen_rsk) return false; + early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ - if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) || + if ((early_retrans != 3
Re: [PATCH net-next 00/15] tcp: move 14 sysctls to namespaces
On Fri, 2017-10-27 at 13:46 +0900, David Miller wrote: > From: Eric Dumazet> Date: Thu, 26 Oct 2017 16:35:11 -0700 > > > Ideally all TCP sysctls should be per netns. > > This patch series takes care of 14 of sysctls. > > More to come later. > > The tcp-fack patch doesn't apply cleanly, please respin. > > Thank you. Sure, I did a git rebase that went well. v2 is coming right away, thanks.
[PATCH v2 net-next 07/15] tcp: Namespace-ify sysctl_tcp_rfc1337
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 1 - net/ipv4/tcp_minisocks.c | 2 +- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ffa2cf3dc747ca9443df3927dc7928c18357f872..968edce38eb5d3399724b3142277eab44f19f2fb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -134,6 +134,7 @@ struct netns_ipv4 { int sysctl_tcp_slow_start_after_idle; int sysctl_tcp_retrans_collapse; int sysctl_tcp_stdurg; + int sysctl_tcp_rfc1337; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index cf3fac7008d791f2a01e4df9178164769a861c60..2aea2b3373b38dde9aabf869931448e9ecd38649 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_fack; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a34bb75815c15afc077ba7ff36939b5abc9229f6..832e554235df37770809541ad8f9f1ca2f201739 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_rfc1337", - .data = _tcp_rfc1337, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "inet_peer_threshold", .data = _peer_threshold, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_rfc1337", + .data = _net.ipv4.sysctl_tcp_rfc1337, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 14b06963c102dc8c747050448e504fc2e75a4eb4..64fde81b0eb70feccffd18a703e2b604e306ea65 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -89,7 +89,6 @@ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; -int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_min_rtt_wlen __read_mostly = 300; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 056009f1c14f13ac4af987d0a7451f32dbde0023..11836667763cf9a32c673086b6dc2d759833c856 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -181,7 +181,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ - if (sysctl_tcp_rfc1337 == 0) { + if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 00/15] tcp: move 14 sysctls to namespaces
Ideally all TCP sysctls should be per netns. This patch series takes care of 14 of sysctls. More to come later. Eric Dumazet (15): tcp: Namespace-ify sysctl_tcp_early_retrans tcp: Namespace-ify sysctl_tcp_recovery tcp: Namespace-ify sysctl_tcp_thin_linear_timeouts tcp: Namespace-ify sysctl_tcp_slow_start_after_idle tcp: Namespace-ify sysctl_tcp_retrans_collapse tcp: Namespace-ify sysctl_tcp_stdurg tcp: Namespace-ify sysctl_tcp_rfc1337 tcp: Namespace-ify sysctl_tcp_abort_on_overflow tcp: Namespace-ify sysctl_tcp_fack tcp: remove stale sysctl_tcp_reordering tcp: Namespace-ify sysctl_tcp_max_reordering tcp: Namespace-ify sysctl_tcp_dsack tcp: Namespace-ify sysctl_tcp_app_win tcp: Namespace-ify sysctl_tcp_adv_win_scale tcp: Namespace-ify sysctl_tcp_frto include/net/netns/ipv4.h | 14 include/net/tcp.h | 27 ++ net/ipv4/sysctl_net_ipv4.c | 204 ++--- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 42 -- net/ipv4/tcp_ipv4.c| 9 ++ net/ipv4/tcp_minisocks.c | 8 +- net/ipv4/tcp_output.c | 14 ++-- net/ipv4/tcp_recovery.c| 2 - net/ipv4/tcp_timer.c | 4 +- 10 files changed, 157 insertions(+), 169 deletions(-) -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 03/15] tcp: Namespace-ify sysctl_tcp_thin_linear_timeouts
Note that sysctl_tcp_thin_dupack was not used, I deleted it. Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 2 -- net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_timer.c | 4 +--- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d6ed718075d44cfc0e60995c1e938d588ad261a8..2a9f37b39c45fe451e45025790a4e5c45ece5cbc 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -130,6 +130,7 @@ struct netns_ipv4 { int sysctl_tcp_timestamps; int sysctl_tcp_early_retrans; int sysctl_tcp_recovery; + int sysctl_tcp_thin_linear_timeouts; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index c7f51534fc44c61a95225e0adc0a1200ea5c0c1c..063a7a48b7fe23092023d053e26a967389628cdc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -263,8 +263,6 @@ extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; -extern int sysctl_tcp_thin_linear_timeouts; -extern int sysctl_tcp_thin_dupack; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 78019adcae875a438264ee47723670f6b54cacf9..12003214f4d80b38d5f754ddd91be8a990168ade 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -620,13 +620,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_allowed_congestion_control, }, - { - .procname = "tcp_thin_linear_timeouts", - .data = _tcp_thin_linear_timeouts, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_min_tso_segs", .data = _tcp_min_tso_segs, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_thin_linear_timeouts", + .data = _net.ipv4.sysctl_tcp_thin_linear_timeouts, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 804a8d34ce86cc17472c918c00c25de88b85184f..035a1ef1f2d8462c1d19f364b599ffac538ef688 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,8 +22,6 @@ #include #include -int sysctl_tcp_thin_linear_timeouts __read_mostly; - /** * tcp_write_err() - close socket and save error info * @sk: The socket the error has appeared on. @@ -522,7 +520,7 @@ void tcp_retransmit_timer(struct sock *sk) * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && - (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && + (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 02/15] tcp: Namespace-ify sysctl_tcp_recovery
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 2 +- net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c| 1 + net/ipv4/tcp_recovery.c| 2 -- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a7f39e3ea666a835b6042e4008c86ccaadd14b46..d6ed718075d44cfc0e60995c1e938d588ad261a8 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -129,6 +129,7 @@ struct netns_ipv4 { int sysctl_tcp_window_scaling; int sysctl_tcp_timestamps; int sysctl_tcp_early_retrans; + int sysctl_tcp_recovery; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index a12b71d4118baa6b939bdeba7380cb3830d46ff0..c7f51534fc44c61a95225e0adc0a1200ea5c0c1c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -265,7 +265,7 @@ extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; -extern int sysctl_tcp_recovery; + #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ extern int sysctl_tcp_limit_output_bytes; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f0f650f020afd535f41943c6c9fb1483be7cfb8d..78019adcae875a438264ee47723670f6b54cacf9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -449,13 +449,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_recovery", - .data = _tcp_recovery, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "tcp_max_reordering", .data = _tcp_max_reordering, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = , .extra2 = , }, + { + .procname = "tcp_recovery", + .data = _net.ipv4.sysctl_tcp_recovery, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7656b1e6d5046297b4c5e6cf5591266b9be40095..5b2272dbf6a9a507d62d8ee594fab53284b22a6d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2788,7 +2788,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag) struct tcp_sock *tp = tcp_sk(sk); /* Use RACK to detect loss */ - if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { + if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { u32 prior_retrans = tp->retrans_out; tcp_rack_mark_lost(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7ab313f6768e234173d78f17cfb1f664b230e958..517ff1948a71287b06ea0859e1f25a15119a3dd9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2485,6 +2485,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_window_scaling = 1; net->ipv4.sysctl_tcp_timestamps = 1; net->ipv4.sysctl_tcp_early_retrans = 3; + net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index cda6074a429a24b7be600065d95600f4f9810ee4..d3603a9e24eae8649edd12d3f0678015b09b2037 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -1,8 +1,6 @@ #include #include -int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION; - static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH v2 net-next 04/15] tcp: Namespace-ify sysctl_tcp_slow_start_after_idle
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 3 +-- net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_ipv4.c| 1 + net/ipv4/tcp_output.c | 5 + 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2a9f37b39c45fe451e45025790a4e5c45ece5cbc..8662692686b3af98a94a176230b9ed147881d87a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -131,6 +131,7 @@ struct netns_ipv4 { int sysctl_tcp_early_retrans; int sysctl_tcp_recovery; int sysctl_tcp_thin_linear_timeouts; + int sysctl_tcp_slow_start_after_idle; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 063a7a48b7fe23092023d053e26a967389628cdc..cc2ab522eb5cf7cb08b6918cdfd5c5500cfbf057 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -262,7 +262,6 @@ extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_workaround_signed_windows; -extern int sysctl_tcp_slow_start_after_idle; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ @@ -1308,7 +1307,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sysctl_tcp_slow_start_after_idle || tp->packets_out || + if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 12003214f4d80b38d5f754ddd91be8a990168ade..40d69af8b363bc236e23879973872d8f9346d85e 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -571,13 +571,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_slow_start_after_idle", - .data = _tcp_slow_start_after_idle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, #ifdef CONFIG_NETLABEL { .procname = "cipso_cache_enable", @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_slow_start_after_idle", + .data = _net.ipv4.sysctl_tcp_slow_start_after_idle, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 517ff1948a71287b06ea0859e1f25a15119a3dd9..cea63a4b59655823def7a423d27191003c7f084c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2486,6 +2486,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_timestamps = 1; net->ipv4.sysctl_tcp_early_retrans = 3; net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; + net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 21713836d46af9d48de10e8ec0e7410572ed7eeb..bdc288a06f941add38a5cde434081c63ee94ed42 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -62,9 +62,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 262144; */ int sysctl_tcp_tso_win_divisor __read_mostly = 3; -/* By default, RFC2861 behavior. */ -int sysctl_tcp_slow_start_after_idle __read_mostly = 1; - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); @@ -1690,7 +1687,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if (sysctl_tcp_slow_start_after_idle && + if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle && (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && !ca_ops->cong_control) tcp_cwnd_application_limited(sk); -- 2.15.0.rc2.357.g7e34df9404-goog
Re: pull-request: mac80211 2017-10-25
From: Johannes BergDate: Wed, 25 Oct 2017 16:03:42 +0200 > Here are a few more fixes for net, we started comprehensive testing > for the security issues and found that the problem wasn't addressed > in TKIP, so that's included, along with a handful other fixes. > > Please pull and let me know if there's any problem. Pulled, thanks Johannes.
Re: [PATCH 1/1] l2tp: cleanup l2tp_tunnel_delete calls
From: Jiri SlabyDate: Wed, 25 Oct 2017 15:57:55 +0200 > l2tp_tunnel_delete does not return anything since commit 62b982eeb458 > ("l2tp: fix race condition in l2tp_tunnel_delete"). But call sites of > l2tp_tunnel_delete still do casts to void to avoid unused return value > warnings. > > Kill these now useless casts. > > Signed-off-by: Jiri Slaby Appied to net-next, thanks.
Re: [PATCH net-next 00/15] tcp: move 14 sysctls to namespaces
From: Eric DumazetDate: Thu, 26 Oct 2017 16:35:11 -0700 > Ideally all TCP sysctls should be per netns. > This patch series takes care of 14 of sysctls. > More to come later. The tcp-fack patch doesn't apply cleanly, please respin. Thank you.
Re: [Patch net 01/16] net_sched: introduce a workqueue for RCU callbacks of tc filter
On Thu, 2017-10-26 at 21:28 -0700, Cong Wang wrote: > On Thu, Oct 26, 2017 at 9:05 PM, Eric Dumazetwrote: > > On Thu, 2017-10-26 at 18:24 -0700, Cong Wang wrote: > >> ... > > > >> On the other hand, this makes tcf_block_put() ugly and > >> harder to understand. Since David and Eric strongly dislike > >> adding synchronize_rcu(), this is probably the only > >> solution that could make everyone happy. > > > > > > ... > > > >> +static void tcf_block_put_deferred(struct work_struct *work) > >> +{ > >> + struct tcf_block *block = container_of(work, struct tcf_block, work); > >> + struct tcf_chain *chain; > >> > >> + rtnl_lock(); > >> /* Hold a refcnt for all chains, except 0, in case they are gone. */ > >> list_for_each_entry(chain, >chain_list, list) > >> if (chain->index) > >> @@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block) > >> list_for_each_entry(chain, >chain_list, list) > >> tcf_chain_flush(chain); > >> > >> - /* Wait for RCU callbacks to release the reference count. */ > >> + INIT_WORK(>work, tcf_block_put_final); > >> + /* Wait for RCU callbacks to release the reference count and make > >> + * sure their works have been queued before this. > >> + */ > >> rcu_barrier(); > >> + tcf_queue_work(>work); > >> + rtnl_unlock(); > >> +} > > > > > > On a loaded server, rcu_barrier() typically takes 4 ms. > > > > Way better than synchronize_rcu() (about 90 ms) but still an issue when > > holding RTNL. > > > > We have thousands of filters, and management daemon restarts and rebuild > > TC hierarchy from scratch. > > > > Simply getting rid of 1000 old filters might block RTNL for a while, or > > maybe I misunderstood your patches. > > > > Paul pointed out the same. > > As I replied, this rcu_barrier() is NOT added by this patchset, it is already > there in current master branch. You added the rtnl_lock() rtnl_unlock()... I really do not care if hundreds of tasks (not owning rtnl) call rcu_barrier()... Also we are still using a 4.3 based kernel, and no rcu_barrier() is used in filters dismantle ( unregister_tcf_proto_ops() is not used in our workloads ) Somehow something went very wrong in net/sched in recent kernels.
Re: [Patch net 01/16] net_sched: introduce a workqueue for RCU callbacks of tc filter
On Thu, Oct 26, 2017 at 9:05 PM, Eric Dumazetwrote: > On Thu, 2017-10-26 at 18:24 -0700, Cong Wang wrote: >> ... > >> On the other hand, this makes tcf_block_put() ugly and >> harder to understand. Since David and Eric strongly dislike >> adding synchronize_rcu(), this is probably the only >> solution that could make everyone happy. > > > ... > >> +static void tcf_block_put_deferred(struct work_struct *work) >> +{ >> + struct tcf_block *block = container_of(work, struct tcf_block, work); >> + struct tcf_chain *chain; >> >> + rtnl_lock(); >> /* Hold a refcnt for all chains, except 0, in case they are gone. */ >> list_for_each_entry(chain, >chain_list, list) >> if (chain->index) >> @@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block) >> list_for_each_entry(chain, >chain_list, list) >> tcf_chain_flush(chain); >> >> - /* Wait for RCU callbacks to release the reference count. */ >> + INIT_WORK(>work, tcf_block_put_final); >> + /* Wait for RCU callbacks to release the reference count and make >> + * sure their works have been queued before this. >> + */ >> rcu_barrier(); >> + tcf_queue_work(>work); >> + rtnl_unlock(); >> +} > > > On a loaded server, rcu_barrier() typically takes 4 ms. > > Way better than synchronize_rcu() (about 90 ms) but still an issue when > holding RTNL. > > We have thousands of filters, and management daemon restarts and rebuild > TC hierarchy from scratch. > > Simply getting rid of 1000 old filters might block RTNL for a while, or > maybe I misunderstood your patches. > Paul pointed out the same. As I replied, this rcu_barrier() is NOT added by this patchset, it is already there in current master branch.
[PATCH net] tcp: refresh tp timestamp before tcp_mtu_probe()
From: Eric DumazetIn the unlikely event tcp_mtu_probe() is sending a packet, we want tp->tcp_mstamp being as accurate as possible. This means we need to call tcp_mstamp_refresh() a bit earlier in tcp_write_xmit(). Fixes: 385e20706fac ("tcp: use tp->tcp_mstamp in output path") Signed-off-by: Eric Dumazet --- net/ipv4/tcp_output.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1151870018e345592853b035a0902121c41e268d..ae60dd3faed0adc71731bc686f878afd4c628d32 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2239,6 +2239,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, sent_pkts = 0; + tcp_mstamp_refresh(tp); if (!push_one) { /* Do MTU probing. */ result = tcp_mtu_probe(sk); @@ -2250,7 +2251,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } max_segs = tcp_tso_segs(sk, mss_now); - tcp_mstamp_refresh(tp); while ((skb = tcp_send_head(sk))) { unsigned int limit;
[PATCH] ipv6: exthdrs: use swap macro in ipv6_dest_hao
make use of the swap macro and remove unnecessary variable tmp_addr. This makes the code easier to read and maintain. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva--- net/ipv6/exthdrs.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 7835dea..9f918a7 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -187,7 +187,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); - struct in6_addr tmp_addr; int ret; if (opt->dsthao) { @@ -229,9 +228,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; - tmp_addr = ipv6h->saddr; - ipv6h->saddr = hao->addr; - hao->addr = tmp_addr; + swap(ipv6h->saddr, hao->addr); if (skb->tstamp == 0) __net_timestamp(skb); -- 2.7.4
Re: [Patch net 01/16] net_sched: introduce a workqueue for RCU callbacks of tc filter
On Thu, 2017-10-26 at 18:24 -0700, Cong Wang wrote: > ... > On the other hand, this makes tcf_block_put() ugly and > harder to understand. Since David and Eric strongly dislike > adding synchronize_rcu(), this is probably the only > solution that could make everyone happy. ... > +static void tcf_block_put_deferred(struct work_struct *work) > +{ > + struct tcf_block *block = container_of(work, struct tcf_block, work); > + struct tcf_chain *chain; > > + rtnl_lock(); > /* Hold a refcnt for all chains, except 0, in case they are gone. */ > list_for_each_entry(chain, >chain_list, list) > if (chain->index) > @@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block) > list_for_each_entry(chain, >chain_list, list) > tcf_chain_flush(chain); > > - /* Wait for RCU callbacks to release the reference count. */ > + INIT_WORK(>work, tcf_block_put_final); > + /* Wait for RCU callbacks to release the reference count and make > + * sure their works have been queued before this. > + */ > rcu_barrier(); > + tcf_queue_work(>work); > + rtnl_unlock(); > +} On a loaded server, rcu_barrier() typically takes 4 ms. Way better than synchronize_rcu() (about 90 ms) but still an issue when holding RTNL. We have thousands of filters, and management daemon restarts and rebuild TC hierarchy from scratch. Simply getting rid of 1000 old filters might block RTNL for a while, or maybe I misunderstood your patches. Thanks.
breakage due to commit 6e617de84e ("net: avoid a full fib lookup when rp_filter is disabled")
Hi Paolo: Your commit: commit 6e617de84e87d626d1e976fc30e1322239fd4d2d Author: Paolo AbeniDate: Wed Sep 20 18:26:53 2017 +0200 net: avoid a full fib lookup when rp_filter is disabled. breaks a test case that uses a veth pair in the same network namespace but separate VRFs. This setup: vrf add vrf0 table 1001 vrf add vrf1 table 1002 ip link add virt01 type veth peer name virt10 ip link set virt01 master vrf0 ip link set virt10 master vrf1 ip addr add 172.16.20.20/24 dev virt01 ip link set virt01 up ip addr add 172.16.20.21/24 dev virt10 ip link set virt10 up ping -c 1 -I vrf0 172.16.20.21 fails due to: if (inet_lookup_ifaddr_rcu(net, src)) return -EINVAL; in fib_validate_source. David #!/bin/sh # vrf0 sends out packets with mpls labels # vrf1 receives the labelled packets, pops the labels, and forwards to vrf2 # vrf2 receives the unlabelled packets and replies to vrf0 vrf add vrf0 table 1001 vrf add vrf1 table 1002 vrf add vrf2 table 1003 ip link add virt01 type veth peer name virt10 ip link set virt01 master vrf0 ip link set virt10 master vrf1 ip link add virt12 type veth peer name virt21 ip link set virt12 master vrf1 ip link set virt21 master vrf2 ip addr add 172.16.20.20/24 dev virt01 ip link set virt01 up ip addr add 172.16.20.21/24 dev virt10 ip link set virt10 up ip addr add 172.16.21.21/24 dev virt12 ip link set virt12 up ip addr add 172.16.21.22/24 dev virt21 ip link set virt21 up modprobe mpls_iptunnel ip route add vrf vrf0 10.10.10.10/32 encap mpls 100 via inet 172.16.20.21 ip route add vrf vrf0 172.16.21.0/24 via 172.16.20.21 sysctl -w net.mpls.conf.virt10.input=1 sysctl -w net.mpls.platform_labels=1000 ip -f mpls route add 100 via inet 172.16.21.22 dev virt12 ip addr add 10.10.10.10/32 dev vrf2 ip route add vrf vrf2 172.16.20.0/24 via 172.16.21.21 ping -c 1 -I vrf0 10.10.10.10 netserver cat <
Re: [PATCH] drivers/net: 3com/3c515: Convert timers to use timer_setup()
From: Kees CookDate: Wed, 25 Oct 2017 03:51:03 -0700 > In preparation for unconditionally passing the struct timer_list pointer to > all timer callbacks, switch to using the new timer_setup() and from_timer() > to pass the timer pointer explicitly. > > Cc: "David S. Miller" > Cc: Thomas Gleixner > Cc: Stephen Hemminger > Cc: Johannes Berg > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
Re: [PATCH] drivers/net: can: Convert timers to use timer_setup()
From: Kees CookDate: Wed, 25 Oct 2017 03:51:14 -0700 > In preparation for unconditionally passing the struct timer_list pointer to > all timer callbacks, switch to using the new timer_setup() and from_timer() > to pass the timer pointer explicitly. > > Cc: Wolfgang Grandegger > Cc: Marc Kleine-Budde > Cc: "David S. Miller" > Cc: Allen Pais > Cc: linux-...@vger.kernel.org > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
Re: [PATCH] drivers/net: netronome: Convert timers to use timer_setup()
From: Kees CookDate: Wed, 25 Oct 2017 03:51:38 -0700 > In preparation for unconditionally passing the struct timer_list pointer to > all timer callbacks, switch to using the new timer_setup() and from_timer() > to pass the timer pointer explicitly. > > Cc: Jakub Kicinski > Cc: "David S. Miller" > Cc: Jiri Pirko > Cc: Jamal Hadi Salim > Cc: Simon Horman > Cc: oss-driv...@netronome.com > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
Re: [PATCH] drivers/net: hamradio/yam: Convert timers to use timer_setup()
From: Kees CookDate: Wed, 25 Oct 2017 03:51:20 -0700 > In preparation for unconditionally passing the struct timer_list pointer to > all timer callbacks, switch to using the new timer_setup() and from_timer() > to pass the timer pointer explicitly. Initialization was entirely missing. > > Cc: Jean-Paul Roubelat > Cc: linux-h...@vger.kernel.org > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
Re: [PATCH] drivers/net: nuvoton: Convert timers to use timer_setup()
From: Kees CookDate: Wed, 25 Oct 2017 03:51:58 -0700 > In preparation for unconditionally passing the struct timer_list pointer to > all timer callbacks, switch to using the new timer_setup() and from_timer() > to pass the timer pointer explicitly. > > Cc: Wan ZongShun > Cc: linux-arm-ker...@lists.infradead.org > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
Re: [PATCH] drivers/net: realtek: Convert timers to use timer_setup()
From: Kees CookDate: Wed, 25 Oct 2017 03:53:12 -0700 > In preparation for unconditionally passing the struct timer_list pointer to > all timer callbacks, switch to using the new timer_setup() and from_timer() > to pass the timer pointer explicitly. > > Cc: Realtek linux nic maintainers > Cc: "David S. Miller" > Cc: David Howells > Cc: Jay Vosburgh > Cc: Allen Pais > Cc: Eric Dumazet > Cc: Tobias Klauser > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
Re: [PATCH] drivers/net: wan/dscc4: Remove unused timer
From: Kees CookDate: Wed, 25 Oct 2017 03:53:42 -0700 > This removes an entirely unused timer, which avoids needing to convert it > to timer_setup(). > > Cc: Francois Romieu > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
Re: [PATCH] drivers/net: wan/lmc: Convert timers to use timer_setup()
From: Kees CookDate: Wed, 25 Oct 2017 03:53:53 -0700 > In preparation for unconditionally passing the struct timer_list pointer to > all timer callbacks, switch to using the new timer_setup() and from_timer() > to pass the timer pointer explicitly. > > Cc: Allen Pais > Cc: "David S. Miller" > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
Re: [PATCH] drivers/net: sxgbe: Convert timers to use timer_setup()
From: Kees CookDate: Wed, 25 Oct 2017 03:53:20 -0700 > In preparation for unconditionally passing the struct timer_list pointer to > all timer callbacks, switch to using the new timer_setup() and from_timer() > to pass the timer pointer explicitly. > > Cc: Byungho An > Cc: Girish K S > Cc: Vipul Pandya > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
Re: [PATCH] drivers/net: wan/sdla: Convert timers to use timer_setup()
From: Kees CookDate: Wed, 25 Oct 2017 03:53:59 -0700 > In preparation for unconditionally passing the struct timer_list pointer to > all timer callbacks, switch to using the new timer_setup() and from_timer() > to pass the timer pointer explicitly. > > Cc: Allen Pais > Cc: "David S. Miller" > Cc: Tobias Klauser > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
Re: [PATCH] drivers/net: arcnet: Convert timers to use timer_setup()
From: Kees CookDate: Wed, 25 Oct 2017 03:54:06 -0700 > In preparation for unconditionally passing the struct timer_list pointer to > all timer callbacks, switch to using the new timer_setup() and from_timer() > to pass the timer pointer explicitly. > > Cc: Michael Grzeschik > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
Re: [PATCH] drivers/net: hippi: Convert timers to use timer_setup()
From: Kees CookDate: Wed, 25 Oct 2017 03:51:29 -0700 > In preparation for unconditionally passing the struct timer_list pointer to > all timer callbacks, switch to using the new timer_setup() and from_timer() > to pass the timer pointer explicitly. > > Cc: Jes Sorensen > Cc: linux-hi...@sunsite.dk > Cc: netdev@vger.kernel.org > Signed-off-by: Kees Cook Applied.
[PATCH V2 net] tuntap: properly align skb->head before building skb
An unaligned alloc_frag->offset caused by previous allocation will result an unaligned skb->head. This will lead unaligned skb_shared_info and then unaligned dataref which requires to be aligned for accessing on some architecture. Fix this by aligning alloc_frag->offset before the frag refilling. Fixes: 0bbd7dad34f8 ("tun: make tun_build_skb() thread safe") Cc: Eric DumazetCc: Willem de Bruijn Cc: Wei Wei Cc: Dmitry Vyukov Cc: Mark Rutland Reported-by: Wei Wei Signed-off-by: Jason Wang --- - The patch is needed for -stable. - Wei, can you try this patch to see if it solves your issue? --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b9973fb..5550f56 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1286,6 +1286,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, buflen += SKB_DATA_ALIGN(len + pad); rcu_read_unlock(); + alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL))) return ERR_PTR(-ENOMEM); -- 2.7.4
Re: [PATCH net] tuntap: properly align skb->head before building skb
On 2017年10月26日 22:11, Eric Dumazet wrote: On Thu, Oct 26, 2017 at 5:15 AM, Jason Wangwrote: An unaligned alloc_frag->offset caused by previous allocation will result an unaligned skb->head. This will lead unaligned skb_shared_info and then unaligned dataref which requires to be aligned for accessing on some architecture. Fix this by aligning alloc_frag->offset before the frag refilling. Fixes: 0bbd7dad34f8 ("tun: make tun_build_skb() thread safe") Cc: Eric Dumazet Cc: Willem de Bruijn Cc: Wei Wei Cc: Dmitry Vyukov Cc: Mark Rutland Reported-by: Wei Wei Signed-off-by: Jason Wang --- - The patch is needed for -stable. - Wei, can you try this patch to see if it solves your issue? --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b9973fb..60e44f2 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1286,6 +1286,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, buflen += SKB_DATA_ALIGN(len + pad); rcu_read_unlock(); + alloc_frag->offset = ALIGN((u64)alloc_frag->offset, TUN_RX_PAD); You have to align to one cache line (SMP_CACHE_BYTES), or SKB_DATA_ALIGN(1) Oh right. Then eventually use skb_reserve() for NET_IP_ALIGN, but I guess it is already done. Yes. Thanks
Re: [PATCH] thunderbolt: Drop sequence number check from tb_xdomain_match()
From: Mika WesterbergDate: Wed, 25 Oct 2017 12:27:34 +0300 > Commit 9a03c3d398c1 ("thunderbolt: Fix a couple right shifting to zero > bugs") revealed an issue that was previously hidden because we never > actually compared received XDomain message sequence numbers properly. > The idea with these sequence numbers is that the responding host uses > the same sequence number that was in the request packet which we can > then check at the requesting host. > > However, testing against macOS it looks like it does not follow this but > instead uses some other logic. Windows driver on the other hand handles > it the same way than Linux. > > In order to be able to talk to macOS again, fix this so that we drop the > whole sequence number check. This effectively works exactly the same > than it worked before the aforementioned commit. This also follows the > logic the original P2P networking code used. > > Signed-off-by: Mika Westerberg > --- > This applies on top of net-next.git/master. Applied, thank you.
Re: [PATCH v9 00/10] net: stmmac: dwmac-sun8i: Handle integrated PHY
From: Corentin LabbeDate: Tue, 24 Oct 2017 19:57:04 +0200 > The first 7 patch should go via the sunxi tree, the last three via > the net tree. I've applied the last 3 patches to net-next.
Re: [PATCH net-next] net: updating dst lastusage is an unlikely event.
From: Paolo AbeniDate: Tue, 24 Oct 2017 12:41:01 +0200 > Since commit 0da4af00b2ed ("ipv6: only update __use and lastusetime > once per jiffy at most"), updating the dst lastuse field is an > unlikely action: it happens at most once per jiffy, out of > potentially millions of calls per second. > > Mark explicitly the code as such, and let the compiler generate > better code. > > Note: gcc 7.2 and several older versions do actually generate > different - better - code when the unlikely() hint is in place, > avoid jump in the fast path and keeping better code locality. > > Signed-off-by: Paolo Abeni Applied, thanks.
[PATCH net-next] stmmac: copy unicast mac address to MAC registers
Currently stmmac driver not copying the valid ethernet MAC address to MAC registers. This patch takes care of updating the MAC register with MAC address. Signed-off-by: Bhadram Varka--- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0e1b0a3..e0e6348 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3748,6 +3748,20 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return ret; } +static int stmmac_set_mac_address(struct net_device *ndev, void *addr) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + int ret = 0; + + ret = eth_mac_addr(ndev, addr); + if (ret) + return ret; + + priv->hw->mac->set_umac_addr(priv->hw, ndev->dev_addr, 0); + + return ret; +} + #ifdef CONFIG_DEBUG_FS static struct dentry *stmmac_fs_dir; @@ -3975,7 +3989,7 @@ static const struct net_device_ops stmmac_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = stmmac_poll_controller, #endif - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = stmmac_set_mac_address, }; /** -- 2.7.4
Re: WARNING in refcount_sub_and_test
Maybe I have just made some mistakes on understanding the reproduction methods, will try it again. Thanks, - ChunYu On Thu, Oct 26, 2017 at 10:49 PM, Dmitry Vyukovwrote: > On Thu, Oct 26, 2017 at 10:53 AM, ChunYu Wang wrote: >> Hi all, >> >> I am failed to reproduce it on target kernel with the reproducer file >> or replaying the target syzkaller description log file, do I made >> something wrong or there exists more subjects then the line in >> repro.txt: >> >> #{Threaded:true Collide:true Repeat:false Procs:1 Sandbox:namespace >> Fault:false FaultCall:-1 FaultNth:0 EnableTun:false UseTmpDir:true >> HandleSegv:false WaitRepeat:false Debug:false Repro:false} > > > Hi ChunYu, > > I've just re-tested the C repro and was able to trigger the bug in a second. > I've checked out 49ca1943a7adb429b11b8e05d81bc821694b76c7, copied the > provided config, run make olddefconfig, built with gcc-7 (you can get > the exact one here > https://storage.googleapis.com/syzkaller/gcc-7.tar.gz). Then run in > qemu (most of the flags are probably irrelevant): > > qemu-system-x86_64 -hda wheezy.img -net > user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel > arch/x86/boot/bzImage -append "kvm-intel.nested=1 > kvm-intel.unrestricted_guest=1 kvm-intel.ept=1 > kvm-intel.flexpriority=1 kvm-intel.vpid=1 > kvm-intel.emulate_invalid_guest_state=1 kvm-intel.eptad=1 > kvm-intel.enable_shadow_vmcs=1 kvm-intel.pml=1 > kvm-intel.enable_apicv=1 console=ttyS0 root=/dev/sda > earlyprintk=serial slub_debug=UZ vsyscall=native rodata=n oops=panic > panic_on_warn=1 panic=86400" -enable-kvm -pidfile vm_pid -m 2G -smp 4 > -cpu host -usb -usbdevice mouse -usbdevice tablet -soundhw all > > And running the provided C program instantly spewed the following. > > Is there anything you did differently? I would like to understand > common reasons why syzbot reproducers don't work and outline them > here: > https://github.com/google/syzkaller/blob/master/docs/syzbot.md > > Thanks > > > [ 588.444300] refcount_t: underflow; use-after-free. > [ 588.445812] [ cut here ] > [ 588.447026] WARNING: CPU: 1 PID: 3086 at lib/refcount.c:186 > refcount_sub_and_test+0x167/0x1b0 > [ 588.449082] Kernel panic - not syncing: panic_on_warn set ... > [ 588.449082] > [ 588.450737] CPU: 1 PID: 3086 Comm: a.out Not tainted 4.14.0-rc5+ #9 > [ 588.452160] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), > BIOS Bochs 01/01/2011 > [ 588.454059] Call Trace: > [ 588.454658] dump_stack+0x194/0x257 > [ 588.455538] ? arch_local_irq_restore+0x53/0x53 > [ 588.456630] panic+0x1e4/0x417 > [ 588.457367] ? __warn+0x1d9/0x1d9 > [ 588.458171] ? show_regs_print_info+0x65/0x65 > [ 588.459234] ? refcount_sub_and_test+0x167/0x1b0 > [ 588.460262] __warn+0x1c4/0x1d9 > [ 588.460958] ? refcount_sub_and_test+0x167/0x1b0 > [ 588.461965] report_bug+0x211/0x2d0 > [ 588.462756] fixup_bug+0x40/0x90 > [ 588.463597] do_trap+0x260/0x390 > [ 588.464304] do_error_trap+0x120/0x390 > [ 588.465105] ? vprintk_emit+0x49b/0x590 > [ 588.465929] ? do_trap+0x390/0x390 > [ 588.41] ? refcount_sub_and_test+0x167/0x1b0 > [ 588.467646] ? vprintk_emit+0x3ea/0x590 > [ 588.468475] ? trace_hardirqs_off_thunk+0x1a/0x1c > [ 588.469482] do_invalid_op+0x1b/0x20 > [ 588.470262] invalid_op+0x18/0x20 > [ 588.470988] RIP: 0010:refcount_sub_and_test+0x167/0x1b0 > [ 588.472080] RSP: 0018:88006550e9c8 EFLAGS: 00010282 > [ 588.473224] RAX: 0026 RBX: 0001 RCX: > > [ 588.474643] RDX: 0026 RSI: 11000caa1cf9 RDI: > ed000caa1d2d > [ 588.476091] RBP: 88006550ea58 R08: R09: > 11000caa1ccb > [ 588.477520] R10: 88006550e7f8 R11: 85b2cb78 R12: > 11000caa1d3a > [ 588.478967] R13: ff01 R14: 0100 R15: > 88006a7f4a7c > [ 588.480413] ? refcount_sub_and_test+0x167/0x1b0 > [ 588.481337] ? refcount_inc+0x50/0x50 > [ 588.482081] ? __sctp_outq_teardown+0xa5b/0x1230 > [ 588.483004] ? sctp_association_free+0x2d0/0x930 > [ 588.484291] ? sctp_do_sm+0x271b/0x6a30 > [ 588.485247] ? sctp_primitive_SHUTDOWN+0xa0/0xd0 > [ 588.486295] ? sctp_close+0x3c6/0x980 > [ 588.487058] ? inet_release+0xed/0x1c0 > [ 588.488370] ? sock_release+0x8d/0x1e0 > [ 588.489080] ? sock_close+0x16/0x20 > [ 588.489759] sctp_wfree+0x183/0x620 > [ 588.490430] ? entry_SYSCALL_64_fastpath+0xbc/0xbe > [ 588.491323] ? __sctp_write_space+0x910/0x910 > [ 588.492177] skb_release_head_state+0x124/0x200 > [ 588.493078] skb_release_all+0x15/0x60 > [ 588.493938] consume_skb+0x153/0x490 > [ 588.494605] ? sctp_chunk_put+0x99/0x420 > [ 588.495388] ? alloc_skb_with_frags+0x750/0x750 > [ 588.496119] ? sctp_chunk_hold+0x20/0x20 > [ 588.496757] ? sctp_sched_dequeue_common+0x2aa/0x5d0 > [ 588.497554] ? refcount_sub_and_test+0x115/0x1b0 > [ 588.498296] ? refcount_inc+0x50/0x50 > [
Re: [PATCH net-next] tcp: add tracepoint trace_tcp_retransmit_synack()
On Thu, Oct 26, 2017 at 4:50 PM, Song Liuwrote: > In this case, we are putting CONFIG_IPV6 in TRACE_EVENT macro, which generates > warnings like: > > ./include/trace/events/tcp.h:274:1: error: directive in argument list > ./include/trace/events/tcp.h:281:1: error: directive in argument list > > Seems these warning cannot be easily avoided. This is also the same pattern we > have been using in include/trace/events/tcp.h. Hmm, we use the same so why it only complains about this one? > > Any suggestions on how shall we proceed from here? > I think this warning is harmless, so perhaps not worthy time to shut it up, unless sparse provides a simple way to do so.
Re: [PATCH net-next v5 2/2] bridge: vlan: signal if anything changed on vlan add
On 2017/10/26 22:41, Nikolay Aleksandrov wrote: > Before this patch there was no way to tell if the vlan add operation > actually changed anything, thus we would always generate a notification > on adds. Let's make the notifications more precise and generate them > only if anything changed, so use the new bool parameter to signal that the > vlan was updated. We cannot return an error because there are valid use > cases that will be broken (e.g. overlapping range add) and also we can't > risk masking errors due to calls into drivers for vlan add which can > potentially return anything. > > Signed-off-by: Nikolay Aleksandrov> --- > v5: fix br_vlan_add return (v1 leftover) spotted by Toshiaki Makita > v4: set changed always to false in the non-vlan config case > v3: fix non-vlan config functions reported by kbuild bot > v2: pass changed down to vlan add functions instead of using a specific > error that needs to be masked > > net/bridge/br_netlink.c | 9 -- > net/bridge/br_private.h | 14 ++--- > net/bridge/br_vlan.c| 76 > +++-- > 3 files changed, 71 insertions(+), 28 deletions(-) > > diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c > index d0290ede9342..e732403669c6 100644 > --- a/net/bridge/br_netlink.c > +++ b/net/bridge/br_netlink.c > @@ -508,6 +508,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, > static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, > int cmd, struct bridge_vlan_info *vinfo, bool *changed) > { > + bool curr_change; > int err = 0; Just a question. Why are you defining another variable here? Is it impossible to pass "changed" down to [br|nbp]_vlan_add() like other functions you modified in patch 1/2? -- Toshiaki Makita
[Patch net 03/16] net_sched: use tcf_queue_work() in bpf filter
Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_bpf.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 520c5027646a..037a3ae86829 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -49,7 +49,10 @@ struct cls_bpf_prog { struct sock_filter *bpf_ops; const char *bpf_name; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { @@ -257,9 +260,21 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) kfree(prog); } +static void cls_bpf_delete_prog_work(struct work_struct *work) +{ + struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work); + + rtnl_lock(); + __cls_bpf_delete_prog(prog); + rtnl_unlock(); +} + static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) { - __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu)); + struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu); + + INIT_WORK(>work, cls_bpf_delete_prog_work); + tcf_queue_work(>work); } static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) -- 2.13.0
[Patch net 07/16] net_sched: use tcf_queue_work() in fw filter
Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_fw.c | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 941245ad07fd..99183b8621ec 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -46,7 +46,10 @@ struct fw_filter { #endif /* CONFIG_NET_CLS_IND */ struct tcf_exts exts; struct tcf_proto*tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static u32 fw_hash(u32 handle) @@ -119,12 +122,22 @@ static int fw_init(struct tcf_proto *tp) return 0; } -static void fw_delete_filter(struct rcu_head *head) +static void fw_delete_filter_work(struct work_struct *work) { - struct fw_filter *f = container_of(head, struct fw_filter, rcu); + struct fw_filter *f = container_of(work, struct fw_filter, work); + rtnl_lock(); tcf_exts_destroy(>exts); kfree(f); + rtnl_unlock(); +} + +static void fw_delete_filter(struct rcu_head *head) +{ + struct fw_filter *f = container_of(head, struct fw_filter, rcu); + + INIT_WORK(>work, fw_delete_filter_work); + tcf_queue_work(>work); } static void fw_destroy(struct tcf_proto *tp) -- 2.13.0
[Patch net 14/16] net_sched: fix call_rcu() race on act_sample module removal
Similar to commit c78e1746d3ad ("net: sched: fix call_rcu() race on classifier module unloads"), we need to wait for flying RCU callback tcf_sample_cleanup_rcu(). Cc: Yotam GigiCc: Daniel Borkmann Cc: Jiri Pirko Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/act_sample.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index ec986ae52808..a9f9a2ccc664 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -264,6 +264,7 @@ static int __init sample_init_module(void) static void __exit sample_cleanup_module(void) { + rcu_barrier(); tcf_unregister_action(_sample_ops, _net_ops); } -- 2.13.0
[Patch net 13/16] net_sched: add rtnl assertion to tcf_exts_destroy()
After previous patches, it is now safe to claim that tcf_exts_destroy() is always called with RTNL lock. Cc: Daniel BorkmannCc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 045d13679ad6..231181c602ed 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -909,6 +909,7 @@ void tcf_exts_destroy(struct tcf_exts *exts) #ifdef CONFIG_NET_CLS_ACT LIST_HEAD(actions); + ASSERT_RTNL(); tcf_exts_to_list(exts, ); tcf_action_destroy(, TCA_ACT_UNBIND); kfree(exts->actions); -- 2.13.0
[Patch net 02/16] net_sched: use tcf_queue_work() in basic filter
Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_basic.c | 20 +--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index d89ebafd2239..f177649a2419 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -34,7 +34,10 @@ struct basic_filter { struct tcf_result res; struct tcf_proto*tp; struct list_headlink; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -82,15 +85,26 @@ static int basic_init(struct tcf_proto *tp) return 0; } -static void basic_delete_filter(struct rcu_head *head) +static void basic_delete_filter_work(struct work_struct *work) { - struct basic_filter *f = container_of(head, struct basic_filter, rcu); + struct basic_filter *f = container_of(work, struct basic_filter, work); + rtnl_lock(); tcf_exts_destroy(>exts); tcf_em_tree_destroy(>ematches); + rtnl_unlock(); + kfree(f); } +static void basic_delete_filter(struct rcu_head *head) +{ + struct basic_filter *f = container_of(head, struct basic_filter, rcu); + + INIT_WORK(>work, basic_delete_filter_work); + tcf_queue_work(>work); +} + static void basic_destroy(struct tcf_proto *tp) { struct basic_head *head = rtnl_dereference(tp->root); -- 2.13.0
[Patch net 12/16] net_sched: use tcf_queue_work() in tcindex filter
Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_tcindex.c | 38 +- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 14a7e08b2fa9..beaa95e09c25 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -27,14 +27,20 @@ struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct tcindex_filter { u16 key; struct tcindex_filter_result result; struct tcindex_filter __rcu *next; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; @@ -133,12 +139,34 @@ static int tcindex_init(struct tcf_proto *tp) return 0; } +static void tcindex_destroy_rexts_work(struct work_struct *work) +{ + struct tcindex_filter_result *r; + + r = container_of(work, struct tcindex_filter_result, work); + rtnl_lock(); + tcf_exts_destroy(>exts); + rtnl_unlock(); +} + static void tcindex_destroy_rexts(struct rcu_head *head) { struct tcindex_filter_result *r; r = container_of(head, struct tcindex_filter_result, rcu); - tcf_exts_destroy(>exts); + INIT_WORK(>work, tcindex_destroy_rexts_work); + tcf_queue_work(>work); +} + +static void tcindex_destroy_fexts_work(struct work_struct *work) +{ + struct tcindex_filter *f = container_of(work, struct tcindex_filter, + work); + + rtnl_lock(); + tcf_exts_destroy(>result.exts); + kfree(f); + rtnl_unlock(); } static void tcindex_destroy_fexts(struct rcu_head *head) @@ -146,8 +174,8 @@ static void tcindex_destroy_fexts(struct rcu_head *head) struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); - tcf_exts_destroy(>result.exts); - kfree(f); + INIT_WORK(>work, tcindex_destroy_fexts_work); + tcf_queue_work(>work); } static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last) -- 2.13.0
[Patch net 04/16] net_sched: use tcf_queue_work() in cgroup filter
Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_cgroup.c | 22 ++ 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index d48452f87975..a97e069bee89 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -23,7 +23,10 @@ struct cls_cgroup_head { struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto*tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -57,15 +60,26 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; +static void cls_cgroup_destroy_work(struct work_struct *work) +{ + struct cls_cgroup_head *head = container_of(work, + struct cls_cgroup_head, + work); + rtnl_lock(); + tcf_exts_destroy(>exts); + tcf_em_tree_destroy(>ematches); + kfree(head); + rtnl_unlock(); +} + static void cls_cgroup_destroy_rcu(struct rcu_head *root) { struct cls_cgroup_head *head = container_of(root, struct cls_cgroup_head, rcu); - tcf_exts_destroy(>exts); - tcf_em_tree_destroy(>ematches); - kfree(head); + INIT_WORK(>work, cls_cgroup_destroy_work); + tcf_queue_work(>work); } static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, -- 2.13.0
[Patch net 10/16] net_sched: use tcf_queue_work() in route filter
Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_route.c | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 9ddde65915d2..4b14ccd8b8f2 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -57,7 +57,10 @@ struct route4_filter { u32 handle; struct route4_bucket*bkt; struct tcf_proto*tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; #define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) @@ -254,12 +257,22 @@ static int route4_init(struct tcf_proto *tp) return 0; } -static void route4_delete_filter(struct rcu_head *head) +static void route4_delete_filter_work(struct work_struct *work) { - struct route4_filter *f = container_of(head, struct route4_filter, rcu); + struct route4_filter *f = container_of(work, struct route4_filter, work); + rtnl_lock(); tcf_exts_destroy(>exts); kfree(f); + rtnl_unlock(); +} + +static void route4_delete_filter(struct rcu_head *head) +{ + struct route4_filter *f = container_of(head, struct route4_filter, rcu); + + INIT_WORK(>work, route4_delete_filter_work); + tcf_queue_work(>work); } static void route4_destroy(struct tcf_proto *tp) -- 2.13.0
[Patch net 11/16] net_sched: use tcf_queue_work() in rsvp filter
Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_rsvp.h | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index b1f6ed48bc72..bdbc541787f8 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -97,7 +97,10 @@ struct rsvp_filter { u32 handle; struct rsvp_session *sess; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) @@ -282,12 +285,22 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } -static void rsvp_delete_filter_rcu(struct rcu_head *head) +static void rsvp_delete_filter_work(struct work_struct *work) { - struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); + struct rsvp_filter *f = container_of(work, struct rsvp_filter, work); + rtnl_lock(); tcf_exts_destroy(>exts); kfree(f); + rtnl_unlock(); +} + +static void rsvp_delete_filter_rcu(struct rcu_head *head) +{ + struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); + + INIT_WORK(>work, rsvp_delete_filter_work); + tcf_queue_work(>work); } static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) -- 2.13.0
[Patch net 15/16] selftests: Introduce a new script to generate tc batch file
From: Chris Mi# ./tdc_batch.py -h usage: tdc_batch.py [-h] [-n NUMBER] [-o] [-s] [-p] device file TC batch file generator positional arguments: devicedevice name file batch file name optional arguments: -h, --helpshow this help message and exit -n NUMBER, --number NUMBER how many lines in batch file -o, --skip_sw skip_sw (offload), by default skip_hw -s, --share_actionall filters share the same action -p, --prioall filters have different prio Acked-by: Jamal Hadi Salim Acked-by: Lucas Bates Signed-off-by: Chris Mi Signed-off-by: Cong Wang --- tools/testing/selftests/tc-testing/tdc_batch.py | 62 + 1 file changed, 62 insertions(+) create mode 100755 tools/testing/selftests/tc-testing/tdc_batch.py diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py new file mode 100755 index ..707c6bfef689 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tdc_batch.py @@ -0,0 +1,62 @@ +#!/usr/bin/python3 + +""" +tdc_batch.py - a script to generate TC batch file + +Copyright (C) 2017 Chris Mi +""" + +import argparse + +parser = argparse.ArgumentParser(description='TC batch file generator') +parser.add_argument("device", help="device name") +parser.add_argument("file", help="batch file name") +parser.add_argument("-n", "--number", type=int, +help="how many lines in batch file") +parser.add_argument("-o", "--skip_sw", +help="skip_sw (offload), by default skip_hw", +action="store_true") +parser.add_argument("-s", "--share_action", +help="all filters share the same action", +action="store_true") +parser.add_argument("-p", "--prio", +help="all filters have different prio", +action="store_true") +args = parser.parse_args() + +device = args.device +file = open(args.file, 'w') + +number = 1 +if args.number: +number = args.number + +skip = "skip_hw" +if args.skip_sw: +skip = "skip_sw" + +share_action = "" +if args.share_action: +share_action = "index 1" + +prio = "prio 1" +if args.prio: +prio = "" +if number > 0x4000: +number = 0x4000 + +index = 0 +for i in range(0x100): +for j in range(0x100): +for k in range(0x100): +mac = ("%02x:%02x:%02x" % (i, j, k)) +src_mac = "e4:11:00:" + mac +dst_mac = "e4:12:00:" + mac +cmd = ("filter add dev %s %s protocol ip parent : flower %s " + "src_mac %s dst_mac %s action drop %s" % + (device, prio, skip, src_mac, dst_mac, share_action)) +file.write("%s\n" % cmd) +index += 1 +if index >= number: +file.close() +exit(0) -- 2.13.0
[Patch net 05/16] net_sched: use tcf_queue_work() in flow filter
Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_flow.c | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 2a3a60ec5b86..67f3a2af6aab 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -57,7 +57,10 @@ struct flow_filter { u32 divisor; u32 baseclass; u32 hashrnd; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static inline u32 addr_fold(void *addr) @@ -369,14 +372,24 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static void flow_destroy_filter(struct rcu_head *head) +static void flow_destroy_filter_work(struct work_struct *work) { - struct flow_filter *f = container_of(head, struct flow_filter, rcu); + struct flow_filter *f = container_of(work, struct flow_filter, work); + rtnl_lock(); del_timer_sync(>perturb_timer); tcf_exts_destroy(>exts); tcf_em_tree_destroy(>ematches); kfree(f); + rtnl_unlock(); +} + +static void flow_destroy_filter(struct rcu_head *head) +{ + struct flow_filter *f = container_of(head, struct flow_filter, rcu); + + INIT_WORK(>work, flow_destroy_filter_work); + tcf_queue_work(>work); } static int flow_change(struct net *net, struct sk_buff *in_skb, -- 2.13.0
[Patch net 09/16] net_sched: use tcf_queue_work() in u32 filter
Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_u32.c | 29 ++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 10b8d851fc6b..dadd1b344497 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -68,7 +68,10 @@ struct tc_u_knode { u32 __percpu*pcpu_success; #endif struct tcf_proto*tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; /* The 'sel' field MUST be the last field in structure to allow for * tc_u32_keys allocated at end of structure. */ @@ -418,11 +421,21 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, * this the u32_delete_key_rcu variant does not free the percpu * statistics. */ +static void u32_delete_key_work(struct work_struct *work) +{ + struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); + + rtnl_lock(); + u32_destroy_key(key->tp, key, false); + rtnl_unlock(); +} + static void u32_delete_key_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - u32_destroy_key(key->tp, key, false); + INIT_WORK(>work, u32_delete_key_work); + tcf_queue_work(>work); } /* u32_delete_key_freepf_rcu is the rcu callback variant @@ -432,11 +445,21 @@ static void u32_delete_key_rcu(struct rcu_head *rcu) * for the variant that should be used with keys return from * u32_init_knode() */ +static void u32_delete_key_freepf_work(struct work_struct *work) +{ + struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); + + rtnl_lock(); + u32_destroy_key(key->tp, key, true); + rtnl_unlock(); +} + static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - u32_destroy_key(key->tp, key, true); + INIT_WORK(>work, u32_delete_key_freepf_work); + tcf_queue_work(>work); } static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) -- 2.13.0
[Patch net 01/16] net_sched: introduce a workqueue for RCU callbacks of tc filter
This patch introduces a dedicated workqueue for tc filters so that each tc filter's RCU callback could defer their action destroy work to this workqueue. The helper tcf_queue_work() is introduced for them to use. Because we hold RTNL lock when calling tcf_block_put(), we can not simply flush works inside it, therefore we have to defer it again to this workqueue and make sure all flying RCU callbacks have already queued their work before this one, in other words, to ensure this is the last one to execute to prevent any use-after-free. On the other hand, this makes tcf_block_put() ugly and harder to understand. Since David and Eric strongly dislike adding synchronize_rcu(), this is probably the only solution that could make everyone happy. Please also see the code comments below. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- include/net/pkt_cls.h | 3 +++ include/net/sch_generic.h | 2 ++ net/sched/cls_api.c | 68 +++ 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e80edd8879ef..3009547f3c66 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -2,6 +2,7 @@ #define __NET_PKT_CLS_H #include +#include #include #include @@ -17,6 +18,8 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +bool tcf_queue_work(struct work_struct *work); + #ifdef CONFIG_NET_CLS struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 135f5a2dd931..0dec8a23be57 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -271,6 +272,7 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; + struct work_struct work; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0b2219adf520..045d13679ad6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -77,6 +77,8 @@ int register_tcf_proto_ops(struct tcf_proto_ops *ops) } EXPORT_SYMBOL(register_tcf_proto_ops); +static struct workqueue_struct *tc_filter_wq; + int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) { struct tcf_proto_ops *t; @@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) * tcf_proto_ops's destroy() handler. */ rcu_barrier(); + flush_workqueue(tc_filter_wq); write_lock(_mod_lock); list_for_each_entry(t, _proto_base, head) { @@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) } EXPORT_SYMBOL(unregister_tcf_proto_ops); +bool tcf_queue_work(struct work_struct *work) +{ + return queue_work(tc_filter_wq, work); +} +EXPORT_SYMBOL(tcf_queue_work); + /* Select new prio value from the range, managed by kernel. */ static inline u32 tcf_auto_prio(struct tcf_proto *tp) @@ -266,23 +275,30 @@ int tcf_block_get(struct tcf_block **p_block, } EXPORT_SYMBOL(tcf_block_get); -void tcf_block_put(struct tcf_block *block) +static void tcf_block_put_final(struct work_struct *work) { + struct tcf_block *block = container_of(work, struct tcf_block, work); struct tcf_chain *chain, *tmp; - if (!block) - return; - - /* XXX: Standalone actions are not allowed to jump to any chain, and -* bound actions should be all removed after flushing. However, -* filters are destroyed in RCU callbacks, we have to hold the chains -* first, otherwise we would always race with RCU callbacks on this list -* without proper locking. -*/ + /* At this point, all the chains should have refcnt == 1. */ + rtnl_lock(); + list_for_each_entry_safe(chain, tmp, >chain_list, list) + tcf_chain_put(chain); + rtnl_unlock(); + kfree(block); +} - /* Wait for existing RCU callbacks to cool down. */ - rcu_barrier(); +/* XXX: Standalone actions are not allowed to jump to any chain, and bound + * actions should be all removed after flushing. However, filters are destroyed + * in RCU callbacks, we have to hold the chains first, otherwise we would + * always race with RCU callbacks on this list without proper locking. + */ +static void tcf_block_put_deferred(struct work_struct *work) +{ + struct tcf_block *block = container_of(work, struct tcf_block, work); + struct tcf_chain *chain; +
[Patch net 06/16] net_sched: use tcf_queue_work() in flower filter
Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_flower.c | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b480d7c792ba..5b5722c8b32c 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -87,7 +87,10 @@ struct cls_fl_filter { struct list_head list; u32 handle; u32 flags; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; struct net_device *hw_dev; }; @@ -215,12 +218,22 @@ static int fl_init(struct tcf_proto *tp) return 0; } -static void fl_destroy_filter(struct rcu_head *head) +static void fl_destroy_filter_work(struct work_struct *work) { - struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); + struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work); + rtnl_lock(); tcf_exts_destroy(>exts); kfree(f); + rtnl_unlock(); +} + +static void fl_destroy_filter(struct rcu_head *head) +{ + struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); + + INIT_WORK(>work, fl_destroy_filter_work); + tcf_queue_work(>work); } static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) -- 2.13.0
[Patch net 16/16] selftests: Introduce a new test case to tc testsuite
From: Chris MiIn this patchset, we fixed a tc bug. This patch adds the test case that reproduces the bug. To run this test case, user should specify an existing NIC device: # sudo ./tdc.py -d enp4s0f0 This test case belongs to category "flower". If user doesn't specify a NIC device, the test cases belong to "flower" will not be run. In this test case, we create 1M filters and all filters share the same action. When destroying all filters, kernel should not panic. It takes about 18s to run it. Acked-by: Jamal Hadi Salim Acked-by: Lucas Bates Signed-off-by: Chris Mi Signed-off-by: Cong Wang --- .../tc-testing/tc-tests/filters/tests.json | 23 +- tools/testing/selftests/tc-testing/tdc.py | 20 +++ tools/testing/selftests/tc-testing/tdc_config.py | 2 ++ 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index c727b96a59b0..5fa02d86b35f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -17,5 +17,26 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] +}, +{ +"id": "d052", +"name": "Add 1M filters with the same action", +"category": [ +"filter", +"flower" +], +"setup": [ +"$TC qdisc add dev $DEV2 ingress", +"./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 100" +], +"cmdUnderTest": "$TC -b $BATCH_FILE", +"expExitCode": "0", +"verifyCmd": "$TC actions list action gact", +"matchPattern": "action order 0: gact action drop.*index 1 ref 100 bind 100", +"matchCount": "1", +"teardown": [ +"$TC qdisc del dev $DEV2 ingress", +"/bin/rm $BATCH_FILE" +] } -] \ No newline at end of file +] diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index cd61b7844c0d..5f11f5d7456e 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -88,7 +88,7 @@ USE_NS = True exit(1) -def test_runner(filtered_tests): +def test_runner(filtered_tests, args): """ Driver function for the unit tests. @@ -105,6 +105,8 @@ USE_NS = True for tidx in testlist: result = True tresult = "" +if "flower" in tidx["category"] and args.device == None: +continue print("Test " + tidx["id"] + ": " + tidx["name"]) prepare_env(tidx["setup"]) (p, procout) = exec_cmd(tidx["cmdUnderTest"]) @@ -152,6 +154,10 @@ USE_NS = True exec_cmd(cmd, False) cmd = 'ip -s $NS link set $DEV1 up' exec_cmd(cmd, False) +cmd = 'ip link set $DEV2 netns $NS' +exec_cmd(cmd, False) +cmd = 'ip -s $NS link set $DEV2 up' +exec_cmd(cmd, False) def ns_destroy(): @@ -211,7 +217,8 @@ USE_NS = True help='Execute the single test case with specified ID') parser.add_argument('-i', '--id', action='store_true', dest='gen_id', help='Generate ID numbers for new test cases') -return parser +parser.add_argument('-d', '--device', +help='Execute the test case in flower category') return parser @@ -225,6 +232,8 @@ USE_NS = True if args.path != None: NAMES['TC'] = args.path +if args.device != None: + NAMES['DEV2'] = args.device if not os.path.isfile(NAMES['TC']): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) @@ -381,14 +390,17 @@ USE_NS = True if (len(alltests) == 0): print("Cannot find a test case with ID matching " + target_id) exit(1) -catresults = test_runner(alltests) +catresults = test_runner(alltests, args) print("All test results: " + "\n\n" + catresults) elif (len(target_category) > 0): +if (target_category == "flower") and args.device == None: +print("Please specify a NIC device (-d) to run category flower") +exit(1) if (target_category not in ucat): print("Specified category is not present in this file.") exit(1) else: -catresults = test_runner(testcases[target_category]) +catresults = test_runner(testcases[target_category], args) print("Category " + target_category + "\n\n" + catresults) ns_destroy() diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index
[Patch net 08/16] net_sched: use tcf_queue_work() in matchall filter
Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris MiCc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang --- net/sched/cls_matchall.c | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index eeac606c95ab..c33f711b9019 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -21,7 +21,10 @@ struct cls_mall_head { struct tcf_result res; u32 handle; u32 flags; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -41,13 +44,23 @@ static int mall_init(struct tcf_proto *tp) return 0; } +static void mall_destroy_work(struct work_struct *work) +{ + struct cls_mall_head *head = container_of(work, struct cls_mall_head, + work); + rtnl_lock(); + tcf_exts_destroy(>exts); + kfree(head); + rtnl_unlock(); +} + static void mall_destroy_rcu(struct rcu_head *rcu) { struct cls_mall_head *head = container_of(rcu, struct cls_mall_head, rcu); - tcf_exts_destroy(>exts); - kfree(head); + INIT_WORK(>work, mall_destroy_work); + tcf_queue_work(>work); } static int mall_replace_hw_filter(struct tcf_proto *tp, -- 2.13.0
[Patch net 00/16] net_sched: fix races with RCU callbacks
Recently, the RCU callbacks used in TC filters and TC actions keep drawing my attention, they introduce at least 4 race condition bugs: 1. A simple one fixed by Daniel: commit c78e1746d3ad7d548bdf3fe491898cc453911a49 Author: Daniel BorkmannDate: Wed May 20 17:13:33 2015 +0200 net: sched: fix call_rcu() race on classifier module unloads 2. A very nasty one fixed by me: commit 1697c4bb5245649a23f06a144cc38c06715e1b65 Author: Cong Wang Date: Mon Sep 11 16:33:32 2017 -0700 net_sched: carefully handle tcf_block_put() 3. Two more bugs found by Chris: https://patchwork.ozlabs.org/patch/826696/ https://patchwork.ozlabs.org/patch/826695/ Usually RCU callbacks are simple, however for TC filters and actions, they are complex because at least TC actions could be destroyed together with the TC filter in one callback. And RCU callbacks are invoked in BH context, without locking they are parallel too. All of these contribute to the cause of these nasty bugs. Alternatively, we could also: a) Introduce a spinlock to serialize these RCU callbacks. But as I said in commit 1697c4bb5245 ("net_sched: carefully handle tcf_block_put()"), it is very hard to do because of tcf_chain_dump(). Potentially we need to do a lot of work to make it possible (if not impossible). b) Just get rid of these RCU callbacks, because they are not necessary at all, callers of these call_rcu() are all on slow paths and holding RTNL lock, so blocking is allowed in their contexts. However, David and Eric dislike adding synchronize_rcu() here. As suggested by Paul, we could defer the work to a workqueue and gain the permission of holding RTNL again without any performance impact, however, in tcf_block_put() we could have a deadlock when flushing workqueue while hodling RTNL lock, the trick here is to defer the work itself in workqueue and make it queued after all other works so that we keep the same ordering to avoid any use-after-free. Please see the first patch for details. Patch 1 introduces the infrastructure, patch 2~12 move each tc filter to the new tc filter workqueue, patch 13 adds an assertion to catch potential bugs like this, patch 14 closes another rcu callback race, patch 15 and patch 16 add new test cases. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Chris Mi (2): selftests: Introduce a new script to generate tc batch file selftests: Introduce a new test case to tc testsuite Cong Wang (14): net_sched: introduce a workqueue for RCU callbacks of tc filter net_sched: use tcf_queue_work() in basic filter net_sched: use tcf_queue_work() in bpf filter net_sched: use tcf_queue_work() in cgroup filter net_sched: use tcf_queue_work() in flow filter net_sched: use tcf_queue_work() in flower filter net_sched: use tcf_queue_work() in fw filter net_sched: use tcf_queue_work() in matchall filter net_sched: use tcf_queue_work() in u32 filter net_sched: use tcf_queue_work() in route filter net_sched: use tcf_queue_work() in rsvp filter net_sched: use tcf_queue_work() in tcindex filter net_sched: add rtnl assertion to tcf_exts_destroy() net_sched: fix call_rcu() race on act_sample module removal include/net/pkt_cls.h | 3 + include/net/sch_generic.h | 2 + net/sched/act_sample.c | 1 + net/sched/cls_api.c| 69 -- net/sched/cls_basic.c | 20 ++- net/sched/cls_bpf.c| 19 +- net/sched/cls_cgroup.c | 22 +-- net/sched/cls_flow.c | 19 +- net/sched/cls_flower.c | 19 +- net/sched/cls_fw.c | 19 +- net/sched/cls_matchall.c | 19 +- net/sched/cls_route.c | 19 +- net/sched/cls_rsvp.h | 19 +- net/sched/cls_tcindex.c| 38 ++-- net/sched/cls_u32.c| 29 - .../tc-testing/tc-tests/filters/tests.json | 23 +++- tools/testing/selftests/tc-testing/tdc.py | 20 +-- tools/testing/selftests/tc-testing/tdc_batch.py| 62 +++ tools/testing/selftests/tc-testing/tdc_config.py | 2 + 19 files changed, 367 insertions(+), 57 deletions(-) create mode 100755 tools/testing/selftests/tc-testing/tdc_batch.py -- 2.13.0
[PATCH net-next] nfp: inform the VF driver needs to be restarted after changing the MAC
From: Pablo CascónAdd message to inform the VF MAC was changed and the need to restart the VF driver for the changes to be effective. Signed-off-by: Pablo Cascón Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c index e6d2e06b050c..8b1b962cf1d1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c @@ -112,7 +112,13 @@ int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) writew(get_unaligned_be16(mac + 4), app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_MAC_LO); - return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC"); + err = nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC"); + if (!err) + nfp_info(app->pf->cpp, +"MAC %pM set on VF %d, reload the VF driver to make this change effective.\n", +mac, vf); + + return err; } int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, -- 2.14.1
RE: removing bridge in vlan_filtering mode requests delete of attached ports main MAC address
> -Original Message- > From: Keller, Jacob E > Sent: Thursday, October 26, 2017 1:33 PM > To: Keller, Jacob E; vyase...@redhat.com; > netdev@vger.kernel.org > Cc: Malek, Patryk > Subject: RE: removing bridge in vlan_filtering mode requests delete of > attached > ports main MAC address > > > -Original Message- > > From: netdev-ow...@vger.kernel.org [mailto:netdev- > ow...@vger.kernel.org] > > On Behalf Of Keller, Jacob E > > Sent: Thursday, October 26, 2017 1:27 PM > > To: vyase...@redhat.com; netdev@vger.kernel.org > > Cc: Malek, Patryk > > Subject: RE: removing bridge in vlan_filtering mode requests delete of > > attached > > ports main MAC address > > > > > -Original Message- > > > From: Vlad Yasevich [mailto:vyase...@redhat.com] > > > Sent: Thursday, October 26, 2017 3:22 AM > > > To: Keller, Jacob E ; netdev@vger.kernel.org > > > Cc: Malek, Patryk > > > Subject: Re: removing bridge in vlan_filtering mode requests delete of > attached > > > ports main MAC address > > > > > > Hi Jake > > > > > > I think adding a !fdb->local should work. local fdb contain the address > > > of > > assigned > > > to > > > the ports of the bridge and those shouldn't be directly removed. > > > > > > If that works, that looks like the right solution. > > > > > > -vlad > > > > > > > So this does prevent us from removing the port's address. However, if I add > two > > devices to the bridge, then after removing the bridge, each device now keeps > > both permanent addresses in their list, which isn't what we want is it? > > > > Do we even want to assign the local fdb addresses to every port? > > > > Obviously, I don't fully understand this code, so I think I'm missing > > something > > here. > > > > Regards, > > Jake > > > > Ok, I tried this again, and it didn't end up crossing the local device > addresses to > each port. I'm not sure how that happened the first time yet, so maybe it is > correct to skip removing local addresses... but if we skip removing them, > wouldn't > we want to skip adding them too? > > Thanks, > Jake I'm still digging into this. It turns out adding two devices, enabling vlan filtering, and deleting the bridge sometimes (but not always, not sure what condition triggers it) causes the hw address of one of the devices to be assigned to the other device. I'm still unsure whether sync_static should be assigning local addresses to each device, but it appears like it should. In this case, I'm really unsure how to handle this case properly. If we add local addresses, we need to delete the ones that aren't specific to that device so that after removing the bridge we end up in the original configuration.. but I'm not really sure how best to do this. Using !fdb->is_local in unsync_static works to resolve my issue, but I believe it papers over other issues, since it means that we'll never delete static addresses when deleting the ports or exiting promiscuous mode. I think checking fdb->dst might work, but that would break if we manually add a new address and tag is as permanent, see line 806 of br_fdb.c... In this case, we'd never delete this address even though it was not originally on the device. I checked other drivers, and it turns out that at least one (ixgbe) doesn't have this problem because the hw address is special and isn't actually stored in a hardware MAC filter list. In i40e we keep the hardware address in the same list as all the other MAC filters. We could "fix" this in i40e by treating the hw permanent address separately and essentially ignoring it from the dev_uc_del() calls.. but I still feel like this papers over the issues in the bridge code. Any thoughts or suggestions? I haven't checked other drivers to see how they handle addresses in the unicast table (whether they treat the hw address as special or not, like ixgbe ultimately does). Thanks, Jake
Re: [PATCH net-next 5/9] net: dsa: use dsa_is_user_port everywhere
On 10/26/2017 08:22 AM, Vivien Didelot wrote: > Most of the DSA code still check ds->enabled_port_mask directly to > inspect a given port type instead of using the provided dsa_is_user_port > helper. Change this. > > Signed-off-by: Vivien DidelotReviewed-by: Florian Fainelli -- Florian
[iproute2 PATCH] tc/mqprio: Offload mode and shaper options in mqprio
This patch was previously submitted as RFC. Submitting this as non-RFC now that the tc/mqprio changes are accepted in net-next. Adds new mqprio options for 'mode' and 'shaper'. The mode option can take values for offload modes such as 'dcb' (default), 'channel' with the 'hw' option set to 1. The new 'channel' mode supports offloading TCs and other queue configurations. The 'shaper' option is to support HW shapers ('dcb' default) and takes the value 'bw_rlimit' for bandwidth rate limiting. The parameters to the bw_rlimit shaper are minimum and maximum bandwidth rates. New HW shapers in future can be supported through the shaper attribute. # tc qdisc add dev eth0 root mqprio num_tc 2 map 0 0 0 0 1 1 1 1\ queues 4@0 4@4 hw 1 mode channel shaper bw_rlimit\ min_rate 1Gbit 2Gbit max_rate 4Gbit 5Gbit # tc qdisc show dev eth0 qdisc mqprio 804a: root tc 2 map 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 queues:(0:3) (4:7) mode:channel shaper:bw_rlimit min_rate:1Gbit 2Gbit max_rate:4Gbit 5Gbit Signed-off-by: Amritha Nambiar--- include/uapi/linux/pkt_sched.h | 32 +++ tc/q_mqprio.c | 192 +++- 2 files changed, 217 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 099bf55..e95b5c9 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -625,6 +625,22 @@ enum { #define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) +enum { + TC_MQPRIO_MODE_DCB, + TC_MQPRIO_MODE_CHANNEL, + __TC_MQPRIO_MODE_MAX +}; + +#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1) + +enum { + TC_MQPRIO_SHAPER_DCB, + TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */ + __TC_MQPRIO_SHAPER_MAX +}; + +#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1) + struct tc_mqprio_qopt { __u8num_tc; __u8prio_tc_map[TC_QOPT_BITMASK + 1]; @@ -633,6 +649,22 @@ struct tc_mqprio_qopt { __u16 offset[TC_QOPT_MAX_QUEUE]; }; +#define TC_MQPRIO_F_MODE 0x1 +#define TC_MQPRIO_F_SHAPER 0x2 +#define TC_MQPRIO_F_MIN_RATE 0x4 +#define TC_MQPRIO_F_MAX_RATE 0x8 + +enum { + TCA_MQPRIO_UNSPEC, + TCA_MQPRIO_MODE, + TCA_MQPRIO_SHAPER, + TCA_MQPRIO_MIN_RATE64, + TCA_MQPRIO_MAX_RATE64, + __TCA_MQPRIO_MAX, +}; + +#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1) + /* SFB */ enum { diff --git a/tc/q_mqprio.c b/tc/q_mqprio.c index d6718fb..cd305b7 100644 --- a/tc/q_mqprio.c +++ b/tc/q_mqprio.c @@ -27,6 +27,10 @@ static void explain(void) fprintf(stderr, "Usage: ... mqprio [num_tc NUMBER] [map P0 P1 ...]\n"); fprintf(stderr, " [queues count1@offset1 count2@offset2 ...] "); fprintf(stderr, "[hw 1|0]\n"); + fprintf(stderr, " [mode dcb|channel]\n"); + fprintf(stderr, " [shaper bw_rlimit SHAPER_PARAMS]\n" + "Where: SHAPER_PARAMS := { min_rate MIN_RATE1 MIN_RATE2 ...|\n" + " max_rate MAX_RATE1 MAX_RATE2 ... }\n"); } static int mqprio_parse_opt(struct qdisc_util *qu, int argc, @@ -40,6 +44,12 @@ static int mqprio_parse_opt(struct qdisc_util *qu, int argc, .count = { }, .offset = { }, }; + __u64 min_rate64[TC_QOPT_MAX_QUEUE] = {0}; + __u64 max_rate64[TC_QOPT_MAX_QUEUE] = {0}; + __u16 shaper = TC_MQPRIO_SHAPER_DCB; + __u16 mode = TC_MQPRIO_MODE_DCB; + struct rtattr *tail; + __u32 flags = 0; while (argc > 0) { idx = 0; @@ -92,6 +102,68 @@ static int mqprio_parse_opt(struct qdisc_util *qu, int argc, return -1; } idx++; + } else if (opt.hw && strcmp(*argv, "mode") == 0) { + NEXT_ARG(); + if (matches(*argv, "dcb") == 0) { + mode = TC_MQPRIO_MODE_DCB; + } else if (matches(*argv, "channel") == 0) { + mode = TC_MQPRIO_MODE_CHANNEL; + } else { + fprintf(stderr, "Illegal mode (%s)\n", + *argv); + return -1; + } + if (mode != TC_MQPRIO_MODE_DCB) + flags |= TC_MQPRIO_F_MODE; + idx++; + } else if (opt.hw && strcmp(*argv, "shaper") == 0) { + NEXT_ARG(); + if (matches(*argv, "dcb") == 0) { + shaper = TC_MQPRIO_SHAPER_DCB; + } else if (matches(*argv, "bw_rlimit") == 0) { + shaper =
Re: [PATCH net-next] tcp: add tracepoint trace_tcp_retransmit_synack()
> On Oct 25, 2017, at 8:13 PM, kbuild test robot <l...@intel.com> wrote: > > Hi Song, > > [auto build test WARNING on net-next/master] > > url: > https://github.com/0day-ci/linux/commits/Song-Liu/tcp-add-tracepoint-trace_tcp_retransmit_synack/20171026-010651 > reproduce: ># apt-get install sparse >make ARCH=x86_64 allmodconfig >make C=1 CF=-D__CHECK_ENDIAN__ > > > sparse warnings: (new ones prefixed by >>) > > > vim +281 include/trace/events/tcp.h > > 241 > 242 TP_PROTO(const struct sock *sk, const struct request_sock *req), > 243 > 244 TP_ARGS(sk, req), > 245 > 246 TP_STRUCT__entry( > 247 __field(const void *, skaddr) > 248 __field(const void *, req) > 249 __field(__u16, sport) > 250 __field(__u16, dport) > 251 __array(__u8, saddr, 4) > 252 __array(__u8, daddr, 4) > 253 __array(__u8, saddr_v6, 16) > 254 __array(__u8, daddr_v6, 16) > 255 ), > 256 > 257 TP_fast_assign( > 258 struct inet_request_sock *ireq = inet_rsk(req); > 259 struct in6_addr *pin6; > 260 __be32 *p32; > 261 > 262 __entry->skaddr = sk; > 263 __entry->req = req; > 264 > 265 __entry->sport = ireq->ir_num; > 266 __entry->dport = ntohs(ireq->ir_rmt_port); > 267 > 268 p32 = (__be32 *) __entry->saddr; > 269 *p32 = ireq->ir_loc_addr; > 270 > 271 p32 = (__be32 *) __entry->daddr; > 272 *p32 = ireq->ir_rmt_addr; > 273 >> 274 #if IS_ENABLED(CONFIG_IPV6) > 275 if (sk->sk_family == AF_INET6) { > 276 pin6 = (struct in6_addr *)__entry->saddr_v6; > 277 *pin6 = ireq->ir_v6_loc_addr; > 278 pin6 = (struct in6_addr *)__entry->daddr_v6; > 279 *pin6 = ireq->ir_v6_rmt_addr; > 280 } else >> 281 #endif In this case, we are putting CONFIG_IPV6 in TRACE_EVENT macro, which generates warnings like: ./include/trace/events/tcp.h:274:1: error: directive in argument list ./include/trace/events/tcp.h:281:1: error: directive in argument list Seems these warning cannot be easily avoided. This is also the same pattern we have been using in include/trace/events/tcp.h. Any suggestions on how shall we proceed from here? Thanks, Song
[PATCH net-next] liquidio: fix kernel panic in VF driver
Doing ifconfig down on VF driver in the middle of receiving line rate traffic causes a kernel panic: LiquidIO_VF :02:00.3: should not come here should not get rx when poll mode = 0 for vf BUG: unable to handle kernel NULL pointer dereference at (null) . . . Call Trace: ? tasklet_action+0x102/0x120 __do_softirq+0x91/0x292 irq_exit+0xb6/0xc0 do_IRQ+0x4f/0xd0 common_interrupt+0x93/0x93 RIP: 0010:cpuidle_enter_state+0x142/0x2f0 RSP: 0018:a6403e20 EFLAGS: 0246 ORIG_RAX: ff59 RAX: RBX: 0003 RCX: 001f RDX: RSI: 2ab7519f RDI: RBP: a6403e58 R08: 0084 R09: 0018 R10: a6403df0 R11: 03c7 R12: 0003 R13: d27ebd806800 R14: a64d40d8 R15: 007be072823f cpuidle_enter+0x17/0x20 call_cpuidle+0x23/0x40 do_idle+0x18c/0x1f0 cpu_startup_entry+0x64/0x70 rest_init+0xa5/0xb0 start_kernel+0x45e/0x46b x86_64_start_reservations+0x24/0x26 x86_64_start_kernel+0x6f/0x72 secondary_startup_64+0xa5/0xa5 Code: Bad RIP value. RIP: (null) RSP: 9246ed003f28 CR2: ---[ end trace 92731e80f31b7d7d ]--- Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x2400 from 0x8100 (relocation range: 0x8000-0xbfff) ---[ end Kernel panic - not syncing: Fatal exception in interrupt Reason is: in the function assigned to net_device_ops->ndo_stop, the steps for bringing down the interface are done in the wrong order. The step that notifies the NIC firmware to stop forwarding packets to host is done too late. Fix it by moving that step to the beginning. Signed-off-by: Felix ManlunasSigned-off-by: Raghu Vatsavayi --- drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 4c3b568..ed1f073 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1288,6 +1288,9 @@ static int liquidio_stop(struct net_device *netdev) struct octeon_device *oct = lio->oct_dev; struct napi_struct *napi, *n; + /* tell Octeon to stop forwarding packets to host */ + send_rx_ctrl_cmd(lio, 0); + if (oct->props[lio->ifidx].napi_enabled) { list_for_each_entry_safe(napi, n, >napi_list, dev_list) napi_disable(napi); @@ -1305,9 +1308,6 @@ static int liquidio_stop(struct net_device *netdev) netif_carrier_off(netdev); lio->link_changes++; - /* tell Octeon to stop forwarding packets to host */ - send_rx_ctrl_cmd(lio, 0); - ifstate_reset(lio, LIO_IFSTATE_RUNNING); txqs_stop(netdev);
[PATCH net-next 07/15] tcp: Namespace-ify sysctl_tcp_rfc1337
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 1 - net/ipv4/tcp_minisocks.c | 2 +- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ffa2cf3dc747ca9443df3927dc7928c18357f872..968edce38eb5d3399724b3142277eab44f19f2fb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -134,6 +134,7 @@ struct netns_ipv4 { int sysctl_tcp_slow_start_after_idle; int sysctl_tcp_retrans_collapse; int sysctl_tcp_stdurg; + int sysctl_tcp_rfc1337; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0cf9de8506916c67369ce78833207ba648f34a10..7f88987bc62dd76206c15eb91f2990d4469e5421 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_fack; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a34bb75815c15afc077ba7ff36939b5abc9229f6..832e554235df37770809541ad8f9f1ca2f201739 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_rfc1337", - .data = _tcp_rfc1337, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "inet_peer_threshold", .data = _peer_threshold, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_rfc1337", + .data = _net.ipv4.sysctl_tcp_rfc1337, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6c3655b538f6b2315af7dc611acc574f7489bde6..d2d3f62387a98d7f955f8c9e27320b9722035b2a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -87,7 +87,6 @@ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; -int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_min_rtt_wlen __read_mostly = 300; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a952357054f4ddfbb98746ebcf323d1c45f7e951..2abaa4c1fe0108f2645d8e783ae6b48e87a82fb3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -180,7 +180,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ - if (sysctl_tcp_rfc1337 == 0) { + if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH net-next 06/15] tcp: Namespace-ify sysctl_tcp_stdurg
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 3 +-- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b28c172b10e497f235b51aae0fc2d3bbf7cc51f3..ffa2cf3dc747ca9443df3927dc7928c18357f872 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -133,6 +133,7 @@ struct netns_ipv4 { int sysctl_tcp_thin_linear_timeouts; int sysctl_tcp_slow_start_after_idle; int sysctl_tcp_retrans_collapse; + int sysctl_tcp_stdurg; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 085848e4de38058bb09f025387c713ade32b263e..0cf9de8506916c67369ce78833207ba648f34a10 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 533b92ad39dd0cada542028fe2f276d9eebcd2c8..a34bb75815c15afc077ba7ff36939b5abc9229f6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_stdurg", - .data = _tcp_stdurg, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_rfc1337", .data = _tcp_rfc1337, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_stdurg", + .data = _net.ipv4.sysctl_tcp_stdurg, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2cc56fd57b751dd6b457c15067aa9309683a04a8..6c3655b538f6b2315af7dc611acc574f7489bde6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -87,7 +87,6 @@ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; -int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; @@ -5103,7 +5102,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); - if (ptr && !sysctl_tcp_stdurg) + if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg) ptr--; ptr += ntohl(th->seq); -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH net-next 05/15] tcp: Namespace-ify sysctl_tcp_retrans_collapse
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_ipv4.c| 2 +- net/ipv4/tcp_output.c | 5 + 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8662692686b3af98a94a176230b9ed147881d87a..b28c172b10e497f235b51aae0fc2d3bbf7cc51f3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -132,6 +132,7 @@ struct netns_ipv4 { int sysctl_tcp_recovery; int sysctl_tcp_thin_linear_timeouts; int sysctl_tcp_slow_start_after_idle; + int sysctl_tcp_retrans_collapse; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 09c79705a0742aa8c22b3b7795d01b6c685d32e2..085848e4de38058bb09f025387c713ade32b263e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 40d69af8b363bc236e23879973872d8f9346d85e..533b92ad39dd0cada542028fe2f276d9eebcd2c8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -386,13 +386,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, } static struct ctl_table ipv4_table[] = { - { - .procname = "tcp_retrans_collapse", - .data = _tcp_retrans_collapse, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_max_orphans", .data = _tcp_max_orphans, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_retrans_collapse", + .data = _net.ipv4.sysctl_tcp_retrans_collapse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cea63a4b59655823def7a423d27191003c7f084c..2bc6ba2059d32aa848dbc415b4b0e194b61b0268 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2487,7 +2487,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_early_retrans = 3; net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ - + net->ipv4.sysctl_tcp_retrans_collapse = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bc93a346e6e91289c2aeb2f2d2522b809da12dd6..735fff44aaca3d5afbb0ac55ebdb9898b6c44ae6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -44,9 +44,6 @@ #include -/* People can turn this off for buggy TCP's found in printers etc. */ -int sysctl_tcp_retrans_collapse __read_mostly = 1; - /* People can turn this on to work with those rare, broken TCPs that * interpret the window field as a signed quantity. */ @@ -2747,7 +2744,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, struct sk_buff *skb = to, *tmp; bool first = true; - if (!sysctl_tcp_retrans_collapse) + if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) return; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH net-next 13/15] tcp: Namespace-ify sysctl_tcp_app_win
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 8 net/ipv4/tcp_ipv4.c| 1 + 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 956957a77db96ad3d231cc018c13503d615d8d2e..63f91d52cbc0ad35d8e04a8da0d9f57aa960bcb0 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -139,6 +139,7 @@ struct netns_ipv4 { int sysctl_tcp_fack; int sysctl_tcp_max_reordering; int sysctl_tcp_dsack; + int sysctl_tcp_app_win; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index aba20c4828ee912d9f0f3ef49f3de5376729c022..c6bee85a3dec0dea6d4402d89184ade02a637a2e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7652a9c2a65d3f1cfa0a75d1198e1d9d56761c35..e057788834a99cf99e141a602ddbe19b8e6fce3c 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = , }, - { - .procname = "tcp_app_win", - .data = _tcp_app_win, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_adv_win_scale", .data = _tcp_adv_win_scale, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_app_win", + .data = _net.ipv4.sysctl_tcp_app_win, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index db4798f99bb093b6d5a3e0fdd76efb83b88da49e..06a8c27e1a690e3b26cb6773320bafa31b06d3b3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -77,7 +77,6 @@ #include #include -int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); @@ -426,6 +425,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) */ void tcp_init_buffer_space(struct sock *sk) { + int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; struct tcp_sock *tp = tcp_sk(sk); int maxwin; @@ -444,14 +444,14 @@ void tcp_init_buffer_space(struct sock *sk) if (tp->window_clamp >= maxwin) { tp->window_clamp = maxwin; - if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss) + if (tcp_app_win && maxwin > 4 * tp->advmss) tp->window_clamp = max(maxwin - - (maxwin >> sysctl_tcp_app_win), + (maxwin >> tcp_app_win), 4 * tp->advmss); } /* Force reservation of one segment. */ - if (sysctl_tcp_app_win && + if (tcp_app_win && tp->window_clamp > 2 * tp->advmss && tp->window_clamp + tp->advmss > maxwin) tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d9d4d191e8f3c962a6ee68015ffe5a6e7fb8e9c1..189664ebd28e4cda7ef40a47591c3bd8cac3574b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2490,6 +2490,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_retrans_collapse = 1; net->ipv4.sysctl_tcp_max_reordering = 300; net->ipv4.sysctl_tcp_dsack = 1; + net->ipv4.sysctl_tcp_app_win = 31; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH net-next 15/15] tcp: Namespace-ify sysctl_tcp_frto
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_ipv4.c| 1 + 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9dbb07d4eff465428817831e55c6a4922b7208fb..f4622e28db3a1484553f51709b144ee769766a28 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -141,6 +141,7 @@ struct netns_ipv4 { int sysctl_tcp_dsack; int sysctl_tcp_app_win; int sysctl_tcp_adv_win_scale; + int sysctl_tcp_frto; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 2572b57682987dd5f3700ed47d63e7238946b9a8..19006a5d073c202995ba63199ab8cde814d6d869 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a95123e1e7da706c88bf5553b7d8ef6c2653ab50..f1bcb9b7e082c6688fad12e15be9b872ebed8151 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = , }, - { - .procname = "tcp_frto", - .data = _tcp_frto, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_min_rtt_wlen", .data = _tcp_min_rtt_wlen, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = _adv_win_scale_min, .extra2 = _adv_win_scale_max, }, + { + .procname = "tcp_frto", + .data = _net.ipv4.sysctl_tcp_frto, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d01f390da23dcd4100271b150bd8bc143f7328cf..24950ea3094288cad8d9cd9eb0e0698d6f50e989 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -81,7 +81,6 @@ int sysctl_tcp_challenge_ack_limit = 1000; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; -int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_min_rtt_wlen __read_mostly = 300; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; @@ -2024,7 +2023,7 @@ void tcp_enter_loss(struct sock *sk) * falsely raise the receive window, which results in repeated * timeouts and stop-and-go behavior. */ - tp->frto = sysctl_tcp_frto && + tp->frto = net->ipv4.sysctl_tcp_frto && (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1fe30fb99308b3e3fd07509b509b0e3727cc5d44..49757c7582c6d2cf413415be2c1b58482659 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2492,6 +2492,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_dsack = 1; net->ipv4.sysctl_tcp_app_win = 31; net->ipv4.sysctl_tcp_adv_win_scale = 1; + net->ipv4.sysctl_tcp_frto = 2; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH net-next 09/15] tcp: Namespace-ify sysctl_tcp_fack
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_minisocks.c | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3875fdf6b18653477408beb25176eac849e65ba4..f0e792beeea974b0850090d7624a3d7490124067 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -136,6 +136,7 @@ struct netns_ipv4 { int sysctl_tcp_stdurg; int sysctl_tcp_rfc1337; int sysctl_tcp_abort_on_overflow; + int sysctl_tcp_fack; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index e28471ce52bd815346676931075588d59306a441..38504d5ab109454219ac9570c3b11e02733384c1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -241,7 +241,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ffd1fd769bba7c3524aa6dfac734e1de0cad1506..1f23be13ce7be8b2a12b82aada36c6351fdfb70a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "tcp_fack", - .data = _tcp_fack, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_max_reordering", .data = _tcp_max_reordering, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_fack", + .data = _net.ipv4.sysctl_tcp_fack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8f36277e82e9dbea750ce66b73018a81b30b5156..4a777ba113b9afe118e3020da65878d85848e1cb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2511,7 +2511,7 @@ static int tcp_repair_options_est(struct sock *sk, return -EINVAL; tp->rx_opt.sack_ok |= TCP_SACK_SEEN; - if (sysctl_tcp_fack) + if (sock_net(sk)->ipv4.sysctl_tcp_fack) tcp_enable_fack(tp); break; case TCPOPT_TIMESTAMP: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d2d3f62387a98d7f955f8c9e27320b9722035b2a..8941fc32072b69fedcb01afbe837f4d7791dd28d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -77,7 +77,6 @@ #include #include -int sysctl_tcp_fack __read_mostly; int sysctl_tcp_max_reordering __read_mostly = 300; int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; @@ -5690,7 +5689,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->tcp_header_len = sizeof(struct tcphdr); } - if (tcp_is_sack(tp) && sysctl_tcp_fack) + if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack) tcp_enable_fack(tp); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a79a89fdb558a5d66ec5241fdb8bfcab196c744d..eba61f77bc36e4f49580d15840c15af565b8b479 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -491,7 +491,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { - if (sysctl_tcp_fack) + if (sock_net(sk)->ipv4.sysctl_tcp_fack) tcp_enable_fack(newtp); } newtp->window_clamp = req->rsk_window_clamp; -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH net-next 14/15] tcp: Namespace-ify sysctl_tcp_adv_win_scale
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 9 - net/ipv4/sysctl_net_ipv4.c | 18 +- net/ipv4/tcp_input.c | 13 + net/ipv4/tcp_ipv4.c| 1 + 5 files changed, 20 insertions(+), 22 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 63f91d52cbc0ad35d8e04a8da0d9f57aa960bcb0..9dbb07d4eff465428817831e55c6a4922b7208fb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -140,6 +140,7 @@ struct netns_ipv4 { int sysctl_tcp_max_reordering; int sysctl_tcp_dsack; int sysctl_tcp_app_win; + int sysctl_tcp_adv_win_scale; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index c6bee85a3dec0dea6d4402d89184ade02a637a2e..2572b57682987dd5f3700ed47d63e7238946b9a8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -244,7 +244,6 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; @@ -1308,9 +1307,9 @@ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd); -static inline int tcp_win_from_space(int space) +static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : @@ -1320,13 +1319,13 @@ static inline int tcp_win_from_space(int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk->sk_rcvbuf - + return tcp_win_from_space(sk, sk->sk_rcvbuf - atomic_read(>sk_rmem_alloc)); } static inline int tcp_full_space(const struct sock *sk) { - return tcp_win_from_space(sk->sk_rcvbuf); + return tcp_win_from_space(sk, sk->sk_rcvbuf); } extern void tcp_openreq_init_rwin(struct request_sock *req, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e057788834a99cf99e141a602ddbe19b8e6fce3c..a95123e1e7da706c88bf5553b7d8ef6c2653ab50 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -437,15 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = , }, - { - .procname = "tcp_adv_win_scale", - .data = _tcp_adv_win_scale, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = _adv_win_scale_min, - .extra2 = _adv_win_scale_max, - }, { .procname = "tcp_frto", .data = _tcp_frto, @@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_adv_win_scale", + .data = _net.ipv4.sysctl_tcp_adv_win_scale, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = _adv_win_scale_min, + .extra2 = _adv_win_scale_max, + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 06a8c27e1a690e3b26cb6773320bafa31b06d3b3..d01f390da23dcd4100271b150bd8bc143f7328cf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -77,9 +77,6 @@ #include #include -int sysctl_tcp_adv_win_scale __read_mostly = 1; -EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); - /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; @@ -361,8 +358,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ - int truesize = tcp_win_from_space(skb->truesize) >> 1; - int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1; + int truesize = tcp_win_from_space(sk, skb->truesize) >> 1; + int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) @@ -387,7 +384,7 @@ static void tcp_grow_window(struct
[PATCH net-next 11/15] tcp: Namespace-ify sysctl_tcp_max_reordering
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_ipv4.c| 2 ++ 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f0e792beeea974b0850090d7624a3d7490124067..3f6844665a2fbe66fc0c91bd13e057ac2e03007a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -137,6 +137,7 @@ struct netns_ipv4 { int sysctl_tcp_rfc1337; int sysctl_tcp_abort_on_overflow; int sysctl_tcp_fack; + int sysctl_tcp_max_reordering; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index c912d63839e7cd4b6ad009344e8017de4c0b1483..2b559c7bf16c70864e77a34c78479d01f538d6cd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -241,7 +241,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1f23be13ce7be8b2a12b82aada36c6351fdfb70a..18cd228a20690541936dd6b3d9bb02cb283a9740 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "tcp_max_reordering", - .data = _tcp_max_reordering, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_dsack", .data = _tcp_dsack, @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_max_reordering", + .data = _net.ipv4.sysctl_tcp_max_reordering, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8941fc32072b69fedcb01afbe837f4d7791dd28d..bd6abf9a6d5a0f7a85384a259d61a34c4170eb50 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -77,7 +77,6 @@ #include #include -int sysctl_tcp_max_reordering __read_mostly = 300; int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; @@ -887,7 +886,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, return; if (metric > tp->reordering) { - tp->reordering = min(sysctl_tcp_max_reordering, metric); + tp->reordering = min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric); #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2bc6ba2059d32aa848dbc415b4b0e194b61b0268..c379a242abb3546044da9a3ef032f6f68acafe88 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2488,6 +2488,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ net->ipv4.sysctl_tcp_retrans_collapse = 1; + net->ipv4.sysctl_tcp_max_reordering = 300; + net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH net-next 10/15] tcp: remove stale sysctl_tcp_reordering
This extern is no longer used. Signed-off-by: Eric Dumazet--- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 38504d5ab109454219ac9570c3b11e02733384c1..c912d63839e7cd4b6ad009344e8017de4c0b1483 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -241,7 +241,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_reordering; extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; -- 2.15.0.rc2.357.g7e34df9404-goog
[PATCH net-next 12/15] tcp: Namespace-ify sysctl_tcp_dsack
Signed-off-by: Eric Dumazet--- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++--- net/ipv4/tcp_input.c | 5 ++--- net/ipv4/tcp_ipv4.c| 1 + 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3f6844665a2fbe66fc0c91bd13e057ac2e03007a..956957a77db96ad3d231cc018c13503d615d8d2e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -138,6 +138,7 @@ struct netns_ipv4 { int sysctl_tcp_abort_on_overflow; int sysctl_tcp_fack; int sysctl_tcp_max_reordering; + int sysctl_tcp_dsack; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 2b559c7bf16c70864e77a34c78479d01f538d6cd..aba20c4828ee912d9f0f3ef49f3de5376729c022 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -241,7 +241,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_dsack; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 18cd228a20690541936dd6b3d9bb02cb283a9740..7652a9c2a65d3f1cfa0a75d1198e1d9d56761c35 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "tcp_dsack", - .data = _tcp_dsack, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_mem", .maxlen = sizeof(sysctl_tcp_mem), @@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_dsack", + .data = _net.ipv4.sysctl_tcp_dsack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bd6abf9a6d5a0f7a85384a259d61a34c4170eb50..db4798f99bb093b6d5a3e0fdd76efb83b88da49e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -77,7 +77,6 @@ #include #include -int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); @@ -4130,7 +4129,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4165,7 +4164,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c379a242abb3546044da9a3ef032f6f68acafe88..d9d4d191e8f3c962a6ee68015ffe5a6e7fb8e9c1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2489,6 +2489,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ net->ipv4.sysctl_tcp_retrans_collapse = 1; net->ipv4.sysctl_tcp_max_reordering = 300; + net->ipv4.sysctl_tcp_dsack = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(>ipv4.tcp_fastopen_ctx_lock); -- 2.15.0.rc2.357.g7e34df9404-goog