date:20171026

[PATCH] drivers/net: chelsio/cxgb*: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: Santosh Raspatur 
Cc: Ganesh Goudar 
Cc: Casey Leedom 
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook 
---
 drivers/net/ethernet/chelsio/cxgb3/sge.c | 12 ++--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c |  7 +++
 drivers/net/ethernet/chelsio/cxgb4/sge.c | 12 ++--
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c   | 12 ++--
 4 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c 
b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index e2d342647b19..e3d28ae75360 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -2853,9 +2853,9 @@ void t3_sge_err_intr_handler(struct adapter *adapter)
  * bother cleaning them up here.
  *
  */
-static void sge_timer_tx(unsigned long data)
+static void sge_timer_tx(struct timer_list *t)
 {
-   struct sge_qset *qs = (struct sge_qset *)data;
+   struct sge_qset *qs = from_timer(qs, t, tx_reclaim_timer);
struct port_info *pi = netdev_priv(qs->netdev);
struct adapter *adap = pi->adapter;
unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0};
@@ -2893,10 +2893,10 @@ static void sge_timer_tx(unsigned long data)
  * starved.
  *
  */
-static void sge_timer_rx(unsigned long data)
+static void sge_timer_rx(struct timer_list *t)
 {
spinlock_t *lock;
-   struct sge_qset *qs = (struct sge_qset *)data;
+   struct sge_qset *qs = from_timer(qs, t, rx_reclaim_timer);
struct port_info *pi = netdev_priv(qs->netdev);
struct adapter *adap = pi->adapter;
u32 status;
@@ -2976,8 +2976,8 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned 
int id, int nports,
struct sge_qset *q = >sge.qs[id];
 
init_qset_cntxt(q, id);
-   setup_timer(>tx_reclaim_timer, sge_timer_tx, (unsigned long)q);
-   setup_timer(>rx_reclaim_timer, sge_timer_rx, (unsigned long)q);
+   timer_setup(>tx_reclaim_timer, sge_timer_tx, 0);
+   timer_setup(>rx_reclaim_timer, sge_timer_rx, 0);
 
q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
   sizeof(struct rx_desc),
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index 92a311767381..0c154c663748 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -366,9 +366,9 @@ int cxgb4_tc_flower_destroy(struct net_device *dev,
return ret;
 }
 
-static void ch_flower_stats_cb(unsigned long data)
+static void ch_flower_stats_cb(struct timer_list *t)
 {
-   struct adapter *adap = (struct adapter *)data;
+   struct adapter *adap = from_timer(adap, t, flower_stats_timer);
struct ch_tc_flower_entry *flower_entry;
struct ch_tc_flower_stats *ofld_stats;
unsigned int i;
@@ -440,8 +440,7 @@ int cxgb4_tc_flower_stats(struct net_device *dev,
 void cxgb4_init_tc_flower(struct adapter *adap)
 {
hash_init(adap->flower_anymatch_tbl);
-   setup_timer(>flower_stats_timer, ch_flower_stats_cb,
-   (unsigned long)adap);
+   timer_setup(>flower_stats_timer, ch_flower_stats_cb, 0);
mod_timer(>flower_stats_timer, jiffies + STATS_CHECK_PERIOD);
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 4ef68f69b58c..486b01fe23bd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2583,11 +2583,11 @@ irq_handler_t t4_intr_handler(struct adapter *adap)
return t4_intr_intx;
 }
 
-static void sge_rx_timer_cb(unsigned long data)
+static void sge_rx_timer_cb(struct timer_list *t)
 {
unsigned long m;
unsigned int i;
-   struct adapter *adap = (struct adapter *)data;
+   struct adapter *adap = from_timer(adap, t, sge.rx_timer);
struct sge *s = >sge;
 
for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
@@ -2620,11 +2620,11 @@ static void sge_rx_timer_cb(unsigned long data)
mod_timer(>rx_timer, jiffies + RX_QCHECK_PERIOD);
 }
 
-static void sge_tx_timer_cb(unsigned long data)
+static void sge_tx_timer_cb(struct timer_list *t)
 {
unsigned long m;
unsigned int i, budget;
-   struct adapter *adap = (struct adapter *)data;
+   struct adapter *adap = from_timer(adap, t, sge.tx_timer);
struct sge *s = >sge;
 
for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
@@ -3458,8 +3458,8 @@ int t4_sge_init(struct adapter *adap)
/* Set up timers used for recuring callbacks to process RX and TX
 * administrative tasks.
 */

[PATCH] drivers/net: appletalk/cops: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: Allen Pais 
Cc: "David S. Miller" 
Cc: David Howells 
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook 
---
 drivers/net/appletalk/cops.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index caf04284711a..bb49f6e40a19 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -155,6 +155,7 @@ static int cops_irqlist[] = {
 };
 
 static struct timer_list cops_timer;
+static struct net_device *cops_timer_dev;
 
 /* use 0 for production, 1 for verification, 2 for debug, 3 for verbose debug 
*/
 #ifndef COPS_DEBUG
@@ -187,7 +188,7 @@ static void cops_load (struct net_device *dev);
 static int  cops_nodeid (struct net_device *dev, int nodeid);
 
 static irqreturn_t cops_interrupt (int irq, void *dev_id);
-static void cops_poll (unsigned long ltdev);
+static void cops_poll(struct timer_list *t);
 static void cops_timeout(struct net_device *dev);
 static void cops_rx (struct net_device *dev);
 static netdev_tx_t  cops_send_packet (struct sk_buff *skb,
@@ -424,7 +425,8 @@ static int cops_open(struct net_device *dev)
 */
if(lp->board==TANGENT)  /* Poll 20 times per second */
{
-   setup_timer(_timer, cops_poll, (unsigned long)dev);
+   cops_timer_dev = dev;
+   timer_setup(_timer, cops_poll, 0);
cops_timer.expires  = jiffies + HZ/20;
add_timer(_timer);
} 
@@ -671,12 +673,11 @@ static int cops_nodeid (struct net_device *dev, int 
nodeid)
  * Poll the Tangent type cards to see if we have work.
  */
  
-static void cops_poll(unsigned long ltdev)
+static void cops_poll(struct timer_list *unused)
 {
int ioaddr, status;
int boguscount = 0;
-
-   struct net_device *dev = (struct net_device *)ltdev;
+   struct net_device *dev = cops_timer_dev;
 
del_timer(_timer);
 
-- 
2.7.4


-- 
Kees Cook
Pixel Security

[PATCH] drivers/net: amd: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: Tom Lendacky 
Cc: "David S. Miller" 
Cc: Allen Pais 
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook 
---
 drivers/net/ethernet/amd/a2065.c | 13 ++---
 drivers/net/ethernet/amd/am79c961a.c |  9 +
 drivers/net/ethernet/amd/am79c961a.h |  1 +
 drivers/net/ethernet/amd/declance.c  | 10 ++
 drivers/net/ethernet/amd/pcnet32.c   | 10 +-
 drivers/net/ethernet/amd/sunlance.c  |  8 
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 14 ++
 7 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c
index 998d30e050a6..212fe72a190b 100644
--- a/drivers/net/ethernet/amd/a2065.c
+++ b/drivers/net/ethernet/amd/a2065.c
@@ -123,6 +123,7 @@ struct lance_private {
int burst_sizes;  /* ledma SBus burst sizes */
 #endif
struct timer_list multicast_timer;
+   struct net_device *dev;
 };
 
 #define LANCE_ADDR(x) ((int)(x) & ~0xff00)
@@ -638,6 +639,13 @@ static void lance_set_multicast(struct net_device *dev)
netif_wake_queue(dev);
 }
 
+static void lance_set_multicast_retry(struct timer_list *t)
+{
+   struct lance_private *lp = from_timer(lp, t, multicast_timer);
+
+   lance_set_multicast(lp->dev);
+}
+
 static int a2065_init_one(struct zorro_dev *z,
  const struct zorro_device_id *ent);
 static void a2065_remove_one(struct zorro_dev *z);
@@ -728,14 +736,13 @@ static int a2065_init_one(struct zorro_dev *z,
priv->lance_log_tx_bufs = LANCE_LOG_TX_BUFFERS;
priv->rx_ring_mod_mask = RX_RING_MOD_MASK;
priv->tx_ring_mod_mask = TX_RING_MOD_MASK;
+   priv->dev = dev;
 
dev->netdev_ops = _netdev_ops;
dev->watchdog_timeo = 5*HZ;
dev->dma = 0;
 
-   setup_timer(>multicast_timer,
-   (void(*)(unsigned long))lance_set_multicast,
-   (unsigned long)dev);
+   timer_setup(>multicast_timer, lance_set_multicast_retry, 0);
 
err = register_netdev(dev);
if (err) {
diff --git a/drivers/net/ethernet/amd/am79c961a.c 
b/drivers/net/ethernet/amd/am79c961a.c
index 0612dbee00d2..01d132c02ff9 100644
--- a/drivers/net/ethernet/amd/am79c961a.c
+++ b/drivers/net/ethernet/amd/am79c961a.c
@@ -302,10 +302,10 @@ am79c961_init_for_open(struct net_device *dev)
write_rreg (dev->base_addr, CSR0, CSR0_IENA|CSR0_STRT);
 }
 
-static void am79c961_timer(unsigned long data)
+static void am79c961_timer(struct timer_list *t)
 {
-   struct net_device *dev = (struct net_device *)data;
-   struct dev_priv *priv = netdev_priv(dev);
+   struct dev_priv *priv = from_timer(priv, t, timer);
+   struct net_device *dev = priv->dev;
unsigned int lnkstat, carrier;
unsigned long flags;
 
@@ -728,7 +728,8 @@ static int am79c961_probe(struct platform_device *pdev)
am79c961_banner();
 
spin_lock_init(>chip_lock);
-   setup_timer(>timer, am79c961_timer, (unsigned long)dev);
+   priv->dev = dev;
+   timer_setup(>timer, am79c961_timer, 0);
 
if (am79c961_hw_init(dev))
goto release;
diff --git a/drivers/net/ethernet/amd/am79c961a.h 
b/drivers/net/ethernet/amd/am79c961a.h
index 9f384b79507b..fc5088c70731 100644
--- a/drivers/net/ethernet/amd/am79c961a.h
+++ b/drivers/net/ethernet/amd/am79c961a.h
@@ -140,6 +140,7 @@ struct dev_priv {
 unsigned long  txhdr;
 spinlock_t chip_lock;
 struct timer_list  timer;
+struct net_device   *dev;
 };
 
 #endif
diff --git a/drivers/net/ethernet/amd/declance.c 
b/drivers/net/ethernet/amd/declance.c
index 9bdf81c2cd00..116997a8b593 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -260,6 +260,7 @@ struct lance_private {
unsigned short busmaster_regval;
 
struct timer_list   multicast_timer;
+   struct net_device   *dev;
 
/* Pointers to the ring buffers as seen from the CPU */
char *rx_buf_ptr_cpu[RX_RING_SIZE];
@@ -1000,9 +1001,10 @@ static void lance_set_multicast(struct net_device *dev)
netif_wake_queue(dev);
 }
 
-static void lance_set_multicast_retry(unsigned long _opaque)
+static void lance_set_multicast_retry(struct timer_list *t)
 {
-   struct net_device *dev = (struct net_device *) _opaque;
+   struct lance_private *lp = from_timer(lp, t, multicast_timer);
+   struct net_device *dev = lp->dev;
 
lance_set_multicast(dev);
 }
@@ -1246,8 +1248,8 @@ static int dec_lance_probe(struct device *bdev, const int 
type)
 * can occur from interrupts (ex. IPv6).  So we
 * use a timer to try

[PATCH] drivers/net: korina: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: "David S. Miller" 
Cc: Roman Yeryomin 
Cc: Florian Fainelli 
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook 
---
 drivers/net/ethernet/korina.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 7cecd9dbc111..ae195f8adff5 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -653,10 +653,10 @@ static void korina_check_media(struct net_device *dev, 
unsigned int init_media)
>eth_regs->ethmac2);
 }
 
-static void korina_poll_media(unsigned long data)
+static void korina_poll_media(struct timer_list *t)
 {
-   struct net_device *dev = (struct net_device *) data;
-   struct korina_private *lp = netdev_priv(dev);
+   struct korina_private *lp = from_timer(lp, t, media_check_timer);
+   struct net_device *dev = lp->dev;
 
korina_check_media(dev, 0);
mod_timer(>media_check_timer, jiffies + HZ);
@@ -1103,7 +1103,7 @@ static int korina_probe(struct platform_device *pdev)
": cannot register net device: %d\n", rc);
goto probe_err_register;
}
-   setup_timer(>media_check_timer, korina_poll_media, (unsigned long) 
dev);
+   timer_setup(>media_check_timer, korina_poll_media, 0);
 
INIT_WORK(>restart_task, korina_restart_task);
 
-- 
2.7.4


-- 
Kees Cook
Pixel Security

[PATCH] drivers/net: fealnx: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: "David S. Miller" 
Cc: "yuval.sh...@oracle.com" 
Cc: Allen Pais 
Cc: Stephen Hemminger 
Cc: Philippe Reynes 
Cc: Johannes Berg 
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook 
---
 drivers/net/ethernet/fealnx.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index c8982313d850..23053919ebf5 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -426,8 +426,8 @@ static void mdio_write(struct net_device *dev, int phy_id, 
int location, int val
 static int netdev_open(struct net_device *dev);
 static void getlinktype(struct net_device *dev);
 static void getlinkstatus(struct net_device *dev);
-static void netdev_timer(unsigned long data);
-static void reset_timer(unsigned long data);
+static void netdev_timer(struct timer_list *t);
+static void reset_timer(struct timer_list *t);
 static void fealnx_tx_timeout(struct net_device *dev);
 static void init_ring(struct net_device *dev);
 static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
@@ -909,13 +909,13 @@ static int netdev_open(struct net_device *dev)
printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name);
 
/* Set the timer to check for link beat. */
-   setup_timer(>timer, netdev_timer, (unsigned long)dev);
+   timer_setup(>timer, netdev_timer, 0);
np->timer.expires = RUN_AT(3 * HZ);
 
/* timer handler */
add_timer(>timer);
 
-   setup_timer(>reset_timer, reset_timer, (unsigned long)dev);
+   timer_setup(>reset_timer, reset_timer, 0);
np->reset_timer_armed = 0;
return rc;
 }
@@ -1078,10 +1078,10 @@ static void allocate_rx_buffers(struct net_device *dev)
 }
 
 
-static void netdev_timer(unsigned long data)
+static void netdev_timer(struct timer_list *t)
 {
-   struct net_device *dev = (struct net_device *) data;
-   struct netdev_private *np = netdev_priv(dev);
+   struct netdev_private *np = from_timer(np, t, timer);
+   struct net_device *dev = np->mii.dev;
void __iomem *ioaddr = np->mem;
int old_crvalue = np->crvalue;
unsigned int old_linkok = np->linkok;
@@ -1167,10 +1167,10 @@ static void enable_rxtx(struct net_device *dev)
 }
 
 
-static void reset_timer(unsigned long data)
+static void reset_timer(struct timer_list *t)
 {
-   struct net_device *dev = (struct net_device *) data;
-   struct netdev_private *np = netdev_priv(dev);
+   struct netdev_private *np = from_timer(np, t, reset_timer);
+   struct net_device *dev = np->mii.dev;
unsigned long flags;
 
printk(KERN_WARNING "%s: resetting tx and rx machinery\n", dev->name);
-- 
2.7.4


-- 
Kees Cook
Pixel Security

[PATCH] drivers/net: natsemi: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: "David S. Miller" 
Cc: Allen Pais 
Cc: Eric Dumazet 
Cc: Philippe Reynes 
Cc: Wei Yongjun 
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook 
---
 drivers/net/ethernet/natsemi/natsemi.c | 10 +-
 drivers/net/ethernet/natsemi/ns83820.c |  8 
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/natsemi/natsemi.c 
b/drivers/net/ethernet/natsemi/natsemi.c
index dedeacd0bbca..b9a1a9f999ea 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -610,7 +610,7 @@ static int netdev_open(struct net_device *dev);
 static void do_cable_magic(struct net_device *dev);
 static void undo_cable_magic(struct net_device *dev);
 static void check_link(struct net_device *dev);
-static void netdev_timer(unsigned long data);
+static void netdev_timer(struct timer_list *t);
 static void dump_ring(struct net_device *dev);
 static void ns_tx_timeout(struct net_device *dev);
 static int alloc_ring(struct net_device *dev);
@@ -1571,7 +1571,7 @@ static int netdev_open(struct net_device *dev)
dev->name, (int)readl(ioaddr + ChipCmd));
 
/* Set the timer to check for link beat. */
-   setup_timer(>timer, netdev_timer, (unsigned long)dev);
+   timer_setup(>timer, netdev_timer, 0);
np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ);
add_timer(>timer);
 
@@ -1787,10 +1787,10 @@ static void init_registers(struct net_device *dev)
  *this check via dspcfg_workaround sysfs option.
  * 3) check of death of the RX path due to OOM
  */
-static void netdev_timer(unsigned long data)
+static void netdev_timer(struct timer_list *t)
 {
-   struct net_device *dev = (struct net_device *)data;
-   struct netdev_private *np = netdev_priv(dev);
+   struct netdev_private *np = from_timer(np, t, timer);
+   struct net_device *dev = np->dev;
void __iomem * ioaddr = ns_ioaddr(dev);
int next_tick = NATSEMI_TIMER_FREQ;
const int irq = np->pci_dev->irq;
diff --git a/drivers/net/ethernet/natsemi/ns83820.c 
b/drivers/net/ethernet/natsemi/ns83820.c
index 99d3c7884a4a..958fced4dacf 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -1600,10 +1600,10 @@ static void ns83820_tx_timeout(struct net_device *ndev)
spin_unlock_irqrestore(>tx_lock, flags);
 }
 
-static void ns83820_tx_watch(unsigned long data)
+static void ns83820_tx_watch(struct timer_list *t)
 {
-   struct net_device *ndev = (void *)data;
-   struct ns83820 *dev = PRIV(ndev);
+   struct ns83820 *dev = from_timer(dev, t, tx_watchdog);
+   struct net_device *ndev = dev->ndev;
 
 #if defined(DEBUG)
printk("ns83820_tx_watch: %u %u %d\n",
@@ -1652,7 +1652,7 @@ static int ns83820_open(struct net_device *ndev)
writel(0, dev->base + TXDP_HI);
writel(desc, dev->base + TXDP);
 
-   setup_timer(>tx_watchdog, ns83820_tx_watch, (unsigned long)ndev);
+   timer_setup(>tx_watchdog, ns83820_tx_watch, 0);
mod_timer(>tx_watchdog, jiffies + 2*HZ);
 
netif_start_queue(ndev);/* FIXME: wait for phy to come up */
-- 
2.7.4


-- 
Kees Cook
Pixel Security

[PATCH] drivers/net: packetengines: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: "David S. Miller" 
Cc: Allen Pais 
Cc: yuan linyu 
Cc: Philippe Reynes 
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook 
---
 drivers/net/ethernet/packetengines/hamachi.c   | 14 +++---
 drivers/net/ethernet/packetengines/yellowfin.c | 10 +-
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/packetengines/hamachi.c 
b/drivers/net/ethernet/packetengines/hamachi.c
index 77bc7cca8980..c9529c29a0a7 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c
@@ -413,13 +413,13 @@ that case.
 
 /* The rest of these values should never change. */
 
-static void hamachi_timer(unsigned long data);
+static void hamachi_timer(struct timer_list *t);
 
 enum capability_flags {CanHaveMII=1, };
 static const struct chip_info {
u16 vendor_id, device_id, device_id_mask, pad;
const char *name;
-   void (*media_timer)(unsigned long data);
+   void (*media_timer)(struct timer_list *t);
int flags;
 } chip_tbl[] = {
{0x1318, 0x0911, 0x, 0, "Hamachi GNIC-II", hamachi_timer, 0},
@@ -547,7 +547,7 @@ static int mdio_read(struct net_device *dev, int phy_id, 
int location);
 static void mdio_write(struct net_device *dev, int phy_id, int location, int 
value);
 static int hamachi_open(struct net_device *dev);
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-static void hamachi_timer(unsigned long data);
+static void hamachi_timer(struct timer_list *t);
 static void hamachi_tx_timeout(struct net_device *dev);
 static void hamachi_init_ring(struct net_device *dev);
 static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb,
@@ -979,7 +979,7 @@ static int hamachi_open(struct net_device *dev)
   dev->name, readw(ioaddr + RxStatus), readw(ioaddr + 
TxStatus));
}
/* Set the timer to check for link beat. */
-   setup_timer(>timer, hamachi_timer, (unsigned long)dev);
+   timer_setup(>timer, hamachi_timer, 0);
hmp->timer.expires = RUN_AT((24*HZ)/10);/* 2.4 
sec. */
add_timer(>timer);
 
@@ -1017,10 +1017,10 @@ static inline int hamachi_tx(struct net_device *dev)
return 0;
 }
 
-static void hamachi_timer(unsigned long data)
+static void hamachi_timer(struct timer_list *t)
 {
-   struct net_device *dev = (struct net_device *)data;
-   struct hamachi_private *hmp = netdev_priv(dev);
+   struct hamachi_private *hmp = from_timer(hmp, t, timer);
+   struct net_device *dev = hmp->mii_if.dev;
void __iomem *ioaddr = hmp->base;
int next_tick = 10*HZ;
 
diff --git a/drivers/net/ethernet/packetengines/yellowfin.c 
b/drivers/net/ethernet/packetengines/yellowfin.c
index 33c241f52a71..54224d1822e3 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c
@@ -343,7 +343,7 @@ static int mdio_read(void __iomem *ioaddr, int phy_id, int 
location);
 static void mdio_write(void __iomem *ioaddr, int phy_id, int location, int 
value);
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static int yellowfin_open(struct net_device *dev);
-static void yellowfin_timer(unsigned long data);
+static void yellowfin_timer(struct timer_list *t);
 static void yellowfin_tx_timeout(struct net_device *dev);
 static int yellowfin_init_ring(struct net_device *dev);
 static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb,
@@ -632,7 +632,7 @@ static int yellowfin_open(struct net_device *dev)
}
 
/* Set the timer to check for link beat. */
-   setup_timer(>timer, yellowfin_timer, (unsigned long)dev);
+   timer_setup(>timer, yellowfin_timer, 0);
yp->timer.expires = jiffies + 3*HZ;
add_timer(>timer);
 out:
@@ -643,10 +643,10 @@ static int yellowfin_open(struct net_device *dev)
goto out;
 }
 
-static void yellowfin_timer(unsigned long data)
+static void yellowfin_timer(struct timer_list *t)
 {
-   struct net_device *dev = (struct net_device *)data;
-   struct yellowfin_private *yp = netdev_priv(dev);
+   struct yellowfin_private *yp = from_timer(yp, t, timer);
+   struct net_device *dev = pci_get_drvdata(yp->pci_dev);
void __iomem *ioaddr = yp->base;
int next_tick = 60*HZ;
 
-- 
2.7.4


-- 
Kees Cook
Pixel Security

[PATCH] drivers/net: mellanox: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: Saeed Mahameed 
Cc: Matan Barak 
Cc: Leon Romanovsky 
Cc: netdev@vger.kernel.org
Cc: linux-r...@vger.kernel.org
Signed-off-by: Kees Cook 
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c 
b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index a89a68ce53ad..185dcac0abe7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -285,9 +285,9 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
spin_unlock_irqrestore(>wq_lock, flags);
 }
 
-static void poll_health(unsigned long data)
+static void poll_health(struct timer_list *t)
 {
-   struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
+   struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
struct mlx5_core_health *health = >priv.health;
u32 count;
 
@@ -320,7 +320,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 {
struct mlx5_core_health *health = >priv.health;
 
-   setup_timer(>timer, poll_health, (unsigned long)dev);
+   timer_setup(>timer, poll_health, 0);
health->sick = 0;
clear_bit(MLX5_DROP_NEW_HEALTH_WORK, >flags);
clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, >flags);
-- 
2.7.4


-- 
Kees Cook
Pixel Security

[PATCH] drivers/net: smsc: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: "David S. Miller" 
Cc: "yuval.sh...@oracle.com" 
Cc: Eric Dumazet 
Cc: Philippe Reynes 
Cc: Allen Pais 
Cc: Tobias Klauser 
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook 
---
 drivers/net/ethernet/smsc/epic100.c | 10 +-
 drivers/net/ethernet/smsc/smc91c92_cs.c | 10 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/smsc/epic100.c 
b/drivers/net/ethernet/smsc/epic100.c
index 2a9724898fcf..949aaef390b6 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -290,7 +290,7 @@ static int read_eeprom(struct epic_private *, int);
 static int mdio_read(struct net_device *dev, int phy_id, int location);
 static void mdio_write(struct net_device *dev, int phy_id, int loc, int val);
 static void epic_restart(struct net_device *dev);
-static void epic_timer(unsigned long data);
+static void epic_timer(struct timer_list *t);
 static void epic_tx_timeout(struct net_device *dev);
 static void epic_init_ring(struct net_device *dev);
 static netdev_tx_t epic_start_xmit(struct sk_buff *skb,
@@ -739,7 +739,7 @@ static int epic_open(struct net_device *dev)
 
/* Set the timer to switch to check for link beat and perhaps switch
   to an alternate media type. */
-   setup_timer(>timer, epic_timer, (unsigned long)dev);
+   timer_setup(>timer, epic_timer, 0);
ep->timer.expires = jiffies + 3*HZ;
add_timer(>timer);
 
@@ -843,10 +843,10 @@ static void check_media(struct net_device *dev)
}
 }
 
-static void epic_timer(unsigned long data)
+static void epic_timer(struct timer_list *t)
 {
-   struct net_device *dev = (struct net_device *)data;
-   struct epic_private *ep = netdev_priv(dev);
+   struct epic_private *ep = from_timer(ep, t, timer);
+   struct net_device *dev = ep->mii.dev;
void __iomem *ioaddr = ep->ioaddr;
int next_tick = 5*HZ;
 
diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c 
b/drivers/net/ethernet/smsc/smc91c92_cs.c
index 92c927aec66d..a55f430f6a7b 100644
--- a/drivers/net/ethernet/smsc/smc91c92_cs.c
+++ b/drivers/net/ethernet/smsc/smc91c92_cs.c
@@ -280,7 +280,7 @@ static void set_rx_mode(struct net_device *dev);
 static int s9k_config(struct net_device *dev, struct ifmap *map);
 static void smc_set_xcvr(struct net_device *dev, int if_port);
 static void smc_reset(struct net_device *dev);
-static void media_check(u_long arg);
+static void media_check(struct timer_list *t);
 static void mdio_sync(unsigned int addr);
 static int mdio_read(struct net_device *dev, int phy_id, int loc);
 static void mdio_write(struct net_device *dev, int phy_id, int loc, int value);
@@ -1070,7 +1070,7 @@ static int smc_open(struct net_device *dev)
 smc->packets_waiting = 0;
 
 smc_reset(dev);
-setup_timer(>media, media_check, (u_long)dev);
+timer_setup(>media, media_check, 0);
 mod_timer(>media, jiffies + HZ);
 
 return 0;
@@ -1708,10 +1708,10 @@ static void smc_reset(struct net_device *dev)
 
 ==*/
 
-static void media_check(u_long arg)
+static void media_check(struct timer_list *t)
 {
-struct net_device *dev = (struct net_device *) arg;
-struct smc_private *smc = netdev_priv(dev);
+struct smc_private *smc = from_timer(smc, t, media);
+struct net_device *dev = smc->mii_if.dev;
 unsigned int ioaddr = dev->base_addr;
 u_short i, media, saved_bank;
 u_short link;
-- 
2.7.4


-- 
Kees Cook
Pixel Security

[PATCH] netfilter: ipvs: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: Wensong Zhang 
Cc: Simon Horman 
Cc: Julian Anastasov 
Cc: Pablo Neira Ayuso 
Cc: Jozsef Kadlecsik 
Cc: Florian Westphal 
Cc: "David S. Miller" 
Cc: netdev@vger.kernel.org
Cc: lvs-de...@vger.kernel.org
Cc: netfilter-de...@vger.kernel.org
Cc: coret...@netfilter.org
Signed-off-by: Kees Cook 
---
 net/netfilter/ipvs/ip_vs_conn.c  | 10 +-
 net/netfilter/ipvs/ip_vs_ctl.c   |  7 +++
 net/netfilter/ipvs/ip_vs_est.c   |  6 +++---
 net/netfilter/ipvs/ip_vs_lblc.c  | 11 ++-
 net/netfilter/ipvs/ip_vs_lblcr.c | 11 ++-
 5 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 3d2ac71a83ec..3a43b3470331 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -104,7 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
spin_unlock_bh(&__ip_vs_conntbl_lock_array[key_LOCKARRAY_MASK].l);
 }
 
-static void ip_vs_conn_expire(unsigned long data);
+static void ip_vs_conn_expire(struct timer_list *t);
 
 /*
  * Returns hash value for IPVS connection entry
@@ -457,7 +457,7 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
 static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
 {
__ip_vs_conn_put(cp);
-   ip_vs_conn_expire((unsigned long)cp);
+   ip_vs_conn_expire(>timer);
 }
 
 /*
@@ -817,9 +817,9 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
kmem_cache_free(ip_vs_conn_cachep, cp);
 }
 
-static void ip_vs_conn_expire(unsigned long data)
+static void ip_vs_conn_expire(struct timer_list *t)
 {
-   struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+   struct ip_vs_conn *cp = from_timer(cp, t, timer);
struct netns_ipvs *ipvs = cp->ipvs;
 
/*
@@ -909,7 +909,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int 
dest_af,
}
 
INIT_HLIST_NODE(>c_list);
-   setup_timer(>timer, ip_vs_conn_expire, (unsigned long)cp);
+   timer_setup(>timer, ip_vs_conn_expire, 0);
cp->ipvs   = ipvs;
cp->af = p->af;
cp->daf= dest_af;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 4f940d7eb2f7..b47e266c6eca 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1146,9 +1146,9 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct 
ip_vs_dest_user_kern *udest)
return 0;
 }
 
-static void ip_vs_dest_trash_expire(unsigned long data)
+static void ip_vs_dest_trash_expire(struct timer_list *t)
 {
-   struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
+   struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
struct ip_vs_dest *dest, *next;
unsigned long now = jiffies;
 
@@ -4019,8 +4019,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs 
*ipvs)
 
INIT_LIST_HEAD(>dest_trash);
spin_lock_init(>dest_trash_lock);
-   setup_timer(>dest_trash_timer, ip_vs_dest_trash_expire,
-   (unsigned long) ipvs);
+   timer_setup(>dest_trash_timer, ip_vs_dest_trash_expire, 0);
atomic_set(>ftpsvc_counter, 0);
atomic_set(>nullsvc_counter, 0);
atomic_set(>conn_out_counter, 0);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 457c6c193e13..489055091a9b 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -97,12 +97,12 @@ static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
 }
 
 
-static void estimation_timer(unsigned long arg)
+static void estimation_timer(struct timer_list *t)
 {
struct ip_vs_estimator *e;
struct ip_vs_stats *s;
u64 rate;
-   struct netns_ipvs *ipvs = (struct netns_ipvs *)arg;
+   struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
 
spin_lock(>est_lock);
list_for_each_entry(e, >est_list, list) {
@@ -192,7 +192,7 @@ int __net_init ip_vs_estimator_net_init(struct netns_ipvs 
*ipvs)
 {
INIT_LIST_HEAD(>est_list);
spin_lock_init(>est_lock);
-   setup_timer(>est_timer, estimation_timer, (unsigned long)ipvs);
+   timer_setup(>est_timer, estimation_timer, 0);
mod_timer(>est_timer, jiffies + 2 * HZ);
return 0;
 }
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index b6aa4a970c6e..d625179de485 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -106,6 +106,7 @@ struct ip_vs_lblc_table {
struct rcu_head rcu_head;
struct hlist_head   bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
struct

[PATCH] drivers/net: dlink: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: Denis Kirjanov 
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook 
---
 drivers/net/ethernet/dlink/sundance.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/dlink/sundance.c 
b/drivers/net/ethernet/dlink/sundance.c
index 6ca9e981ad57..1a27176381fb 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -431,7 +431,7 @@ static void mdio_write(struct net_device *dev, int phy_id, 
int location, int val
 static int  mdio_wait_link(struct net_device *dev, int wait);
 static int  netdev_open(struct net_device *dev);
 static void check_duplex(struct net_device *dev);
-static void netdev_timer(unsigned long data);
+static void netdev_timer(struct timer_list *t);
 static void tx_timeout(struct net_device *dev);
 static void init_ring(struct net_device *dev);
 static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
@@ -913,7 +913,7 @@ static int netdev_open(struct net_device *dev)
   ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + 
MACCtrl0));
 
/* Set the timer to check for link beat. */
-   setup_timer(>timer, netdev_timer, (unsigned long)dev);
+   timer_setup(>timer, netdev_timer, 0);
np->timer.expires = jiffies + 3*HZ;
add_timer(>timer);
 
@@ -951,10 +951,10 @@ static void check_duplex(struct net_device *dev)
}
 }
 
-static void netdev_timer(unsigned long data)
+static void netdev_timer(struct timer_list *t)
 {
-   struct net_device *dev = (struct net_device *)data;
-   struct netdev_private *np = netdev_priv(dev);
+   struct netdev_private *np = from_timer(np, t, timer);
+   struct net_device *dev = np->mii_if.dev;
void __iomem *ioaddr = np->base;
int next_tick = 10*HZ;
 
-- 
2.7.4


-- 
Kees Cook
Pixel Security

[PATCH] drivers/net: 8390: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook 
---
 drivers/net/ethernet/8390/axnet_cs.c | 10 +-
 drivers/net/ethernet/8390/pcnet_cs.c | 10 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/8390/axnet_cs.c 
b/drivers/net/ethernet/8390/axnet_cs.c
index 3da1fc539ef9..7bddb8efb6d5 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -85,7 +85,7 @@ static struct net_device_stats *get_stats(struct net_device 
*dev);
 static void set_multicast_list(struct net_device *dev);
 static void axnet_tx_timeout(struct net_device *dev);
 static irqreturn_t ei_irq_wrapper(int irq, void *dev_id);
-static void ei_watchdog(u_long arg);
+static void ei_watchdog(struct timer_list *t);
 static void axnet_reset_8390(struct net_device *dev);
 
 static int mdio_read(unsigned int addr, int phy_id, int loc);
@@ -483,7 +483,7 @@ static int axnet_open(struct net_device *dev)
 link->open++;
 
 info->link_status = 0x00;
-setup_timer(>watchdog, ei_watchdog, (u_long)dev);
+timer_setup(>watchdog, ei_watchdog, 0);
 mod_timer(>watchdog, jiffies + HZ);
 
 return ax_open(dev);
@@ -547,10 +547,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id)
 return ax_interrupt(irq, dev_id);
 }
 
-static void ei_watchdog(u_long arg)
+static void ei_watchdog(struct timer_list *t)
 {
-struct net_device *dev = (struct net_device *)(arg);
-struct axnet_dev *info = PRIV(dev);
+struct axnet_dev *info = from_timer(info, t, watchdog);
+struct net_device *dev = info->p_dev->priv;
 unsigned int nic_base = dev->base_addr;
 unsigned int mii_addr = nic_base + AXNET_MII_EEP;
 u_short link;
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c 
b/drivers/net/ethernet/8390/pcnet_cs.c
index bd0a2a14b649..eae9827035dc 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -99,7 +99,7 @@ static int pcnet_open(struct net_device *dev);
 static int pcnet_close(struct net_device *dev);
 static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static irqreturn_t ei_irq_wrapper(int irq, void *dev_id);
-static void ei_watchdog(u_long arg);
+static void ei_watchdog(struct timer_list *t);
 static void pcnet_reset_8390(struct net_device *dev);
 static int set_config(struct net_device *dev, struct ifmap *map);
 static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
@@ -917,7 +917,7 @@ static int pcnet_open(struct net_device *dev)
 
 info->phy_id = info->eth_phy;
 info->link_status = 0x00;
-setup_timer(>watchdog, ei_watchdog, (u_long)dev);
+timer_setup(>watchdog, ei_watchdog, 0);
 mod_timer(>watchdog, jiffies + HZ);
 
 return ei_open(dev);
@@ -1006,10 +1006,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id)
 return ret;
 }
 
-static void ei_watchdog(u_long arg)
+static void ei_watchdog(struct timer_list *t)
 {
-struct net_device *dev = (struct net_device *)arg;
-struct pcnet_dev *info = PRIV(dev);
+struct pcnet_dev *info = from_timer(info, t, watchdog);
+struct net_device *dev = info->p_dev->priv;
 unsigned int nic_base = dev->base_addr;
 unsigned int mii_addr = nic_base + DLINK_GPIO;
 u_short link;
-- 
2.7.4


-- 
Kees Cook
Pixel Security

[PATCH v3 1/2] net: netrom: nr_route: refactor code in nr_add_node

2017-10-26 Thread Gustavo A. R. Silva

Code refactoring in order to make the code easier to read and maintain.

Signed-off-by: Gustavo A. R. Silva 
---
Changes in v2:
 Make use of the swap macro and remove inline keyword.

Changes in v3:
 Update subject.

 net/netrom/nr_route.c | 59 ++-
 1 file changed, 16 insertions(+), 43 deletions(-)

diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 0c59354..fba4b4c 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -80,6 +80,19 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address 
*callsign,
 
 static void nr_remove_neigh(struct nr_neigh *);
 
+/*  re-sort the routes in quality order.*/
+static void re_sort_routes(struct nr_node *nr_node, int x, int y)
+{
+   if (nr_node->routes[y].quality > nr_node->routes[x].quality) {
+   if (nr_node->which == x)
+   nr_node->which = y;
+   else if (nr_node->which == y)
+   nr_node->which = x;
+
+   swap(nr_node->routes[x], nr_node->routes[y]);
+   }
+}
+
 /*
  * Add a new route to a node, and in the process add the node and the
  * neighbour if it is new.
@@ -90,7 +103,6 @@ static int __must_check nr_add_node(ax25_address *nr, const 
char *mnemonic,
 {
struct nr_node  *nr_node;
struct nr_neigh *nr_neigh;
-   struct nr_route nr_route;
int i, found;
struct net_device *odev;
 
@@ -251,49 +263,10 @@ static int __must_check nr_add_node(ax25_address *nr, 
const char *mnemonic,
/* Now re-sort the routes in quality order */
switch (nr_node->count) {
case 3:
-   if (nr_node->routes[1].quality > nr_node->routes[0].quality) {
-   switch (nr_node->which) {
-   case 0:
-   nr_node->which = 1;
-   break;
-   case 1:
-   nr_node->which = 0;
-   break;
-   }
-   nr_route   = nr_node->routes[0];
-   nr_node->routes[0] = nr_node->routes[1];
-   nr_node->routes[1] = nr_route;
-   }
-   if (nr_node->routes[2].quality > nr_node->routes[1].quality) {
-   switch (nr_node->which) {
-   case 1:  nr_node->which = 2;
-   break;
-
-   case 2:  nr_node->which = 1;
-   break;
-
-   default:
-   break;
-   }
-   nr_route   = nr_node->routes[1];
-   nr_node->routes[1] = nr_node->routes[2];
-   nr_node->routes[2] = nr_route;
-   }
+   re_sort_routes(nr_node, 0, 1);
+   re_sort_routes(nr_node, 1, 2);
case 2:
-   if (nr_node->routes[1].quality > nr_node->routes[0].quality) {
-   switch (nr_node->which) {
-   case 0:  nr_node->which = 1;
-   break;
-
-   case 1:  nr_node->which = 0;
-   break;
-
-   default: break;
-   }
-   nr_route   = nr_node->routes[0];
-   nr_node->routes[0] = nr_node->routes[1];
-   nr_node->routes[1] = nr_route;
-   }
+   re_sort_routes(nr_node, 0, 1);
case 1:
break;
}
-- 
2.7.4

[PATCH v3 2/2] net: netrom: nr_route: mark expected switch fall-throughs

2017-10-26 Thread Gustavo A. R. Silva

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

Signed-off-by: Gustavo A. R. Silva 
---
Changes in v2:
 None.

Changes in v3:
 Update subject.

 net/netrom/nr_route.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index fba4b4c..75e6ba9 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -265,6 +265,7 @@ static int __must_check nr_add_node(ax25_address *nr, const 
char *mnemonic,
case 3:
re_sort_routes(nr_node, 0, 1);
re_sort_routes(nr_node, 1, 2);
+   /* fall through */
case 2:
re_sort_routes(nr_node, 0, 1);
case 1:
@@ -357,6 +358,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address 
*neighbour, struct n
switch (i) {
case 0:
nr_node->routes[0] = nr_node->routes[1];
+   /* fall through */
case 1:
nr_node->routes[1] = nr_node->routes[2];
case 2:
@@ -526,6 +528,7 @@ void nr_rt_device_down(struct net_device *dev)
switch (i) {
case 0:
t->routes[0] = 
t->routes[1];
+   /* fall through */
case 1:
t->routes[1] = 
t->routes[2];
case 2:
-- 
2.7.4

Re: v6/sit tunnels and VRFs

2017-10-26 Thread Jeff Barnhill

Thanks, David.

I corrected the static route, applied the patch, and set the
link/output dev on the tunnel and it works now.  Is it required to set
the link/output dev?  I was thinking that this should not be required
for cases where the outgoing device is not known, for instance on a
router or device with multiple interfaces.

Also, what is the expected behavior of loopback addresses in a VRF
context?  For instance, if an application were being run under "ip vrf
exec" and it tried to use these addresses.

jeff@VM2:~$ ping -I myvrf 127.0.0.1
PING 127.0.0.1 (127.0.0.1) from 127.0.0.1 myvrf: 56(84) bytes of data.
^C
--- 127.0.0.1 ping statistics ---
3 packets transmitted, 0 received, 100% packet loss, time 2033ms

jeff@VM2:~$ ping -I myvrf ::1
connect: Network is unreachable

Thanks,
Jeff


On Thu, Oct 26, 2017 at 1:24 PM, David Ahern  wrote:
> On 10/25/17 9:28 PM, Jeff Barnhill wrote:
>> Thanks, David.
>>
>> VM1:
>> sudo ip addr add 192.168.200.1/24 dev enp0s8 broadcast 192.168.200.255
>> sudo ip link set enp0s8 up
>> sudo ip route add 192.168.210.0/24 nexthop via 192.168.200.3 dev enp0s8
>> sudo ip tunnel add jtun mode sit remote 192.168.210.2 local 192.168.200.1
>> sudo ip -6 addr add 2001::1/64 dev jtun
>> sudo ip link set jtun up
>>
>> VM2:
>> sudo ip addr add 192.168.210.2/24 dev enp0s8 broadcast 192.168.210.255
>> sudo ip link set enp0s8 up
>> sudo ip route add 192.168.200.0/24 nexthop via 192.168.210.3 dev enp0s8
>> sudo ip link add dev myvrf type vrf table 256
>> sudo ip link set myvrf up
>> sudo ip link set enp0s8 vrf myvrf
>
> You lost the static route by doing the enslaving here. When the device
> is added to or removed from a VRF it is cycled specifically to dump
> routes and neighbor entries associated with the prior vrf. Always create
> the vrf and enslave first, then add routes:
>
> sudo ip link add dev myvrf type vrf table 256
> sudo ip link set myvrf up
> sudo ip link set enp0s8 vrf myvrf
>
> sudo ip addr add 192.168.210.2/24 dev enp0s8 broadcast 192.168.210.255
> sudo ip link set enp0s8 up
> sudo ip route add 192.168.200.0/24 nexthop via 192.168.210.3 dev enp0s8
>
> That said, the above works for the wrong reason -- it is not really
> doing VRF based routing. For that to happen, the static route should be
> added to the vrf table:
>
> sudo ip route add vrf myvrf 192.168.200.0/24 nexthop via 192.168.210.3
> dev enp0s8
>
> And ...
>
>> sudo ip tunnel add jtun mode sit remote 192.168.200.1 local 192.168.210.2
>
> you need to specify the link on the tunnel create:
>
> sudo ip tunnel add jtun mode sit remote 192.168.200.1 local
> 192.168.210.2 dev enp0s8.
>
> And ...
>
> The tunnel lookup needs to account for the VRF device switch:
>
> (whitespace damaged on paste)
>
> diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
> index a799f5258614..cf0512054fa7 100644
> --- a/net/ipv6/sit.c
> +++ b/net/ipv6/sit.c
> @@ -632,11 +632,18 @@ static bool packet_is_spoofed(struct sk_buff *skb,
>  static int ipip6_rcv(struct sk_buff *skb)
>  {
> const struct iphdr *iph = ip_hdr(skb);
> +   struct net_device *dev = skb->dev;
> +   struct net *net = dev_net(dev);
> struct ip_tunnel *tunnel;
> int err;
>
> -   tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
> -iph->saddr, iph->daddr);
> +   if (netif_is_l3_master(dev)) {
> +   dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
> +   if (!dev)
> +   goto out;
> +   }
> +
> +   tunnel = ipip6_tunnel_lookup(net, dev, iph->saddr, iph->daddr);
> if (tunnel) {
> struct pcpu_sw_netstats *tstats;
>

Re: [PATCH net-next] net: dsa: Simplify dsa_slave_phy_setup()

2017-10-26 Thread Martin Hundebøll


On 2017-10-26 18:45, Florian Fainelli wrote:

On 10/26/2017 04:07 AM, Martin Hundebøll wrote:

On 2017-10-26 02:32, Florian Fainelli wrote:

Remove the code that tried to identify if a PHY designated by Device
Tree required diversion through the DSA-created MDIO bus. This was
created mainly for the bcm_sf2.c driver back when it did not have its
own MDIO bus driver, which it now has since 461cd1b03e32 ("net: dsa:
bcm_sf2: Register our slave MDIO bus").

Signed-off-by: Florian Fainelli

Tested-by: Martin Hundebøll

Thanks Martin, does that correctly fix the problem you reported a week
ago on 639X?


It does indeed. Thanks for the work on this.

// Martin

Re: [PATCH net-next] tcp: add tracepoint trace_tcp_retransmit_synack()

2017-10-26 Thread Song Liu


> On Oct 26, 2017, at 7:01 PM, Cong Wang  wrote:
> 
> On Thu, Oct 26, 2017 at 4:50 PM, Song Liu  wrote:
>> In this case, we are putting CONFIG_IPV6 in TRACE_EVENT macro, which 
>> generates
>> warnings like:
>> 
>> ./include/trace/events/tcp.h:274:1: error: directive in argument list
>> ./include/trace/events/tcp.h:281:1: error: directive in argument list
>> 
>> Seems these warning cannot be easily avoided. This is also the same pattern 
>> we
>> have been using in include/trace/events/tcp.h.
> 
> Hmm, we use the same so why it only complains about this one?\

sparse reports same warning for all the lines in tcp.h. Don't know why
kbuild test bot only complains about this patch. 

> 
>> 
>> Any suggestions on how shall we proceed from here?
>> 
> 
> I think this warning is harmless, so perhaps not worthy time to
> shut it up, unless sparse provides a simple way to do so.

About CFG80211_REQUIRE_SIGNED_REGDB in Kconfig

2017-10-26 Thread Yoshihiro Shimoda

Hi,

I have a question about CFG80211_REQUIRE_SIGNED_REGDB behavior in the latest 
net-next.git.

Since my environment disables CONFIG_EXPERT, CFG80211_CERTIFICATION_ONUS is 
also disabled.
In this case, menuconfig doesn't show me the config because the 
net/wireless/Kconfig has:

config CFG80211_REQUIRE_SIGNED_REGDB
bool "require regdb signature" if CFG80211_CERTIFICATION_ONUS
default y
select SYSTEM_DATA_VERIFICATION

Does this mean that non expert users should enable 
CFG80211_REQUIRE_SIGNED_REGDB anyway?
Or, does this have special other reasons?

Best regards,
Yoshihiro Shimoda

Re: [PATCH v2] ipv6: esp6: use BUG_ON instead of if condition followed by BUG

2017-10-26 Thread Gustavo A. R. Silva



Quoting Herbert Xu :


On Thu, Oct 26, 2017 at 07:51:06AM -0500, Gustavo A. R. Silva wrote:

Use BUG_ON instead of if condition followed by BUG in esp_remove_trailer.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 


Acked-by: Herbert Xu 


Thanks!

--
Gustavo A. R. Silva

Re: [PATCH] net: tipc: Convert timers to use timer_setup()

2017-10-26 Thread Kees Cook

On Tue, Oct 24, 2017 at 11:44 AM, Jon Maloy  wrote:
> NAK. It doesn't sound like a good idea to send this to net. Especially since 
> one of these timers has already been refactored in net-next.

Hi! I'm not sure what you mean about the one timer issue. I don't see
any use of timer_setup() in net/tipc (and no recent conversions to the
older setup_timer() API). What's the preferred path for landing this
API conversion in net/tipc/?

And, just to note, these changes are almost entirely mechanical. The
only "special" case is in tipc_sk_timeout() where the argument needs
to be slightly adjusted to fetch the tsk from the sk again.

Thanks!

-Kees

-- 
Kees Cook
Pixel Security

[PATCH v2 net-next 13/15] tcp: Namespace-ify sysctl_tcp_app_win

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  8 
 net/ipv4/tcp_ipv4.c|  1 +
 5 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
956957a77db96ad3d231cc018c13503d615d8d2e..63f91d52cbc0ad35d8e04a8da0d9f57aa960bcb0
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -139,6 +139,7 @@ struct netns_ipv4 {
int sysctl_tcp_fack;
int sysctl_tcp_max_reordering;
int sysctl_tcp_dsack;
+   int sysctl_tcp_app_win;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
8b2ae3e8d79f223d4637226fc7278fe751d0b5d7..7aa3d65062a14a98358f8868fa2c0dbb2c74a0ce
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans;
 extern long sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
-extern int sysctl_tcp_app_win;
 extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_nometrics_save;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
7652a9c2a65d3f1cfa0a75d1198e1d9d56761c35..e057788834a99cf99e141a602ddbe19b8e6fce3c
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec_minmax,
.extra1 = ,
},
-   {
-   .procname   = "tcp_app_win",
-   .data   = _tcp_app_win,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_adv_win_scale",
.data   = _tcp_adv_win_scale,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_app_win",
+   .data   = _net.ipv4.sysctl_tcp_app_win,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
fd77037ac800a1153ec0ef904fcf00b93c061fa1..6af4b58ac6d5de54bdbb418f41a0b18eee38ca50
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -79,7 +79,6 @@
 #include 
 #include 
 
-int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 
@@ -428,6 +427,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
  */
 void tcp_init_buffer_space(struct sock *sk)
 {
+   int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
 
@@ -446,14 +446,14 @@ void tcp_init_buffer_space(struct sock *sk)
if (tp->window_clamp >= maxwin) {
tp->window_clamp = maxwin;
 
-   if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
+   if (tcp_app_win && maxwin > 4 * tp->advmss)
tp->window_clamp = max(maxwin -
-  (maxwin >> sysctl_tcp_app_win),
+  (maxwin >> tcp_app_win),
   4 * tp->advmss);
}
 
/* Force reservation of one segment. */
-   if (sysctl_tcp_app_win &&
+   if (tcp_app_win &&
tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin)
tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
d9d4d191e8f3c962a6ee68015ffe5a6e7fb8e9c1..189664ebd28e4cda7ef40a47591c3bd8cac3574b
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2490,6 +2490,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_retrans_collapse = 1;
net->ipv4.sysctl_tcp_max_reordering = 300;
net->ipv4.sysctl_tcp_dsack = 1;
+   net->ipv4.sysctl_tcp_app_win = 31;
 
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 05/15] tcp: Namespace-ify sysctl_tcp_retrans_collapse

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_ipv4.c|  2 +-
 net/ipv4/tcp_output.c  |  5 +
 5 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
8662692686b3af98a94a176230b9ed147881d87a..b28c172b10e497f235b51aae0fc2d3bbf7cc51f3
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -132,6 +132,7 @@ struct netns_ipv4 {
int sysctl_tcp_recovery;
int sysctl_tcp_thin_linear_timeouts;
int sysctl_tcp_slow_start_after_idle;
+   int sysctl_tcp_retrans_collapse;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
cc2ab522eb5cf7cb08b6918cdfd5c5500cfbf057..33cc86355b8ff9b506d21ad46cfc01b3916f5b61
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_retrans_collapse;
 extern int sysctl_tcp_stdurg;
 extern int sysctl_tcp_rfc1337;
 extern int sysctl_tcp_abort_on_overflow;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
40d69af8b363bc236e23879973872d8f9346d85e..533b92ad39dd0cada542028fe2f276d9eebcd2c8
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -386,13 +386,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
 }
 
 static struct ctl_table ipv4_table[] = {
-   {
-   .procname   = "tcp_retrans_collapse",
-   .data   = _tcp_retrans_collapse,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_max_orphans",
.data   = _tcp_max_orphans,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_retrans_collapse",
+   .data   = _net.ipv4.sysctl_tcp_retrans_collapse,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
cea63a4b59655823def7a423d27191003c7f084c..2bc6ba2059d32aa848dbc415b4b0e194b61b0268
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2487,7 +2487,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_early_retrans = 3;
net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 
behavior.  */
-
+   net->ipv4.sysctl_tcp_retrans_collapse = 1;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 
bdc288a06f941add38a5cde434081c63ee94ed42..55a0aa4b96dfc7cd8f703ad42b932bae23ea5660
 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -45,9 +45,6 @@
 
 #include 
 
-/* People can turn this off for buggy TCP's found in printers etc. */
-int sysctl_tcp_retrans_collapse __read_mostly = 1;
-
 /* People can turn this on to work with those rare, broken TCPs that
  * interpret the window field as a signed quantity.
  */
@@ -2804,7 +2801,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, 
struct sk_buff *to,
struct sk_buff *skb = to, *tmp;
bool first = true;
 
-   if (!sysctl_tcp_retrans_collapse)
+   if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 10/15] tcp: remove stale sysctl_tcp_reordering

2017-10-26 Thread Eric Dumazet

This extern is no longer used.

Signed-off-by: Eric Dumazet 
---
 include/net/tcp.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
e7b15e9f6e288908bf58a28fe24554630c1e0710..fc134ba74c7d38d08304b5be36506946784538f2
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 /* sysctl variables for tcp */
 extern int sysctl_tcp_max_orphans;
-extern int sysctl_tcp_reordering;
 extern int sysctl_tcp_max_reordering;
 extern int sysctl_tcp_dsack;
 extern long sysctl_tcp_mem[3];
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 08/15] tcp: Namespace-ify sysctl_tcp_abort_on_overflow

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_minisocks.c   |  4 +---
 4 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
968edce38eb5d3399724b3142277eab44f19f2fb..3875fdf6b18653477408beb25176eac849e65ba4
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -135,6 +135,7 @@ struct netns_ipv4 {
int sysctl_tcp_retrans_collapse;
int sysctl_tcp_stdurg;
int sysctl_tcp_rfc1337;
+   int sysctl_tcp_abort_on_overflow;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
2aea2b3373b38dde9aabf869931448e9ecd38649..7331281a229289f130ad7b5c5ddec1eba1ea2747
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_abort_on_overflow;
 extern int sysctl_tcp_max_orphans;
 extern int sysctl_tcp_fack;
 extern int sysctl_tcp_reordering;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
832e554235df37770809541ad8f9f1ca2f201739..ffd1fd769bba7c3524aa6dfac734e1de0cad1506
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -393,13 +393,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
-   {
-   .procname   = "tcp_abort_on_overflow",
-   .data   = _tcp_abort_on_overflow,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "inet_peer_threshold",
.data   = _peer_threshold,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_abort_on_overflow",
+   .data   = _net.ipv4.sysctl_tcp_abort_on_overflow,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 
11836667763cf9a32c673086b6dc2d759833c856..3674d63170b293778d32abd34aa32043c001aa82
 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -29,8 +29,6 @@
 #include 
 #include 
 
-int sysctl_tcp_abort_on_overflow __read_mostly;
-
 static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
 {
if (seq == s_win)
@@ -783,7 +781,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff 
*skb,
return inet_csk_complete_hashdance(sk, child, req, own_req);
 
 listen_overflow:
-   if (!sysctl_tcp_abort_on_overflow) {
+   if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
inet_rsk(req)->acked = 1;
return NULL;
}
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 15/15] tcp: Namespace-ify sysctl_tcp_frto

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  3 +--
 net/ipv4/tcp_ipv4.c|  1 +
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
9dbb07d4eff465428817831e55c6a4922b7208fb..f4622e28db3a1484553f51709b144ee769766a28
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -141,6 +141,7 @@ struct netns_ipv4 {
int sysctl_tcp_dsack;
int sysctl_tcp_app_win;
int sysctl_tcp_adv_win_scale;
+   int sysctl_tcp_frto;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
0dc27cd248997bf6a0463477db38db483c312fb0..18f047501f53be3780bd41a5c8234adf9683cebf
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans;
 extern long sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
-extern int sysctl_tcp_frto;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
a95123e1e7da706c88bf5553b7d8ef6c2653ab50..f1bcb9b7e082c6688fad12e15be9b872ebed8151
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec_minmax,
.extra1 = ,
},
-   {
-   .procname   = "tcp_frto",
-   .data   = _tcp_frto,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_min_rtt_wlen",
.data   = _tcp_min_rtt_wlen,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = _adv_win_scale_min,
.extra2 = _adv_win_scale_max,
},
+   {
+   .procname   = "tcp_frto",
+   .data   = _net.ipv4.sysctl_tcp_frto,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
8ee2c84b0bc67f943dbaea95d9433e82b9a7d082..90d76f1c8f96bc89618ddc59ae237a34cd25db7c
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -83,7 +83,6 @@
 int sysctl_tcp_challenge_ack_limit = 1000;
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
-int sysctl_tcp_frto __read_mostly = 2;
 int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
@@ -2026,7 +2025,7 @@ void tcp_enter_loss(struct sock *sk)
 * falsely raise the receive window, which results in repeated
 * timeouts and stop-and-go behavior.
 */
-   tp->frto = sysctl_tcp_frto &&
+   tp->frto = net->ipv4.sysctl_tcp_frto &&
   (new_recovery || icsk->icsk_retransmits) &&
   !inet_csk(sk)->icsk_mtup.probe_size;
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
1fe30fb99308b3e3fd07509b509b0e3727cc5d44..49757c7582c6d2cf413415be2c1b58482659
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2492,6 +2492,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_dsack = 1;
net->ipv4.sysctl_tcp_app_win = 31;
net->ipv4.sysctl_tcp_adv_win_scale = 1;
+   net->ipv4.sysctl_tcp_frto = 2;
 
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 09/15] tcp: Namespace-ify sysctl_tcp_fack

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp.c |  2 +-
 net/ipv4/tcp_input.c   |  3 +--
 net/ipv4/tcp_minisocks.c   |  2 +-
 6 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
3875fdf6b18653477408beb25176eac849e65ba4..f0e792beeea974b0850090d7624a3d7490124067
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -136,6 +136,7 @@ struct netns_ipv4 {
int sysctl_tcp_stdurg;
int sysctl_tcp_rfc1337;
int sysctl_tcp_abort_on_overflow;
+   int sysctl_tcp_fack;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
7331281a229289f130ad7b5c5ddec1eba1ea2747..e7b15e9f6e288908bf58a28fe24554630c1e0710
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 /* sysctl variables for tcp */
 extern int sysctl_tcp_max_orphans;
-extern int sysctl_tcp_fack;
 extern int sysctl_tcp_reordering;
 extern int sysctl_tcp_max_reordering;
 extern int sysctl_tcp_dsack;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
ffd1fd769bba7c3524aa6dfac734e1de0cad1506..1f23be13ce7be8b2a12b82aada36c6351fdfb70a
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec_jiffies,
},
-   {
-   .procname   = "tcp_fack",
-   .data   = _tcp_fack,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_max_reordering",
.data   = _tcp_max_reordering,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_fack",
+   .data   = _net.ipv4.sysctl_tcp_fack,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 
f6e1c00e300eeedcfe2ff0f4f2a4e1d997cd315d..c7c983f0f817c639e68f6fb1a70916cb604de90b
 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2517,7 +2517,7 @@ static int tcp_repair_options_est(struct sock *sk,
return -EINVAL;
 
tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
-   if (sysctl_tcp_fack)
+   if (sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(tp);
break;
case TCPOPT_TIMESTAMP:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
64fde81b0eb70feccffd18a703e2b604e306ea65..c5b94460793f9693719b38978c123209e2b6ec0f
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -79,7 +79,6 @@
 #include 
 #include 
 
-int sysctl_tcp_fack __read_mostly;
 int sysctl_tcp_max_reordering __read_mostly = 300;
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
@@ -5720,7 +5719,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, 
struct sk_buff *skb,
tp->tcp_header_len = sizeof(struct tcphdr);
}
 
-   if (tcp_is_sack(tp) && sysctl_tcp_fack)
+   if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(tp);
 
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 
3674d63170b293778d32abd34aa32043c001aa82..3270ab8416ce8691cbb1c3a25533142fe1029bed
 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -510,7 +510,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 
newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
-   if (sysctl_tcp_fack)
+   if (sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(newtp);
}
newtp->window_clamp = req->rsk_window_clamp;
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 12/15] tcp: Namespace-ify sysctl_tcp_dsack

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  5 ++---
 net/ipv4/tcp_ipv4.c|  1 +
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
3f6844665a2fbe66fc0c91bd13e057ac2e03007a..956957a77db96ad3d231cc018c13503d615d8d2e
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -138,6 +138,7 @@ struct netns_ipv4 {
int sysctl_tcp_abort_on_overflow;
int sysctl_tcp_fack;
int sysctl_tcp_max_reordering;
+   int sysctl_tcp_dsack;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
8cd286226a1eca27d97b9f182d1a951b072e4575..8b2ae3e8d79f223d4637226fc7278fe751d0b5d7
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 /* sysctl variables for tcp */
 extern int sysctl_tcp_max_orphans;
-extern int sysctl_tcp_dsack;
 extern long sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
18cd228a20690541936dd6b3d9bb02cb283a9740..7652a9c2a65d3f1cfa0a75d1198e1d9d56761c35
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec_jiffies,
},
-   {
-   .procname   = "tcp_dsack",
-   .data   = _tcp_dsack,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_mem",
.maxlen = sizeof(sysctl_tcp_mem),
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_dsack",
+   .data   = _net.ipv4.sysctl_tcp_dsack,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
c118657f06ee390053e38c35f03bea5b82845513..fd77037ac800a1153ec0ef904fcf00b93c061fa1
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -79,7 +79,6 @@
 #include 
 #include 
 
-int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
@@ -4150,7 +4149,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 
end_seq)
 {
struct tcp_sock *tp = tcp_sk(sk);
 
-   if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+   if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
int mib_idx;
 
if (before(seq, tp->rcv_nxt))
@@ -4185,7 +4184,7 @@ static void tcp_send_dupack(struct sock *sk, const struct 
sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk);
 
-   if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+   if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
c379a242abb3546044da9a3ef032f6f68acafe88..d9d4d191e8f3c962a6ee68015ffe5a6e7fb8e9c1
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2489,6 +2489,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 
behavior.  */
net->ipv4.sysctl_tcp_retrans_collapse = 1;
net->ipv4.sysctl_tcp_max_reordering = 300;
+   net->ipv4.sysctl_tcp_dsack = 1;
 
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 06/15] tcp: Namespace-ify sysctl_tcp_stdurg

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  3 +--
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
b28c172b10e497f235b51aae0fc2d3bbf7cc51f3..ffa2cf3dc747ca9443df3927dc7928c18357f872
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -133,6 +133,7 @@ struct netns_ipv4 {
int sysctl_tcp_thin_linear_timeouts;
int sysctl_tcp_slow_start_after_idle;
int sysctl_tcp_retrans_collapse;
+   int sysctl_tcp_stdurg;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
33cc86355b8ff9b506d21ad46cfc01b3916f5b61..cf3fac7008d791f2a01e4df9178164769a861c60
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_stdurg;
 extern int sysctl_tcp_rfc1337;
 extern int sysctl_tcp_abort_on_overflow;
 extern int sysctl_tcp_max_orphans;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
533b92ad39dd0cada542028fe2f276d9eebcd2c8..a34bb75815c15afc077ba7ff36939b5abc9229f6
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
-   {
-   .procname   = "tcp_stdurg",
-   .data   = _tcp_stdurg,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_rfc1337",
.data   = _tcp_rfc1337,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_stdurg",
+   .data   = _net.ipv4.sysctl_tcp_stdurg,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
5b2272dbf6a9a507d62d8ee594fab53284b22a6d..14b06963c102dc8c747050448e504fc2e75a4eb4
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,7 +89,6 @@ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 /* rfc5961 challenge ack rate limiting */
 int sysctl_tcp_challenge_ack_limit = 1000;
 
-int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly = 2;
@@ -5123,7 +5122,7 @@ static void tcp_check_urg(struct sock *sk, const struct 
tcphdr *th)
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
 
-   if (ptr && !sysctl_tcp_stdurg)
+   if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
ptr--;
ptr += ntohl(th->seq);
 
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 14/15] tcp: Namespace-ify sysctl_tcp_adv_win_scale

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  9 -
 net/ipv4/sysctl_net_ipv4.c | 18 +-
 net/ipv4/tcp_input.c   | 13 +
 net/ipv4/tcp_ipv4.c|  1 +
 5 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
63f91d52cbc0ad35d8e04a8da0d9f57aa960bcb0..9dbb07d4eff465428817831e55c6a4922b7208fb
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -140,6 +140,7 @@ struct netns_ipv4 {
int sysctl_tcp_max_reordering;
int sysctl_tcp_dsack;
int sysctl_tcp_app_win;
+   int sysctl_tcp_adv_win_scale;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
7aa3d65062a14a98358f8868fa2c0dbb2c74a0ce..0dc27cd248997bf6a0463477db38db483c312fb0
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -247,7 +247,6 @@ extern int sysctl_tcp_max_orphans;
 extern long sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
-extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
@@ -1311,9 +1310,9 @@ void tcp_select_initial_window(int __space, __u32 mss, 
__u32 *rcv_wnd,
   __u32 *window_clamp, int wscale_ok,
   __u8 *rcv_wscale, __u32 init_rcv_wnd);
 
-static inline int tcp_win_from_space(int space)
+static inline int tcp_win_from_space(const struct sock *sk, int space)
 {
-   int tcp_adv_win_scale = sysctl_tcp_adv_win_scale;
+   int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
 
return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) :
@@ -1323,13 +1322,13 @@ static inline int tcp_win_from_space(int space)
 /* Note: caller must be prepared to deal with negative returns */
 static inline int tcp_space(const struct sock *sk)
 {
-   return tcp_win_from_space(sk->sk_rcvbuf -
+   return tcp_win_from_space(sk, sk->sk_rcvbuf -
  atomic_read(>sk_rmem_alloc));
 }
 
 static inline int tcp_full_space(const struct sock *sk)
 {
-   return tcp_win_from_space(sk->sk_rcvbuf);
+   return tcp_win_from_space(sk, sk->sk_rcvbuf);
 }
 
 extern void tcp_openreq_init_rwin(struct request_sock *req,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
e057788834a99cf99e141a602ddbe19b8e6fce3c..a95123e1e7da706c88bf5553b7d8ef6c2653ab50
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -437,15 +437,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec_minmax,
.extra1 = ,
},
-   {
-   .procname   = "tcp_adv_win_scale",
-   .data   = _tcp_adv_win_scale,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = _adv_win_scale_min,
-   .extra2 = _adv_win_scale_max,
-   },
{
.procname   = "tcp_frto",
.data   = _tcp_frto,
@@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_adv_win_scale",
+   .data   = _net.ipv4.sysctl_tcp_adv_win_scale,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = _adv_win_scale_min,
+   .extra2 = _adv_win_scale_max,
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
6af4b58ac6d5de54bdbb418f41a0b18eee38ca50..8ee2c84b0bc67f943dbaea95d9433e82b9a7d082
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -79,9 +79,6 @@
 #include 
 #include 
 
-int sysctl_tcp_adv_win_scale __read_mostly = 1;
-EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-
 /* rfc5961 challenge ack rate limiting */
 int sysctl_tcp_challenge_ack_limit = 1000;
 
@@ -363,8 +360,8 @@ static int __tcp_grow_window(const struct sock *sk, const 
struct sk_buff *skb)
 {
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
-   int truesize = tcp_win_from_space(skb->truesize) >> 1;
-   int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
+   int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+   int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1;
 
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
@@ -389,7 +386,7 @@ static void tcp_grow_window(struct

[PATCH v2 net-next 11/15] tcp: Namespace-ify sysctl_tcp_max_reordering

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  3 +--
 net/ipv4/tcp_ipv4.c|  2 ++
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
f0e792beeea974b0850090d7624a3d7490124067..3f6844665a2fbe66fc0c91bd13e057ac2e03007a
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -137,6 +137,7 @@ struct netns_ipv4 {
int sysctl_tcp_rfc1337;
int sysctl_tcp_abort_on_overflow;
int sysctl_tcp_fack;
+   int sysctl_tcp_max_reordering;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
fc134ba74c7d38d08304b5be36506946784538f2..8cd286226a1eca27d97b9f182d1a951b072e4575
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -244,7 +244,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 /* sysctl variables for tcp */
 extern int sysctl_tcp_max_orphans;
-extern int sysctl_tcp_max_reordering;
 extern int sysctl_tcp_dsack;
 extern long sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
1f23be13ce7be8b2a12b82aada36c6351fdfb70a..18cd228a20690541936dd6b3d9bb02cb283a9740
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec_jiffies,
},
-   {
-   .procname   = "tcp_max_reordering",
-   .data   = _tcp_max_reordering,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_dsack",
.data   = _tcp_dsack,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_max_reordering",
+   .data   = _net.ipv4.sysctl_tcp_max_reordering,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
c5b94460793f9693719b38978c123209e2b6ec0f..c118657f06ee390053e38c35f03bea5b82845513
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -79,7 +79,6 @@
 #include 
 #include 
 
-int sysctl_tcp_max_reordering __read_mostly = 300;
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
@@ -889,7 +888,7 @@ static void tcp_update_reordering(struct sock *sk, const 
int metric,
return;
 
if (metric > tp->reordering) {
-   tp->reordering = min(sysctl_tcp_max_reordering, metric);
+   tp->reordering = 
min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric);
 
 #if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
2bc6ba2059d32aa848dbc415b4b0e194b61b0268..c379a242abb3546044da9a3ef032f6f68acafe88
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2488,6 +2488,8 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 
behavior.  */
net->ipv4.sysctl_tcp_retrans_collapse = 1;
+   net->ipv4.sysctl_tcp_max_reordering = 300;
+
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 01/15] tcp: Namespace-ify sysctl_tcp_early_retrans

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 18 +-
 net/ipv4/tcp_input.c   |  1 -
 net/ipv4/tcp_ipv4.c|  1 +
 net/ipv4/tcp_output.c  |  4 +++-
 6 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
2c4222a5d1025f5928665e10edb70fad65352dba..a7f39e3ea666a835b6042e4008c86ccaadd14b46
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -128,6 +128,7 @@ struct netns_ipv4 {
int sysctl_tcp_sack;
int sysctl_tcp_window_scaling;
int sysctl_tcp_timestamps;
+   int sysctl_tcp_early_retrans;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
285bc82dea410b22ac585ee65daff5cbac7c3fc7..a12b71d4118baa6b939bdeba7380cb3830d46ff0
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -265,7 +265,6 @@ extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_thin_linear_timeouts;
 extern int sysctl_tcp_thin_dupack;
-extern int sysctl_tcp_early_retrans;
 extern int sysctl_tcp_recovery;
 #define TCP_RACK_LOSS_DETECTION  0x1 /* Use RACK to detect losses */
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
81d218346cf7a0f340f964c434a21cace5c41fa0..f0f650f020afd535f41943c6c9fb1483be7cfb8d
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -634,15 +634,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
-   {
-   .procname   = "tcp_early_retrans",
-   .data   = _tcp_early_retrans,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = ,
-   .extra2 = ,
-   },
{
.procname   = "tcp_min_tso_segs",
.data   = _tcp_min_tso_segs,
@@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_early_retrans",
+   .data   = _net.ipv4.sysctl_tcp_early_retrans,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = ,
+   .extra2 = ,
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
337f6011528a7d4c3ab7fdcc0623496cfefafc71..7656b1e6d5046297b4c5e6cf5591266b9be40095
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -95,7 +95,6 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly = 2;
 int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
-int sysctl_tcp_early_retrans __read_mostly = 3;
 int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 
 #define FLAG_DATA  0x01 /* Incoming frame contained data.  
*/
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
23a8100af5ad399d4fa2568f4cac19192a008055..7ab313f6768e234173d78f17cfb1f664b230e958
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2484,6 +2484,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_sack = 1;
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
+   net->ipv4.sysctl_tcp_early_retrans = 3;
 
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 
c8fc512e0bbb48f7d36e159e8aae56ec70a24498..21713836d46af9d48de10e8ec0e7410572ed7eeb
 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2435,6 +2435,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, rto_delta_us;
+   int early_retrans;
 
/* Don't do any loss probe on a Fast Open connection before 3WHS
 * finishes.
@@ -2442,10 +2443,11 @@ bool tcp_schedule_loss_probe(struct sock *sk)
if (tp->fastopen_rsk)
return false;
 
+   early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
/* Schedule a loss probe in 2*RTT for SACK capable connections
 * in Open state, that are either limited by cwnd or application.
 */
-   if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
+   if ((early_retrans != 3

Re: [PATCH net-next 00/15] tcp: move 14 sysctls to namespaces

2017-10-26 Thread Eric Dumazet

On Fri, 2017-10-27 at 13:46 +0900, David Miller wrote:
> From: Eric Dumazet 
> Date: Thu, 26 Oct 2017 16:35:11 -0700
> 
> > Ideally all TCP sysctls should be per netns.
> > This patch series takes care of 14 of sysctls.
> > More to come later.
> 
> The tcp-fack patch doesn't apply cleanly, please respin.
> 
> Thank you.

Sure, I did a git rebase that went well.

v2 is coming right away, thanks.

[PATCH v2 net-next 07/15] tcp: Namespace-ify sysctl_tcp_rfc1337

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  1 -
 net/ipv4/tcp_minisocks.c   |  2 +-
 5 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
ffa2cf3dc747ca9443df3927dc7928c18357f872..968edce38eb5d3399724b3142277eab44f19f2fb
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -134,6 +134,7 @@ struct netns_ipv4 {
int sysctl_tcp_slow_start_after_idle;
int sysctl_tcp_retrans_collapse;
int sysctl_tcp_stdurg;
+   int sysctl_tcp_rfc1337;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
cf3fac7008d791f2a01e4df9178164769a861c60..2aea2b3373b38dde9aabf869931448e9ecd38649
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -243,7 +243,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_rfc1337;
 extern int sysctl_tcp_abort_on_overflow;
 extern int sysctl_tcp_max_orphans;
 extern int sysctl_tcp_fack;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
a34bb75815c15afc077ba7ff36939b5abc9229f6..832e554235df37770809541ad8f9f1ca2f201739
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
-   {
-   .procname   = "tcp_rfc1337",
-   .data   = _tcp_rfc1337,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "inet_peer_threshold",
.data   = _peer_threshold,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_rfc1337",
+   .data   = _net.ipv4.sysctl_tcp_rfc1337,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
14b06963c102dc8c747050448e504fc2e75a4eb4..64fde81b0eb70feccffd18a703e2b604e306ea65
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,7 +89,6 @@ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 /* rfc5961 challenge ack rate limiting */
 int sysctl_tcp_challenge_ack_limit = 1000;
 
-int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly = 2;
 int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 
056009f1c14f13ac4af987d0a7451f32dbde0023..11836667763cf9a32c673086b6dc2d759833c856
 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -181,7 +181,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, 
struct sk_buff *skb,
 * Oh well... nobody has a sufficient solution to this
 * protocol bug yet.
 */
-   if (sysctl_tcp_rfc1337 == 0) {
+   if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
 kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 00/15] tcp: move 14 sysctls to namespaces

2017-10-26 Thread Eric Dumazet

Ideally all TCP sysctls should be per netns.
This patch series takes care of 14 of sysctls.
More to come later.

Eric Dumazet (15):
  tcp: Namespace-ify sysctl_tcp_early_retrans
  tcp: Namespace-ify sysctl_tcp_recovery
  tcp: Namespace-ify sysctl_tcp_thin_linear_timeouts
  tcp: Namespace-ify sysctl_tcp_slow_start_after_idle
  tcp: Namespace-ify sysctl_tcp_retrans_collapse
  tcp: Namespace-ify sysctl_tcp_stdurg
  tcp: Namespace-ify sysctl_tcp_rfc1337
  tcp: Namespace-ify sysctl_tcp_abort_on_overflow
  tcp: Namespace-ify sysctl_tcp_fack
  tcp: remove stale sysctl_tcp_reordering
  tcp: Namespace-ify sysctl_tcp_max_reordering
  tcp: Namespace-ify sysctl_tcp_dsack
  tcp: Namespace-ify sysctl_tcp_app_win
  tcp: Namespace-ify sysctl_tcp_adv_win_scale
  tcp: Namespace-ify sysctl_tcp_frto

 include/net/netns/ipv4.h   |  14 
 include/net/tcp.h  |  27 ++
 net/ipv4/sysctl_net_ipv4.c | 204 ++---
 net/ipv4/tcp.c |   2 +-
 net/ipv4/tcp_input.c   |  42 --
 net/ipv4/tcp_ipv4.c|   9 ++
 net/ipv4/tcp_minisocks.c   |   8 +-
 net/ipv4/tcp_output.c  |  14 ++--
 net/ipv4/tcp_recovery.c|   2 -
 net/ipv4/tcp_timer.c   |   4 +-
 10 files changed, 157 insertions(+), 169 deletions(-)

-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 03/15] tcp: Namespace-ify sysctl_tcp_thin_linear_timeouts

2017-10-26 Thread Eric Dumazet

Note that sysctl_tcp_thin_dupack was not used, I deleted it.

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  2 --
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_timer.c   |  4 +---
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
d6ed718075d44cfc0e60995c1e938d588ad261a8..2a9f37b39c45fe451e45025790a4e5c45ece5cbc
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -130,6 +130,7 @@ struct netns_ipv4 {
int sysctl_tcp_timestamps;
int sysctl_tcp_early_retrans;
int sysctl_tcp_recovery;
+   int sysctl_tcp_thin_linear_timeouts;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
c7f51534fc44c61a95225e0adc0a1200ea5c0c1c..063a7a48b7fe23092023d053e26a967389628cdc
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -263,8 +263,6 @@ extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
-extern int sysctl_tcp_thin_linear_timeouts;
-extern int sysctl_tcp_thin_dupack;
 
 #define TCP_RACK_LOSS_DETECTION  0x1 /* Use RACK to detect losses */
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
78019adcae875a438264ee47723670f6b54cacf9..12003214f4d80b38d5f754ddd91be8a990168ade
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -620,13 +620,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_allowed_congestion_control,
},
-   {
-   .procname   = "tcp_thin_linear_timeouts",
-   .data   = _tcp_thin_linear_timeouts,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_min_tso_segs",
.data   = _tcp_min_tso_segs,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec,
},
+   {
+   .procname   = "tcp_thin_linear_timeouts",
+   .data   = 
_net.ipv4.sysctl_tcp_thin_linear_timeouts,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 
804a8d34ce86cc17472c918c00c25de88b85184f..035a1ef1f2d8462c1d19f364b599ffac538ef688
 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,8 +22,6 @@
 #include 
 #include 
 
-int sysctl_tcp_thin_linear_timeouts __read_mostly;
-
 /**
  *  tcp_write_err() - close socket and save error info
  *  @sk:  The socket the error has appeared on.
@@ -522,7 +520,7 @@ void tcp_retransmit_timer(struct sock *sk)
 * linear-timeout retransmissions into a black hole
 */
if (sk->sk_state == TCP_ESTABLISHED &&
-   (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
+   (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 02/15] tcp: Namespace-ify sysctl_tcp_recovery

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  2 +-
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  2 +-
 net/ipv4/tcp_ipv4.c|  1 +
 net/ipv4/tcp_recovery.c|  2 --
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
a7f39e3ea666a835b6042e4008c86ccaadd14b46..d6ed718075d44cfc0e60995c1e938d588ad261a8
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -129,6 +129,7 @@ struct netns_ipv4 {
int sysctl_tcp_window_scaling;
int sysctl_tcp_timestamps;
int sysctl_tcp_early_retrans;
+   int sysctl_tcp_recovery;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
a12b71d4118baa6b939bdeba7380cb3830d46ff0..c7f51534fc44c61a95225e0adc0a1200ea5c0c1c
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -265,7 +265,7 @@ extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_thin_linear_timeouts;
 extern int sysctl_tcp_thin_dupack;
-extern int sysctl_tcp_recovery;
+
 #define TCP_RACK_LOSS_DETECTION  0x1 /* Use RACK to detect losses */
 
 extern int sysctl_tcp_limit_output_bytes;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
f0f650f020afd535f41943c6c9fb1483be7cfb8d..78019adcae875a438264ee47723670f6b54cacf9
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -449,13 +449,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
-   {
-   .procname   = "tcp_recovery",
-   .data   = _tcp_recovery,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec,
-   },
{
.procname   = "tcp_max_reordering",
.data   = _tcp_max_reordering,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = ,
.extra2 = ,
},
+   {
+   .procname   = "tcp_recovery",
+   .data   = _net.ipv4.sysctl_tcp_recovery,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec,
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
7656b1e6d5046297b4c5e6cf5591266b9be40095..5b2272dbf6a9a507d62d8ee594fab53284b22a6d
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2788,7 +2788,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int 
*ack_flag)
struct tcp_sock *tp = tcp_sk(sk);
 
/* Use RACK to detect loss */
-   if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
+   if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
u32 prior_retrans = tp->retrans_out;
 
tcp_rack_mark_lost(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
7ab313f6768e234173d78f17cfb1f664b230e958..517ff1948a71287b06ea0859e1f25a15119a3dd9
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2485,6 +2485,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
net->ipv4.sysctl_tcp_early_retrans = 3;
+   net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
 
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 
cda6074a429a24b7be600065d95600f4f9810ee4..d3603a9e24eae8649edd12d3f0678015b09b2037
 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -1,8 +1,6 @@
 #include 
 #include 
 
-int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION;
-
 static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
 {
struct tcp_sock *tp = tcp_sk(sk);
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH v2 net-next 04/15] tcp: Namespace-ify sysctl_tcp_slow_start_after_idle

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  3 +--
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_ipv4.c|  1 +
 net/ipv4/tcp_output.c  |  5 +
 5 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
2a9f37b39c45fe451e45025790a4e5c45ece5cbc..8662692686b3af98a94a176230b9ed147881d87a
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -131,6 +131,7 @@ struct netns_ipv4 {
int sysctl_tcp_early_retrans;
int sysctl_tcp_recovery;
int sysctl_tcp_thin_linear_timeouts;
+   int sysctl_tcp_slow_start_after_idle;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
063a7a48b7fe23092023d053e26a967389628cdc..cc2ab522eb5cf7cb08b6918cdfd5c5500cfbf057
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -262,7 +262,6 @@ extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
 extern int sysctl_tcp_workaround_signed_windows;
-extern int sysctl_tcp_slow_start_after_idle;
 
 #define TCP_RACK_LOSS_DETECTION  0x1 /* Use RACK to detect losses */
 
@@ -1308,7 +1307,7 @@ static inline void tcp_slow_start_after_idle_check(struct 
sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
s32 delta;
 
-   if (!sysctl_tcp_slow_start_after_idle || tp->packets_out ||
+   if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || 
tp->packets_out ||
ca_ops->cong_control)
return;
delta = tcp_jiffies32 - tp->lsndtime;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
12003214f4d80b38d5f754ddd91be8a990168ade..40d69af8b363bc236e23879973872d8f9346d85e
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -571,13 +571,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
-   {
-   .procname   = "tcp_slow_start_after_idle",
-   .data   = _tcp_slow_start_after_idle,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
 #ifdef CONFIG_NETLABEL
{
.procname   = "cipso_cache_enable",
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_slow_start_after_idle",
+   .data   = 
_net.ipv4.sysctl_tcp_slow_start_after_idle,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
517ff1948a71287b06ea0859e1f25a15119a3dd9..cea63a4b59655823def7a423d27191003c7f084c
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2486,6 +2486,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_timestamps = 1;
net->ipv4.sysctl_tcp_early_retrans = 3;
net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
+   net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 
behavior.  */
 
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 
21713836d46af9d48de10e8ec0e7410572ed7eeb..bdc288a06f941add38a5cde434081c63ee94ed42
 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -62,9 +62,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
  */
 int sysctl_tcp_tso_win_divisor __read_mostly = 3;
 
-/* By default, RFC2861 behavior.  */
-int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
-
 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
   int push_one, gfp_t gfp);
 
@@ -1690,7 +1687,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool 
is_cwnd_limited)
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
 
-   if (sysctl_tcp_slow_start_after_idle &&
+   if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= 
inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
-- 
2.15.0.rc2.357.g7e34df9404-goog

Re: pull-request: mac80211 2017-10-25

2017-10-26 Thread David Miller

From: Johannes Berg 
Date: Wed, 25 Oct 2017 16:03:42 +0200

> Here are a few more fixes for net, we started comprehensive testing
> for the security issues and found that the problem wasn't addressed
> in TKIP, so that's included, along with a handful other fixes.
> 
> Please pull and let me know if there's any problem.

Pulled, thanks Johannes.

Re: [PATCH 1/1] l2tp: cleanup l2tp_tunnel_delete calls

2017-10-26 Thread David Miller

From: Jiri Slaby 
Date: Wed, 25 Oct 2017 15:57:55 +0200

> l2tp_tunnel_delete does not return anything since commit 62b982eeb458
> ("l2tp: fix race condition in l2tp_tunnel_delete").  But call sites of
> l2tp_tunnel_delete still do casts to void to avoid unused return value
> warnings.
> 
> Kill these now useless casts.
> 
> Signed-off-by: Jiri Slaby 

Appied to net-next, thanks.

Re: [PATCH net-next 00/15] tcp: move 14 sysctls to namespaces

2017-10-26 Thread David Miller

From: Eric Dumazet 
Date: Thu, 26 Oct 2017 16:35:11 -0700

> Ideally all TCP sysctls should be per netns.
> This patch series takes care of 14 of sysctls.
> More to come later.

The tcp-fack patch doesn't apply cleanly, please respin.

Thank you.

Re: [Patch net 01/16] net_sched: introduce a workqueue for RCU callbacks of tc filter

2017-10-26 Thread Eric Dumazet

On Thu, 2017-10-26 at 21:28 -0700, Cong Wang wrote:
> On Thu, Oct 26, 2017 at 9:05 PM, Eric Dumazet  wrote:
> > On Thu, 2017-10-26 at 18:24 -0700, Cong Wang wrote:
> >> ...
> >
> >> On the other hand, this makes tcf_block_put() ugly and
> >> harder to understand. Since David and Eric strongly dislike
> >> adding synchronize_rcu(), this is probably the only
> >> solution that could make everyone happy.
> >
> >
> > ...
> >
> >> +static void tcf_block_put_deferred(struct work_struct *work)
> >> +{
> >> + struct tcf_block *block = container_of(work, struct tcf_block, work);
> >> + struct tcf_chain *chain;
> >>
> >> + rtnl_lock();
> >>   /* Hold a refcnt for all chains, except 0, in case they are gone. */
> >>   list_for_each_entry(chain, >chain_list, list)
> >>   if (chain->index)
> >> @@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block)
> >>   list_for_each_entry(chain, >chain_list, list)
> >>   tcf_chain_flush(chain);
> >>
> >> - /* Wait for RCU callbacks to release the reference count. */
> >> + INIT_WORK(>work, tcf_block_put_final);
> >> + /* Wait for RCU callbacks to release the reference count and make
> >> +  * sure their works have been queued before this.
> >> +  */
> >>   rcu_barrier();
> >> + tcf_queue_work(>work);
> >> + rtnl_unlock();
> >> +}
> >
> >
> > On a loaded server, rcu_barrier() typically takes 4 ms.
> >
> > Way better than synchronize_rcu() (about 90 ms) but still an issue when
> > holding RTNL.
> >
> > We have thousands of filters, and management daemon restarts and rebuild
> > TC hierarchy from scratch.
> >
> > Simply getting rid of 1000 old filters might block RTNL for a while, or
> > maybe I misunderstood your patches.
> >
> 
> Paul pointed out the same.
> 
> As I replied, this rcu_barrier() is NOT added by this patchset, it is already
> there in current master branch.

You added the rtnl_lock()  rtnl_unlock()...

I really do not care if hundreds of tasks (not owning rtnl) call
rcu_barrier()...

Also we are still using a 4.3 based kernel, and no rcu_barrier() is used
in filters dismantle ( unregister_tcf_proto_ops() is not used in our
workloads )

Somehow something went very wrong in net/sched in recent kernels.

Re: [Patch net 01/16] net_sched: introduce a workqueue for RCU callbacks of tc filter

2017-10-26 Thread Cong Wang

On Thu, Oct 26, 2017 at 9:05 PM, Eric Dumazet  wrote:
> On Thu, 2017-10-26 at 18:24 -0700, Cong Wang wrote:
>> ...
>
>> On the other hand, this makes tcf_block_put() ugly and
>> harder to understand. Since David and Eric strongly dislike
>> adding synchronize_rcu(), this is probably the only
>> solution that could make everyone happy.
>
>
> ...
>
>> +static void tcf_block_put_deferred(struct work_struct *work)
>> +{
>> + struct tcf_block *block = container_of(work, struct tcf_block, work);
>> + struct tcf_chain *chain;
>>
>> + rtnl_lock();
>>   /* Hold a refcnt for all chains, except 0, in case they are gone. */
>>   list_for_each_entry(chain, >chain_list, list)
>>   if (chain->index)
>> @@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block)
>>   list_for_each_entry(chain, >chain_list, list)
>>   tcf_chain_flush(chain);
>>
>> - /* Wait for RCU callbacks to release the reference count. */
>> + INIT_WORK(>work, tcf_block_put_final);
>> + /* Wait for RCU callbacks to release the reference count and make
>> +  * sure their works have been queued before this.
>> +  */
>>   rcu_barrier();
>> + tcf_queue_work(>work);
>> + rtnl_unlock();
>> +}
>
>
> On a loaded server, rcu_barrier() typically takes 4 ms.
>
> Way better than synchronize_rcu() (about 90 ms) but still an issue when
> holding RTNL.
>
> We have thousands of filters, and management daemon restarts and rebuild
> TC hierarchy from scratch.
>
> Simply getting rid of 1000 old filters might block RTNL for a while, or
> maybe I misunderstood your patches.
>

Paul pointed out the same.

As I replied, this rcu_barrier() is NOT added by this patchset, it is already
there in current master branch.

[PATCH net] tcp: refresh tp timestamp before tcp_mtu_probe()

2017-10-26 Thread Eric Dumazet

From: Eric Dumazet 

In the unlikely event tcp_mtu_probe() is sending a packet, we
want tp->tcp_mstamp being as accurate as possible. 

This means we need to call tcp_mstamp_refresh() a bit earlier in
tcp_write_xmit().

Fixes: 385e20706fac ("tcp: use tp->tcp_mstamp in output path")
Signed-off-by: Eric Dumazet 
---
 net/ipv4/tcp_output.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 
1151870018e345592853b035a0902121c41e268d..ae60dd3faed0adc71731bc686f878afd4c628d32
 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2239,6 +2239,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int 
mss_now, int nonagle,
 
sent_pkts = 0;
 
+   tcp_mstamp_refresh(tp);
if (!push_one) {
/* Do MTU probing. */
result = tcp_mtu_probe(sk);
@@ -2250,7 +2251,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int 
mss_now, int nonagle,
}
 
max_segs = tcp_tso_segs(sk, mss_now);
-   tcp_mstamp_refresh(tp);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;

[PATCH] ipv6: exthdrs: use swap macro in ipv6_dest_hao

2017-10-26 Thread Gustavo A. R. Silva

make use of the swap macro and remove unnecessary variable tmp_addr.
This makes the code easier to read and maintain.

This code was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 net/ipv6/exthdrs.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 7835dea..9f918a7 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -187,7 +187,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-   struct in6_addr tmp_addr;
int ret;
 
if (opt->dsthao) {
@@ -229,9 +228,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
 
-   tmp_addr = ipv6h->saddr;
-   ipv6h->saddr = hao->addr;
-   hao->addr = tmp_addr;
+   swap(ipv6h->saddr, hao->addr);
 
if (skb->tstamp == 0)
__net_timestamp(skb);
-- 
2.7.4

Re: [Patch net 01/16] net_sched: introduce a workqueue for RCU callbacks of tc filter

2017-10-26 Thread Eric Dumazet

On Thu, 2017-10-26 at 18:24 -0700, Cong Wang wrote:
> ...

> On the other hand, this makes tcf_block_put() ugly and
> harder to understand. Since David and Eric strongly dislike
> adding synchronize_rcu(), this is probably the only
> solution that could make everyone happy.


...

> +static void tcf_block_put_deferred(struct work_struct *work)
> +{
> + struct tcf_block *block = container_of(work, struct tcf_block, work);
> + struct tcf_chain *chain;
>  
> + rtnl_lock();
>   /* Hold a refcnt for all chains, except 0, in case they are gone. */
>   list_for_each_entry(chain, >chain_list, list)
>   if (chain->index)
> @@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block)
>   list_for_each_entry(chain, >chain_list, list)
>   tcf_chain_flush(chain);
>  
> - /* Wait for RCU callbacks to release the reference count. */
> + INIT_WORK(>work, tcf_block_put_final);
> + /* Wait for RCU callbacks to release the reference count and make
> +  * sure their works have been queued before this.
> +  */
>   rcu_barrier();
> + tcf_queue_work(>work);
> + rtnl_unlock();
> +}


On a loaded server, rcu_barrier() typically takes 4 ms.

Way better than synchronize_rcu() (about 90 ms) but still an issue when
holding RTNL.

We have thousands of filters, and management daemon restarts and rebuild
TC hierarchy from scratch.

Simply getting rid of 1000 old filters might block RTNL for a while, or
maybe I misunderstood your patches.

Thanks.

breakage due to commit 6e617de84e ("net: avoid a full fib lookup when rp_filter is disabled")

2017-10-26 Thread David Ahern

Hi Paolo:

Your commit:

commit 6e617de84e87d626d1e976fc30e1322239fd4d2d
Author: Paolo Abeni 
Date:   Wed Sep 20 18:26:53 2017 +0200

net: avoid a full fib lookup when rp_filter is disabled.

breaks a test case that uses a veth pair in the same network namespace
but separate VRFs. This setup:

vrf add vrf0 table 1001
vrf add vrf1 table 1002

ip link add virt01 type veth peer name virt10
ip link set virt01 master vrf0
ip link set virt10 master vrf1

ip addr add 172.16.20.20/24 dev virt01
ip link set virt01 up

ip addr add 172.16.20.21/24 dev virt10
ip link set virt10 up

ping -c 1 -I vrf0 172.16.20.21

fails due to:

if (inet_lookup_ifaddr_rcu(net, src))
   return -EINVAL;

in fib_validate_source.

David
#!/bin/sh

# vrf0 sends out packets with mpls labels
# vrf1 receives the labelled packets, pops the labels, and forwards to vrf2
# vrf2 receives the unlabelled packets and replies to vrf0

vrf add vrf0 table 1001
vrf add vrf1 table 1002
vrf add vrf2 table 1003

ip link add virt01 type veth peer name virt10
ip link set virt01 master vrf0
ip link set virt10 master vrf1

ip link add virt12 type veth peer name virt21
ip link set virt12 master vrf1
ip link set virt21 master vrf2

ip addr add 172.16.20.20/24 dev virt01
ip link set virt01 up

ip addr add 172.16.20.21/24 dev virt10
ip link set virt10 up
ip addr add 172.16.21.21/24 dev virt12
ip link set virt12 up

ip addr add 172.16.21.22/24 dev virt21
ip link set virt21 up

modprobe mpls_iptunnel

ip route add vrf vrf0 10.10.10.10/32 encap mpls 100 via inet 172.16.20.21 
ip route add vrf vrf0 172.16.21.0/24 via 172.16.20.21 

sysctl -w net.mpls.conf.virt10.input=1
sysctl -w net.mpls.platform_labels=1000
ip -f mpls route add 100 via inet 172.16.21.22 dev virt12

ip addr add 10.10.10.10/32 dev vrf2
ip route add vrf vrf2 172.16.20.0/24 via 172.16.21.21

ping -c 1 -I vrf0 10.10.10.10

netserver

cat <

Re: [PATCH] drivers/net: 3com/3c515: Convert timers to use timer_setup()

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:51:03 -0700

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
> 
> Cc: "David S. Miller" 
> Cc: Thomas Gleixner 
> Cc: Stephen Hemminger 
> Cc: Johannes Berg 
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

Re: [PATCH] drivers/net: can: Convert timers to use timer_setup()

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:51:14 -0700

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
> 
> Cc: Wolfgang Grandegger 
> Cc: Marc Kleine-Budde 
> Cc: "David S. Miller" 
> Cc: Allen Pais 
> Cc: linux-...@vger.kernel.org
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

Re: [PATCH] drivers/net: netronome: Convert timers to use timer_setup()

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:51:38 -0700

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
> 
> Cc: Jakub Kicinski 
> Cc: "David S. Miller" 
> Cc: Jiri Pirko 
> Cc: Jamal Hadi Salim 
> Cc: Simon Horman 
> Cc: oss-driv...@netronome.com
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

Re: [PATCH] drivers/net: hamradio/yam: Convert timers to use timer_setup()

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:51:20 -0700

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly. Initialization was entirely missing.
> 
> Cc: Jean-Paul Roubelat 
> Cc: linux-h...@vger.kernel.org
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

Re: [PATCH] drivers/net: nuvoton: Convert timers to use timer_setup()

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:51:58 -0700

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
> 
> Cc: Wan ZongShun 
> Cc: linux-arm-ker...@lists.infradead.org
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

Re: [PATCH] drivers/net: realtek: Convert timers to use timer_setup()

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:53:12 -0700

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
> 
> Cc: Realtek linux nic maintainers 
> Cc: "David S. Miller" 
> Cc: David Howells 
> Cc: Jay Vosburgh 
> Cc: Allen Pais 
> Cc: Eric Dumazet 
> Cc: Tobias Klauser 
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

Re: [PATCH] drivers/net: wan/dscc4: Remove unused timer

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:53:42 -0700

> This removes an entirely unused timer, which avoids needing to convert it
> to timer_setup().
> 
> Cc: Francois Romieu 
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

Re: [PATCH] drivers/net: wan/lmc: Convert timers to use timer_setup()

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:53:53 -0700

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
> 
> Cc: Allen Pais 
> Cc: "David S. Miller" 
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

Re: [PATCH] drivers/net: sxgbe: Convert timers to use timer_setup()

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:53:20 -0700

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
> 
> Cc: Byungho An 
> Cc: Girish K S 
> Cc: Vipul Pandya 
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

Re: [PATCH] drivers/net: wan/sdla: Convert timers to use timer_setup()

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:53:59 -0700

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
> 
> Cc: Allen Pais 
> Cc: "David S. Miller" 
> Cc: Tobias Klauser 
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

Re: [PATCH] drivers/net: arcnet: Convert timers to use timer_setup()

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:54:06 -0700

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
> 
> Cc: Michael Grzeschik 
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

Re: [PATCH] drivers/net: hippi: Convert timers to use timer_setup()

2017-10-26 Thread David Miller

From: Kees Cook 
Date: Wed, 25 Oct 2017 03:51:29 -0700

> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
> 
> Cc: Jes Sorensen 
> Cc: linux-hi...@sunsite.dk
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kees Cook 

Applied.

[PATCH V2 net] tuntap: properly align skb->head before building skb

2017-10-26 Thread Jason Wang

An unaligned alloc_frag->offset caused by previous allocation will
result an unaligned skb->head. This will lead unaligned
skb_shared_info and then unaligned dataref which requires to be
aligned for accessing on some architecture. Fix this by aligning
alloc_frag->offset before the frag refilling.

Fixes: 0bbd7dad34f8 ("tun: make tun_build_skb() thread safe")
Cc: Eric Dumazet 
Cc: Willem de Bruijn 
Cc: Wei Wei 
Cc: Dmitry Vyukov 
Cc: Mark Rutland 
Reported-by: Wei Wei 
Signed-off-by: Jason Wang 
---
- The patch is needed for -stable.
- Wei, can you try this patch to see if it solves your issue?
---
 drivers/net/tun.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b9973fb..5550f56 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1286,6 +1286,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct 
*tun,
buflen += SKB_DATA_ALIGN(len + pad);
rcu_read_unlock();
 
+   alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
return ERR_PTR(-ENOMEM);
 
-- 
2.7.4

Re: [PATCH net] tuntap: properly align skb->head before building skb

2017-10-26 Thread Jason Wang




On 2017年10月26日 22:11, Eric Dumazet wrote:

On Thu, Oct 26, 2017 at 5:15 AM, Jason Wang  wrote:

An unaligned alloc_frag->offset caused by previous allocation will
result an unaligned skb->head. This will lead unaligned
skb_shared_info and then unaligned dataref which requires to be
aligned for accessing on some architecture. Fix this by aligning
alloc_frag->offset before the frag refilling.

Fixes: 0bbd7dad34f8 ("tun: make tun_build_skb() thread safe")
Cc: Eric Dumazet 
Cc: Willem de Bruijn 
Cc: Wei Wei 
Cc: Dmitry Vyukov 
Cc: Mark Rutland 
Reported-by: Wei Wei 
Signed-off-by: Jason Wang 
---
- The patch is needed for -stable.
- Wei, can you try this patch to see if it solves your issue?
---
  drivers/net/tun.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b9973fb..60e44f2 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1286,6 +1286,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct 
*tun,
 buflen += SKB_DATA_ALIGN(len + pad);
 rcu_read_unlock();

+   alloc_frag->offset = ALIGN((u64)alloc_frag->offset, TUN_RX_PAD);

You have to align to one cache line (SMP_CACHE_BYTES), or SKB_DATA_ALIGN(1)


Oh right.


Then eventually use skb_reserve() for NET_IP_ALIGN, but I guess it is
already done.


Yes.

Thanks

Re: [PATCH] thunderbolt: Drop sequence number check from tb_xdomain_match()

2017-10-26 Thread David Miller

From: Mika Westerberg 
Date: Wed, 25 Oct 2017 12:27:34 +0300

> Commit 9a03c3d398c1 ("thunderbolt: Fix a couple right shifting to zero
> bugs") revealed an issue that was previously hidden because we never
> actually compared received XDomain message sequence numbers properly.
> The idea with these sequence numbers is that the responding host uses
> the same sequence number that was in the request packet which we can
> then check at the requesting host.
> 
> However, testing against macOS it looks like it does not follow this but
> instead uses some other logic. Windows driver on the other hand handles
> it the same way than Linux.
> 
> In order to be able to talk to macOS again, fix this so that we drop the
> whole sequence number check. This effectively works exactly the same
> than it worked before the aforementioned commit. This also follows the
> logic the original P2P networking code used.
> 
> Signed-off-by: Mika Westerberg 
> ---
> This applies on top of net-next.git/master.

Applied, thank you.

Re: [PATCH v9 00/10] net: stmmac: dwmac-sun8i: Handle integrated PHY

2017-10-26 Thread David Miller

From: Corentin Labbe 
Date: Tue, 24 Oct 2017 19:57:04 +0200

> The first 7 patch should go via the sunxi tree, the last three via
> the net tree.

I've applied the last 3 patches to net-next.

Re: [PATCH net-next] net: updating dst lastusage is an unlikely event.

2017-10-26 Thread David Miller

From: Paolo Abeni 
Date: Tue, 24 Oct 2017 12:41:01 +0200

> Since commit 0da4af00b2ed ("ipv6: only update __use and lastusetime
> once per jiffy at most"), updating the dst lastuse field is an
> unlikely action: it happens at most once per jiffy, out of
> potentially millions of calls per second.
> 
> Mark explicitly the code as such, and let the compiler generate
> better code.
> 
> Note: gcc 7.2 and several older versions do actually generate
> different - better - code when the unlikely() hint is in place,
> avoid jump in the fast path and keeping better code locality.
> 
> Signed-off-by: Paolo Abeni 

Applied, thanks.

[PATCH net-next] stmmac: copy unicast mac address to MAC registers

2017-10-26 Thread Bhadram Varka

Currently stmmac driver not copying the valid ethernet
MAC address to MAC registers. This patch takes care
of updating the MAC register with MAC address.

Signed-off-by: Bhadram Varka 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0e1b0a3..e0e6348 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3748,6 +3748,20 @@ static int stmmac_ioctl(struct net_device *dev, struct 
ifreq *rq, int cmd)
return ret;
 }
 
+static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
+{
+   struct stmmac_priv *priv = netdev_priv(ndev);
+   int ret = 0;
+
+   ret = eth_mac_addr(ndev, addr);
+   if (ret)
+   return ret;
+
+   priv->hw->mac->set_umac_addr(priv->hw, ndev->dev_addr, 0);
+
+   return ret;
+}
+
 #ifdef CONFIG_DEBUG_FS
 static struct dentry *stmmac_fs_dir;
 
@@ -3975,7 +3989,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = stmmac_poll_controller,
 #endif
-   .ndo_set_mac_address = eth_mac_addr,
+   .ndo_set_mac_address = stmmac_set_mac_address,
 };
 
 /**
-- 
2.7.4

Re: WARNING in refcount_sub_and_test

2017-10-26 Thread ChunYu Wang

Maybe I have just made some mistakes on understanding the reproduction
methods, will try it again.

Thanks,
- ChunYu

On Thu, Oct 26, 2017 at 10:49 PM, Dmitry Vyukov  wrote:
> On Thu, Oct 26, 2017 at 10:53 AM, ChunYu Wang  wrote:
>> Hi all,
>>
>> I am failed to reproduce it on target kernel with the reproducer file
>> or replaying the target syzkaller description log file, do I made
>> something wrong or there exists more subjects then the line in
>> repro.txt:
>>
>> #{Threaded:true Collide:true Repeat:false Procs:1 Sandbox:namespace
>> Fault:false FaultCall:-1 FaultNth:0 EnableTun:false UseTmpDir:true
>> HandleSegv:false WaitRepeat:false Debug:false Repro:false}
>
>
> Hi ChunYu,
>
> I've just re-tested the C repro and was able to trigger the bug in a second.
> I've checked out 49ca1943a7adb429b11b8e05d81bc821694b76c7, copied the
> provided config, run make olddefconfig, built with gcc-7 (you can get
> the exact one here
> https://storage.googleapis.com/syzkaller/gcc-7.tar.gz). Then run in
> qemu (most of the flags are probably irrelevant):
>
> qemu-system-x86_64 -hda wheezy.img -net
> user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel
> arch/x86/boot/bzImage -append "kvm-intel.nested=1
> kvm-intel.unrestricted_guest=1 kvm-intel.ept=1
> kvm-intel.flexpriority=1 kvm-intel.vpid=1
> kvm-intel.emulate_invalid_guest_state=1 kvm-intel.eptad=1
> kvm-intel.enable_shadow_vmcs=1 kvm-intel.pml=1
> kvm-intel.enable_apicv=1 console=ttyS0 root=/dev/sda
> earlyprintk=serial slub_debug=UZ vsyscall=native rodata=n oops=panic
> panic_on_warn=1 panic=86400" -enable-kvm -pidfile vm_pid -m 2G -smp 4
> -cpu host -usb -usbdevice mouse -usbdevice tablet -soundhw all
>
> And running the provided C program instantly spewed the following.
>
> Is there anything you did differently? I would like to understand
> common reasons why syzbot reproducers don't work and outline them
> here:
> https://github.com/google/syzkaller/blob/master/docs/syzbot.md
>
> Thanks
>
>
> [  588.444300] refcount_t: underflow; use-after-free.
> [  588.445812] [ cut here ]
> [  588.447026] WARNING: CPU: 1 PID: 3086 at lib/refcount.c:186
> refcount_sub_and_test+0x167/0x1b0
> [  588.449082] Kernel panic - not syncing: panic_on_warn set ...
> [  588.449082]
> [  588.450737] CPU: 1 PID: 3086 Comm: a.out Not tainted 4.14.0-rc5+ #9
> [  588.452160] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> BIOS Bochs 01/01/2011
> [  588.454059] Call Trace:
> [  588.454658]  dump_stack+0x194/0x257
> [  588.455538]  ? arch_local_irq_restore+0x53/0x53
> [  588.456630]  panic+0x1e4/0x417
> [  588.457367]  ? __warn+0x1d9/0x1d9
> [  588.458171]  ? show_regs_print_info+0x65/0x65
> [  588.459234]  ? refcount_sub_and_test+0x167/0x1b0
> [  588.460262]  __warn+0x1c4/0x1d9
> [  588.460958]  ? refcount_sub_and_test+0x167/0x1b0
> [  588.461965]  report_bug+0x211/0x2d0
> [  588.462756]  fixup_bug+0x40/0x90
> [  588.463597]  do_trap+0x260/0x390
> [  588.464304]  do_error_trap+0x120/0x390
> [  588.465105]  ? vprintk_emit+0x49b/0x590
> [  588.465929]  ? do_trap+0x390/0x390
> [  588.41]  ? refcount_sub_and_test+0x167/0x1b0
> [  588.467646]  ? vprintk_emit+0x3ea/0x590
> [  588.468475]  ? trace_hardirqs_off_thunk+0x1a/0x1c
> [  588.469482]  do_invalid_op+0x1b/0x20
> [  588.470262]  invalid_op+0x18/0x20
> [  588.470988] RIP: 0010:refcount_sub_and_test+0x167/0x1b0
> [  588.472080] RSP: 0018:88006550e9c8 EFLAGS: 00010282
> [  588.473224] RAX: 0026 RBX: 0001 RCX: 
> 
> [  588.474643] RDX: 0026 RSI: 11000caa1cf9 RDI: 
> ed000caa1d2d
> [  588.476091] RBP: 88006550ea58 R08:  R09: 
> 11000caa1ccb
> [  588.477520] R10: 88006550e7f8 R11: 85b2cb78 R12: 
> 11000caa1d3a
> [  588.478967] R13: ff01 R14: 0100 R15: 
> 88006a7f4a7c
> [  588.480413]  ? refcount_sub_and_test+0x167/0x1b0
> [  588.481337]  ? refcount_inc+0x50/0x50
> [  588.482081]  ? __sctp_outq_teardown+0xa5b/0x1230
> [  588.483004]  ? sctp_association_free+0x2d0/0x930
> [  588.484291]  ? sctp_do_sm+0x271b/0x6a30
> [  588.485247]  ? sctp_primitive_SHUTDOWN+0xa0/0xd0
> [  588.486295]  ? sctp_close+0x3c6/0x980
> [  588.487058]  ? inet_release+0xed/0x1c0
> [  588.488370]  ? sock_release+0x8d/0x1e0
> [  588.489080]  ? sock_close+0x16/0x20
> [  588.489759]  sctp_wfree+0x183/0x620
> [  588.490430]  ? entry_SYSCALL_64_fastpath+0xbc/0xbe
> [  588.491323]  ? __sctp_write_space+0x910/0x910
> [  588.492177]  skb_release_head_state+0x124/0x200
> [  588.493078]  skb_release_all+0x15/0x60
> [  588.493938]  consume_skb+0x153/0x490
> [  588.494605]  ? sctp_chunk_put+0x99/0x420
> [  588.495388]  ? alloc_skb_with_frags+0x750/0x750
> [  588.496119]  ? sctp_chunk_hold+0x20/0x20
> [  588.496757]  ? sctp_sched_dequeue_common+0x2aa/0x5d0
> [  588.497554]  ? refcount_sub_and_test+0x115/0x1b0
> [  588.498296]  ? refcount_inc+0x50/0x50
> [

Re: [PATCH net-next] tcp: add tracepoint trace_tcp_retransmit_synack()

2017-10-26 Thread Cong Wang

On Thu, Oct 26, 2017 at 4:50 PM, Song Liu  wrote:
> In this case, we are putting CONFIG_IPV6 in TRACE_EVENT macro, which generates
> warnings like:
>
> ./include/trace/events/tcp.h:274:1: error: directive in argument list
> ./include/trace/events/tcp.h:281:1: error: directive in argument list
>
> Seems these warning cannot be easily avoided. This is also the same pattern we
> have been using in include/trace/events/tcp.h.

Hmm, we use the same so why it only complains about this one?

>
> Any suggestions on how shall we proceed from here?
>

I think this warning is harmless, so perhaps not worthy time to
shut it up, unless sparse provides a simple way to do so.

Re: [PATCH net-next v5 2/2] bridge: vlan: signal if anything changed on vlan add

2017-10-26 Thread Toshiaki Makita

On 2017/10/26 22:41, Nikolay Aleksandrov wrote:
> Before this patch there was no way to tell if the vlan add operation
> actually changed anything, thus we would always generate a notification
> on adds. Let's make the notifications more precise and generate them
> only if anything changed, so use the new bool parameter to signal that the
> vlan was updated. We cannot return an error because there are valid use
> cases that will be broken (e.g. overlapping range add) and also we can't
> risk masking errors due to calls into drivers for vlan add which can
> potentially return anything.
> 
> Signed-off-by: Nikolay Aleksandrov 
> ---
> v5: fix br_vlan_add return (v1 leftover) spotted by Toshiaki Makita
> v4: set changed always to false in the non-vlan config case
> v3: fix non-vlan config functions reported by kbuild bot
> v2: pass changed down to vlan add functions instead of using a specific
> error that needs to be masked
> 
>  net/bridge/br_netlink.c |  9 --
>  net/bridge/br_private.h | 14 ++---
>  net/bridge/br_vlan.c| 76 
> +++--
>  3 files changed, 71 insertions(+), 28 deletions(-)
> 
> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
> index d0290ede9342..e732403669c6 100644
> --- a/net/bridge/br_netlink.c
> +++ b/net/bridge/br_netlink.c
> @@ -508,6 +508,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
>  static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
>   int cmd, struct bridge_vlan_info *vinfo, bool *changed)
>  {
> + bool curr_change;
>   int err = 0;

Just a question.
Why are you defining another variable here?
Is it impossible to pass "changed" down to [br|nbp]_vlan_add() like
other functions you modified in patch 1/2?

-- 
Toshiaki Makita

[Patch net 03/16] net_sched: use tcf_queue_work() in bpf filter

2017-10-26 Thread Cong Wang

Defer the tcf_exts_destroy() in RCU callback to
tc filter workqueue and get RTNL lock.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_bpf.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 520c5027646a..037a3ae86829 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -49,7 +49,10 @@ struct cls_bpf_prog {
struct sock_filter *bpf_ops;
const char *bpf_name;
struct tcf_proto *tp;
-   struct rcu_head rcu;
+   union {
+   struct work_struct work;
+   struct rcu_head rcu;
+   };
 };
 
 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -257,9 +260,21 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog 
*prog)
kfree(prog);
 }
 
+static void cls_bpf_delete_prog_work(struct work_struct *work)
+{
+   struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, 
work);
+
+   rtnl_lock();
+   __cls_bpf_delete_prog(prog);
+   rtnl_unlock();
+}
+
 static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
 {
-   __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
+   struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
+
+   INIT_WORK(>work, cls_bpf_delete_prog_work);
+   tcf_queue_work(>work);
 }
 
 static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
-- 
2.13.0

[Patch net 07/16] net_sched: use tcf_queue_work() in fw filter

2017-10-26 Thread Cong Wang

Defer the tcf_exts_destroy() in RCU callback to
tc filter workqueue and get RTNL lock.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_fw.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 941245ad07fd..99183b8621ec 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -46,7 +46,10 @@ struct fw_filter {
 #endif /* CONFIG_NET_CLS_IND */
struct tcf_exts exts;
struct tcf_proto*tp;
-   struct rcu_head rcu;
+   union {
+   struct work_struct  work;
+   struct rcu_head rcu;
+   };
 };
 
 static u32 fw_hash(u32 handle)
@@ -119,12 +122,22 @@ static int fw_init(struct tcf_proto *tp)
return 0;
 }
 
-static void fw_delete_filter(struct rcu_head *head)
+static void fw_delete_filter_work(struct work_struct *work)
 {
-   struct fw_filter *f = container_of(head, struct fw_filter, rcu);
+   struct fw_filter *f = container_of(work, struct fw_filter, work);
 
+   rtnl_lock();
tcf_exts_destroy(>exts);
kfree(f);
+   rtnl_unlock();
+}
+
+static void fw_delete_filter(struct rcu_head *head)
+{
+   struct fw_filter *f = container_of(head, struct fw_filter, rcu);
+
+   INIT_WORK(>work, fw_delete_filter_work);
+   tcf_queue_work(>work);
 }
 
 static void fw_destroy(struct tcf_proto *tp)
-- 
2.13.0

[Patch net 14/16] net_sched: fix call_rcu() race on act_sample module removal

2017-10-26 Thread Cong Wang

Similar to commit c78e1746d3ad
("net: sched: fix call_rcu() race on classifier module unloads"),
we need to wait for flying RCU callback tcf_sample_cleanup_rcu().

Cc: Yotam Gigi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/act_sample.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index ec986ae52808..a9f9a2ccc664 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -264,6 +264,7 @@ static int __init sample_init_module(void)
 
 static void __exit sample_cleanup_module(void)
 {
+   rcu_barrier();
tcf_unregister_action(_sample_ops, _net_ops);
 }
 
-- 
2.13.0

[Patch net 13/16] net_sched: add rtnl assertion to tcf_exts_destroy()

2017-10-26 Thread Cong Wang

After previous patches, it is now safe to claim that
tcf_exts_destroy() is always called with RTNL lock.

Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_api.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 045d13679ad6..231181c602ed 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -909,6 +909,7 @@ void tcf_exts_destroy(struct tcf_exts *exts)
 #ifdef CONFIG_NET_CLS_ACT
LIST_HEAD(actions);
 
+   ASSERT_RTNL();
tcf_exts_to_list(exts, );
tcf_action_destroy(, TCA_ACT_UNBIND);
kfree(exts->actions);
-- 
2.13.0

[Patch net 02/16] net_sched: use tcf_queue_work() in basic filter

2017-10-26 Thread Cong Wang

Defer the tcf_exts_destroy() in RCU callback to
tc filter workqueue and get RTNL lock.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_basic.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index d89ebafd2239..f177649a2419 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -34,7 +34,10 @@ struct basic_filter {
struct tcf_result   res;
struct tcf_proto*tp;
struct list_headlink;
-   struct rcu_head rcu;
+   union {
+   struct work_struct  work;
+   struct rcu_head rcu;
+   };
 };
 
 static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -82,15 +85,26 @@ static int basic_init(struct tcf_proto *tp)
return 0;
 }
 
-static void basic_delete_filter(struct rcu_head *head)
+static void basic_delete_filter_work(struct work_struct *work)
 {
-   struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+   struct basic_filter *f = container_of(work, struct basic_filter, work);
 
+   rtnl_lock();
tcf_exts_destroy(>exts);
tcf_em_tree_destroy(>ematches);
+   rtnl_unlock();
+
kfree(f);
 }
 
+static void basic_delete_filter(struct rcu_head *head)
+{
+   struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+
+   INIT_WORK(>work, basic_delete_filter_work);
+   tcf_queue_work(>work);
+}
+
 static void basic_destroy(struct tcf_proto *tp)
 {
struct basic_head *head = rtnl_dereference(tp->root);
-- 
2.13.0

[Patch net 12/16] net_sched: use tcf_queue_work() in tcindex filter

2017-10-26 Thread Cong Wang

Defer the tcf_exts_destroy() in RCU callback to
tc filter workqueue and get RTNL lock.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_tcindex.c | 38 +-
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 14a7e08b2fa9..beaa95e09c25 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -27,14 +27,20 @@
 struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result   res;
-   struct rcu_head rcu;
+   union {
+   struct work_struct  work;
+   struct rcu_head rcu;
+   };
 };
 
 struct tcindex_filter {
u16 key;
struct tcindex_filter_result result;
struct tcindex_filter __rcu *next;
-   struct rcu_head rcu;
+   union {
+   struct work_struct work;
+   struct rcu_head rcu;
+   };
 };
 
 
@@ -133,12 +139,34 @@ static int tcindex_init(struct tcf_proto *tp)
return 0;
 }
 
+static void tcindex_destroy_rexts_work(struct work_struct *work)
+{
+   struct tcindex_filter_result *r;
+
+   r = container_of(work, struct tcindex_filter_result, work);
+   rtnl_lock();
+   tcf_exts_destroy(>exts);
+   rtnl_unlock();
+}
+
 static void tcindex_destroy_rexts(struct rcu_head *head)
 {
struct tcindex_filter_result *r;
 
r = container_of(head, struct tcindex_filter_result, rcu);
-   tcf_exts_destroy(>exts);
+   INIT_WORK(>work, tcindex_destroy_rexts_work);
+   tcf_queue_work(>work);
+}
+
+static void tcindex_destroy_fexts_work(struct work_struct *work)
+{
+   struct tcindex_filter *f = container_of(work, struct tcindex_filter,
+   work);
+
+   rtnl_lock();
+   tcf_exts_destroy(>result.exts);
+   kfree(f);
+   rtnl_unlock();
 }
 
 static void tcindex_destroy_fexts(struct rcu_head *head)
@@ -146,8 +174,8 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
struct tcindex_filter *f = container_of(head, struct tcindex_filter,
rcu);
 
-   tcf_exts_destroy(>result.exts);
-   kfree(f);
+   INIT_WORK(>work, tcindex_destroy_fexts_work);
+   tcf_queue_work(>work);
 }
 
 static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last)
-- 
2.13.0

[Patch net 04/16] net_sched: use tcf_queue_work() in cgroup filter

2017-10-26 Thread Cong Wang

Defer the tcf_exts_destroy() in RCU callback to
tc filter workqueue and get RTNL lock.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_cgroup.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index d48452f87975..a97e069bee89 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,7 +23,10 @@ struct cls_cgroup_head {
struct tcf_exts exts;
struct tcf_ematch_tree  ematches;
struct tcf_proto*tp;
-   struct rcu_head rcu;
+   union {
+   struct work_struct  work;
+   struct rcu_head rcu;
+   };
 };
 
 static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -57,15 +60,26 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX 
+ 1] = {
[TCA_CGROUP_EMATCHES]   = { .type = NLA_NESTED },
 };
 
+static void cls_cgroup_destroy_work(struct work_struct *work)
+{
+   struct cls_cgroup_head *head = container_of(work,
+   struct cls_cgroup_head,
+   work);
+   rtnl_lock();
+   tcf_exts_destroy(>exts);
+   tcf_em_tree_destroy(>ematches);
+   kfree(head);
+   rtnl_unlock();
+}
+
 static void cls_cgroup_destroy_rcu(struct rcu_head *root)
 {
struct cls_cgroup_head *head = container_of(root,
struct cls_cgroup_head,
rcu);
 
-   tcf_exts_destroy(>exts);
-   tcf_em_tree_destroy(>ematches);
-   kfree(head);
+   INIT_WORK(>work, cls_cgroup_destroy_work);
+   tcf_queue_work(>work);
 }
 
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
-- 
2.13.0

[Patch net 10/16] net_sched: use tcf_queue_work() in route filter

2017-10-26 Thread Cong Wang

Defer the tcf_exts_destroy() in RCU callback to
tc filter workqueue and get RTNL lock.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_route.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 9ddde65915d2..4b14ccd8b8f2 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -57,7 +57,10 @@ struct route4_filter {
u32 handle;
struct route4_bucket*bkt;
struct tcf_proto*tp;
-   struct rcu_head rcu;
+   union {
+   struct work_struct  work;
+   struct rcu_head rcu;
+   };
 };
 
 #define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
@@ -254,12 +257,22 @@ static int route4_init(struct tcf_proto *tp)
return 0;
 }
 
-static void route4_delete_filter(struct rcu_head *head)
+static void route4_delete_filter_work(struct work_struct *work)
 {
-   struct route4_filter *f = container_of(head, struct route4_filter, rcu);
+   struct route4_filter *f = container_of(work, struct route4_filter, 
work);
 
+   rtnl_lock();
tcf_exts_destroy(>exts);
kfree(f);
+   rtnl_unlock();
+}
+
+static void route4_delete_filter(struct rcu_head *head)
+{
+   struct route4_filter *f = container_of(head, struct route4_filter, rcu);
+
+   INIT_WORK(>work, route4_delete_filter_work);
+   tcf_queue_work(>work);
 }
 
 static void route4_destroy(struct tcf_proto *tp)
-- 
2.13.0

[Patch net 11/16] net_sched: use tcf_queue_work() in rsvp filter

2017-10-26 Thread Cong Wang

Defer the tcf_exts_destroy() in RCU callback to
tc filter workqueue and get RTNL lock.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_rsvp.h | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index b1f6ed48bc72..bdbc541787f8 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -97,7 +97,10 @@ struct rsvp_filter {
 
u32 handle;
struct rsvp_session *sess;
-   struct rcu_head rcu;
+   union {
+   struct work_struct  work;
+   struct rcu_head rcu;
+   };
 };
 
 static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
@@ -282,12 +285,22 @@ static int rsvp_init(struct tcf_proto *tp)
return -ENOBUFS;
 }
 
-static void rsvp_delete_filter_rcu(struct rcu_head *head)
+static void rsvp_delete_filter_work(struct work_struct *work)
 {
-   struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
+   struct rsvp_filter *f = container_of(work, struct rsvp_filter, work);
 
+   rtnl_lock();
tcf_exts_destroy(>exts);
kfree(f);
+   rtnl_unlock();
+}
+
+static void rsvp_delete_filter_rcu(struct rcu_head *head)
+{
+   struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
+
+   INIT_WORK(>work, rsvp_delete_filter_work);
+   tcf_queue_work(>work);
 }
 
 static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
-- 
2.13.0

[Patch net 15/16] selftests: Introduce a new script to generate tc batch file

2017-10-26 Thread Cong Wang

From: Chris Mi 

  # ./tdc_batch.py -h
  usage: tdc_batch.py [-h] [-n NUMBER] [-o] [-s] [-p] device file

  TC batch file generator

  positional arguments:
devicedevice name
file  batch file name

  optional arguments:
-h, --helpshow this help message and exit
-n NUMBER, --number NUMBER
  how many lines in batch file
-o, --skip_sw skip_sw (offload), by default skip_hw
-s, --share_actionall filters share the same action
-p, --prioall filters have different prio

Acked-by: Jamal Hadi Salim 
Acked-by: Lucas Bates 
Signed-off-by: Chris Mi 
Signed-off-by: Cong Wang 
---
 tools/testing/selftests/tc-testing/tdc_batch.py | 62 +
 1 file changed, 62 insertions(+)
 create mode 100755 tools/testing/selftests/tc-testing/tdc_batch.py

diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py 
b/tools/testing/selftests/tc-testing/tdc_batch.py
new file mode 100755
index ..707c6bfef689
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python3
+
+"""
+tdc_batch.py - a script to generate TC batch file
+
+Copyright (C) 2017 Chris Mi 
+"""
+
+import argparse
+
+parser = argparse.ArgumentParser(description='TC batch file generator')
+parser.add_argument("device", help="device name")
+parser.add_argument("file", help="batch file name")
+parser.add_argument("-n", "--number", type=int,
+help="how many lines in batch file")
+parser.add_argument("-o", "--skip_sw",
+help="skip_sw (offload), by default skip_hw",
+action="store_true")
+parser.add_argument("-s", "--share_action",
+help="all filters share the same action",
+action="store_true")
+parser.add_argument("-p", "--prio",
+help="all filters have different prio",
+action="store_true")
+args = parser.parse_args()
+
+device = args.device
+file = open(args.file, 'w')
+
+number = 1
+if args.number:
+number = args.number
+
+skip = "skip_hw"
+if args.skip_sw:
+skip = "skip_sw"
+
+share_action = ""
+if args.share_action:
+share_action = "index 1"
+
+prio = "prio 1"
+if args.prio:
+prio = ""
+if number > 0x4000:
+number = 0x4000
+
+index = 0
+for i in range(0x100):
+for j in range(0x100):
+for k in range(0x100):
+mac = ("%02x:%02x:%02x" % (i, j, k))
+src_mac = "e4:11:00:" + mac
+dst_mac = "e4:12:00:" + mac
+cmd = ("filter add dev %s %s protocol ip parent : flower %s "
+   "src_mac %s dst_mac %s action drop %s" %
+   (device, prio, skip, src_mac, dst_mac, share_action))
+file.write("%s\n" % cmd)
+index += 1
+if index >= number:
+file.close()
+exit(0)
-- 
2.13.0

[Patch net 05/16] net_sched: use tcf_queue_work() in flow filter

2017-10-26 Thread Cong Wang

Defer the tcf_exts_destroy() in RCU callback to
tc filter workqueue and get RTNL lock.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_flow.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2a3a60ec5b86..67f3a2af6aab 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -57,7 +57,10 @@ struct flow_filter {
u32 divisor;
u32 baseclass;
u32 hashrnd;
-   struct rcu_head rcu;
+   union {
+   struct work_struct  work;
+   struct rcu_head rcu;
+   };
 };
 
 static inline u32 addr_fold(void *addr)
@@ -369,14 +372,24 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 
1] = {
[TCA_FLOW_PERTURB]  = { .type = NLA_U32 },
 };
 
-static void flow_destroy_filter(struct rcu_head *head)
+static void flow_destroy_filter_work(struct work_struct *work)
 {
-   struct flow_filter *f = container_of(head, struct flow_filter, rcu);
+   struct flow_filter *f = container_of(work, struct flow_filter, work);
 
+   rtnl_lock();
del_timer_sync(>perturb_timer);
tcf_exts_destroy(>exts);
tcf_em_tree_destroy(>ematches);
kfree(f);
+   rtnl_unlock();
+}
+
+static void flow_destroy_filter(struct rcu_head *head)
+{
+   struct flow_filter *f = container_of(head, struct flow_filter, rcu);
+
+   INIT_WORK(>work, flow_destroy_filter_work);
+   tcf_queue_work(>work);
 }
 
 static int flow_change(struct net *net, struct sk_buff *in_skb,
-- 
2.13.0

[Patch net 09/16] net_sched: use tcf_queue_work() in u32 filter

2017-10-26 Thread Cong Wang

Defer the tcf_exts_destroy() in RCU callback to
tc filter workqueue and get RTNL lock.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_u32.c | 29 ++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 10b8d851fc6b..dadd1b344497 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -68,7 +68,10 @@ struct tc_u_knode {
u32 __percpu*pcpu_success;
 #endif
struct tcf_proto*tp;
-   struct rcu_head rcu;
+   union {
+   struct work_struct  work;
+   struct rcu_head rcu;
+   };
/* The 'sel' field MUST be the last field in structure to allow for
 * tc_u32_keys allocated at end of structure.
 */
@@ -418,11 +421,21 @@ static int u32_destroy_key(struct tcf_proto *tp, struct 
tc_u_knode *n,
  * this the u32_delete_key_rcu variant does not free the percpu
  * statistics.
  */
+static void u32_delete_key_work(struct work_struct *work)
+{
+   struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
+
+   rtnl_lock();
+   u32_destroy_key(key->tp, key, false);
+   rtnl_unlock();
+}
+
 static void u32_delete_key_rcu(struct rcu_head *rcu)
 {
struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
 
-   u32_destroy_key(key->tp, key, false);
+   INIT_WORK(>work, u32_delete_key_work);
+   tcf_queue_work(>work);
 }
 
 /* u32_delete_key_freepf_rcu is the rcu callback variant
@@ -432,11 +445,21 @@ static void u32_delete_key_rcu(struct rcu_head *rcu)
  * for the variant that should be used with keys return from
  * u32_init_knode()
  */
+static void u32_delete_key_freepf_work(struct work_struct *work)
+{
+   struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
+
+   rtnl_lock();
+   u32_destroy_key(key->tp, key, true);
+   rtnl_unlock();
+}
+
 static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
 {
struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
 
-   u32_destroy_key(key->tp, key, true);
+   INIT_WORK(>work, u32_delete_key_freepf_work);
+   tcf_queue_work(>work);
 }
 
 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
-- 
2.13.0

[Patch net 01/16] net_sched: introduce a workqueue for RCU callbacks of tc filter

2017-10-26 Thread Cong Wang

This patch introduces a dedicated workqueue for tc filters
so that each tc filter's RCU callback could defer their
action destroy work to this workqueue. The helper
tcf_queue_work() is introduced for them to use.

Because we hold RTNL lock when calling tcf_block_put(), we
can not simply flush works inside it, therefore we have to
defer it again to this workqueue and make sure all flying RCU
callbacks have already queued their work before this one, in
other words, to ensure this is the last one to execute to
prevent any use-after-free.

On the other hand, this makes tcf_block_put() ugly and
harder to understand. Since David and Eric strongly dislike
adding synchronize_rcu(), this is probably the only
solution that could make everyone happy.

Please also see the code comments below.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 include/net/pkt_cls.h |  3 +++
 include/net/sch_generic.h |  2 ++
 net/sched/cls_api.c   | 68 +++
 3 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e80edd8879ef..3009547f3c66 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -2,6 +2,7 @@
 #define __NET_PKT_CLS_H
 
 #include 
+#include 
 #include 
 #include 
 
@@ -17,6 +18,8 @@ struct tcf_walker {
 int register_tcf_proto_ops(struct tcf_proto_ops *ops);
 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
 
+bool tcf_queue_work(struct work_struct *work);
+
 #ifdef CONFIG_NET_CLS
 struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
bool create);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 135f5a2dd931..0dec8a23be57 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -271,6 +272,7 @@ struct tcf_chain {
 
 struct tcf_block {
struct list_head chain_list;
+   struct work_struct work;
 };
 
 static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0b2219adf520..045d13679ad6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -77,6 +77,8 @@ int register_tcf_proto_ops(struct tcf_proto_ops *ops)
 }
 EXPORT_SYMBOL(register_tcf_proto_ops);
 
+static struct workqueue_struct *tc_filter_wq;
+
 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 {
struct tcf_proto_ops *t;
@@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 * tcf_proto_ops's destroy() handler.
 */
rcu_barrier();
+   flush_workqueue(tc_filter_wq);
 
write_lock(_mod_lock);
list_for_each_entry(t, _proto_base, head) {
@@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 }
 EXPORT_SYMBOL(unregister_tcf_proto_ops);
 
+bool tcf_queue_work(struct work_struct *work)
+{
+   return queue_work(tc_filter_wq, work);
+}
+EXPORT_SYMBOL(tcf_queue_work);
+
 /* Select new prio value from the range, managed by kernel. */
 
 static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@ -266,23 +275,30 @@ int tcf_block_get(struct tcf_block **p_block,
 }
 EXPORT_SYMBOL(tcf_block_get);
 
-void tcf_block_put(struct tcf_block *block)
+static void tcf_block_put_final(struct work_struct *work)
 {
+   struct tcf_block *block = container_of(work, struct tcf_block, work);
struct tcf_chain *chain, *tmp;
 
-   if (!block)
-   return;
-
-   /* XXX: Standalone actions are not allowed to jump to any chain, and
-* bound actions should be all removed after flushing. However,
-* filters are destroyed in RCU callbacks, we have to hold the chains
-* first, otherwise we would always race with RCU callbacks on this list
-* without proper locking.
-*/
+   /* At this point, all the chains should have refcnt == 1. */
+   rtnl_lock();
+   list_for_each_entry_safe(chain, tmp, >chain_list, list)
+   tcf_chain_put(chain);
+   rtnl_unlock();
+   kfree(block);
+}
 
-   /* Wait for existing RCU callbacks to cool down. */
-   rcu_barrier();
+/* XXX: Standalone actions are not allowed to jump to any chain, and bound
+ * actions should be all removed after flushing. However, filters are destroyed
+ * in RCU callbacks, we have to hold the chains first, otherwise we would
+ * always race with RCU callbacks on this list without proper locking.
+ */
+static void tcf_block_put_deferred(struct work_struct *work)
+{
+   struct tcf_block *block = container_of(work, struct tcf_block, work);
+   struct tcf_chain *chain;
 
+

[Patch net 06/16] net_sched: use tcf_queue_work() in flower filter

2017-10-26 Thread Cong Wang

Defer the tcf_exts_destroy() in RCU callback to
tc filter workqueue and get RTNL lock.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_flower.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b480d7c792ba..5b5722c8b32c 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -87,7 +87,10 @@ struct cls_fl_filter {
struct list_head list;
u32 handle;
u32 flags;
-   struct rcu_head rcu;
+   union {
+   struct work_struct work;
+   struct rcu_head rcu;
+   };
struct net_device *hw_dev;
 };
 
@@ -215,12 +218,22 @@ static int fl_init(struct tcf_proto *tp)
return 0;
 }
 
-static void fl_destroy_filter(struct rcu_head *head)
+static void fl_destroy_filter_work(struct work_struct *work)
 {
-   struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+   struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, 
work);
 
+   rtnl_lock();
tcf_exts_destroy(>exts);
kfree(f);
+   rtnl_unlock();
+}
+
+static void fl_destroy_filter(struct rcu_head *head)
+{
+   struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+   INIT_WORK(>work, fl_destroy_filter_work);
+   tcf_queue_work(>work);
 }
 
 static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
-- 
2.13.0

[Patch net 16/16] selftests: Introduce a new test case to tc testsuite

2017-10-26 Thread Cong Wang

From: Chris Mi 

In this patchset, we fixed a tc bug. This patch adds the test case
that reproduces the bug. To run this test case, user should specify
an existing NIC device:
  # sudo ./tdc.py -d enp4s0f0

This test case belongs to category "flower". If user doesn't specify
a NIC device, the test cases belong to "flower" will not be run.

In this test case, we create 1M filters and all filters share the same
action. When destroying all filters, kernel should not panic. It takes
about 18s to run it.

Acked-by: Jamal Hadi Salim 
Acked-by: Lucas Bates 
Signed-off-by: Chris Mi 
Signed-off-by: Cong Wang 
---
 .../tc-testing/tc-tests/filters/tests.json | 23 +-
 tools/testing/selftests/tc-testing/tdc.py  | 20 +++
 tools/testing/selftests/tc-testing/tdc_config.py   |  2 ++
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json 
b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index c727b96a59b0..5fa02d86b35f 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -17,5 +17,26 @@
 "teardown": [
 "$TC qdisc del dev $DEV1 ingress"
 ]
+},
+{
+"id": "d052",
+"name": "Add 1M filters with the same action",
+"category": [
+"filter",
+"flower"
+],
+"setup": [
+"$TC qdisc add dev $DEV2 ingress",
+"./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 100"
+],
+"cmdUnderTest": "$TC -b $BATCH_FILE",
+"expExitCode": "0",
+"verifyCmd": "$TC actions list action gact",
+"matchPattern": "action order 0: gact action drop.*index 1 ref 100 
bind 100",
+"matchCount": "1",
+"teardown": [
+"$TC qdisc del dev $DEV2 ingress",
+"/bin/rm $BATCH_FILE"
+]
 }
-]
\ No newline at end of file
+]
diff --git a/tools/testing/selftests/tc-testing/tdc.py 
b/tools/testing/selftests/tc-testing/tdc.py
index cd61b7844c0d..5f11f5d7456e 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -88,7 +88,7 @@ USE_NS = True
 exit(1)
 
 
-def test_runner(filtered_tests):
+def test_runner(filtered_tests, args):
 """
 Driver function for the unit tests.
 
@@ -105,6 +105,8 @@ USE_NS = True
 for tidx in testlist:
 result = True
 tresult = ""
+if "flower" in tidx["category"] and args.device == None:
+continue
 print("Test " + tidx["id"] + ": " + tidx["name"])
 prepare_env(tidx["setup"])
 (p, procout) = exec_cmd(tidx["cmdUnderTest"])
@@ -152,6 +154,10 @@ USE_NS = True
 exec_cmd(cmd, False)
 cmd = 'ip -s $NS link set $DEV1 up'
 exec_cmd(cmd, False)
+cmd = 'ip link set $DEV2 netns $NS'
+exec_cmd(cmd, False)
+cmd = 'ip -s $NS link set $DEV2 up'
+exec_cmd(cmd, False)
 
 
 def ns_destroy():
@@ -211,7 +217,8 @@ USE_NS = True
 help='Execute the single test case with specified ID')
 parser.add_argument('-i', '--id', action='store_true', dest='gen_id',
 help='Generate ID numbers for new test cases')
-return parser
+parser.add_argument('-d', '--device',
+help='Execute the test case in flower category')
 return parser
 
 
@@ -225,6 +232,8 @@ USE_NS = True
 
 if args.path != None:
  NAMES['TC'] = args.path
+if args.device != None:
+ NAMES['DEV2'] = args.device
 if not os.path.isfile(NAMES['TC']):
 print("The specified tc path " + NAMES['TC'] + " does not exist.")
 exit(1)
@@ -381,14 +390,17 @@ USE_NS = True
 if (len(alltests) == 0):
 print("Cannot find a test case with ID matching " + target_id)
 exit(1)
-catresults = test_runner(alltests)
+catresults = test_runner(alltests, args)
 print("All test results: " + "\n\n" + catresults)
 elif (len(target_category) > 0):
+if (target_category == "flower") and args.device == None:
+print("Please specify a NIC device (-d) to run category flower")
+exit(1)
 if (target_category not in ucat):
 print("Specified category is not present in this file.")
 exit(1)
 else:
-catresults = test_runner(testcases[target_category])
+catresults = test_runner(testcases[target_category], args)
 print("Category " + target_category + "\n\n" + catresults)
 
 ns_destroy()
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py 
b/tools/testing/selftests/tc-testing/tdc_config.py
index

[Patch net 08/16] net_sched: use tcf_queue_work() in matchall filter

2017-10-26 Thread Cong Wang

Defer the tcf_exts_destroy() in RCU callback to
tc filter workqueue and get RTNL lock.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 
---
 net/sched/cls_matchall.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index eeac606c95ab..c33f711b9019 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -21,7 +21,10 @@ struct cls_mall_head {
struct tcf_result res;
u32 handle;
u32 flags;
-   struct rcu_head rcu;
+   union {
+   struct work_struct work;
+   struct rcu_head rcu;
+   };
 };
 
 static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -41,13 +44,23 @@ static int mall_init(struct tcf_proto *tp)
return 0;
 }
 
+static void mall_destroy_work(struct work_struct *work)
+{
+   struct cls_mall_head *head = container_of(work, struct cls_mall_head,
+ work);
+   rtnl_lock();
+   tcf_exts_destroy(>exts);
+   kfree(head);
+   rtnl_unlock();
+}
+
 static void mall_destroy_rcu(struct rcu_head *rcu)
 {
struct cls_mall_head *head = container_of(rcu, struct cls_mall_head,
  rcu);
 
-   tcf_exts_destroy(>exts);
-   kfree(head);
+   INIT_WORK(>work, mall_destroy_work);
+   tcf_queue_work(>work);
 }
 
 static int mall_replace_hw_filter(struct tcf_proto *tp,
-- 
2.13.0

[Patch net 00/16] net_sched: fix races with RCU callbacks

2017-10-26 Thread Cong Wang

Recently, the RCU callbacks used in TC filters and TC actions keep
drawing my attention, they introduce at least 4 race condition bugs:

1. A simple one fixed by Daniel:

commit c78e1746d3ad7d548bdf3fe491898cc453911a49
Author: Daniel Borkmann 
Date:   Wed May 20 17:13:33 2015 +0200

net: sched: fix call_rcu() race on classifier module unloads

2. A very nasty one fixed by me:

commit 1697c4bb5245649a23f06a144cc38c06715e1b65
Author: Cong Wang 
Date:   Mon Sep 11 16:33:32 2017 -0700

net_sched: carefully handle tcf_block_put()

3. Two more bugs found by Chris:
https://patchwork.ozlabs.org/patch/826696/
https://patchwork.ozlabs.org/patch/826695/

Usually RCU callbacks are simple, however for TC filters and actions,
they are complex because at least TC actions could be destroyed
together with the TC filter in one callback. And RCU callbacks are
invoked in BH context, without locking they are parallel too. All of
these contribute to the cause of these nasty bugs.

Alternatively, we could also:

a) Introduce a spinlock to serialize these RCU callbacks. But as I
said in commit 1697c4bb5245 ("net_sched: carefully handle
tcf_block_put()"), it is very hard to do because of tcf_chain_dump().
Potentially we need to do a lot of work to make it possible (if not
impossible).

b) Just get rid of these RCU callbacks, because they are not
necessary at all, callers of these call_rcu() are all on slow paths
and holding RTNL lock, so blocking is allowed in their contexts.
However, David and Eric dislike adding synchronize_rcu() here.

As suggested by Paul, we could defer the work to a workqueue and
gain the permission of holding RTNL again without any performance
impact, however, in tcf_block_put() we could have a deadlock when
flushing workqueue while hodling RTNL lock, the trick here is to
defer the work itself in workqueue and make it queued after all
other works so that we keep the same ordering to avoid any
use-after-free. Please see the first patch for details.

Patch 1 introduces the infrastructure, patch 2~12 move each
tc filter to the new tc filter workqueue, patch 13 adds
an assertion to catch potential bugs like this, patch 14
closes another rcu callback race, patch 15 and patch 16 add
new test cases.

Reported-by: Chris Mi 
Cc: Daniel Borkmann 
Cc: Jiri Pirko 
Cc: John Fastabend 
Cc: Jamal Hadi Salim 
Cc: "Paul E. McKenney" 
Signed-off-by: Cong Wang 

Chris Mi (2):
  selftests: Introduce a new script to generate tc batch file
  selftests: Introduce a new test case to tc testsuite

Cong Wang (14):
  net_sched: introduce a workqueue for RCU callbacks of tc filter
  net_sched: use tcf_queue_work() in basic filter
  net_sched: use tcf_queue_work() in bpf filter
  net_sched: use tcf_queue_work() in cgroup filter
  net_sched: use tcf_queue_work() in flow filter
  net_sched: use tcf_queue_work() in flower filter
  net_sched: use tcf_queue_work() in fw filter
  net_sched: use tcf_queue_work() in matchall filter
  net_sched: use tcf_queue_work() in u32 filter
  net_sched: use tcf_queue_work() in route filter
  net_sched: use tcf_queue_work() in rsvp filter
  net_sched: use tcf_queue_work() in tcindex filter
  net_sched: add rtnl assertion to tcf_exts_destroy()
  net_sched: fix call_rcu() race on act_sample module removal

 include/net/pkt_cls.h  |  3 +
 include/net/sch_generic.h  |  2 +
 net/sched/act_sample.c |  1 +
 net/sched/cls_api.c| 69 --
 net/sched/cls_basic.c  | 20 ++-
 net/sched/cls_bpf.c| 19 +-
 net/sched/cls_cgroup.c | 22 +--
 net/sched/cls_flow.c   | 19 +-
 net/sched/cls_flower.c | 19 +-
 net/sched/cls_fw.c | 19 +-
 net/sched/cls_matchall.c   | 19 +-
 net/sched/cls_route.c  | 19 +-
 net/sched/cls_rsvp.h   | 19 +-
 net/sched/cls_tcindex.c| 38 ++--
 net/sched/cls_u32.c| 29 -
 .../tc-testing/tc-tests/filters/tests.json | 23 +++-
 tools/testing/selftests/tc-testing/tdc.py  | 20 +--
 tools/testing/selftests/tc-testing/tdc_batch.py| 62 +++
 tools/testing/selftests/tc-testing/tdc_config.py   |  2 +
 19 files changed, 367 insertions(+), 57 deletions(-)
 create mode 100755 tools/testing/selftests/tc-testing/tdc_batch.py

-- 
2.13.0

[PATCH net-next] nfp: inform the VF driver needs to be restarted after changing the MAC

2017-10-26 Thread Jakub Kicinski

From: Pablo Cascón 

Add message to inform the VF MAC was changed and the need to restart
the VF driver for the changes to be effective.

Signed-off-by: Pablo Cascón 
Signed-off-by: Jakub Kicinski 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
index e6d2e06b050c..8b1b962cf1d1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
@@ -112,7 +112,13 @@ int nfp_app_set_vf_mac(struct net_device *netdev, int vf, 
u8 *mac)
writew(get_unaligned_be16(mac + 4),
   app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_MAC_LO);
 
-   return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC");
+   err = nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC");
+   if (!err)
+   nfp_info(app->pf->cpp,
+"MAC %pM set on VF %d, reload the VF driver to make 
this change effective.\n",
+mac, vf);
+
+   return err;
 }
 
 int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
-- 
2.14.1

RE: removing bridge in vlan_filtering mode requests delete of attached ports main MAC address

2017-10-26 Thread Keller, Jacob E

> -Original Message-
> From: Keller, Jacob E
> Sent: Thursday, October 26, 2017 1:33 PM
> To: Keller, Jacob E ; vyase...@redhat.com;
> netdev@vger.kernel.org
> Cc: Malek, Patryk 
> Subject: RE: removing bridge in vlan_filtering mode requests delete of 
> attached
> ports main MAC address
> 
> > -Original Message-
> > From: netdev-ow...@vger.kernel.org [mailto:netdev-
> ow...@vger.kernel.org]
> > On Behalf Of Keller, Jacob E
> > Sent: Thursday, October 26, 2017 1:27 PM
> > To: vyase...@redhat.com; netdev@vger.kernel.org
> > Cc: Malek, Patryk 
> > Subject: RE: removing bridge in vlan_filtering mode requests delete of 
> > attached
> > ports main MAC address
> >
> > > -Original Message-
> > > From: Vlad Yasevich [mailto:vyase...@redhat.com]
> > > Sent: Thursday, October 26, 2017 3:22 AM
> > > To: Keller, Jacob E ; netdev@vger.kernel.org
> > > Cc: Malek, Patryk 
> > > Subject: Re: removing bridge in vlan_filtering mode requests delete of
> attached
> > > ports main MAC address
> > >
> > > Hi Jake
> > >
> > > I think adding a !fdb->local should work.  local fdb contain the address 
> > > of
> > assigned
> > > to
> > > the ports of the bridge and those shouldn't be directly removed.
> > >
> > > If that works,  that looks like the right solution.
> > >
> > > -vlad
> > >
> >
> > So this does prevent us from removing the port's address. However, if I add
> two
> > devices to the bridge, then after removing the bridge, each device now keeps
> > both permanent addresses in their list, which isn't what we want is it?
> >
> > Do we even want to assign the local fdb addresses to every port?
> >
> > Obviously, I don't fully understand this code, so I think I'm missing 
> > something
> > here.
> >
> > Regards,
> > Jake
> >
> 
> Ok, I tried this again, and it didn't end up crossing the local device 
> addresses to
> each port. I'm not sure how that happened the first time yet, so maybe it is
> correct to skip removing local addresses... but if we skip removing them, 
> wouldn't
> we want to skip adding them too?
> 
> Thanks,
> Jake

I'm still digging into this. It turns out adding two devices, enabling vlan 
filtering, and deleting the bridge sometimes (but not always, not sure what 
condition triggers it) causes the hw address of one of the devices to be 
assigned to the other device.

I'm still unsure whether sync_static should be assigning local addresses to 
each device, but it appears like it should. In this case, I'm really unsure how 
to handle this case properly.

If we add local addresses, we need to delete the ones that aren't specific to 
that device so that after removing the bridge we end up in the original 
configuration.. but I'm not really sure how best to do this.

Using !fdb->is_local in unsync_static works to resolve my issue, but I believe 
it papers over other issues, since it means that we'll never delete static 
addresses when deleting the ports or exiting promiscuous mode.

I think checking fdb->dst might work, but that would break if we manually add a 
new address and tag is as permanent, see line 806 of br_fdb.c... In this case, 
we'd never delete this address even though it was not originally on the device.

I checked other drivers, and it turns out that at least one (ixgbe) doesn't 
have this problem because the hw address is special and isn't actually stored 
in a hardware MAC filter list. In i40e we keep the hardware address in the same 
list as all the other MAC filters.

We could "fix" this in i40e by treating the hw permanent address separately and 
essentially ignoring it from the dev_uc_del() calls.. but I still feel like 
this papers over the issues in the bridge code.

Any thoughts or suggestions? I haven't checked other drivers to see how they 
handle addresses in the unicast table (whether they treat the hw address as 
special or not, like ixgbe ultimately does).

Thanks,
Jake

Re: [PATCH net-next 5/9] net: dsa: use dsa_is_user_port everywhere

2017-10-26 Thread Florian Fainelli

On 10/26/2017 08:22 AM, Vivien Didelot wrote:
> Most of the DSA code still check ds->enabled_port_mask directly to
> inspect a given port type instead of using the provided dsa_is_user_port
> helper. Change this.
> 
> Signed-off-by: Vivien Didelot 

Reviewed-by: Florian Fainelli 
-- 
Florian

[iproute2 PATCH] tc/mqprio: Offload mode and shaper options in mqprio

2017-10-26 Thread Amritha Nambiar

This patch was previously submitted as RFC. Submitting this as
non-RFC now that the tc/mqprio changes are accepted in net-next.

Adds new mqprio options for 'mode' and 'shaper'. The mode
option can take values for offload modes such as 'dcb' (default),
'channel' with the 'hw' option set to 1. The new 'channel' mode
supports offloading TCs and other queue configurations. The
'shaper' option is to support HW shapers ('dcb' default) and
takes the value 'bw_rlimit' for bandwidth rate limiting. The
parameters to the bw_rlimit shaper are minimum and maximum
bandwidth rates. New HW shapers in future can be supported
through the shaper attribute.

# tc qdisc add dev eth0 root mqprio num_tc 2  map 0 0 0 0 1 1 1 1\
  queues 4@0 4@4 hw 1 mode channel shaper bw_rlimit\
  min_rate 1Gbit 2Gbit max_rate 4Gbit 5Gbit

# tc qdisc show dev eth0

qdisc mqprio 804a: root  tc 2 map 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0
 queues:(0:3) (4:7)
 mode:channel
 shaper:bw_rlimit   min_rate:1Gbit 2Gbit   max_rate:4Gbit 5Gbit

Signed-off-by: Amritha Nambiar 
---
 include/uapi/linux/pkt_sched.h |   32 +++
 tc/q_mqprio.c  |  192 +++-
 2 files changed, 217 insertions(+), 7 deletions(-)

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 099bf55..e95b5c9 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -625,6 +625,22 @@ enum {
 
 #define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1)
 
+enum {
+   TC_MQPRIO_MODE_DCB,
+   TC_MQPRIO_MODE_CHANNEL,
+   __TC_MQPRIO_MODE_MAX
+};
+
+#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1)
+
+enum {
+   TC_MQPRIO_SHAPER_DCB,
+   TC_MQPRIO_SHAPER_BW_RATE,   /* Add new shapers below */
+   __TC_MQPRIO_SHAPER_MAX
+};
+
+#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1)
+
 struct tc_mqprio_qopt {
__u8num_tc;
__u8prio_tc_map[TC_QOPT_BITMASK + 1];
@@ -633,6 +649,22 @@ struct tc_mqprio_qopt {
__u16   offset[TC_QOPT_MAX_QUEUE];
 };
 
+#define TC_MQPRIO_F_MODE   0x1
+#define TC_MQPRIO_F_SHAPER 0x2
+#define TC_MQPRIO_F_MIN_RATE   0x4
+#define TC_MQPRIO_F_MAX_RATE   0x8
+
+enum {
+   TCA_MQPRIO_UNSPEC,
+   TCA_MQPRIO_MODE,
+   TCA_MQPRIO_SHAPER,
+   TCA_MQPRIO_MIN_RATE64,
+   TCA_MQPRIO_MAX_RATE64,
+   __TCA_MQPRIO_MAX,
+};
+
+#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1)
+
 /* SFB */
 
 enum {
diff --git a/tc/q_mqprio.c b/tc/q_mqprio.c
index d6718fb..cd305b7 100644
--- a/tc/q_mqprio.c
+++ b/tc/q_mqprio.c
@@ -27,6 +27,10 @@ static void explain(void)
fprintf(stderr, "Usage: ... mqprio [num_tc NUMBER] [map P0 P1 ...]\n");
fprintf(stderr, "  [queues count1@offset1 
count2@offset2 ...] ");
fprintf(stderr, "[hw 1|0]\n");
+   fprintf(stderr, "  [mode dcb|channel]\n");
+   fprintf(stderr, "  [shaper bw_rlimit SHAPER_PARAMS]\n"
+   "Where: SHAPER_PARAMS := { min_rate MIN_RATE1 MIN_RATE2 ...|\n"
+   "  max_rate MAX_RATE1 MAX_RATE2 ... 
}\n");
 }
 
 static int mqprio_parse_opt(struct qdisc_util *qu, int argc,
@@ -40,6 +44,12 @@ static int mqprio_parse_opt(struct qdisc_util *qu, int argc,
.count = { },
.offset = { },
};
+   __u64 min_rate64[TC_QOPT_MAX_QUEUE] = {0};
+   __u64 max_rate64[TC_QOPT_MAX_QUEUE] = {0};
+   __u16 shaper = TC_MQPRIO_SHAPER_DCB;
+   __u16 mode = TC_MQPRIO_MODE_DCB;
+   struct rtattr *tail;
+   __u32 flags = 0;
 
while (argc > 0) {
idx = 0;
@@ -92,6 +102,68 @@ static int mqprio_parse_opt(struct qdisc_util *qu, int argc,
return -1;
}
idx++;
+   } else if (opt.hw && strcmp(*argv, "mode") == 0) {
+   NEXT_ARG();
+   if (matches(*argv, "dcb") == 0) {
+   mode = TC_MQPRIO_MODE_DCB;
+   } else if (matches(*argv, "channel") == 0) {
+   mode = TC_MQPRIO_MODE_CHANNEL;
+   }  else {
+   fprintf(stderr, "Illegal mode (%s)\n",
+   *argv);
+   return -1;
+   }
+   if (mode != TC_MQPRIO_MODE_DCB)
+   flags |= TC_MQPRIO_F_MODE;
+   idx++;
+   } else if (opt.hw && strcmp(*argv, "shaper") == 0) {
+   NEXT_ARG();
+   if (matches(*argv, "dcb") == 0) {
+   shaper = TC_MQPRIO_SHAPER_DCB;
+   } else if (matches(*argv, "bw_rlimit") == 0) {
+   shaper =

Re: [PATCH net-next] tcp: add tracepoint trace_tcp_retransmit_synack()

2017-10-26 Thread Song Liu


> On Oct 25, 2017, at 8:13 PM, kbuild test robot <l...@intel.com> wrote:
> 
> Hi Song,
> 
> [auto build test WARNING on net-next/master]
> 
> url:
> https://github.com/0day-ci/linux/commits/Song-Liu/tcp-add-tracepoint-trace_tcp_retransmit_synack/20171026-010651
> reproduce:
># apt-get install sparse
>make ARCH=x86_64 allmodconfig
>make C=1 CF=-D__CHECK_ENDIAN__
> 
> 
> sparse warnings: (new ones prefixed by >>)
> 
> 
> vim +281 include/trace/events/tcp.h
> 
>   241 
>   242 TP_PROTO(const struct sock *sk, const struct request_sock *req),
>   243 
>   244 TP_ARGS(sk, req),
>   245 
>   246 TP_STRUCT__entry(
>   247 __field(const void *, skaddr)
>   248 __field(const void *, req)
>   249 __field(__u16, sport)
>   250 __field(__u16, dport)
>   251 __array(__u8, saddr, 4)
>   252 __array(__u8, daddr, 4)
>   253 __array(__u8, saddr_v6, 16)
>   254 __array(__u8, daddr_v6, 16)
>   255 ),
>   256 
>   257 TP_fast_assign(
>   258 struct inet_request_sock *ireq = inet_rsk(req);
>   259 struct in6_addr *pin6;
>   260 __be32 *p32;
>   261 
>   262 __entry->skaddr = sk;
>   263 __entry->req = req;
>   264 
>   265 __entry->sport = ireq->ir_num;
>   266 __entry->dport = ntohs(ireq->ir_rmt_port);
>   267 
>   268 p32 = (__be32 *) __entry->saddr;
>   269 *p32 = ireq->ir_loc_addr;
>   270 
>   271 p32 = (__be32 *) __entry->daddr;
>   272 *p32 = ireq->ir_rmt_addr;
>   273 
>> 274  #if IS_ENABLED(CONFIG_IPV6)
>   275 if (sk->sk_family == AF_INET6) {
>   276 pin6 = (struct in6_addr *)__entry->saddr_v6;
>   277 *pin6 = ireq->ir_v6_loc_addr;
>   278 pin6 = (struct in6_addr *)__entry->daddr_v6;
>   279 *pin6 = ireq->ir_v6_rmt_addr;
>   280 } else
>> 281  #endif

In this case, we are putting CONFIG_IPV6 in TRACE_EVENT macro, which generates
warnings like:

./include/trace/events/tcp.h:274:1: error: directive in argument list
./include/trace/events/tcp.h:281:1: error: directive in argument list

Seems these warning cannot be easily avoided. This is also the same pattern we 
have been using in include/trace/events/tcp.h. 

Any suggestions on how shall we proceed from here?

Thanks,
Song

[PATCH net-next] liquidio: fix kernel panic in VF driver

2017-10-26 Thread Felix Manlunas

Doing ifconfig down on VF driver in the middle of receiving line rate
traffic causes a kernel panic:

LiquidIO_VF :02:00.3: should not come here should not get rx when poll 
mode = 0 for vf
BUG: unable to handle kernel NULL pointer dereference at   (null)
.
.
.
Call Trace:
 
 ? tasklet_action+0x102/0x120
 __do_softirq+0x91/0x292
 irq_exit+0xb6/0xc0
 do_IRQ+0x4f/0xd0
 common_interrupt+0x93/0x93
 
RIP: 0010:cpuidle_enter_state+0x142/0x2f0
RSP: 0018:a6403e20 EFLAGS: 0246 ORIG_RAX: ff59
RAX:  RBX: 0003 RCX: 001f
RDX:  RSI: 2ab7519f RDI: 
RBP: a6403e58 R08: 0084 R09: 0018
R10: a6403df0 R11: 03c7 R12: 0003
R13: d27ebd806800 R14: a64d40d8 R15: 007be072823f
 cpuidle_enter+0x17/0x20
 call_cpuidle+0x23/0x40
 do_idle+0x18c/0x1f0
 cpu_startup_entry+0x64/0x70
 rest_init+0xa5/0xb0
 start_kernel+0x45e/0x46b
 x86_64_start_reservations+0x24/0x26
 x86_64_start_kernel+0x6f/0x72
 secondary_startup_64+0xa5/0xa5
Code:  Bad RIP value.
RIP:   (null) RSP: 9246ed003f28
CR2: 
---[ end trace 92731e80f31b7d7d ]---
Kernel panic - not syncing: Fatal exception in interrupt
Kernel Offset: 0x2400 from 0x8100 (relocation range: 
0x8000-0xbfff)
---[ end Kernel panic - not syncing: Fatal exception in interrupt

Reason is:  in the function assigned to net_device_ops->ndo_stop, the steps
for bringing down the interface are done in the wrong order.  The step that
notifies the NIC firmware to stop forwarding packets to host is done too
late.  Fix it by moving that step to the beginning.

Signed-off-by: Felix Manlunas 
Signed-off-by: Raghu Vatsavayi 
---
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c 
b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 4c3b568..ed1f073 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -1288,6 +1288,9 @@ static int liquidio_stop(struct net_device *netdev)
struct octeon_device *oct = lio->oct_dev;
struct napi_struct *napi, *n;
 
+   /* tell Octeon to stop forwarding packets to host */
+   send_rx_ctrl_cmd(lio, 0);
+
if (oct->props[lio->ifidx].napi_enabled) {
list_for_each_entry_safe(napi, n, >napi_list, dev_list)
napi_disable(napi);
@@ -1305,9 +1308,6 @@ static int liquidio_stop(struct net_device *netdev)
netif_carrier_off(netdev);
lio->link_changes++;
 
-   /* tell Octeon to stop forwarding packets to host */
-   send_rx_ctrl_cmd(lio, 0);
-
ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
txqs_stop(netdev);

[PATCH net-next 07/15] tcp: Namespace-ify sysctl_tcp_rfc1337

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  1 -
 net/ipv4/tcp_minisocks.c   |  2 +-
 5 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
ffa2cf3dc747ca9443df3927dc7928c18357f872..968edce38eb5d3399724b3142277eab44f19f2fb
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -134,6 +134,7 @@ struct netns_ipv4 {
int sysctl_tcp_slow_start_after_idle;
int sysctl_tcp_retrans_collapse;
int sysctl_tcp_stdurg;
+   int sysctl_tcp_rfc1337;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
0cf9de8506916c67369ce78833207ba648f34a10..7f88987bc62dd76206c15eb91f2990d4469e5421
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -240,7 +240,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_rfc1337;
 extern int sysctl_tcp_abort_on_overflow;
 extern int sysctl_tcp_max_orphans;
 extern int sysctl_tcp_fack;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
a34bb75815c15afc077ba7ff36939b5abc9229f6..832e554235df37770809541ad8f9f1ca2f201739
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
-   {
-   .procname   = "tcp_rfc1337",
-   .data   = _tcp_rfc1337,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "inet_peer_threshold",
.data   = _peer_threshold,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_rfc1337",
+   .data   = _net.ipv4.sysctl_tcp_rfc1337,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
6c3655b538f6b2315af7dc611acc574f7489bde6..d2d3f62387a98d7f955f8c9e27320b9722035b2a
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -87,7 +87,6 @@ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 /* rfc5961 challenge ack rate limiting */
 int sysctl_tcp_challenge_ack_limit = 1000;
 
-int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly = 2;
 int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 
a952357054f4ddfbb98746ebcf323d1c45f7e951..2abaa4c1fe0108f2645d8e783ae6b48e87a82fb3
 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -180,7 +180,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, 
struct sk_buff *skb,
 * Oh well... nobody has a sufficient solution to this
 * protocol bug yet.
 */
-   if (sysctl_tcp_rfc1337 == 0) {
+   if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
 kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH net-next 06/15] tcp: Namespace-ify sysctl_tcp_stdurg

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  3 +--
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
b28c172b10e497f235b51aae0fc2d3bbf7cc51f3..ffa2cf3dc747ca9443df3927dc7928c18357f872
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -133,6 +133,7 @@ struct netns_ipv4 {
int sysctl_tcp_thin_linear_timeouts;
int sysctl_tcp_slow_start_after_idle;
int sysctl_tcp_retrans_collapse;
+   int sysctl_tcp_stdurg;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
085848e4de38058bb09f025387c713ade32b263e..0cf9de8506916c67369ce78833207ba648f34a10
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -240,7 +240,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_stdurg;
 extern int sysctl_tcp_rfc1337;
 extern int sysctl_tcp_abort_on_overflow;
 extern int sysctl_tcp_max_orphans;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
533b92ad39dd0cada542028fe2f276d9eebcd2c8..a34bb75815c15afc077ba7ff36939b5abc9229f6
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
-   {
-   .procname   = "tcp_stdurg",
-   .data   = _tcp_stdurg,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_rfc1337",
.data   = _tcp_rfc1337,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_stdurg",
+   .data   = _net.ipv4.sysctl_tcp_stdurg,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
2cc56fd57b751dd6b457c15067aa9309683a04a8..6c3655b538f6b2315af7dc611acc574f7489bde6
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -87,7 +87,6 @@ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 /* rfc5961 challenge ack rate limiting */
 int sysctl_tcp_challenge_ack_limit = 1000;
 
-int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly = 2;
@@ -5103,7 +5102,7 @@ static void tcp_check_urg(struct sock *sk, const struct 
tcphdr *th)
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
 
-   if (ptr && !sysctl_tcp_stdurg)
+   if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
ptr--;
ptr += ntohl(th->seq);
 
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH net-next 05/15] tcp: Namespace-ify sysctl_tcp_retrans_collapse

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_ipv4.c|  2 +-
 net/ipv4/tcp_output.c  |  5 +
 5 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
8662692686b3af98a94a176230b9ed147881d87a..b28c172b10e497f235b51aae0fc2d3bbf7cc51f3
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -132,6 +132,7 @@ struct netns_ipv4 {
int sysctl_tcp_recovery;
int sysctl_tcp_thin_linear_timeouts;
int sysctl_tcp_slow_start_after_idle;
+   int sysctl_tcp_retrans_collapse;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
09c79705a0742aa8c22b3b7795d01b6c685d32e2..085848e4de38058bb09f025387c713ade32b263e
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -240,7 +240,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_retrans_collapse;
 extern int sysctl_tcp_stdurg;
 extern int sysctl_tcp_rfc1337;
 extern int sysctl_tcp_abort_on_overflow;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
40d69af8b363bc236e23879973872d8f9346d85e..533b92ad39dd0cada542028fe2f276d9eebcd2c8
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -386,13 +386,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
 }
 
 static struct ctl_table ipv4_table[] = {
-   {
-   .procname   = "tcp_retrans_collapse",
-   .data   = _tcp_retrans_collapse,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_max_orphans",
.data   = _tcp_max_orphans,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_retrans_collapse",
+   .data   = _net.ipv4.sysctl_tcp_retrans_collapse,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
cea63a4b59655823def7a423d27191003c7f084c..2bc6ba2059d32aa848dbc415b4b0e194b61b0268
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2487,7 +2487,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_early_retrans = 3;
net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 
behavior.  */
-
+   net->ipv4.sysctl_tcp_retrans_collapse = 1;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 
bc93a346e6e91289c2aeb2f2d2522b809da12dd6..735fff44aaca3d5afbb0ac55ebdb9898b6c44ae6
 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -44,9 +44,6 @@
 
 #include 
 
-/* People can turn this off for buggy TCP's found in printers etc. */
-int sysctl_tcp_retrans_collapse __read_mostly = 1;
-
 /* People can turn this on to work with those rare, broken TCPs that
  * interpret the window field as a signed quantity.
  */
@@ -2747,7 +2744,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, 
struct sk_buff *to,
struct sk_buff *skb = to, *tmp;
bool first = true;
 
-   if (!sysctl_tcp_retrans_collapse)
+   if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH net-next 13/15] tcp: Namespace-ify sysctl_tcp_app_win

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  8 
 net/ipv4/tcp_ipv4.c|  1 +
 5 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
956957a77db96ad3d231cc018c13503d615d8d2e..63f91d52cbc0ad35d8e04a8da0d9f57aa960bcb0
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -139,6 +139,7 @@ struct netns_ipv4 {
int sysctl_tcp_fack;
int sysctl_tcp_max_reordering;
int sysctl_tcp_dsack;
+   int sysctl_tcp_app_win;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
aba20c4828ee912d9f0f3ef49f3de5376729c022..c6bee85a3dec0dea6d4402d89184ade02a637a2e
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -244,7 +244,6 @@ extern int sysctl_tcp_max_orphans;
 extern long sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
-extern int sysctl_tcp_app_win;
 extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_nometrics_save;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
7652a9c2a65d3f1cfa0a75d1198e1d9d56761c35..e057788834a99cf99e141a602ddbe19b8e6fce3c
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec_minmax,
.extra1 = ,
},
-   {
-   .procname   = "tcp_app_win",
-   .data   = _tcp_app_win,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_adv_win_scale",
.data   = _tcp_adv_win_scale,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_app_win",
+   .data   = _net.ipv4.sysctl_tcp_app_win,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
db4798f99bb093b6d5a3e0fdd76efb83b88da49e..06a8c27e1a690e3b26cb6773320bafa31b06d3b3
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -77,7 +77,6 @@
 #include 
 #include 
 
-int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 
@@ -426,6 +425,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
  */
 void tcp_init_buffer_space(struct sock *sk)
 {
+   int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
 
@@ -444,14 +444,14 @@ void tcp_init_buffer_space(struct sock *sk)
if (tp->window_clamp >= maxwin) {
tp->window_clamp = maxwin;
 
-   if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
+   if (tcp_app_win && maxwin > 4 * tp->advmss)
tp->window_clamp = max(maxwin -
-  (maxwin >> sysctl_tcp_app_win),
+  (maxwin >> tcp_app_win),
   4 * tp->advmss);
}
 
/* Force reservation of one segment. */
-   if (sysctl_tcp_app_win &&
+   if (tcp_app_win &&
tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin)
tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
d9d4d191e8f3c962a6ee68015ffe5a6e7fb8e9c1..189664ebd28e4cda7ef40a47591c3bd8cac3574b
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2490,6 +2490,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_retrans_collapse = 1;
net->ipv4.sysctl_tcp_max_reordering = 300;
net->ipv4.sysctl_tcp_dsack = 1;
+   net->ipv4.sysctl_tcp_app_win = 31;
 
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH net-next 15/15] tcp: Namespace-ify sysctl_tcp_frto

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  3 +--
 net/ipv4/tcp_ipv4.c|  1 +
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
9dbb07d4eff465428817831e55c6a4922b7208fb..f4622e28db3a1484553f51709b144ee769766a28
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -141,6 +141,7 @@ struct netns_ipv4 {
int sysctl_tcp_dsack;
int sysctl_tcp_app_win;
int sysctl_tcp_adv_win_scale;
+   int sysctl_tcp_frto;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
2572b57682987dd5f3700ed47d63e7238946b9a8..19006a5d073c202995ba63199ab8cde814d6d869
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -244,7 +244,6 @@ extern int sysctl_tcp_max_orphans;
 extern long sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
-extern int sysctl_tcp_frto;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
a95123e1e7da706c88bf5553b7d8ef6c2653ab50..f1bcb9b7e082c6688fad12e15be9b872ebed8151
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec_minmax,
.extra1 = ,
},
-   {
-   .procname   = "tcp_frto",
-   .data   = _tcp_frto,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_min_rtt_wlen",
.data   = _tcp_min_rtt_wlen,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = _adv_win_scale_min,
.extra2 = _adv_win_scale_max,
},
+   {
+   .procname   = "tcp_frto",
+   .data   = _net.ipv4.sysctl_tcp_frto,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
d01f390da23dcd4100271b150bd8bc143f7328cf..24950ea3094288cad8d9cd9eb0e0698d6f50e989
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -81,7 +81,6 @@
 int sysctl_tcp_challenge_ack_limit = 1000;
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
-int sysctl_tcp_frto __read_mostly = 2;
 int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
@@ -2024,7 +2023,7 @@ void tcp_enter_loss(struct sock *sk)
 * falsely raise the receive window, which results in repeated
 * timeouts and stop-and-go behavior.
 */
-   tp->frto = sysctl_tcp_frto &&
+   tp->frto = net->ipv4.sysctl_tcp_frto &&
   (new_recovery || icsk->icsk_retransmits) &&
   !inet_csk(sk)->icsk_mtup.probe_size;
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
1fe30fb99308b3e3fd07509b509b0e3727cc5d44..49757c7582c6d2cf413415be2c1b58482659
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2492,6 +2492,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_dsack = 1;
net->ipv4.sysctl_tcp_app_win = 31;
net->ipv4.sysctl_tcp_adv_win_scale = 1;
+   net->ipv4.sysctl_tcp_frto = 2;
 
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH net-next 09/15] tcp: Namespace-ify sysctl_tcp_fack

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp.c |  2 +-
 net/ipv4/tcp_input.c   |  3 +--
 net/ipv4/tcp_minisocks.c   |  2 +-
 6 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
3875fdf6b18653477408beb25176eac849e65ba4..f0e792beeea974b0850090d7624a3d7490124067
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -136,6 +136,7 @@ struct netns_ipv4 {
int sysctl_tcp_stdurg;
int sysctl_tcp_rfc1337;
int sysctl_tcp_abort_on_overflow;
+   int sysctl_tcp_fack;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
e28471ce52bd815346676931075588d59306a441..38504d5ab109454219ac9570c3b11e02733384c1
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -241,7 +241,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 /* sysctl variables for tcp */
 extern int sysctl_tcp_max_orphans;
-extern int sysctl_tcp_fack;
 extern int sysctl_tcp_reordering;
 extern int sysctl_tcp_max_reordering;
 extern int sysctl_tcp_dsack;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
ffd1fd769bba7c3524aa6dfac734e1de0cad1506..1f23be13ce7be8b2a12b82aada36c6351fdfb70a
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec_jiffies,
},
-   {
-   .procname   = "tcp_fack",
-   .data   = _tcp_fack,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_max_reordering",
.data   = _tcp_max_reordering,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_fack",
+   .data   = _net.ipv4.sysctl_tcp_fack,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 
8f36277e82e9dbea750ce66b73018a81b30b5156..4a777ba113b9afe118e3020da65878d85848e1cb
 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2511,7 +2511,7 @@ static int tcp_repair_options_est(struct sock *sk,
return -EINVAL;
 
tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
-   if (sysctl_tcp_fack)
+   if (sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(tp);
break;
case TCPOPT_TIMESTAMP:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
d2d3f62387a98d7f955f8c9e27320b9722035b2a..8941fc32072b69fedcb01afbe837f4d7791dd28d
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -77,7 +77,6 @@
 #include 
 #include 
 
-int sysctl_tcp_fack __read_mostly;
 int sysctl_tcp_max_reordering __read_mostly = 300;
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
@@ -5690,7 +5689,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, 
struct sk_buff *skb,
tp->tcp_header_len = sizeof(struct tcphdr);
}
 
-   if (tcp_is_sack(tp) && sysctl_tcp_fack)
+   if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(tp);
 
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 
a79a89fdb558a5d66ec5241fdb8bfcab196c744d..eba61f77bc36e4f49580d15840c15af565b8b479
 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -491,7 +491,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 
newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
-   if (sysctl_tcp_fack)
+   if (sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(newtp);
}
newtp->window_clamp = req->rsk_window_clamp;
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH net-next 14/15] tcp: Namespace-ify sysctl_tcp_adv_win_scale

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  9 -
 net/ipv4/sysctl_net_ipv4.c | 18 +-
 net/ipv4/tcp_input.c   | 13 +
 net/ipv4/tcp_ipv4.c|  1 +
 5 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
63f91d52cbc0ad35d8e04a8da0d9f57aa960bcb0..9dbb07d4eff465428817831e55c6a4922b7208fb
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -140,6 +140,7 @@ struct netns_ipv4 {
int sysctl_tcp_max_reordering;
int sysctl_tcp_dsack;
int sysctl_tcp_app_win;
+   int sysctl_tcp_adv_win_scale;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
c6bee85a3dec0dea6d4402d89184ade02a637a2e..2572b57682987dd5f3700ed47d63e7238946b9a8
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -244,7 +244,6 @@ extern int sysctl_tcp_max_orphans;
 extern long sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
-extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
@@ -1308,9 +1307,9 @@ void tcp_select_initial_window(int __space, __u32 mss, 
__u32 *rcv_wnd,
   __u32 *window_clamp, int wscale_ok,
   __u8 *rcv_wscale, __u32 init_rcv_wnd);
 
-static inline int tcp_win_from_space(int space)
+static inline int tcp_win_from_space(const struct sock *sk, int space)
 {
-   int tcp_adv_win_scale = sysctl_tcp_adv_win_scale;
+   int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
 
return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) :
@@ -1320,13 +1319,13 @@ static inline int tcp_win_from_space(int space)
 /* Note: caller must be prepared to deal with negative returns */
 static inline int tcp_space(const struct sock *sk)
 {
-   return tcp_win_from_space(sk->sk_rcvbuf -
+   return tcp_win_from_space(sk, sk->sk_rcvbuf -
  atomic_read(>sk_rmem_alloc));
 }
 
 static inline int tcp_full_space(const struct sock *sk)
 {
-   return tcp_win_from_space(sk->sk_rcvbuf);
+   return tcp_win_from_space(sk, sk->sk_rcvbuf);
 }
 
 extern void tcp_openreq_init_rwin(struct request_sock *req,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
e057788834a99cf99e141a602ddbe19b8e6fce3c..a95123e1e7da706c88bf5553b7d8ef6c2653ab50
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -437,15 +437,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec_minmax,
.extra1 = ,
},
-   {
-   .procname   = "tcp_adv_win_scale",
-   .data   = _tcp_adv_win_scale,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = _adv_win_scale_min,
-   .extra2 = _adv_win_scale_max,
-   },
{
.procname   = "tcp_frto",
.data   = _tcp_frto,
@@ -1145,6 +1136,15 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_adv_win_scale",
+   .data   = _net.ipv4.sysctl_tcp_adv_win_scale,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = _adv_win_scale_min,
+   .extra2 = _adv_win_scale_max,
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
06a8c27e1a690e3b26cb6773320bafa31b06d3b3..d01f390da23dcd4100271b150bd8bc143f7328cf
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -77,9 +77,6 @@
 #include 
 #include 
 
-int sysctl_tcp_adv_win_scale __read_mostly = 1;
-EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-
 /* rfc5961 challenge ack rate limiting */
 int sysctl_tcp_challenge_ack_limit = 1000;
 
@@ -361,8 +358,8 @@ static int __tcp_grow_window(const struct sock *sk, const 
struct sk_buff *skb)
 {
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
-   int truesize = tcp_win_from_space(skb->truesize) >> 1;
-   int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
+   int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+   int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1;
 
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
@@ -387,7 +384,7 @@ static void tcp_grow_window(struct

[PATCH net-next 11/15] tcp: Namespace-ify sysctl_tcp_max_reordering

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  3 +--
 net/ipv4/tcp_ipv4.c|  2 ++
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
f0e792beeea974b0850090d7624a3d7490124067..3f6844665a2fbe66fc0c91bd13e057ac2e03007a
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -137,6 +137,7 @@ struct netns_ipv4 {
int sysctl_tcp_rfc1337;
int sysctl_tcp_abort_on_overflow;
int sysctl_tcp_fack;
+   int sysctl_tcp_max_reordering;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
c912d63839e7cd4b6ad009344e8017de4c0b1483..2b559c7bf16c70864e77a34c78479d01f538d6cd
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -241,7 +241,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 /* sysctl variables for tcp */
 extern int sysctl_tcp_max_orphans;
-extern int sysctl_tcp_max_reordering;
 extern int sysctl_tcp_dsack;
 extern long sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
1f23be13ce7be8b2a12b82aada36c6351fdfb70a..18cd228a20690541936dd6b3d9bb02cb283a9740
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec_jiffies,
},
-   {
-   .procname   = "tcp_max_reordering",
-   .data   = _tcp_max_reordering,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_dsack",
.data   = _tcp_dsack,
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_max_reordering",
+   .data   = _net.ipv4.sysctl_tcp_max_reordering,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
8941fc32072b69fedcb01afbe837f4d7791dd28d..bd6abf9a6d5a0f7a85384a259d61a34c4170eb50
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -77,7 +77,6 @@
 #include 
 #include 
 
-int sysctl_tcp_max_reordering __read_mostly = 300;
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
@@ -887,7 +886,7 @@ static void tcp_update_reordering(struct sock *sk, const 
int metric,
return;
 
if (metric > tp->reordering) {
-   tp->reordering = min(sysctl_tcp_max_reordering, metric);
+   tp->reordering = 
min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric);
 
 #if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
2bc6ba2059d32aa848dbc415b4b0e194b61b0268..c379a242abb3546044da9a3ef032f6f68acafe88
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2488,6 +2488,8 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 
behavior.  */
net->ipv4.sysctl_tcp_retrans_collapse = 1;
+   net->ipv4.sysctl_tcp_max_reordering = 300;
+
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH net-next 10/15] tcp: remove stale sysctl_tcp_reordering

2017-10-26 Thread Eric Dumazet

This extern is no longer used.

Signed-off-by: Eric Dumazet 
---
 include/net/tcp.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
38504d5ab109454219ac9570c3b11e02733384c1..c912d63839e7cd4b6ad009344e8017de4c0b1483
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -241,7 +241,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 /* sysctl variables for tcp */
 extern int sysctl_tcp_max_orphans;
-extern int sysctl_tcp_reordering;
 extern int sysctl_tcp_max_reordering;
 extern int sysctl_tcp_dsack;
 extern long sysctl_tcp_mem[3];
-- 
2.15.0.rc2.357.g7e34df9404-goog

[PATCH net-next 12/15] tcp: Namespace-ify sysctl_tcp_dsack

2017-10-26 Thread Eric Dumazet

Signed-off-by: Eric Dumazet 
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp_input.c   |  5 ++---
 net/ipv4/tcp_ipv4.c|  1 +
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 
3f6844665a2fbe66fc0c91bd13e057ac2e03007a..956957a77db96ad3d231cc018c13503d615d8d2e
 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -138,6 +138,7 @@ struct netns_ipv4 {
int sysctl_tcp_abort_on_overflow;
int sysctl_tcp_fack;
int sysctl_tcp_max_reordering;
+   int sysctl_tcp_dsack;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 
2b559c7bf16c70864e77a34c78479d01f538d6cd..aba20c4828ee912d9f0f3ef49f3de5376729c022
 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -241,7 +241,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 /* sysctl variables for tcp */
 extern int sysctl_tcp_max_orphans;
-extern int sysctl_tcp_dsack;
 extern long sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 
18cd228a20690541936dd6b3d9bb02cb283a9740..7652a9c2a65d3f1cfa0a75d1198e1d9d56761c35
 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -414,13 +414,6 @@ static struct ctl_table ipv4_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec_jiffies,
},
-   {
-   .procname   = "tcp_dsack",
-   .data   = _tcp_dsack,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
{
.procname   = "tcp_mem",
.maxlen = sizeof(sysctl_tcp_mem),
@@ -1145,6 +1138,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_dsack",
+   .data   = _net.ipv4.sysctl_tcp_dsack,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
{ }
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
bd6abf9a6d5a0f7a85384a259d61a34c4170eb50..db4798f99bb093b6d5a3e0fdd76efb83b88da49e
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -77,7 +77,6 @@
 #include 
 #include 
 
-int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
@@ -4130,7 +4129,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 
end_seq)
 {
struct tcp_sock *tp = tcp_sk(sk);
 
-   if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+   if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
int mib_idx;
 
if (before(seq, tp->rcv_nxt))
@@ -4165,7 +4164,7 @@ static void tcp_send_dupack(struct sock *sk, const struct 
sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk);
 
-   if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+   if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 
c379a242abb3546044da9a3ef032f6f68acafe88..d9d4d191e8f3c962a6ee68015ffe5a6e7fb8e9c1
 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2489,6 +2489,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 
behavior.  */
net->ipv4.sysctl_tcp_retrans_collapse = 1;
net->ipv4.sysctl_tcp_max_reordering = 300;
+   net->ipv4.sysctl_tcp_dsack = 1;
 
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(>ipv4.tcp_fastopen_ctx_lock);
-- 
2.15.0.rc2.357.g7e34df9404-goog

1 2 3 4 >

1 - 100 of 364 matches

Mail list logo