[PATCH net-next 05/15] nfp: document expected locking in the core

2017-03-21 Thread Jakub Kicinski
Document which fields of nfp_cpp are protected by which locks.

Signed-off-by: Jakub Kicinski 
---
 .../ethernet/netronome/nfp/nfpcore/nfp_cppcore.c   | 33 ++
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c 
b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
index 62aa7bcee93d..4e08362d8c97 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
@@ -65,28 +65,49 @@ struct nfp_cpp_resource {
u64 end;
 };
 
+/**
+ * struct nfp_cpp - main nfpcore device structure
+ * Following fields are read-only after probe() exits or netdevs are spawned.
+ * @dev:   embedded device structure
+ * @op:low-level implementation ops
+ * @priv:  private data of the low-level implementation
+ * @model: chip model
+ * @interface: chip interface id we are using to reach it
+ * @serial:chip serial number
+ * @imb_cat_table: CPP Mapping Table
+ *
+ * Following fields can be used only in probe() or with rtnl held:
+ * @hwinfo:HWInfo database fetched from the device
+ * @rtsym: firmware run time symbols
+ *
+ * Following fields use explicit locking:
+ * @resource_list: NFP CPP resource list
+ * @resource_lock: protects @resource_list
+ *
+ * @area_cache_list:   cached areas for cpp/xpb read/write speed up
+ * @area_cache_mutex:  protects @area_cache_list
+ *
+ * @waitq: area wait queue
+ */
 struct nfp_cpp {
struct device dev;
 
-   void *priv; /* Private data of the low-level implementation */
+   void *priv;
 
u32 model;
u16 interface;
u8 serial[NFP_SERIAL_LEN];
 
const struct nfp_cpp_operations *op;
-   struct list_head resource_list; /* NFP CPP resource list */
+   struct list_head resource_list;
rwlock_t resource_lock;
wait_queue_head_t waitq;
 
-   /* NFP6000 CPP Mapping Table */
u32 imb_cat_table[16];
 
-   /* Cached areas for cpp/xpb readl/writel speedups */
-   struct mutex area_cache_mutex;  /* Lock for the area cache */
+   struct mutex area_cache_mutex;
struct list_head area_cache_list;
 
-   /* Cached information */
void *hwinfo;
void *rtsym;
 };
-- 
2.11.0



[PATCH v2 net-next 3/4] drivers: net: xgene-v2: Fix port reset

2017-03-21 Thread Iyappan Subramanian
Fixed port reset sequence by adding ECC init.

Signed-off-by: Iyappan Subramanian 
---
 drivers/net/ethernet/apm/xgene-v2/enet.c | 24 ++--
 drivers/net/ethernet/apm/xgene-v2/enet.h |  2 ++
 drivers/net/ethernet/apm/xgene-v2/mac.h  |  1 -
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/apm/xgene-v2/enet.c 
b/drivers/net/ethernet/apm/xgene-v2/enet.c
index b49edee..5998da0 100644
--- a/drivers/net/ethernet/apm/xgene-v2/enet.c
+++ b/drivers/net/ethernet/apm/xgene-v2/enet.c
@@ -38,10 +38,24 @@ u32 xge_rd_csr(struct xge_pdata *pdata, u32 offset)
 int xge_port_reset(struct net_device *ndev)
 {
struct xge_pdata *pdata = netdev_priv(ndev);
+   struct device *dev = &pdata->pdev->dev;
+   u32 data, wait = 10;
 
-   xge_wr_csr(pdata, ENET_SRST, 0x3);
-   xge_wr_csr(pdata, ENET_SRST, 0x2);
-   xge_wr_csr(pdata, ENET_SRST, 0x0);
+   xge_wr_csr(pdata, ENET_CLKEN, 0x3);
+   xge_wr_csr(pdata, ENET_SRST, 0xf);
+   xge_wr_csr(pdata, ENET_SRST, 0);
+   xge_wr_csr(pdata, CFG_MEM_RAM_SHUTDOWN, 1);
+   xge_wr_csr(pdata, CFG_MEM_RAM_SHUTDOWN, 0);
+
+   do {
+   usleep_range(100, 110);
+   data = xge_rd_csr(pdata, BLOCK_MEM_RDY);
+   } while (data != MEM_RDY && wait--);
+
+   if (data != MEM_RDY) {
+   dev_err(dev, "ECC init failed: %x\n", data);
+   return -ETIMEDOUT;
+   }
 
xge_wr_csr(pdata, ENET_SHIM, DEVM_ARAUX_COH | DEVM_AWAUX_COH);
 
@@ -59,13 +73,11 @@ static void xge_traffic_resume(struct net_device *ndev)
xge_wr_csr(pdata, RX_DV_GATE_REG, 1);
 }
 
-int xge_port_init(struct net_device *ndev)
+void xge_port_init(struct net_device *ndev)
 {
struct xge_pdata *pdata = netdev_priv(ndev);
 
pdata->phy_speed = SPEED_1000;
xge_mac_init(pdata);
xge_traffic_resume(ndev);
-
-   return 0;
 }
diff --git a/drivers/net/ethernet/apm/xgene-v2/enet.h 
b/drivers/net/ethernet/apm/xgene-v2/enet.h
index 40371cf..3fd36dc6 100644
--- a/drivers/net/ethernet/apm/xgene-v2/enet.h
+++ b/drivers/net/ethernet/apm/xgene-v2/enet.h
@@ -28,6 +28,7 @@
 #define CFG_MEM_RAM_SHUTDOWN   0xd070
 #define BLOCK_MEM_RDY  0xd074
 
+#define MEM_RDY0x
 #define DEVM_ARAUX_COH BIT(19)
 #define DEVM_AWAUX_COH BIT(3)
 
@@ -39,5 +40,6 @@
 void xge_wr_csr(struct xge_pdata *pdata, u32 offset, u32 val);
 u32 xge_rd_csr(struct xge_pdata *pdata, u32 offset);
 int xge_port_reset(struct net_device *ndev);
+void xge_port_init(struct net_device *ndev);
 
 #endif  /* __XGENE_ENET_V2_ENET__H__ */
diff --git a/drivers/net/ethernet/apm/xgene-v2/mac.h 
b/drivers/net/ethernet/apm/xgene-v2/mac.h
index 74397c9..18a9c9d 100644
--- a/drivers/net/ethernet/apm/xgene-v2/mac.h
+++ b/drivers/net/ethernet/apm/xgene-v2/mac.h
@@ -105,7 +105,6 @@ static inline u32 xgene_get_reg_bits(u32 var, int pos, int 
len)
 void xge_mac_enable(struct xge_pdata *pdata);
 void xge_mac_disable(struct xge_pdata *pdata);
 void xge_mac_init(struct xge_pdata *pdata);
-int xge_port_init(struct net_device *ndev);
 void xge_mac_set_station_addr(struct xge_pdata *pdata);
 
 #endif /* __XGENE_ENET_V2_MAC_H__ */
-- 
1.9.1



[PATCH v2 net-next 2/4] drivers: net: xgene-v2: Add ethtool support

2017-03-21 Thread Iyappan Subramanian
Added basic ethtool support.

Signed-off-by: Iyappan Subramanian 
---
 drivers/net/ethernet/apm/xgene-v2/Makefile  |   2 +-
 drivers/net/ethernet/apm/xgene-v2/ethtool.c | 121 
 drivers/net/ethernet/apm/xgene-v2/main.c|   1 +
 drivers/net/ethernet/apm/xgene-v2/main.h|   1 +
 4 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/apm/xgene-v2/ethtool.c

diff --git a/drivers/net/ethernet/apm/xgene-v2/Makefile 
b/drivers/net/ethernet/apm/xgene-v2/Makefile
index 0fa5975..f16a2b3 100644
--- a/drivers/net/ethernet/apm/xgene-v2/Makefile
+++ b/drivers/net/ethernet/apm/xgene-v2/Makefile
@@ -2,5 +2,5 @@
 # Makefile for APM X-Gene Ethernet v2 driver
 #
 
-xgene-enet-v2-objs := main.o mac.o enet.o ring.o mdio.o
+xgene-enet-v2-objs := main.o mac.o enet.o ring.o mdio.o ethtool.o
 obj-$(CONFIG_NET_XGENE_V2) += xgene-enet-v2.o
diff --git a/drivers/net/ethernet/apm/xgene-v2/ethtool.c 
b/drivers/net/ethernet/apm/xgene-v2/ethtool.c
new file mode 100644
index 000..0c426f5
--- /dev/null
+++ b/drivers/net/ethernet/apm/xgene-v2/ethtool.c
@@ -0,0 +1,121 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian 
+ *   Keyur Chudgar 
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#include "main.h"
+
+struct xge_gstrings_stats {
+   char name[ETH_GSTRING_LEN];
+   int offset;
+};
+
+#define XGE_STAT(m){ #m, offsetof(struct xge_pdata, stats.m) }
+
+static const struct xge_gstrings_stats gstrings_stats[] = {
+   XGE_STAT(rx_packets),
+   XGE_STAT(tx_packets),
+   XGE_STAT(rx_bytes),
+   XGE_STAT(tx_bytes),
+   XGE_STAT(rx_errors)
+};
+
+#define XGE_STATS_LEN  ARRAY_SIZE(gstrings_stats)
+
+static void xge_get_drvinfo(struct net_device *ndev,
+   struct ethtool_drvinfo *info)
+{
+   struct xge_pdata *pdata = netdev_priv(ndev);
+   struct platform_device *pdev = pdata->pdev;
+
+   strcpy(info->driver, "xgene-enet-v2");
+   strcpy(info->version, XGENE_ENET_V2_VERSION);
+   snprintf(info->fw_version, ETHTOOL_FWVERS_LEN, "N/A");
+   sprintf(info->bus_info, "%s", pdev->name);
+}
+
+static void xge_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
+{
+   u8 *p = data;
+   int i;
+
+   if (stringset != ETH_SS_STATS)
+   return;
+
+   for (i = 0; i < XGE_STATS_LEN; i++) {
+   memcpy(p, gstrings_stats[i].name, ETH_GSTRING_LEN);
+   p += ETH_GSTRING_LEN;
+   }
+}
+
+static int xge_get_sset_count(struct net_device *ndev, int sset)
+{
+   if (sset != ETH_SS_STATS)
+   return -EINVAL;
+
+   return XGE_STATS_LEN;
+}
+
+static void xge_get_ethtool_stats(struct net_device *ndev,
+ struct ethtool_stats *dummy,
+ u64 *data)
+{
+   void *pdata = netdev_priv(ndev);
+   int i;
+
+   for (i = 0; i < XGE_STATS_LEN; i++)
+   *data++ = *(u64 *)(pdata + gstrings_stats[i].offset);
+}
+
+static int xge_get_link_ksettings(struct net_device *ndev,
+ struct ethtool_link_ksettings *cmd)
+{
+   struct phy_device *phydev = ndev->phydev;
+
+   if (!phydev)
+   return -ENODEV;
+
+   return phy_ethtool_ksettings_get(phydev, cmd);
+}
+
+static int xge_set_link_ksettings(struct net_device *ndev,
+ const struct ethtool_link_ksettings *cmd)
+{
+   struct phy_device *phydev = ndev->phydev;
+
+   if (!phydev)
+   return -ENODEV;
+
+   return phy_ethtool_ksettings_set(phydev, cmd);
+}
+
+static const struct ethtool_ops xge_ethtool_ops = {
+   .get_drvinfo = xge_get_drvinfo,
+   .get_link = ethtool_op_get_link,
+   .get_strings = xge_get_strings,
+   .get_sset_count = xge_get_sset_count,
+   .get_ethtool_stats = xge_get_ethtool_stats,
+   .get_link_ksettings = xge_get_link_ksettings,
+   .set_link_ksettings = xge_set_link_ksettings,
+};
+
+void xge_set_ethtool_ops(struct net_device *ndev)
+{
+   ndev->ethtool_ops = &xge_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c 
b/drivers/net/ethernet/apm/xgene-v2/main.c
index 82ac5b4..e764e58 100644
--- a/drivers/net/eth

[PATCH v2 net-next 1/4] drivers: net: xgene-v2: Add MDIO support

2017-03-21 Thread Iyappan Subramanian
Added phy management support by using phy abstraction layer APIs.

Signed-off-by: Iyappan Subramanian 
---
 drivers/net/ethernet/apm/xgene-v2/Makefile |   2 +-
 drivers/net/ethernet/apm/xgene-v2/mac.c|   2 +-
 drivers/net/ethernet/apm/xgene-v2/mac.h|   1 +
 drivers/net/ethernet/apm/xgene-v2/main.c   |  11 +-
 drivers/net/ethernet/apm/xgene-v2/main.h   |   4 +
 drivers/net/ethernet/apm/xgene-v2/mdio.c   | 167 +
 6 files changed, 182 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/ethernet/apm/xgene-v2/mdio.c

diff --git a/drivers/net/ethernet/apm/xgene-v2/Makefile 
b/drivers/net/ethernet/apm/xgene-v2/Makefile
index 735309c..0fa5975 100644
--- a/drivers/net/ethernet/apm/xgene-v2/Makefile
+++ b/drivers/net/ethernet/apm/xgene-v2/Makefile
@@ -2,5 +2,5 @@
 # Makefile for APM X-Gene Ethernet v2 driver
 #
 
-xgene-enet-v2-objs := main.o mac.o enet.o ring.o
+xgene-enet-v2-objs := main.o mac.o enet.o ring.o mdio.o
 obj-$(CONFIG_NET_XGENE_V2) += xgene-enet-v2.o
diff --git a/drivers/net/ethernet/apm/xgene-v2/mac.c 
b/drivers/net/ethernet/apm/xgene-v2/mac.c
index c3189de..ee431e3 100644
--- a/drivers/net/ethernet/apm/xgene-v2/mac.c
+++ b/drivers/net/ethernet/apm/xgene-v2/mac.c
@@ -27,7 +27,7 @@ void xge_mac_reset(struct xge_pdata *pdata)
xge_wr_csr(pdata, MAC_CONFIG_1, 0);
 }
 
-static void xge_mac_set_speed(struct xge_pdata *pdata)
+void xge_mac_set_speed(struct xge_pdata *pdata)
 {
u32 icm0, icm2, ecm0, mc2;
u32 intf_ctrl, rgmii;
diff --git a/drivers/net/ethernet/apm/xgene-v2/mac.h 
b/drivers/net/ethernet/apm/xgene-v2/mac.h
index 0fce6ae..74397c9 100644
--- a/drivers/net/ethernet/apm/xgene-v2/mac.h
+++ b/drivers/net/ethernet/apm/xgene-v2/mac.h
@@ -101,6 +101,7 @@ static inline u32 xgene_get_reg_bits(u32 var, int pos, int 
len)
 struct xge_pdata;
 
 void xge_mac_reset(struct xge_pdata *pdata);
+void xge_mac_set_speed(struct xge_pdata *pdata);
 void xge_mac_enable(struct xge_pdata *pdata);
 void xge_mac_disable(struct xge_pdata *pdata);
 void xge_mac_init(struct xge_pdata *pdata);
diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c 
b/drivers/net/ethernet/apm/xgene-v2/main.c
index ae76977..82ac5b4 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.c
+++ b/drivers/net/ethernet/apm/xgene-v2/main.c
@@ -500,9 +500,10 @@ static int xge_open(struct net_device *ndev)
 
xge_intr_enable(pdata);
xge_wr_csr(pdata, DMARXCTRL, 1);
+
+   phy_start(ndev->phydev);
xge_mac_enable(pdata);
netif_start_queue(ndev);
-   netif_carrier_on(ndev);
 
return 0;
 }
@@ -511,9 +512,9 @@ static int xge_close(struct net_device *ndev)
 {
struct xge_pdata *pdata = netdev_priv(ndev);
 
-   netif_carrier_off(ndev);
netif_stop_queue(ndev);
xge_mac_disable(pdata);
+   phy_stop(ndev->phydev);
 
xge_intr_disable(pdata);
xge_free_irq(ndev);
@@ -683,9 +684,12 @@ static int xge_probe(struct platform_device *pdev)
if (ret)
goto err;
 
+   ret = xge_mdio_config(ndev);
+   if (ret)
+   goto err;
+
netif_napi_add(ndev, &pdata->napi, xge_napi, NAPI_POLL_WEIGHT);
 
-   netif_carrier_off(ndev);
ret = register_netdev(ndev);
if (ret) {
netdev_err(ndev, "Failed to register netdev\n");
@@ -713,6 +717,7 @@ static int xge_remove(struct platform_device *pdev)
dev_close(ndev);
rtnl_unlock();
 
+   xge_mdio_remove(ndev);
unregister_netdev(ndev);
free_netdev(ndev);
 
diff --git a/drivers/net/ethernet/apm/xgene-v2/main.h 
b/drivers/net/ethernet/apm/xgene-v2/main.h
index ada7b0e..777f254 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.h
+++ b/drivers/net/ethernet/apm/xgene-v2/main.h
@@ -65,6 +65,7 @@ struct xge_pdata {
struct xge_desc_ring *rx_ring;
struct platform_device *pdev;
char irq_name[IRQ_ID_SIZE];
+   struct mii_bus *mdio_bus;
struct net_device *ndev;
struct napi_struct napi;
struct xge_stats stats;
@@ -72,4 +73,7 @@ struct xge_pdata {
u8 nbufs;
 };
 
+int xge_mdio_config(struct net_device *ndev);
+void xge_mdio_remove(struct net_device *ndev);
+
 #endif /* __XGENE_ENET_V2_MAIN_H__ */
diff --git a/drivers/net/ethernet/apm/xgene-v2/mdio.c 
b/drivers/net/ethernet/apm/xgene-v2/mdio.c
new file mode 100644
index 000..a583c6a
--- /dev/null
+++ b/drivers/net/ethernet/apm/xgene-v2/mdio.c
@@ -0,0 +1,167 @@
+/*
+ * Applied Micro X-Gene SoC Ethernet v2 Driver
+ *
+ * Copyright (c) 2017, Applied Micro Circuits Corporation
+ * Author(s): Iyappan Subramanian 
+ *   Keyur Chudgar 
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,

[PATCH v2 net-next 4/4] drivers: net: xgene-v2: misc fixes

2017-03-21 Thread Iyappan Subramanian
Fixed review comments from the previous patch-set.

- changed return value check of platform_get_irq() to < 0
- replaced devm_request(free)_irq() calls by request(free)_irq() since
  they are called from open() and close()
- changed sizeof(struct mystruct) to sizeof(*mystruct)
- reduced indentation on tx_timeout()

Signed-off-by: Iyappan Subramanian 
---
 drivers/net/ethernet/apm/xgene-v2/main.c | 55 +++-
 1 file changed, 26 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c 
b/drivers/net/ethernet/apm/xgene-v2/main.c
index e764e58..0f2ad50 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.c
+++ b/drivers/net/ethernet/apm/xgene-v2/main.c
@@ -66,9 +66,8 @@ static int xge_get_resources(struct xge_pdata *pdata)
}
 
ret = platform_get_irq(pdev, 0);
-   if (ret <= 0) {
-   dev_err(dev, "Unable to get ENET IRQ\n");
-   ret = ret ? : -ENXIO;
+   if (ret < 0) {
+   dev_err(dev, "Unable to get irq\n");
return ret;
}
pdata->resources.irq = ret;
@@ -156,13 +155,12 @@ static irqreturn_t xge_irq(const int irq, void *data)
 static int xge_request_irq(struct net_device *ndev)
 {
struct xge_pdata *pdata = netdev_priv(ndev);
-   struct device *dev = &pdata->pdev->dev;
int ret;
 
snprintf(pdata->irq_name, IRQ_ID_SIZE, "%s", ndev->name);
 
-   ret = devm_request_irq(dev, pdata->resources.irq, xge_irq,
-  0, pdata->irq_name, pdata);
+   ret = request_irq(pdata->resources.irq, xge_irq, 0, pdata->irq_name,
+ pdata);
if (ret)
netdev_err(ndev, "Failed to request irq %s\n", pdata->irq_name);
 
@@ -172,9 +170,8 @@ static int xge_request_irq(struct net_device *ndev)
 static void xge_free_irq(struct net_device *ndev)
 {
struct xge_pdata *pdata = netdev_priv(ndev);
-   struct device *dev = &pdata->pdev->dev;
 
-   devm_free_irq(dev, pdata->resources.irq, pdata);
+   free_irq(pdata->resources.irq, pdata);
 }
 
 static bool is_tx_slot_available(struct xge_raw_desc *raw_desc)
@@ -424,7 +421,7 @@ static struct xge_desc_ring *xge_create_desc_ring(struct 
net_device *ndev)
struct xge_desc_ring *ring;
u16 size;
 
-   ring = kzalloc(sizeof(struct xge_desc_ring), GFP_KERNEL);
+   ring = kzalloc(sizeof(*ring), GFP_KERNEL);
if (!ring)
return NULL;
 
@@ -436,7 +433,7 @@ static struct xge_desc_ring *xge_create_desc_ring(struct 
net_device *ndev)
if (!ring->desc_addr)
goto err;
 
-   ring->pkt_info = kcalloc(XGENE_ENET_NUM_DESC, sizeof(struct pkt_info),
+   ring->pkt_info = kcalloc(XGENE_ENET_NUM_DESC, sizeof(*ring->pkt_info),
 GFP_KERNEL);
if (!ring->pkt_info)
goto err;
@@ -598,28 +595,28 @@ static void xge_timeout(struct net_device *ndev)
 
rtnl_lock();
 
-   if (netif_running(ndev)) {
-   netif_carrier_off(ndev);
-   netif_stop_queue(ndev);
-   xge_intr_disable(pdata);
-   napi_disable(&pdata->napi);
+   if (!netif_running(ndev))
+   goto out;
 
-   xge_wr_csr(pdata, DMATXCTRL, 0);
-   xge_txc_poll(ndev);
-   xge_free_pending_skb(ndev);
-   xge_wr_csr(pdata, DMATXSTATUS, ~0U);
+   netif_stop_queue(ndev);
+   xge_intr_disable(pdata);
+   napi_disable(&pdata->napi);
 
-   xge_setup_desc(pdata->tx_ring);
-   xge_update_tx_desc_addr(pdata);
-   xge_mac_init(pdata);
+   xge_wr_csr(pdata, DMATXCTRL, 0);
+   xge_txc_poll(ndev);
+   xge_free_pending_skb(ndev);
+   xge_wr_csr(pdata, DMATXSTATUS, ~0U);
 
-   napi_enable(&pdata->napi);
-   xge_intr_enable(pdata);
-   xge_mac_enable(pdata);
-   netif_start_queue(ndev);
-   netif_carrier_on(ndev);
-   }
+   xge_setup_desc(pdata->tx_ring);
+   xge_update_tx_desc_addr(pdata);
+   xge_mac_init(pdata);
+
+   napi_enable(&pdata->napi);
+   xge_intr_enable(pdata);
+   xge_mac_enable(pdata);
+   netif_start_queue(ndev);
 
+out:
rtnl_unlock();
 }
 
@@ -653,7 +650,7 @@ static int xge_probe(struct platform_device *pdev)
struct xge_pdata *pdata;
int ret;
 
-   ndev = alloc_etherdev(sizeof(struct xge_pdata));
+   ndev = alloc_etherdev(sizeof(*pdata));
if (!ndev)
return -ENOMEM;
 
-- 
1.9.1



[PATCH v2 net-next 0/4] drivers: net: xgene-v2: Add MDIO and ethtool support

2017-03-21 Thread Iyappan Subramanian
This patch set,

- adds phy management and ethtool support
- fixes ethernet reset
- addresses review comments from previous patch set

Signed-off-by: Iyappan Subramanian 
---
v2: Address review comments from v1
- removed mdio_lock, since there is a top level lock in mdio_bus.c

v1:
- Initial version
---

Iyappan Subramanian (4):
  drivers: net: xgene-v2: Add MDIO support
  drivers: net: xgene-v2: Add ethtool support
  drivers: net: xgene-v2: Fix port reset
  drivers: net: xgene-v2: misc fixes

 drivers/net/ethernet/apm/xgene-v2/Makefile  |   2 +-
 drivers/net/ethernet/apm/xgene-v2/enet.c|  24 +++-
 drivers/net/ethernet/apm/xgene-v2/enet.h|   2 +
 drivers/net/ethernet/apm/xgene-v2/ethtool.c | 121 
 drivers/net/ethernet/apm/xgene-v2/mac.c |   2 +-
 drivers/net/ethernet/apm/xgene-v2/mac.h |   2 +-
 drivers/net/ethernet/apm/xgene-v2/main.c|  67 +--
 drivers/net/ethernet/apm/xgene-v2/main.h|   5 +
 drivers/net/ethernet/apm/xgene-v2/mdio.c| 167 
 9 files changed, 351 insertions(+), 41 deletions(-)
 create mode 100644 drivers/net/ethernet/apm/xgene-v2/ethtool.c
 create mode 100644 drivers/net/ethernet/apm/xgene-v2/mdio.c

-- 
1.9.1



Re: [PATCH net] sctp: remove temporary variable confirm from sctp_packet_transmit

2017-03-21 Thread David Miller
From: Xin Long 
Date: Sat, 18 Mar 2017 19:12:22 +0800

> Commit c86a773c7802 ("sctp: add dst_pending_confirm flag") introduced
> a temporary variable "confirm" in sctp_packet_transmit.
> 
> But it broke the rule that longer lines should be above shorter ones.
> Besides, this variable is not necessary, so this patch is to just
> remove it and use tp->dst_pending_confirm directly.
> 
> Fixes: c86a773c7802 ("sctp: add dst_pending_confirm flag")
> Signed-off-by: Xin Long 

Applied.


Re: [PATCH net] sctp: define dst_pending_confirm as a bit in sctp_transport

2017-03-21 Thread David Miller
From: Xin Long 
Date: Sat, 18 Mar 2017 19:27:23 +0800

> As tp->dst_pending_confirm's value can only be set 0 or 1, this
> patch is to change to define it as a bit instead of __u32.
> 
> Signed-off-by: Xin Long 

Applied.


Re: [PATCH net] sctp: out_qlen should be updated when pruning unsent queue

2017-03-21 Thread David Miller
From: Xin Long 
Date: Sat, 18 Mar 2017 20:03:59 +0800

> This patch is to fix the issue that sctp_prsctp_prune_sent forgot
> to update q->out_qlen when removing a chunk from unsent queue.
> 
> Fixes: 8dbdf1f5b09c ("sctp: implement prsctp PRIO policy")
> Signed-off-by: Xin Long 

Applied, thanks.


Re: [PATCH net 1/2] tcp: fix SCM_TIMESTAMPING_OPT_STATS for normal skbs

2017-03-21 Thread David Miller
From: Soheil Hassas Yeganeh 
Date: Sat, 18 Mar 2017 17:02:59 -0400

> From: Soheil Hassas Yeganeh 
> 
> __sock_recv_timestamp can be called for both normal skbs (for
> receive timestamps) and for skbs on the error queue (for transmit
> timestamps).
> 
> Commit 1c885808e456
> (tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING)
> assumes any skb passed to __sock_recv_timestamp are from
> the error queue, containing OPT_STATS in the content of the skb.
> This results in accessing invalid memory or generating junk
> data.
> 
> To fix this, set skb->pkt_type to PACKET_OUTGOING for packets
> on the error queue. This is safe because on the receive path
> on local sockets skb->pkt_type is never set to PACKET_OUTGOING.
> With that, copy OPT_STATS from a packet, only if its pkt_type
> is PACKET_OUTGOING.
> 
> Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for 
> SO_TIMESTAMPING")
> Reported-by: JongHwan Kim 
> Signed-off-by: Soheil Hassas Yeganeh 
> Signed-off-by: Eric Dumazet 
> Signed-off-by: Willem de Bruijn 

Applied and queued up for -stable.


Re: [PATCH net 2/2] tcp: mark skbs with SCM_TIMESTAMPING_OPT_STATS

2017-03-21 Thread David Miller
From: Soheil Hassas Yeganeh 
Date: Sat, 18 Mar 2017 17:03:00 -0400

> From: Soheil Hassas Yeganeh 
> 
> SOF_TIMESTAMPING_OPT_STATS can be enabled and disabled
> while packets are collected on the error queue.
> So, checking SOF_TIMESTAMPING_OPT_STATS in sk->sk_tsflags
> is not enough to safely assume that the skb contains
> OPT_STATS data.
> 
> Add a bit in sock_exterr_skb to indicate whether the
> skb contains opt_stats data.
> 
> Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for 
> SO_TIMESTAMPING")
> Reported-by: JongHwan Kim 
> Signed-off-by: Soheil Hassas Yeganeh 
> Signed-off-by: Eric Dumazet 
> Signed-off-by: Willem de Bruijn 

Also applied and queued up for -stable.


Re: [PATCH v2 net] selftests/bpf: fix broken build, take 2

2017-03-21 Thread David Miller
From: Shuah Khan 
Date: Mon, 20 Mar 2017 10:37:26 -0600

> On 03/20/2017 09:45 AM, Alexei Starovoitov wrote:
>> On Mon, Mar 20, 2017 at 04:31:28PM +0100, Daniel Borkmann wrote:
>>> On 03/20/2017 07:03 AM, Zi Shen Lim wrote:
 Merge of 'linux-kselftest-4.11-rc1':

 1. Partially removed use of 'test_objs' target, breaking force rebuild of
 BPFOBJ, introduced in commit d498f8719a09 ("bpf: Rebuild bpf.o for any
 dependency update").

   Update target so dependency on BPFOBJ is restored.

 2. Introduced commit 2047f1d8ba28 ("selftests: Fix the .c linking rule")
 which fixes order of LDLIBS.

   Commit d02d8986a768 ("bpf: Always test unprivileged programs") added
 libcap dependency into CFLAGS. Use LDLIBS instead to fix linking of
 test_verifier.

 3. Introduced commit d83c3ba0b926 ("selftests: Fix selftests build to
 just build, not run tests").

   Reordering the Makefile allows us to remove the 'all' target.

 Tested both:
 selftests/bpf$ make
 and
 selftests$ make TARGETS=bpf
 on Ubuntu 16.04.2.

 Signed-off-by: Zi Shen Lim 
>>>
>>> Looks reasonable to me as follow up to 1da8ac7c49fb ("selftests/bpf:
>>> fix broken build"), thanks for fixing Zi!
>>>
>>> Acked-by: Daniel Borkmann 
>>> Tested-by: Daniel Borkmann 
>> 
>> worked for me as well:
>> Acked-by: Alexei Starovoitov 
>> Tested-by: Alexei Starovoitov 
>> 
>> 
>> 
> 
> David,
> 
> Could you please apply it to your tree. I think you already applied
> the first fix.
> 
> Acked-by: Shuah Khan 

Done.


Re: [PATCH net-next 0/9] qed: IOV related clenaups

2017-03-21 Thread David Miller
From: Yuval Mintz 
Date: Sun, 19 Mar 2017 13:08:11 +0200

> This patch series targets IOV functionality [on both PF and VF].
> 
> Patches #2, #3 and #5 fix flows relating to malicious VFs, either by
> upgrading and aligning current safe-guards or by correcing racy flows.
> 
> Patches #1 and #8 make some malicious/dysnfunctional VFs logging appear
> by default in logs.
> 
> The rest of the patches either cleanup the existing code or else correct
> some possible [yet fairly insignicant] issues in VF behavior.

Series applied, thank you.


Re: [PATCH 07/17] net: convert sock.sk_refcnt from atomic_t to refcount_t

2017-03-21 Thread Eric Dumazet
On Tue, 2017-03-21 at 16:51 -0700, Kees Cook wrote:

> Am I understanding you correctly that you'd want something like:
> 
> refcount.h:
> #ifdef UNPROTECTED_REFCOUNT
> #define refcount_inc(x)   atomic_inc(x)
> ...
> #else
> void refcount_inc(...
> ...
> #endif
> 
> some/net.c:
> #define UNPROTECTED_REFCOUNT
> #include 
> 
> or similar?

At first, it could be something simple like that yes.

Note that we might define two refcount_inc()  : One that does whole
tests, and refcount_inc_relaxed() that might translate to atomic_inc()
on non debug kernels.

Then later, maybe provide a dynamic infrastructure so that we can
dynamically force the full checks even for refcount_inc_relaxed() on say
1% of the hosts, to get better debug coverage ?





Re: [PATCH] net: qmi_wwan: Add USB IDs for MDM6600 modem on Motorola Droid 4

2017-03-21 Thread David Miller
From: Tony Lindgren 
Date: Sun, 19 Mar 2017 09:19:57 -0700

> This gets qmicli working with the MDM6600 modem.
> 
> Cc: Bjørn Mork 
> Reviewed-by: Sebastian Reichel 
> Tested-by: Sebastian Reichel 
> Signed-off-by: Tony Lindgren 

Applied, thanks.


[PATCH 2/2] netfilter: ipset: warn users of list:set that parameter 'size' is ignored

2017-03-21 Thread Vishwanath Pai
Revision 4 warns the users that the parameter 'size' is ignored. The
kernel module doesn't need any changes, it will work with both the
revisions.

Note that this will not restore old behavior before commit 00590fdd5be0
("netfilter: ipset: Introduce RCU locking in list type") for users of
the older revision. It will be a much bigger change if that is
what we need.

Reviewed-by: Josh Hunt 
Signed-off-by: Vishwanath Pai 
---
 net/netfilter/ipset/ip_set_list_set.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/ipset/ip_set_list_set.c 
b/net/netfilter/ipset/ip_set_list_set.c
index 178d4eb..d4f820a 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -19,7 +19,8 @@
 #define IPSET_TYPE_REV_MIN 0
 /* 1Counters support added */
 /* 2Comments support added */
-#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
+/* 3skbinfo support added */
+#define IPSET_TYPE_REV_MAX 4 /* size argument is ignored */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik ");
-- 
1.9.1



Re: [PATCH 3/4] flowcache: make struct flow_cache_percpu::hash_rnd_recalc bool

2017-03-21 Thread David Miller
From: Alexey Dobriyan 
Date: Mon, 20 Mar 2017 01:27:43 +0300

> ->hash_rnd_recalc is only used in boolean context.
> 
> Space savings on x86_64 come from the fact that "MOV rm8, imm8" is
> shorter than "MOV rm32, imm32" by at least 3 bytes.
> 
>   add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-10 (-10)
>   function old new   delta
>   flow_cache_new_hashrnd   166 163  -3
>   flow_cache_cpu_up_prep   171 168  -3
>   flow_cache_lookup   11481144  -4
>   Total: Before=170822872, After=170822862, chg -0.00%
> 
> Signed-off-by: Alexey Dobriyan 

I agree with Eric Dumazet that we might have atomicity issues in the
future because of this change.

Why don't you drop this and resubmit just the other 3 patches which
seem to be much less controversial?

Thanks.


[PATCH 1/2] netfilter: ipset: warn users of list:set that parameter 'size' is ignored

2017-03-21 Thread Vishwanath Pai
Since kernel commit 00590fdd5be0 ("netfilter: ipset: Introduce RCU
locking in list type"), the parameter 'size' has not been in use and
is ignored by the kernel. This is not very apparent to the user. This
commit makes 'size' optional and also warns the user if they try to
specify it. We also don't print it out on 'ipset l'.

I created revision 4 to make this change, revision 3 should work with
older kernels just like before.

Reviewed-by: Josh Hunt 
Signed-off-by: Vishwanath Pai 
---
 lib/ipset_list_set.c | 92 
 1 file changed, 92 insertions(+)

diff --git a/lib/ipset_list_set.c b/lib/ipset_list_set.c
index 45934e7..2d8bc7a 100644
--- a/lib/ipset_list_set.c
+++ b/lib/ipset_list_set.c
@@ -322,6 +322,31 @@ static const struct ipset_arg list_set_create_args3[] = {
{ },
 };
 
+/* Parse commandline arguments */
+static const struct ipset_arg list_set_create_args4[] = {
+   { .name = { "size", NULL },
+ .has_arg = IPSET_OPTIONAL_ARG,.opt = IPSET_OPT_SIZE,
+ .parse = ipset_parse_ignored,
+   },
+   { .name = { "timeout", NULL },
+ .has_arg = IPSET_MANDATORY_ARG,   .opt = IPSET_OPT_TIMEOUT,
+ .parse = ipset_parse_timeout, .print = ipset_print_number,
+   },
+   { .name = { "counters", NULL },
+ .has_arg = IPSET_NO_ARG,  .opt = IPSET_OPT_COUNTERS,
+ .parse = ipset_parse_flag,.print = ipset_print_flag,
+   },
+   { .name = { "comment", NULL },
+ .has_arg = IPSET_NO_ARG,  .opt = IPSET_OPT_CREATE_COMMENT,
+ .parse = ipset_parse_flag,.print = ipset_print_flag,
+   },
+   { .name = { "skbinfo", NULL },
+ .has_arg = IPSET_NO_ARG,  .opt = IPSET_OPT_SKBINFO,
+ .parse = ipset_parse_flag,.print = ipset_print_flag,
+   },
+   { },
+};
+
 static const struct ipset_arg list_set_adt_args3[] = {
{ .name = { "timeout", NULL },
  .has_arg = IPSET_MANDATORY_ARG,   .opt = IPSET_OPT_TIMEOUT,
@@ -426,6 +451,72 @@ static struct ipset_type ipset_list_set3 = {
.usage = list_set_usage3,
.description = "skbinfo support",
 };
+
+static const char list_set_usage4[] =
+"create SETNAME list:set\n"
+"   [timeout VALUE] [counters] [comment]\n"
+"  [skbinfo]\n"
+"addSETNAME NAME [before|after NAME] [timeout VALUE]\n"
+"   [packets VALUE] [bytes VALUE] [comment STRING]\n"
+"  [skbmark VALUE] [skbprio VALUE] [skbqueue VALUE]\n"
+"delSETNAME NAME [before|after NAME]\n"
+"test   SETNAME NAME [before|after NAME]\n\n"
+"where NAME are existing set names.\n";
+
+static struct ipset_type ipset_list_set4 = {
+   .name = "list:set",
+   .alias = { "setlist", NULL },
+   .revision = 4,
+   .family = NFPROTO_UNSPEC,
+   .dimension = IPSET_DIM_ONE,
+   .elem = {
+   [IPSET_DIM_ONE - 1] = {
+   .parse = ipset_parse_setname,
+   .print = ipset_print_name,
+   .opt = IPSET_OPT_NAME
+   },
+   },
+   .compat_parse_elem = ipset_parse_name_compat,
+   .args = {
+   [IPSET_CREATE] = list_set_create_args4,
+   [IPSET_ADD] = list_set_adt_args3,
+   [IPSET_DEL] = list_set_adt_args2,
+   [IPSET_TEST] = list_set_adt_args2,
+   },
+   .mandatory = {
+   [IPSET_CREATE] = 0,
+   [IPSET_ADD] = IPSET_FLAG(IPSET_OPT_NAME),
+   [IPSET_DEL] = IPSET_FLAG(IPSET_OPT_NAME),
+   [IPSET_TEST] = IPSET_FLAG(IPSET_OPT_NAME),
+   },
+   .full = {
+   [IPSET_CREATE] = IPSET_FLAG(IPSET_OPT_SIZE)
+   | IPSET_FLAG(IPSET_OPT_TIMEOUT)
+   | IPSET_FLAG(IPSET_OPT_COUNTERS)
+   | IPSET_FLAG(IPSET_OPT_CREATE_COMMENT)
+   | IPSET_FLAG(IPSET_OPT_SKBINFO),
+   [IPSET_ADD] = IPSET_FLAG(IPSET_OPT_NAME)
+   | IPSET_FLAG(IPSET_OPT_BEFORE)
+   | IPSET_FLAG(IPSET_OPT_NAMEREF)
+   | IPSET_FLAG(IPSET_OPT_TIMEOUT)
+   | IPSET_FLAG(IPSET_OPT_PACKETS)
+   | IPSET_FLAG(IPSET_OPT_BYTES)
+   | IPSET_FLAG(IPSET_OPT_ADT_COMMENT)
+   | IPSET_FLAG(IPSET_OPT_SKBMARK)
+   | IPSET_FLAG(IPSET_OPT_SKBPRIO)
+   | IPSET_FLAG(IPSET_OPT_SKBQUEUE),
+   [IPSET_DEL] = IPSET_FLAG(IPSET_OPT_NAME)
+   | IPSET_FLAG(IPSET_OPT_BEFORE)
+   | IPSET_FLAG(IPSET_OPT_NAMEREF),
+   [IPSET_TEST] = IPSET_FLAG(IPSET_OPT_NAME)
+   | IPSET_FLAG(IPSET_OPT_BEFORE)
+   | IPSET_FLAG(IPSET_OPT_NAMEREF),
+   },
+
+   .usage = list_set_usage4,
+   .description = "ignore and 

[PATCH] netfilter: ipset: print out warnings generated by commands

2017-03-21 Thread Vishwanath Pai
Warnings are only printed out for IPSET_CMD_TEST. The user won't see
warnings from other commands.

Reviewed-by: Josh Hunt 
Signed-off-by: Vishwanath Pai 
---
 src/ipset.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ipset.c b/src/ipset.c
index 2c4fa10..b0bef7b 100644
--- a/src/ipset.c
+++ b/src/ipset.c
@@ -812,8 +812,8 @@ parse_commandline(int argc, char *argv[])
"Unknown argument %s", argv[1]);
ret = ipset_cmd(session, cmd, restore_line);
D("ret %d", ret);
-   /* Special case for TEST and non-quiet mode */
-   if (cmd == IPSET_CMD_TEST && ipset_session_warning(session)) {
+
+   if (ipset_session_warning(session)) {
if (!ipset_envopt_test(session, IPSET_ENV_QUIET))
fprintf(stderr, "%s", ipset_session_warning(session));
ipset_session_report_reset(session);
-- 
1.9.1



[PATCH net] bpf: fix hashmap extra_elems logic

2017-03-21 Thread Alexei Starovoitov
In both kmalloc and prealloc mode the bpf_map_update_elem() is using
per-cpu extra_elems to do atomic update when the map is full.
There are two issues with it. The logic can be misused, since it allows
max_entries+num_cpus elements to be present in the map. And alloc_extra_elems()
at map creation time can fail percpu alloc for large map values with a warn:
WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60
illegal size (32824) or align (8) for percpu allocation

The fixes for both of these issues are different for kmalloc and prealloc modes.
For prealloc mode allocate extra num_possible_cpus elements and store
their pointers into extra_elems array instead of actual elements.
Hence we can use these hidden(spare) elements not only when the map is full
but during bpf_map_update_elem() that replaces existing element too.
That also improves performance, since pcpu_freelist_pop/push is avoided.
Unfortunately this approach cannot be used for kmalloc mode which needs
to kfree elements after rcu grace period. Therefore switch it back to normal
kmalloc even when full and old element exists like it was prior to
commit 6c9059817432 ("bpf: pre-allocate hash map elements").

Add tests to check for over max_entries and large map values.

Reported-by: Dave Jones 
Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Signed-off-by: Alexei Starovoitov 
Acked-by: Daniel Borkmann 
Acked-by: Martin KaFai Lau 
---
 kernel/bpf/hashtab.c| 144 
 tools/testing/selftests/bpf/test_maps.c |  29 ++-
 2 files changed, 97 insertions(+), 76 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index afe5bab376c9..361a69dfe543 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -30,18 +30,12 @@ struct bpf_htab {
struct pcpu_freelist freelist;
struct bpf_lru lru;
};
-   void __percpu *extra_elems;
+   struct htab_elem *__percpu *extra_elems;
atomic_t count; /* number of elements in this hashtable */
u32 n_buckets;  /* number of hash buckets */
u32 elem_size;  /* size of each element in bytes */
 };
 
-enum extra_elem_state {
-   HTAB_NOT_AN_EXTRA_ELEM = 0,
-   HTAB_EXTRA_ELEM_FREE,
-   HTAB_EXTRA_ELEM_USED
-};
-
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
union {
@@ -56,7 +50,6 @@ struct htab_elem {
};
union {
struct rcu_head rcu;
-   enum extra_elem_state state;
struct bpf_lru_node lru_node;
};
u32 hash;
@@ -77,6 +70,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
 }
 
+static bool htab_is_prealloc(const struct bpf_htab *htab)
+{
+   return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
+}
+
 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
 void __percpu *pptr)
 {
@@ -128,17 +126,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab 
*htab, void *key,
 
 static int prealloc_init(struct bpf_htab *htab)
 {
+   u32 num_entries = htab->map.max_entries;
int err = -ENOMEM, i;
 
-   htab->elems = bpf_map_area_alloc(htab->elem_size *
-htab->map.max_entries);
+   if (!htab_is_percpu(htab) && !htab_is_lru(htab))
+   num_entries += num_possible_cpus();
+
+   htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries);
if (!htab->elems)
return -ENOMEM;
 
if (!htab_is_percpu(htab))
goto skip_percpu_elems;
 
-   for (i = 0; i < htab->map.max_entries; i++) {
+   for (i = 0; i < num_entries; i++) {
u32 size = round_up(htab->map.value_size, 8);
void __percpu *pptr;
 
@@ -166,11 +167,11 @@ static int prealloc_init(struct bpf_htab *htab)
if (htab_is_lru(htab))
bpf_lru_populate(&htab->lru, htab->elems,
 offsetof(struct htab_elem, lru_node),
-htab->elem_size, htab->map.max_entries);
+htab->elem_size, num_entries);
else
pcpu_freelist_populate(&htab->freelist,
   htab->elems + offsetof(struct htab_elem, 
fnode),
-  htab->elem_size, htab->map.max_entries);
+  htab->elem_size, num_entries);
 
return 0;
 
@@ -191,16 +192,22 @@ static void prealloc_destroy(struct bpf_htab *htab)
 
 static int alloc_extra_elems(struct bpf_htab *htab)
 {
-   void __percpu *pptr;
+   struct htab_elem *__percpu *pptr, *l_new;
+   struct pcpu_freelist_node *l;
int cpu;
 
-   pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+   pptr = __alloc_percpu_gfp(sizeof

[PATCH net] ipv4: provide stronger user input validation in nl_fib_input()

2017-03-21 Thread Eric Dumazet
From: Eric Dumazet 

Alexander reported a KMSAN splat caused by reads of uninitialized
field (tb_id_in) from user provided struct fib_result_nl

It turns out nl_fib_input() sanity tests on user input is a bit
wrong :

User can pretend nlh->nlmsg_len is big enough, but provide
at sendmsg() time a too small buffer.

Reported-by: Alexander Potapenko 
Signed-off-by: Eric Dumazet 
---
 net/ipv4/fib_frontend.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 
42bfd08109dd78ab509493e8d2205d72845bb3eb..8f2133ffc2ff1b94871408a5f934cb938d3462b5
 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1083,7 +1083,8 @@ static void nl_fib_input(struct sk_buff *skb)
 
net = sock_net(skb->sk);
nlh = nlmsg_hdr(skb);
-   if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
+   if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
+   skb->len < nlh->nlmsg_len ||
nlmsg_len(nlh) < sizeof(*frn))
return;
 




Re: [PATCH net-next 1/8] ptr_ring: introduce batch dequeuing

2017-03-21 Thread Jason Wang



On 2017年03月21日 18:25, Sergei Shtylyov wrote:

Hello!

On 3/21/2017 7:04 AM, Jason Wang wrote:


Signed-off-by: Jason Wang 
---
 include/linux/ptr_ring.h | 65 


 1 file changed, 65 insertions(+)

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 6c70444..4771ded 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -247,6 +247,22 @@ static inline void *__ptr_ring_consume(struct 
ptr_ring *r)

 return ptr;
 }

+static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
+ void **array, int n)
+{
+void *ptr;
+int i = 0;
+
+while (i < n) {


   Hm, why not *for*?


Yes, it maybe better, if there's other comment on the series, will 
change it in next version.


Thanks



[PATCH net-next v3] net: Add sysctl to toggle early demux for tcp and udp

2017-03-21 Thread Subash Abhinov Kasiviswanathan
Certain system process significant unconnected UDP workload.
It would be preferrable to disable UDP early demux for those systems
and enable it for TCP only.

By disabling UDP demux, we see these slight gains on an ARM64 system-
782 -> 788Mbps unconnected single stream UDPv4
633 -> 654Mbps unconnected UDPv4 different sources

The performance impact can change based on CPU architecure and cache
sizes. There will not much difference seen if entire UDP hash table
is in cache.

Both sysctls are enabled by default to preserve existing behavior.

v1->v2: Change function pointer instead of adding conditional as
suggested by Stephen.

v2->v3: Read once in callers to avoid issues due to compiler
optimizations. Also update commit message with the tests.

Signed-off-by: Subash Abhinov Kasiviswanathan 
Suggested-by: Eric Dumazet 
Cc: Stephen Hemminger 
Cc: Tom Herbert 
---
 Documentation/networking/ip-sysctl.txt | 11 +++-
 include/net/netns/ipv4.h   |  2 ++
 include/net/tcp.h  |  2 ++
 include/net/udp.h  |  3 +++
 net/ipv4/af_inet.c | 22 ++--
 net/ipv4/ip_input.c|  2 +-
 net/ipv4/sysctl_net_ipv4.c | 48 ++
 net/ipv6/ip6_input.c   |  2 +-
 net/ipv6/tcp_ipv6.c| 10 ++-
 net/ipv6/udp.c | 10 ++-
 10 files changed, 105 insertions(+), 7 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt 
b/Documentation/networking/ip-sysctl.txt
index ed3d079..6b921a1 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -848,12 +848,21 @@ ip_dynaddr - BOOLEAN
 ip_early_demux - BOOLEAN
Optimize input packet processing down to one demux for
certain kinds of local sockets.  Currently we only do this
-   for established TCP sockets.
+   for established TCP and connected UDP sockets.
 
It may add an additional cost for pure routing workloads that
reduces overall throughput, in such case you should disable it.
Default: 1
 
+tcp_early_demux - BOOLEAN
+   Enable early demux for established TCP sockets.
+   Default: 1
+
+udp_early_demux - BOOLEAN
+   Enable early demux for connected UDP sockets. Disable this if
+   your system could experience more unconnected load.
+   Default: 1
+
 icmp_echo_ignore_all - BOOLEAN
If set non-zero, then the kernel will ignore all ICMP ECHO
requests sent to it.
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2e9d649..a489b76 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -95,6 +95,8 @@ struct netns_ipv4 {
/* Shall we try to damage output packets if routing dev changes? */
int sysctl_ip_dynaddr;
int sysctl_ip_early_demux;
+   int sysctl_tcp_early_demux;
+   int sysctl_udp_early_demux;
 
int sysctl_fwmark_reflect;
int sysctl_tcp_fwmark_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e614ad4..edc1df4 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1932,4 +1932,6 @@ static inline void tcp_listendrop(const struct sock *sk)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
 }
 
+void tcp_v4_early_demux_configure(int enable);
+void tcp_v6_early_demux_configure(int enable);
 #endif /* _TCP_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index c9d8b8e..33198fa 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -372,4 +372,7 @@ struct udp_iter_state {
 #if IS_ENABLED(CONFIG_IPV6)
 void udpv6_encap_enable(void);
 #endif
+
+void udp_v4_early_demux_configure(int enable);
+void udp_v6_early_demux_configure(int enable);
 #endif /* _UDP_H */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6b1fc6e..d286750 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1599,7 +1599,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, 
size_t syncp_offset)
 };
 #endif
 
-static const struct net_protocol tcp_protocol = {
+static struct net_protocol tcp_protocol = {
.early_demux=   tcp_v4_early_demux,
.handler=   tcp_v4_rcv,
.err_handler=   tcp_v4_err,
@@ -1608,7 +1608,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, 
size_t syncp_offset)
.icmp_strict_tag_validation = 1,
 };
 
-static const struct net_protocol udp_protocol = {
+static struct net_protocol udp_protocol = {
.early_demux =  udp_v4_early_demux,
.handler =  udp_rcv,
.err_handler =  udp_err,
@@ -1616,6 +1616,22 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, 
size_t syncp_offset)
.netns_ok = 1,
 };
 
+void tcp_v4_early_demux_configure(int enable)
+{
+   if (enable)
+   tcp_protocol.early_demux = tcp_v4_early_demux;
+   else
+   tcp_protocol.early_demux = NULL;
+}
+
+void udp_v4_early_demux_configure(int enable)
+{
+  

Re: [patch -next] net: dwc-xlgmac: fix an error code in xlgmac_alloc_pages()

2017-03-21 Thread Jie Deng
On 2017/3/22 4:42, Dan Carpenter wrote:

> The dma_mapping_error() returns true if there is an error but we want
> to return -ENOMEM and not 1.
>
> Fixes: 65e0ace2c5cd ("net: dwc-xlgmac: Initial driver for DesignWare 
> Enterprise Ethernet")
> Signed-off-by: Dan Carpenter 
>
> diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c 
> b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c
> index 55c796ed7d26..39b5cb967bba 100644
> --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c
> +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c
> @@ -335,7 +335,6 @@ static int xlgmac_alloc_pages(struct xlgmac_pdata *pdata,
>  {
>   struct page *pages = NULL;
>   dma_addr_t pages_dma;
> - int ret;
>  
>   /* Try to obtain pages, decreasing order if necessary */
>   gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
> @@ -352,10 +351,9 @@ static int xlgmac_alloc_pages(struct xlgmac_pdata *pdata,
>   /* Map the pages */
>   pages_dma = dma_map_page(pdata->dev, pages, 0,
>PAGE_SIZE << order, DMA_FROM_DEVICE);
> - ret = dma_mapping_error(pdata->dev, pages_dma);
> - if (ret) {
> + if (dma_mapping_error(pdata->dev, pages_dma)) {
>   put_page(pages);
> - return ret;
> + return -ENOMEM;
>   }
>  
>   pa->pages = pages;
Thanks for fixes.
Reviewed-by: Jie Deng 


Re: [net-next sample action optimization v4 1/4] openvswitch: Deferred fifo API change.

2017-03-21 Thread Pravin Shelar
On Mon, Mar 20, 2017 at 4:32 PM, Andy Zhou  wrote:
> add_deferred_actions() API currently requires actions to be passed in
> as a fully encoded netlink message. So far both 'sample' and 'recirc'
> actions happens to carry actions as fully encoded netlink messages.
> However, this requirement is more restrictive than necessary, future
> patch will need to pass in action lists that are not fully encoded
> by themselves.
>
> Signed-off-by: Andy Zhou 
> Acked-by: Joe Stringer 
Acked-by: Pravin B Shelar 


Re: [net-next sample action optimization v4 3/4] openvswitch: Optimize sample action for the clone use cases

2017-03-21 Thread Pravin Shelar
On Mon, Mar 20, 2017 at 4:32 PM, Andy Zhou  wrote:
> With the introduction of open flow 'clone' action, the OVS user space
> can now translate the 'clone' action into kernel datapath 'sample'
> action, with 100% probability, to ensure that the clone semantics,
> which is that the packet seen by the clone action is the same as the
> packet seen by the action after clone, is faithfully carried out
> in the datapath.
>
> While the sample action in the datpath has the matching semantics,
> its implementation is only optimized for its original use.
> Specifically, there are two limitation: First, there is a 3 level of
> nesting restriction, enforced at the flow downloading time. This
> limit turns out to be too restrictive for the 'clone' use case.
> Second, the implementation avoid recursive call only if the sample
> action list has a single userspace action.
>
> The main optimization implemented in this series removes the static
> nesting limit check, instead, implement the run time recursion limit
> check, and recursion avoidance similar to that of the 'recirc' action.
> This optimization solve both #1 and #2 issues above.
>
> One related optimization attempts to avoid copying flow key as
> long as the actions enclosed does not change the flow key. The
> detection is performed only once at the flow downloading time.
>
> Another related optimization is to rewrite the action list
> at flow downloading time in order to save the fast path from parsing
> the sample action list in its original form repeatedly.
>
> Signed-off-by: Andy Zhou 

Acked-by: Pravin B Shelar 


Re: [net-next sample action optimization v4 4/4] Openvswitch: Refactor sample and recirc actions implementation

2017-03-21 Thread Pravin Shelar
On Mon, Mar 20, 2017 at 4:32 PM, Andy Zhou  wrote:
> Added clone_execute() that both the sample and the recirc
> action implementation can use.
>
> Signed-off-by: Andy Zhou 

Acked-by: Pravin B Shelar 

Thanks for working on this.


<    1   2   3