date:20150803

[PATCH v2 net-next 4/4] bnx2fc: Read npiv table from nvram and create vports.

2015-08-03 Thread Yuval Mintz

From: Joe Carnuccio 

Signed-off-by: Joe Carnuccio 
Signed-off-by: Chad Dupuis 
Signed-off-by: Yuval Mintz 
---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 66 +++
 1 file changed, 66 insertions(+)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c 
b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 98d06d1..d5cdc47 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2051,9 +2051,49 @@ static int bnx2fc_disable(struct net_device *netdev)
return rc;
 }
 
+static uint bnx2fc_npiv_create_vports(struct fc_lport *lport,
+ struct cnic_fc_npiv_tbl *npiv_tbl)
+{
+   struct fc_vport_identifiers vpid;
+   uint i, created = 0;
+
+   if (npiv_tbl->count > MAX_NPIV_ENTRIES) {
+   BNX2FC_HBA_DBG(lport, "Exceeded count max of npiv table\n");
+   goto done;
+   }
+
+   /* Sanity check the first entry to make sure it's not 0 */
+   if (wwn_to_u64(npiv_tbl->wwnn[0]) == 0 &&
+   wwn_to_u64(npiv_tbl->wwpn[0]) == 0) {
+   BNX2FC_HBA_DBG(lport, "First NPIV table entries invalid.\n");
+   goto done;
+   }
+
+   vpid.roles = FC_PORT_ROLE_FCP_INITIATOR;
+   vpid.vport_type = FC_PORTTYPE_NPIV;
+   vpid.disable = false;
+
+   for (i = 0; i < npiv_tbl->count; i++) {
+   vpid.node_name = wwn_to_u64(npiv_tbl->wwnn[i]);
+   vpid.port_name = wwn_to_u64(npiv_tbl->wwpn[i]);
+   scnprintf(vpid.symbolic_name, sizeof(vpid.symbolic_name),
+   "NPIV[%u]:%016llx-%016llx",
+   created, vpid.port_name, vpid.node_name);
+   if (fc_vport_create(lport->host, 0, &vpid))
+   created++;
+   else
+   BNX2FC_HBA_DBG(lport, "Failed to create vport\n");
+   }
+done:
+   return created;
+}
+
 static int __bnx2fc_enable(struct fcoe_ctlr *ctlr)
 {
struct bnx2fc_interface *interface = fcoe_ctlr_priv(ctlr);
+   struct bnx2fc_hba *hba;
+   struct cnic_fc_npiv_tbl npiv_tbl;
+   struct fc_lport *lport;
 
if (interface->enabled == false) {
if (!ctlr->lp) {
@@ -2064,6 +2104,32 @@ static int __bnx2fc_enable(struct fcoe_ctlr *ctlr)
interface->enabled = true;
}
}
+
+   /* Create static NPIV ports if any are contained in NVRAM */
+   hba = interface->hba;
+   lport = ctlr->lp;
+
+   if (!hba)
+   goto done;
+
+   if (!hba->cnic)
+   goto done;
+
+   if (!lport)
+   goto done;
+
+   if (!lport->host)
+   goto done;
+
+   if (!hba->cnic->get_fc_npiv_tbl)
+   goto done;
+
+   memset(&npiv_tbl, 0, sizeof(npiv_tbl));
+   if (hba->cnic->get_fc_npiv_tbl(hba->cnic, &npiv_tbl))
+   goto done;
+
+   bnx2fc_npiv_create_vports(lport, &npiv_tbl);
+done:
return 0;
 }
 
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 net-next 2/4] cnic: Add the interfaces to get FC-NPIV table.

2015-08-03 Thread Yuval Mintz

From: Adheer Chandravanshi 

Signed-off-by: Adheer Chandravanshi 
Signed-off-by: Chad Dupuis 
Signed-off-by: Yuval Mintz 
---
 drivers/net/ethernet/broadcom/cnic.c| 18 ++
 drivers/net/ethernet/broadcom/cnic_if.h | 16 ++--
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/cnic.c 
b/drivers/net/ethernet/broadcom/cnic.c
index d9e3567..b69dc58 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -5445,6 +5445,23 @@ static void cnic_free_dev(struct cnic_dev *dev)
kfree(dev);
 }
 
+static int cnic_get_fc_npiv_tbl(struct cnic_dev *dev,
+   struct cnic_fc_npiv_tbl *npiv_tbl)
+{
+   struct cnic_local *cp = dev->cnic_priv;
+   struct bnx2x *bp = netdev_priv(dev->netdev);
+   int ret;
+
+   if (!test_bit(CNIC_F_CNIC_UP, &dev->flags))
+   return -EAGAIN; /* bnx2x is down */
+
+   if (!BNX2X_CHIP_IS_E2_PLUS(bp))
+   return -EINVAL;
+
+   ret = cp->ethdev->drv_get_fc_npiv_tbl(dev->netdev, npiv_tbl);
+   return ret;
+}
+
 static struct cnic_dev *cnic_alloc_dev(struct net_device *dev,
   struct pci_dev *pdev)
 {
@@ -5463,6 +5480,7 @@ static struct cnic_dev *cnic_alloc_dev(struct net_device 
*dev,
cdev->register_device = cnic_register_device;
cdev->unregister_device = cnic_unregister_device;
cdev->iscsi_nl_msg_recv = cnic_iscsi_nl_msg_recv;
+   cdev->get_fc_npiv_tbl = cnic_get_fc_npiv_tbl;
 
cp = cdev->cnic_priv;
cp->dev = cdev;
diff --git a/drivers/net/ethernet/broadcom/cnic_if.h 
b/drivers/net/ethernet/broadcom/cnic_if.h
index d0cf006..789e5c7 100644
--- a/drivers/net/ethernet/broadcom/cnic_if.h
+++ b/drivers/net/ethernet/broadcom/cnic_if.h
@@ -15,8 +15,8 @@
 
 #include "bnx2x/bnx2x_mfw_req.h"
 
-#define CNIC_MODULE_VERSION"2.5.21"
-#define CNIC_MODULE_RELDATE"January 29, 2015"
+#define CNIC_MODULE_VERSION"2.5.22"
+#define CNIC_MODULE_RELDATE"July 20, 2015"
 
 #define CNIC_ULP_RDMA  0
 #define CNIC_ULP_ISCSI 1
@@ -166,6 +166,15 @@ struct drv_ctl_info {
} data;
 };
 
+#define MAX_NPIV_ENTRIES 64
+#define FC_NPIV_WWN_SIZE 8
+
+struct cnic_fc_npiv_tbl {
+   u8 wwpn[MAX_NPIV_ENTRIES][FC_NPIV_WWN_SIZE];
+   u8 wwnn[MAX_NPIV_ENTRIES][FC_NPIV_WWN_SIZE];
+   u32 count;
+};
+
 struct cnic_ops {
struct module   *cnic_owner;
/* Calls to these functions are protected by RCU.  When
@@ -231,6 +240,8 @@ struct cnic_eth_dev {
int (*drv_submit_kwqes_16)(struct net_device *,
   struct kwqe_16 *[], u32);
int (*drv_ctl)(struct net_device *, struct drv_ctl_info *);
+   int (*drv_get_fc_npiv_tbl)(struct net_device *,
+  struct cnic_fc_npiv_tbl *);
unsigned long   reserved1[2];
union drv_info_to_mcp   *addr_drv_info_to_mcp;
 };
@@ -319,6 +330,7 @@ struct cnic_dev {
struct cnic_dev *(*cm_select_dev)(struct sockaddr_in *, int ulp_type);
int (*iscsi_nl_msg_recv)(struct cnic_dev *dev, u32 msg_type,
 char *data, u16 data_size);
+   int (*get_fc_npiv_tbl)(struct cnic_dev *, struct cnic_fc_npiv_tbl *);
unsigned long   flags;
 #define CNIC_F_CNIC_UP 1
 #define CNIC_F_BNX2_CLASS  3
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 net-next 3/4] bnx2x: Add BD support for storage

2015-08-03 Thread Yuval Mintz

Commit 230d00eb4bfe ("bnx2x: new Multi-function mode - BD") adds support
for the new mode in bnx2x. This expands this support by implementing
APIs required by our storage drivers to support that mode.

Signed-off-by: Yuval Mintz 
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h| 12 +++
 .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c|  4 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h| 19 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 85 ++
 4 files changed, 118 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index fa7c532..e18a0e4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -1386,4 +1386,16 @@ void bnx2x_schedule_sp_rtnl(struct bnx2x*, enum 
sp_rtnl_flag,
  * @state: OS_DRIVER_STATE_* value reflecting current driver state
  */
 void bnx2x_set_os_driver_state(struct bnx2x *bp, u32 state);
+
+/**
+ * bnx2x_nvram_read - reads data from nvram [might sleep]
+ *
+ * @bp:driver handle
+ * @offset:byte offset in nvram
+ * @ret_buf:   pointer to buffer where data is to be stored
+ * @buf_size:   Length of 'ret_buf' in bytes
+ */
+int bnx2x_nvram_read(struct bnx2x *bp, u32 offset, u8 *ret_buf,
+int buf_size);
+
 #endif /* BNX2X_CMN_H */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 6b2050a..6f90907 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -1348,8 +1348,8 @@ static int bnx2x_nvram_read_dword(struct bnx2x *bp, u32 
offset, __be32 *ret_val,
return rc;
 }
 
-static int bnx2x_nvram_read(struct bnx2x *bp, u32 offset, u8 *ret_buf,
-   int buf_size)
+int bnx2x_nvram_read(struct bnx2x *bp, u32 offset, u8 *ret_buf,
+int buf_size)
 {
int rc;
u32 cmd_flags;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index 08a08fa..cafd5de 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -2075,6 +2075,25 @@ enum curr_cfg_method_e {
CURR_CFG_MET_VENDOR_SPEC = 2,/* e.g. Option ROM, NPAR, O/S Cfg Utils */
 };
 
+#define FC_NPIV_WWPN_SIZE 8
+#define FC_NPIV_WWNN_SIZE 8
+struct bdn_npiv_settings {
+   u8 npiv_wwpn[FC_NPIV_WWPN_SIZE];
+   u8 npiv_wwnn[FC_NPIV_WWNN_SIZE];
+};
+
+struct bdn_fc_npiv_cfg {
+   /* hdr used internally by the MFW */
+   u32 hdr;
+   u32 num_of_npiv;
+};
+
+#define MAX_NUMBER_NPIV 64
+struct bdn_fc_npiv_tbl {
+   struct bdn_fc_npiv_cfg fc_npiv_cfg;
+   struct bdn_npiv_settings settings[MAX_NUMBER_NPIV];
+};
+
 struct mdump_driver_info {
u32 epoc;
u32 drv_ver;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 31c63aa..ad73a60 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -14653,6 +14653,90 @@ static int bnx2x_drv_ctl(struct net_device *dev, 
struct drv_ctl_info *ctl)
rc = -EINVAL;
}
 
+   /* For storage-only interfaces, change driver state */
+   if (IS_MF_SD_STORAGE_PERSONALITY_ONLY(bp)) {
+   switch (ctl->drv_state) {
+   case DRV_NOP:
+   break;
+   case DRV_ACTIVE:
+   bnx2x_set_os_driver_state(bp,
+ OS_DRIVER_STATE_ACTIVE);
+   break;
+   case DRV_INACTIVE:
+   bnx2x_set_os_driver_state(bp,
+ OS_DRIVER_STATE_DISABLED);
+   break;
+   case DRV_UNLOADED:
+   bnx2x_set_os_driver_state(bp,
+ OS_DRIVER_STATE_NOT_LOADED);
+   break;
+   default:
+   BNX2X_ERR("Unknown cnic driver state: %d\n", ctl->drv_state);
+   }
+   }
+
+   return rc;
+}
+
+static int bnx2x_get_fc_npiv(struct net_device *dev,
+struct cnic_fc_npiv_tbl *cnic_tbl)
+{
+   struct bnx2x *bp = netdev_priv(dev);
+   struct bdn_fc_npiv_tbl *tbl = NULL;
+   u32 offset, entries;
+   int rc = -EINVAL;
+   int i;
+
+   if (!SHMEM2_HAS(bp, fc_npiv_nvram_tbl_addr[0]))
+   goto out;
+
+   DP(BNX2X_MSG_MCP, "About to read the FC-NPIV table\n");
+
+   tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
+   if (!tbl) {
+   BNX2X_ERR("Failed to allocate fc_npiv table\n");
+   goto out;
+   }
+
+   offset = SHMEM2_RD(bp, fc_npiv_nvram_tbl_addr[BP_PORT(b

[PATCH v2 net-next 0/4] bnx2x, cnic, bnx2fc: add support for BD

2015-08-03 Thread Yuval Mintz

Commit 230d00eb4bfe ("bnx2x: new Multi-function mode - BD") added support
for a new multi-function mode, but it added only the support required by
bnx2x for L2 interfaces.

This adds the required changes to support the new multi-function mode in
the offloaded storage protocols.

Dave,

Please consider applying this series to `net-next'.

Do notice that this involves non-networking driver changes -
but sending this as a single series seemed like the best approach as
we had to have bnx2x changes to support the new functionality.
If this is problematic, please tell us what's the preferred solution here.

Changes from previous versions
--

 - From v1 - no actual changes; v1 failed to reach netdev so in order to
   keep things in line I've termed this one v2.

Thanks,
Yuval
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 net-next 1/4] cnic: Populate upper layer driver state in MFW

2015-08-03 Thread Yuval Mintz

From: Tej Parkash 

Signed-off-by: Tej Parkash 
Signed-off-by: Yuval Mintz 
---
 drivers/net/ethernet/broadcom/cnic.c| 18 +++---
 drivers/net/ethernet/broadcom/cnic_if.h |  5 +
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/cnic.c 
b/drivers/net/ethernet/broadcom/cnic.c
index 17c145f..d9e3567 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -192,6 +192,7 @@ static void cnic_ctx_wr(struct cnic_dev *dev, u32 cid_addr, 
u32 off, u32 val)
struct drv_ctl_info info;
struct drv_ctl_io *io = &info.data.io;
 
+   memset(&info, 0, sizeof(struct drv_ctl_info));
info.cmd = DRV_CTL_CTX_WR_CMD;
io->cid_addr = cid_addr;
io->offset = off;
@@ -206,6 +207,7 @@ static void cnic_ctx_tbl_wr(struct cnic_dev *dev, u32 off, 
dma_addr_t addr)
struct drv_ctl_info info;
struct drv_ctl_io *io = &info.data.io;
 
+   memset(&info, 0, sizeof(struct drv_ctl_info));
info.cmd = DRV_CTL_CTXTBL_WR_CMD;
io->offset = off;
io->dma_addr = addr;
@@ -219,6 +221,7 @@ static void cnic_ring_ctl(struct cnic_dev *dev, u32 cid, 
u32 cl_id, int start)
struct drv_ctl_info info;
struct drv_ctl_l2_ring *ring = &info.data.ring;
 
+   memset(&info, 0, sizeof(struct drv_ctl_info));
if (start)
info.cmd = DRV_CTL_START_L2_CMD;
else
@@ -236,6 +239,7 @@ static void cnic_reg_wr_ind(struct cnic_dev *dev, u32 off, 
u32 val)
struct drv_ctl_info info;
struct drv_ctl_io *io = &info.data.io;
 
+   memset(&info, 0, sizeof(struct drv_ctl_info));
info.cmd = DRV_CTL_IO_WR_CMD;
io->offset = off;
io->data = val;
@@ -249,13 +253,14 @@ static u32 cnic_reg_rd_ind(struct cnic_dev *dev, u32 off)
struct drv_ctl_info info;
struct drv_ctl_io *io = &info.data.io;
 
+   memset(&info, 0, sizeof(struct drv_ctl_info));
info.cmd = DRV_CTL_IO_RD_CMD;
io->offset = off;
ethdev->drv_ctl(dev->netdev, &info);
return io->data;
 }
 
-static void cnic_ulp_ctl(struct cnic_dev *dev, int ulp_type, bool reg)
+static void cnic_ulp_ctl(struct cnic_dev *dev, int ulp_type, bool reg, int 
state)
 {
struct cnic_local *cp = dev->cnic_priv;
struct cnic_eth_dev *ethdev = cp->ethdev;
@@ -263,6 +268,7 @@ static void cnic_ulp_ctl(struct cnic_dev *dev, int 
ulp_type, bool reg)
struct fcoe_capabilities *fcoe_cap =
&info.data.register_data.fcoe_features;
 
+   memset(&info, 0, sizeof(struct drv_ctl_info));
if (reg) {
info.cmd = DRV_CTL_ULP_REGISTER_CMD;
if (ulp_type == CNIC_ULP_FCOE && dev->fcoe_cap)
@@ -272,6 +278,7 @@ static void cnic_ulp_ctl(struct cnic_dev *dev, int 
ulp_type, bool reg)
}
 
info.data.ulp_type = ulp_type;
+   info.drv_state = state;
ethdev->drv_ctl(dev->netdev, &info);
 }
 
@@ -286,6 +293,7 @@ static void cnic_spq_completion(struct cnic_dev *dev, int 
cmd, u32 count)
struct cnic_eth_dev *ethdev = cp->ethdev;
struct drv_ctl_info info;
 
+   memset(&info, 0, sizeof(struct drv_ctl_info));
info.cmd = cmd;
info.data.credit.credit_count = count;
ethdev->drv_ctl(dev->netdev, &info);
@@ -591,7 +599,7 @@ static int cnic_register_device(struct cnic_dev *dev, int 
ulp_type,
 
mutex_unlock(&cnic_lock);
 
-   cnic_ulp_ctl(dev, ulp_type, true);
+   cnic_ulp_ctl(dev, ulp_type, true, DRV_ACTIVE);
 
return 0;
 
@@ -636,7 +644,10 @@ static int cnic_unregister_device(struct cnic_dev *dev, 
int ulp_type)
if (test_bit(ULP_F_CALL_PENDING, &cp->ulp_flags[ulp_type]))
netdev_warn(dev->netdev, "Failed waiting for ULP up call to 
complete\n");
 
-   cnic_ulp_ctl(dev, ulp_type, false);
+   if (test_bit(ULP_F_INIT, &cp->ulp_flags[ulp_type]))
+   cnic_ulp_ctl(dev, ulp_type, false, DRV_UNLOADED);
+   else
+   cnic_ulp_ctl(dev, ulp_type, false, DRV_INACTIVE);
 
return 0;
 }
@@ -4267,6 +4278,7 @@ static void cnic_delete_task(struct work_struct *work)
 
cnic_ulp_stop_one(cp, CNIC_ULP_ISCSI);
 
+   memset(&info, 0, sizeof(struct drv_ctl_info));
info.cmd = DRV_CTL_ISCSI_STOPPED_CMD;
cp->ethdev->drv_ctl(dev->netdev, &info);
}
diff --git a/drivers/net/ethernet/broadcom/cnic_if.h 
b/drivers/net/ethernet/broadcom/cnic_if.h
index ef6125b..d0cf006 100644
--- a/drivers/net/ethernet/broadcom/cnic_if.h
+++ b/drivers/net/ethernet/broadcom/cnic_if.h
@@ -151,6 +151,11 @@ struct drv_ctl_register_data {
 
 struct drv_ctl_info {
int cmd;
+   int drv_state;
+#define DRV_NOP0
+#define DRV_ACTIVE 1
+#define DRV_INACTIVE   2
+#define DRV_UNLOADED   3
union {
struct drv_ctl_spq_credit credit;
struct drv_ctl_io io;
-- 
1.

[PATCH net] net: dsa: fix EDSA frame from hwaccel frame

2015-08-03 Thread Vivien Didelot

If the underlying network device features NETIF_F_HW_VLAN_CTAG_TX,
an EDSA frame is prepended with a 802.1q header once queued.

To fix this, push the VLAN tag to the payload if present, before
checking the frame protocol.

[note: we may prefer to access directly VLAN TCI from hwaccel frames,
but this approach is simpler.]

Signed-off-by: Vivien Didelot 
---
 net/dsa/tag_edsa.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 2288c80..3ada4eb 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -9,6 +9,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include "dsa_priv.h"
@@ -21,6 +22,10 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct 
net_device *dev)
struct dsa_slave_priv *p = netdev_priv(dev);
u8 *edsa_header;
 
+   skb = vlan_hwaccel_push_inside(skb);
+   if (unlikely(!skb))
+   return NULL;
+
/*
 * Convert the outermost 802.1q tag to a DSA tag and prepend
 * a DSA ethertype field is the packet is tagged, or insert
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-next 4/4] net: dsa: mv88e6xxx: refactor FDB routines

2015-08-03 Thread Vivien Didelot

Refactor mv88e6xxx_port_fdb_{add,del,getnext} to respect the new DSA
switch driver FDB access routines.

The Marvell 88E6xxx switches support up to 4094 FIDs (from 1 to 0xfff;
FID 0 means that multiple address databases are not being used). So
change the fid_mask for a fid_bitmap of 4096 bits.

FIDs 1 to num_ports will be reserved for non-bridged ports and bridge
groups (a bridge group gets the FID of its first member). The remaining
bits will then be used for VLANs.

Also do not consider an address (yet) if it is trunk mapped.

This change is a need to welcome the future support for hardware VLANs.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6171.c |   3 +
 drivers/net/dsa/mv88e6352.c |   3 +
 drivers/net/dsa/mv88e6xxx.c | 205 +++-
 drivers/net/dsa/mv88e6xxx.h |  31 +--
 4 files changed, 172 insertions(+), 70 deletions(-)

diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c
index cfa21ed..735f04c 100644
--- a/drivers/net/dsa/mv88e6171.c
+++ b/drivers/net/dsa/mv88e6171.c
@@ -116,6 +116,9 @@ struct dsa_switch_driver mv88e6171_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
+   .port_fdb_add   = mv88e6xxx_port_fdb_add,
+   .port_fdb_del   = mv88e6xxx_port_fdb_del,
+   .port_fdb_getnext   = mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6171");
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index eb4630f..191fb25 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -341,6 +341,9 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
+   .port_fdb_add   = mv88e6xxx_port_fdb_add,
+   .port_fdb_del   = mv88e6xxx_port_fdb_del,
+   .port_fdb_getnext   = mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6172");
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 438c73e..f576a39 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -964,7 +965,7 @@ static int _mv88e6xxx_atu_cmd(struct dsa_switch *ds, int 
fid, u16 cmd)
 {
int ret;
 
-   ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x01, fid);
+   ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_FID, fid);
if (ret < 0)
return ret;
 
@@ -1091,7 +1092,7 @@ int mv88e6xxx_join_bridge(struct dsa_switch *ds, int 
port, u32 br_port_mask)
ps->bridge_mask[fid] = br_port_mask;
 
if (fid != ps->fid[port]) {
-   ps->fid_mask |= 1 << ps->fid[port];
+   clear_bit(ps->fid[port], ps->fid_bitmap);
ps->fid[port] = fid;
ret = _mv88e6xxx_update_bridge_config(ds, fid);
}
@@ -1125,9 +1126,16 @@ int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int 
port, u32 br_port_mask)
 
mutex_lock(&ps->smi_mutex);
 
-   newfid = __ffs(ps->fid_mask);
+   newfid = find_next_zero_bit(ps->fid_bitmap, VLAN_N_VID, 1);
+   if (unlikely(newfid > ps->num_ports)) {
+   netdev_err(ds->ports[port], "all first %d FIDs are used\n",
+  ps->num_ports);
+   ret = -ENOSPC;
+   goto unlock;
+   }
+
ps->fid[port] = newfid;
-   ps->fid_mask &= ~(1 << newfid);
+   set_bit(newfid, ps->fid_bitmap);
ps->bridge_mask[fid] &= ~(1 << port);
ps->bridge_mask[newfid] = 1 << port;
 
@@ -1135,6 +1143,7 @@ int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int 
port, u32 br_port_mask)
if (!ret)
ret = _mv88e6xxx_update_bridge_config(ds, newfid);
 
+unlock:
mutex_unlock(&ps->smi_mutex);
 
return ret;
@@ -1174,8 +1183,8 @@ int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int 
port, u8 state)
return 0;
 }
 
-static int __mv88e6xxx_write_addr(struct dsa_switch *ds,
- const unsigned char *addr)
+static int _mv88e6xxx_atu_mac_write(struct dsa_switch *ds,
+   const u8 addr[ETH_ALEN])
 {
int i, ret;
 
@@ -1190,7 +1199,7 @@ static int __mv88e6xxx_write_addr(struct dsa_switch *ds,
return 0;
 }
 
-static int __mv88e6xxx_read_addr(struct dsa_switch *ds, unsigned char *addr)
+static int _mv88e6xxx_atu_mac_read(struct dsa_switch *ds, u8 addr[ETH_ALEN])
 {
int i, ret;
 
@@ -1206,109 +1215,184 @@ static int __mv88e6xxx_read_addr(struct dsa_switch 
*ds, unsigned char *addr)
return 0;
 }
 
-static int __mv88e6xxx_port_fdb_cmd(struct dsa_switch *ds, int port,
-   const un

[PATCH net-next 0/4] net: dsa: support switchdev FDB objects

2015-08-03 Thread Vivien Didelot

This patchset refactors the DSA and mv88e6xxx code to use the switchdev FDB
objects.

The first two patches add minor but necessary changes to switchdev, the third
one implements the switchdev glue in DSA for FDB routines, and the forth one
refactors the FDB access functions in the mv88e6xxx code.

Below is an example (ports 0-2 belongs to br0, ports 3-4 belongs to br1):

# bridge fdb add 3c:97:0e:11:30:6e dev swp2
# bridge fdb add 3c:97:0e:11:40:78 dev swp3
# bridge fdb add 3c:97:0e:11:50:86 dev swp4
# bridge fdb del 3c:97:0e:11:40:78 dev swp3
# bridge fdb
01:00:5e:00:00:01 dev eth0 self permanent
01:00:5e:00:00:01 dev eth1 self permanent
00:50:d2:10:78:15 dev swp0 master br0 permanent
3c:97:0e:11:30:6e dev swp2 self static
00:50:d2:10:78:15 dev swp3 master br1 permanent
3c:97:0e:11:50:86 dev swp4 self static
# cat /sys/kernel/debug/dsa0/atu
# DB   T/P  Vec State Addr
# 001  Port 004   e   3c:97:0e:11:30:6e
# 004  Port 010   e   3c:97:0e:11:50:86

For the 88E6xxx switches, FIDs 1 to num_ports will be reserved for non-bridged
ports and bridge groups, and the remaining will be later used by VLANs.

This change is necessary to welcome the support for hardware VLANs (which will
follow soon).

Cheers,
-v

Vivien Didelot (4):
  net: switchdev: change fdb addr for a byte array
  net: switchdev: support static FDB addresses
  net: dsa: add support for switchdev FDB objects
  net: dsa: mv88e6xxx: refactor FDB routines

 drivers/net/dsa/mv88e6171.c  |   6 +-
 drivers/net/dsa/mv88e6352.c  |   6 +-
 drivers/net/dsa/mv88e6xxx.c  | 205 ++--
 drivers/net/dsa/mv88e6xxx.h  |  31 +++--
 drivers/net/ethernet/rocker/rocker.c |   2 +-
 include/net/dsa.h|  16 ++-
 include/net/switchdev.h  |   3 +-
 net/bridge/br_fdb.c  |   2 +-
 net/dsa/slave.c  | 221 +++
 net/switchdev/switchdev.c|   6 +-
 10 files changed, 308 insertions(+), 190 deletions(-)

-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-next 2/4] net: switchdev: support static FDB addresses

2015-08-03 Thread Vivien Didelot

This patch adds a is_static boolean to the switchdev_obj_fdb structure,
in order to set the ndm_state to either NUD_NOARP or NUD_REACHABLE.

Signed-off-by: Vivien Didelot 
---
 include/net/switchdev.h   | 1 +
 net/switchdev/switchdev.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index e90e1a0..0e296b8 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -72,6 +72,7 @@ struct switchdev_obj {
struct switchdev_obj_fdb {  /* PORT_FDB */
u8 addr[ETH_ALEN];
u16 vid;
+   bool is_static;
} fdb;
} u;
 };
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 28786e8..b75897c 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -810,7 +810,7 @@ static int switchdev_port_fdb_dump_cb(struct net_device 
*dev,
ndm->ndm_flags   = NTF_SELF;
ndm->ndm_type= 0;
ndm->ndm_ifindex = dev->ifindex;
-   ndm->ndm_state   = NUD_REACHABLE;
+   ndm->ndm_state   = obj->u.fdb.is_static ? NUD_NOARP : NUD_REACHABLE;
 
if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
goto nla_put_failure;
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-next 3/4] net: dsa: add support for switchdev FDB objects

2015-08-03 Thread Vivien Didelot

Remove the fdb_{add,del,getnext} function pointer in favor of new
port_fdb_{add,del,getnext}.

Implement the switchdev_port_obj_{add,del,dump} functions in DSA to
support the SWITCHDEV_OBJ_PORT_FDB objects.

These functions are called from switchdev_port_bridge_{get,set,del}link.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6171.c |   3 -
 drivers/net/dsa/mv88e6352.c |   3 -
 include/net/dsa.h   |  16 ++--
 net/dsa/slave.c | 221 
 4 files changed, 129 insertions(+), 114 deletions(-)

diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c
index 1c78084..cfa21ed 100644
--- a/drivers/net/dsa/mv88e6171.c
+++ b/drivers/net/dsa/mv88e6171.c
@@ -116,9 +116,6 @@ struct dsa_switch_driver mv88e6171_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
-   .fdb_add= mv88e6xxx_port_fdb_add,
-   .fdb_del= mv88e6xxx_port_fdb_del,
-   .fdb_getnext= mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6171");
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index af210ef..eb4630f 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -341,9 +341,6 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
-   .fdb_add= mv88e6xxx_port_fdb_add,
-   .fdb_del= mv88e6xxx_port_fdb_del,
-   .fdb_getnext= mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6172");
diff --git a/include/net/dsa.h b/include/net/dsa.h
index fbca63b..a090c8a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -296,12 +296,16 @@ struct dsa_switch_driver {
 u32 br_port_mask);
int (*port_stp_update)(struct dsa_switch *ds, int port,
   u8 state);
-   int (*fdb_add)(struct dsa_switch *ds, int port,
-  const unsigned char *addr, u16 vid);
-   int (*fdb_del)(struct dsa_switch *ds, int port,
-  const unsigned char *addr, u16 vid);
-   int (*fdb_getnext)(struct dsa_switch *ds, int port,
-  unsigned char *addr, bool *is_static);
+
+   /*
+* Forwarding database
+*/
+   int (*port_fdb_add)(struct dsa_switch *ds, int port, u16 vid,
+   u8 addr[ETH_ALEN]);
+   int (*port_fdb_del)(struct dsa_switch *ds, int port, u16 vid,
+   u8 addr[ETH_ALEN]);
+   int (*port_fdb_getnext)(struct dsa_switch *ds, int port, u16 *vid,
+   u8 addr[ETH_ALEN], bool *is_static);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 0010c69..0f99a17 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "dsa_priv.h"
 
 /* slave mii_bus handling ***/
@@ -200,105 +201,6 @@ out:
return 0;
 }
 
-static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-struct net_device *dev,
-const unsigned char *addr, u16 vid, u16 nlm_flags)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->parent;
-   int ret = -EOPNOTSUPP;
-
-   if (ds->drv->fdb_add)
-   ret = ds->drv->fdb_add(ds, p->port, addr, vid);
-
-   return ret;
-}
-
-static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-struct net_device *dev,
-const unsigned char *addr, u16 vid)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->parent;
-   int ret = -EOPNOTSUPP;
-
-   if (ds->drv->fdb_del)
-   ret = ds->drv->fdb_del(ds, p->port, addr, vid);
-
-   return ret;
-}
-
-static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb,
-  const unsigned char *addr, u16 vid,
-  bool is_static,
-  u32 portid, u32 seq, int type,
-  unsigned int flags)
-{
-   struct nlmsghdr *nlh;
-   struct ndmsg *ndm;
-
-   nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
-   if (!nlh)
-   return -EMSGSIZE;
-
-   ndm = nlmsg_data(nlh);
-   ndm->ndm_family  = AF_BRIDGE;
-   ndm->ndm_pad1= 0;
-   ndm->ndm_pad2= 0;
-   ndm->ndm_flags   = NTF_EXT_LEARNED;

[PATCH net-next 1/4] net: switchdev: change fdb addr for a byte array

2015-08-03 Thread Vivien Didelot

The address in the switchdev_obj_fdb structure is currently represented
as a pointer. Replacing it for a 6-byte array allows switchdev to carry
addresses directly read from hardware registers, not stored by the
switch chip driver (as in Rocker).

Signed-off-by: Vivien Didelot 
---
 drivers/net/ethernet/rocker/rocker.c | 2 +-
 include/net/switchdev.h  | 2 +-
 net/bridge/br_fdb.c  | 2 +-
 net/switchdev/switchdev.c| 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 4cd5a71..faa5db0 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4543,7 +4543,7 @@ static int rocker_port_fdb_dump(const struct rocker_port 
*rocker_port,
hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) {
if (found->key.pport != rocker_port->pport)
continue;
-   fdb->addr = found->key.addr;
+   memcpy(fdb->addr, found->key.addr, ETH_ALEN);
fdb->vid = rocker_port_vlan_to_vid(rocker_port,
   found->key.vlan_id);
err = obj->cb(rocker_port->dev, obj);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 89da893..e90e1a0 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -70,7 +70,7 @@ struct switchdev_obj {
u32 tb_id;
} ipv4_fib;
struct switchdev_obj_fdb {  /* PORT_FDB */
-   const unsigned char *addr;
+   u8 addr[ETH_ALEN];
u16 vid;
} fdb;
} u;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9e9875d..2c64b6a 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -136,11 +136,11 @@ static void fdb_del_external_learn(struct 
net_bridge_fdb_entry *f)
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = f->addr.addr,
.vid = f->vlan_id,
},
};
 
+   memcpy(obj.u.fdb.addr, f->addr.addr, ETH_ALEN);
switchdev_port_obj_del(f->dst->dev, &obj);
 }
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 33bafa2..28786e8 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -742,11 +742,11 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct 
nlattr *tb[],
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = addr,
.vid = vid,
},
};
 
+   memcpy(obj.u.fdb.addr, addr, ETH_ALEN);
return switchdev_port_obj_add(dev, &obj);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
@@ -769,11 +769,11 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct 
nlattr *tb[],
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = addr,
.vid = vid,
},
};
 
+   memcpy(obj.u.fdb.addr, addr, ETH_ALEN);
return switchdev_port_obj_del(dev, &obj);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] net: dsa: fix EDSA frame from hwaccel frame

2015-08-03 Thread David Miller

From: Vivien Didelot 
Date: Tue, 4 Aug 2015 02:01:18 -0400 (EDT)

> Dully noted. Should I resend it?

Yes, please.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: net: dsa: support switchdev FDB objects

2015-08-03 Thread Vivien Didelot

Hi,

On Aug 4, 2015, at 1:54 AM, Vivien Didelot vivien.dide...@savoirfairelinux.com 
wrote:

> This patchset refactors the DSA and mv88e6xxx code to use the switchdev FDB
> objects.
> 
> The first two patches add minor but necessary changes to switchdev, the third
> one implements the switchdev glue in DSA for FDB routines, and the forth one
> refactors the FDB access functions in the mv88e6xxx code.

For some reason the patch 4/4 didn't follow. I also missed the net-next
prefix, as mentioned by David earlier. Please ignore this serie, I will
retry soon.

Thanks,
-v
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next v2 1/2] openvswitch: Use regular GRE net_device instead of vport

2015-08-03 Thread Alexei Starovoitov

On Mon, Aug 03, 2015 at 10:58:02PM -0700, Pravin Shelar wrote:
> >> > feature detection based on netdev name?
> >> > meaning that there will be only one such device for the whole
> >> > host? and namespaces cannot have their own gre tunnels?
> >> > (since host 'gretap0' cannot be seen in netns)
> >> >
> >> gretap0 exist in every namespace. This device is created in GRE namespce 
> >> init.
> >
> > then all of them get to be in flow_based mode without being able to
> > change it?
> >
> Yes, But there is no side effect of this feature for the user of the device.

other than allocating metadata_dst for every incoming packet?
If you could actually do it for free then we wouldn't need a flag.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] net: dsa: fix EDSA frame from hwaccel frame

2015-08-03 Thread Vivien Didelot

Hi David,

On Aug 4, 2015, at 1:21 AM, David da...@davemloft.net wrote:

> From: Vivien Didelot 
> Date: Sun,  2 Aug 2015 21:46:02 -0400
> 
>> If the underlying network device features NETIF_F_HW_VLAN_CTAG_TX,
>> an EDSA frame is prepended with a 802.1q header once queued.
>> 
>> To fix this, push the VLAN tag to the payload if present, before
>> checking the frame protocol.
>> 
>> [note: we may prefer to access directly VLAN TCI from hwaccel frames,
>> but this approach is simpler.]
>> 
>> Signed-off-by: Vivien Didelot 
> 
> This is a bug fix so should target 'net', but you generated the patch
> against 'net-next'.
> 
> In any event, you should be explicit about the tree you are targetting
> in order to not waste my time like this, by simply specifying the
> tree in your "[PATCH xxx]" text in your subject line.   Either
> "[PATCH net]" or "[PATCH net-next]".

Dully noted. Should I resend it?

Thanks,
-v
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next v2 1/2] openvswitch: Use regular GRE net_device instead of vport

2015-08-03 Thread Pravin Shelar

On Mon, Aug 3, 2015 at 10:54 PM, Alexei Starovoitov
 wrote:
> On Mon, Aug 03, 2015 at 10:51:02PM -0700, Pravin Shelar wrote:
>> On Mon, Aug 3, 2015 at 9:23 PM, Alexei Starovoitov
>>  wrote:
>> > On Mon, Aug 03, 2015 at 05:27:26PM -0700, Pravin B Shelar wrote:
>> >> With addition of flow based tunneling, there is no need to
>> >> have special GRE vport. Removes all of the OVS specific
>> >> GRE code and make OVS use a ip_gre net_device.
>> >> Minimal GRE vport is kept to handle compatibility with
>> >> current userspace application.
>> >>
>> >> Signed-off-by: Pravin B Shelar 
>> > ...
>> >> +#define GRE_TAP_FB_NAME "gretap0"
>> > ...
>> >> + /* fallback device is used for flow based tunneling. */
>> >> + if (!strcmp(dev->name, GRE_TAP_FB_NAME)) {
>> >> + struct ip_tunnel *t;
>> >> +
>> >> + t = netdev_priv(dev);
>> >> + t->flow_based_tunnel = true;
>> >> + eth_hw_addr_random(dev);
>> >> + netif_keep_dst(dev);
>> >> + }
>> >> +
>> >
>> > feature detection based on netdev name?
>> > meaning that there will be only one such device for the whole
>> > host? and namespaces cannot have their own gre tunnels?
>> > (since host 'gretap0' cannot be seen in netns)
>> >
>> gretap0 exist in every namespace. This device is created in GRE namespce 
>> init.
>
> then all of them get to be in flow_based mode without being able to
> change it?
>
Yes, But there is no side effect of this feature for the user of the device.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/4] net: switchdev: change fdb addr for a byte array

2015-08-03 Thread Vivien Didelot

The address in the switchdev_obj_fdb structure is currently represented
as a pointer. Replacing it for a 6-byte array allows switchdev to carry
addresses directly read from hardware registers, not stored by the
switch chip driver (as in Rocker).

Signed-off-by: Vivien Didelot 
---
 drivers/net/ethernet/rocker/rocker.c | 2 +-
 include/net/switchdev.h  | 2 +-
 net/bridge/br_fdb.c  | 2 +-
 net/switchdev/switchdev.c| 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 4cd5a71..faa5db0 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4543,7 +4543,7 @@ static int rocker_port_fdb_dump(const struct rocker_port 
*rocker_port,
hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) {
if (found->key.pport != rocker_port->pport)
continue;
-   fdb->addr = found->key.addr;
+   memcpy(fdb->addr, found->key.addr, ETH_ALEN);
fdb->vid = rocker_port_vlan_to_vid(rocker_port,
   found->key.vlan_id);
err = obj->cb(rocker_port->dev, obj);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 89da893..e90e1a0 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -70,7 +70,7 @@ struct switchdev_obj {
u32 tb_id;
} ipv4_fib;
struct switchdev_obj_fdb {  /* PORT_FDB */
-   const unsigned char *addr;
+   u8 addr[ETH_ALEN];
u16 vid;
} fdb;
} u;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9e9875d..2c64b6a 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -136,11 +136,11 @@ static void fdb_del_external_learn(struct 
net_bridge_fdb_entry *f)
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = f->addr.addr,
.vid = f->vlan_id,
},
};
 
+   memcpy(obj.u.fdb.addr, f->addr.addr, ETH_ALEN);
switchdev_port_obj_del(f->dst->dev, &obj);
 }
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 33bafa2..28786e8 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -742,11 +742,11 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct 
nlattr *tb[],
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = addr,
.vid = vid,
},
};
 
+   memcpy(obj.u.fdb.addr, addr, ETH_ALEN);
return switchdev_port_obj_add(dev, &obj);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
@@ -769,11 +769,11 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct 
nlattr *tb[],
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = addr,
.vid = vid,
},
};
 
+   memcpy(obj.u.fdb.addr, addr, ETH_ALEN);
return switchdev_port_obj_del(dev, &obj);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/4] net: dsa: add support for switchdev FDB objects

2015-08-03 Thread Vivien Didelot

Remove the fdb_{add,del,getnext} function pointer in favor of new
port_fdb_{add,del,getnext}.

Implement the switchdev_port_obj_{add,del,dump} functions in DSA to
support the SWITCHDEV_OBJ_PORT_FDB objects.

These functions are called from switchdev_port_bridge_{get,set,del}link.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6171.c |   3 -
 drivers/net/dsa/mv88e6352.c |   3 -
 include/net/dsa.h   |  16 ++--
 net/dsa/slave.c | 221 
 4 files changed, 129 insertions(+), 114 deletions(-)

diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c
index 1c78084..cfa21ed 100644
--- a/drivers/net/dsa/mv88e6171.c
+++ b/drivers/net/dsa/mv88e6171.c
@@ -116,9 +116,6 @@ struct dsa_switch_driver mv88e6171_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
-   .fdb_add= mv88e6xxx_port_fdb_add,
-   .fdb_del= mv88e6xxx_port_fdb_del,
-   .fdb_getnext= mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6171");
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index af210ef..eb4630f 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -341,9 +341,6 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
-   .fdb_add= mv88e6xxx_port_fdb_add,
-   .fdb_del= mv88e6xxx_port_fdb_del,
-   .fdb_getnext= mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6172");
diff --git a/include/net/dsa.h b/include/net/dsa.h
index fbca63b..a090c8a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -296,12 +296,16 @@ struct dsa_switch_driver {
 u32 br_port_mask);
int (*port_stp_update)(struct dsa_switch *ds, int port,
   u8 state);
-   int (*fdb_add)(struct dsa_switch *ds, int port,
-  const unsigned char *addr, u16 vid);
-   int (*fdb_del)(struct dsa_switch *ds, int port,
-  const unsigned char *addr, u16 vid);
-   int (*fdb_getnext)(struct dsa_switch *ds, int port,
-  unsigned char *addr, bool *is_static);
+
+   /*
+* Forwarding database
+*/
+   int (*port_fdb_add)(struct dsa_switch *ds, int port, u16 vid,
+   u8 addr[ETH_ALEN]);
+   int (*port_fdb_del)(struct dsa_switch *ds, int port, u16 vid,
+   u8 addr[ETH_ALEN]);
+   int (*port_fdb_getnext)(struct dsa_switch *ds, int port, u16 *vid,
+   u8 addr[ETH_ALEN], bool *is_static);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 0010c69..0f99a17 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "dsa_priv.h"
 
 /* slave mii_bus handling ***/
@@ -200,105 +201,6 @@ out:
return 0;
 }
 
-static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-struct net_device *dev,
-const unsigned char *addr, u16 vid, u16 nlm_flags)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->parent;
-   int ret = -EOPNOTSUPP;
-
-   if (ds->drv->fdb_add)
-   ret = ds->drv->fdb_add(ds, p->port, addr, vid);
-
-   return ret;
-}
-
-static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-struct net_device *dev,
-const unsigned char *addr, u16 vid)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->parent;
-   int ret = -EOPNOTSUPP;
-
-   if (ds->drv->fdb_del)
-   ret = ds->drv->fdb_del(ds, p->port, addr, vid);
-
-   return ret;
-}
-
-static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb,
-  const unsigned char *addr, u16 vid,
-  bool is_static,
-  u32 portid, u32 seq, int type,
-  unsigned int flags)
-{
-   struct nlmsghdr *nlh;
-   struct ndmsg *ndm;
-
-   nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
-   if (!nlh)
-   return -EMSGSIZE;
-
-   ndm = nlmsg_data(nlh);
-   ndm->ndm_family  = AF_BRIDGE;
-   ndm->ndm_pad1= 0;
-   ndm->ndm_pad2= 0;
-   ndm->ndm_flags   = NTF_EXT_LEARNED;

[PATCH 2/4] net: switchdev: support static FDB addresses

2015-08-03 Thread Vivien Didelot

This patch adds a is_static boolean to the switchdev_obj_fdb structure,
in order to set the ndm_state to either NUD_NOARP or NUD_REACHABLE.

Signed-off-by: Vivien Didelot 
---
 include/net/switchdev.h   | 1 +
 net/switchdev/switchdev.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index e90e1a0..0e296b8 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -72,6 +72,7 @@ struct switchdev_obj {
struct switchdev_obj_fdb {  /* PORT_FDB */
u8 addr[ETH_ALEN];
u16 vid;
+   bool is_static;
} fdb;
} u;
 };
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 28786e8..b75897c 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -810,7 +810,7 @@ static int switchdev_port_fdb_dump_cb(struct net_device 
*dev,
ndm->ndm_flags   = NTF_SELF;
ndm->ndm_type= 0;
ndm->ndm_ifindex = dev->ifindex;
-   ndm->ndm_state   = NUD_REACHABLE;
+   ndm->ndm_state   = obj->u.fdb.is_static ? NUD_NOARP : NUD_REACHABLE;
 
if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
goto nla_put_failure;
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

net: dsa: support switchdev FDB objects

2015-08-03 Thread Vivien Didelot

This patchset refactors the DSA and mv88e6xxx code to use the switchdev FDB
objects.

The first two patches add minor but necessary changes to switchdev, the third
one implements the switchdev glue in DSA for FDB routines, and the forth one
refactors the FDB access functions in the mv88e6xxx code.

Below is an example (ports 0-2 belongs to br0, ports 3-4 belongs to br1):

# bridge fdb add 3c:97:0e:11:30:6e dev swp2
# bridge fdb add 3c:97:0e:11:40:78 dev swp3
# bridge fdb add 3c:97:0e:11:50:86 dev swp4
# bridge fdb del 3c:97:0e:11:40:78 dev swp3
# bridge fdb
01:00:5e:00:00:01 dev eth0 self permanent
01:00:5e:00:00:01 dev eth1 self permanent
00:50:d2:10:78:15 dev swp0 master br0 permanent
3c:97:0e:11:30:6e dev swp2 self static
00:50:d2:10:78:15 dev swp3 master br1 permanent
3c:97:0e:11:50:86 dev swp4 self static
# cat /sys/kernel/debug/dsa0/atu
# DB   T/P  Vec State Addr
# 001  Port 004   e   3c:97:0e:11:30:6e
# 004  Port 010   e   3c:97:0e:11:50:86

For the 88E6xxx switches, FIDs 1 to num_ports will be reserved for non-bridged
ports and bridge groups, and the remaining will be later used by VLANs.

This change is necessary to welcome the support for hardware VLANs (which will
follow soon).

Cheers,
-v

Vivien Didelot (4):
  net: switchdev: change fdb addr for a byte array
  net: switchdev: support static FDB addresses
  net: dsa: add support for switchdev FDB objects
  net: dsa: mv88e6xxx: refactor FDB routines

 drivers/net/dsa/mv88e6171.c  |   6 +-
 drivers/net/dsa/mv88e6352.c  |   6 +-
 drivers/net/dsa/mv88e6xxx.c  | 205 ++--
 drivers/net/dsa/mv88e6xxx.h  |  31 +++--
 drivers/net/ethernet/rocker/rocker.c |   2 +-
 include/net/dsa.h|  16 ++-
 include/net/switchdev.h  |   3 +-
 net/bridge/br_fdb.c  |   2 +-
 net/dsa/slave.c  | 221 +++
 net/switchdev/switchdev.c|   6 +-
 10 files changed, 308 insertions(+), 190 deletions(-)

-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next v2 1/2] openvswitch: Use regular GRE net_device instead of vport

2015-08-03 Thread Pravin Shelar

On Mon, Aug 3, 2015 at 9:59 PM, David Miller  wrote:
> From: Alexei Starovoitov 
> Date: Mon, 3 Aug 2015 21:23:40 -0700
>
>> On Mon, Aug 03, 2015 at 05:27:26PM -0700, Pravin B Shelar wrote:
>>> With addition of flow based tunneling, there is no need to
>>> have special GRE vport. Removes all of the OVS specific
>>> GRE code and make OVS use a ip_gre net_device.
>>> Minimal GRE vport is kept to handle compatibility with
>>> current userspace application.
>>>
>>> Signed-off-by: Pravin B Shelar 
>> ...
>>> +#define GRE_TAP_FB_NAME "gretap0"
>> ...
>>> +/* fallback device is used for flow based tunneling. */
>>> +if (!strcmp(dev->name, GRE_TAP_FB_NAME)) {
>>> +struct ip_tunnel *t;
>>> +
>>> +t = netdev_priv(dev);
>>> +t->flow_based_tunnel = true;
>>> +eth_hw_addr_random(dev);
>>> +netif_keep_dst(dev);
>>> +}
>>> +
>>
>> feature detection based on netdev name?
>> meaning that there will be only one such device for the whole
>> host? and namespaces cannot have their own gre tunnels?
>> (since host 'gretap0' cannot be seen in netns)
>
> Doing anything like this by netdev name is wrong.
>
> Pravin you will need to do this in some other way.
>
ok, I will add API to create flow-based GRE device.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next v2 1/2] openvswitch: Use regular GRE net_device instead of vport

2015-08-03 Thread Alexei Starovoitov

On Mon, Aug 03, 2015 at 10:51:02PM -0700, Pravin Shelar wrote:
> On Mon, Aug 3, 2015 at 9:23 PM, Alexei Starovoitov
>  wrote:
> > On Mon, Aug 03, 2015 at 05:27:26PM -0700, Pravin B Shelar wrote:
> >> With addition of flow based tunneling, there is no need to
> >> have special GRE vport. Removes all of the OVS specific
> >> GRE code and make OVS use a ip_gre net_device.
> >> Minimal GRE vport is kept to handle compatibility with
> >> current userspace application.
> >>
> >> Signed-off-by: Pravin B Shelar 
> > ...
> >> +#define GRE_TAP_FB_NAME "gretap0"
> > ...
> >> + /* fallback device is used for flow based tunneling. */
> >> + if (!strcmp(dev->name, GRE_TAP_FB_NAME)) {
> >> + struct ip_tunnel *t;
> >> +
> >> + t = netdev_priv(dev);
> >> + t->flow_based_tunnel = true;
> >> + eth_hw_addr_random(dev);
> >> + netif_keep_dst(dev);
> >> + }
> >> +
> >
> > feature detection based on netdev name?
> > meaning that there will be only one such device for the whole
> > host? and namespaces cannot have their own gre tunnels?
> > (since host 'gretap0' cannot be seen in netns)
> >
> gretap0 exist in every namespace. This device is created in GRE namespce init.

then all of them get to be in flow_based mode without being able to
change it?

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next v2 1/2] openvswitch: Use regular GRE net_device instead of vport

2015-08-03 Thread Pravin Shelar

On Mon, Aug 3, 2015 at 9:23 PM, Alexei Starovoitov
 wrote:
> On Mon, Aug 03, 2015 at 05:27:26PM -0700, Pravin B Shelar wrote:
>> With addition of flow based tunneling, there is no need to
>> have special GRE vport. Removes all of the OVS specific
>> GRE code and make OVS use a ip_gre net_device.
>> Minimal GRE vport is kept to handle compatibility with
>> current userspace application.
>>
>> Signed-off-by: Pravin B Shelar 
> ...
>> +#define GRE_TAP_FB_NAME "gretap0"
> ...
>> + /* fallback device is used for flow based tunneling. */
>> + if (!strcmp(dev->name, GRE_TAP_FB_NAME)) {
>> + struct ip_tunnel *t;
>> +
>> + t = netdev_priv(dev);
>> + t->flow_based_tunnel = true;
>> + eth_hw_addr_random(dev);
>> + netif_keep_dst(dev);
>> + }
>> +
>
> feature detection based on netdev name?
> meaning that there will be only one such device for the whole
> host? and namespaces cannot have their own gre tunnels?
> (since host 'gretap0' cannot be seen in netns)
>
gretap0 exist in every namespace. This device is created in GRE namespce init.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next] mpls: Use definition for reserved label checks

2015-08-03 Thread David Miller

From: Robert Shearman 
Date: Mon, 3 Aug 2015 17:50:04 +0100

> In multiple locations there are checks for whether the label in hand
> is a reserved label or not using the arbritray value of 16. Factor
> this out into a #define for better maintainability and for
> documentation.
> 
> Signed-off-by: Robert Shearman 
> ---
> Resend of an earlier version of this patch that was included as part
> of a larger series. Changes since that version:
>   - Move new #define into userspace header file in line with other
> well-defined label values. Rename to match.

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: BUG: null dereference in __skb_recv_datagram

2015-08-03 Thread Brenden Blanco

> [  318.244596] BUG: unable to handle kernel NULL pointer dereference
> at 008e
> [  318.245182] IP: [] __skb_recv_datagram+0xbc/0x5a0

Replying to myself, and adding commit interested parties...

I went through the git log for the function in question, and
positively identified that the following commit introduces the crash:

738ac1e net: Clone skb before setting peeked flag

Null dereference is at line 224 of net/core/datagram.c (according to
my objdump dis-assembly):

spin_lock_irqsave(&queue->lock, cpu_flags);
skb_queue_walk(queue, skb) {
last = skb;
*peeked = skb->peeked;

^---
if (flags & MSG_PEEK) {
if (_off >= skb->len && (skb->len || _off ||
 skb->peeked)) {

Beyond that, I'm probably out of my comfort zone, so if anyone has a
bright idea of a patch to try, I will gladly test it.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/1] net/ipv4: Enable flow-based ECMP

2015-08-03 Thread Stephen Hemminger

On Tue,  4 Aug 2015 13:28:47 +1200
Richard Laing  wrote:

> diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
> index 5fa643b..7db9f72 100644
> --- a/include/net/ip_fib.h
> +++ b/include/net/ip_fib.h
> @@ -117,6 +117,8 @@ struct fib_info {
>  #ifdef CONFIG_IP_ROUTE_MULTIPATH
>   int fib_power;
>  #endif
> + /* Cache the number of live nexthops for flow based ECMP calculation. */
> + int live_nexthops;

unsigned or u16 ? rather than risking sign issues.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 net-next 1/2] RDS-TCP: Make RDS-TCP work correctly when it is set up in a netns other than init_net

2015-08-03 Thread Sowmini Varadhan

Open the sockets calling sock_create_kern() with the correct struct net
pointer, and use that struct net pointer when verifying the
address passed to rds_bind().

Signed-off-by: Sowmini Varadhan 
---
v2: David Ahern comments.

 net/rds/bind.c|3 ++-
 net/rds/connection.c  |   16 ++--
 net/rds/ib.c  |2 +-
 net/rds/ib_cm.c   |5 +++--
 net/rds/iw.c  |2 +-
 net/rds/iw_cm.c   |5 +++--
 net/rds/rds.h |   23 +++
 net/rds/send.c|3 ++-
 net/rds/tcp.c |4 ++--
 net/rds/tcp_connect.c |3 ++-
 net/rds/tcp_listen.c  |   16 
 net/rds/transport.c   |4 ++--
 12 files changed, 59 insertions(+), 27 deletions(-)

diff --git a/net/rds/bind.c b/net/rds/bind.c
index 4ebd29c..dd666fb 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -185,7 +185,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, 
int addr_len)
ret = 0;
goto out;
}
-   trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
+   trans = rds_trans_get_preferred(sock_net(sock->sk),
+   sin->sin_addr.s_addr);
if (!trans) {
ret = -EADDRNOTAVAIL;
rds_remove_bound(rs);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index da6da57..d4fecb2 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -117,7 +117,8 @@ static void rds_conn_reset(struct rds_connection *conn)
  * For now they are not garbage collected once they're created.  They
  * are torn down as the module is removed, if ever.
  */
-static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
+static struct rds_connection *__rds_conn_create(struct net *net,
+   __be32 laddr, __be32 faddr,
   struct rds_transport *trans, gfp_t gfp,
   int is_outgoing)
 {
@@ -157,6 +158,7 @@ static struct rds_connection *__rds_conn_create(__be32 
laddr, __be32 faddr,
conn->c_faddr = faddr;
spin_lock_init(&conn->c_lock);
conn->c_next_tx_seq = 1;
+   rds_conn_net_set(conn, net);
 
init_waitqueue_head(&conn->c_waitq);
INIT_LIST_HEAD(&conn->c_send_queue);
@@ -174,7 +176,7 @@ static struct rds_connection *__rds_conn_create(__be32 
laddr, __be32 faddr,
 * can bind to the destination address then we'd rather the messages
 * flow through loopback rather than either transport.
 */
-   loop_trans = rds_trans_get_preferred(faddr);
+   loop_trans = rds_trans_get_preferred(net, faddr);
if (loop_trans) {
rds_trans_put(loop_trans);
conn->c_loopback = 1;
@@ -260,17 +262,19 @@ static struct rds_connection *__rds_conn_create(__be32 
laddr, __be32 faddr,
return conn;
 }
 
-struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create(struct net *net,
+  __be32 laddr, __be32 faddr,
   struct rds_transport *trans, gfp_t gfp)
 {
-   return __rds_conn_create(laddr, faddr, trans, gfp, 0);
+   return __rds_conn_create(net, laddr, faddr, trans, gfp, 0);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create);
 
-struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create_outgoing(struct net *net,
+   __be32 laddr, __be32 faddr,
   struct rds_transport *trans, gfp_t gfp)
 {
-   return __rds_conn_create(laddr, faddr, trans, gfp, 1);
+   return __rds_conn_create(net, laddr, faddr, trans, gfp, 1);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
 
diff --git a/net/rds/ib.c b/net/rds/ib.c
index ba2dffe..1381422 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -317,7 +317,7 @@ static void rds_ib_ic_info(struct socket *sock, unsigned 
int len,
  * allowed to influence which paths have priority.  We could call userspace
  * asserting this policy "routing".
  */
-static int rds_ib_laddr_check(__be32 addr)
+static int rds_ib_laddr_check(struct net *net, __be32 addr)
 {
int ret;
struct rdma_cm_id *cm_id;
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 0da2a45..f40d8f5 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -448,8 +448,9 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 (unsigned long long)be64_to_cpu(lguid),
 (unsigned long long)be64_to_cpu(fguid));
 
-   conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport,
-  GFP_KERNEL);
+   /* RDS/IB is not currently netns aware, thus init_net */
+   conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
+  &rds_ib_transport, GFP_KERNEL);
if (IS_ERR(conn)) {

[PATCH v2 net-next 0/2] RDS-TCP: Network namespace support

2015-08-03 Thread Sowmini Varadhan

This patch series contains the set of changes to correctly set up 
the infra for PF_RDS sockets that use TCP as the transport in multiple
network namespaces.

Patch 1 in the series is the minimal set of changes to allow
a single instance of RDS-TCP to run in any (i.e init_net or other) net
namespace.  The changes in this patch set ensure that the execution of 
'modprobe [-r] rds_tcp' sets up the kernel TCP sockets 
relative to the current netns, so that RDS applications can send/recv
packets from that netns, and the netns can later be deleted cleanly.

Patch 2 of the series further allows multiple RDS-TCP instances,
one per network namespace. The changes in this patch allows dynamic
creation/tear-down of RDS-TCP client and server sockets  across all
current and future namespaces. 

v2 changes from RFC sent out earlier:
David Ahern comments in patch 1, net_device notifier in patch 2, 
patch 3 broken off and submitted separately.

Sowmini Varadhan (2):
  Make RDS-TCP work correctly when it is set up in a netns other than
init_net
  Support multiple RDS-TCP listen endpoints, one per netns.

 net/rds/bind.c|3 +-
 net/rds/connection.c  |   16 +++--
 net/rds/ib.c  |2 +-
 net/rds/ib_cm.c   |5 +-
 net/rds/iw.c  |2 +-
 net/rds/iw_cm.c   |5 +-
 net/rds/rds.h |   23 ++-
 net/rds/send.c|3 +-
 net/rds/tcp.c |  167 +++-
 net/rds/tcp.h |7 ++-
 net/rds/tcp_connect.c |9 ++-
 net/rds/tcp_listen.c  |   40 
 net/rds/transport.c   |4 +-
 13 files changed, 216 insertions(+), 70 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 net-next 2/2] RDS-TCP: Support multiple RDS-TCP listen endpoints, one per netns.

2015-08-03 Thread Sowmini Varadhan

Register pernet subsys init/stop functions that will set up
and tear down per-net RDS-TCP listen endpoints. Unregister
pernet subusys functions on 'modprobe -r' to clean up these
end points.

Enable keepalive on both accept and connect socket endpoints.
The keepalive timer expiration will ensure that client socket
endpoints will be removed as appropriate from the netns when
an interface is removed from a namespace.

Register a device notifier callback that will clean up all
sockets (and thus avoid the need to wait for keepalive timeout)
when the loopback device is unregistered from the netns indicating
that the netns is getting deleted.

Signed-off-by: Sowmini Varadhan 
---
v2: net_device notifier for synchronous cleanup of sockets.

 net/rds/tcp.c |  163 -
 net/rds/tcp.h |7 ++-
 net/rds/tcp_connect.c |6 +-
 net/rds/tcp_listen.c  |   38 +++-
 4 files changed, 164 insertions(+), 50 deletions(-)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 98f5de3..339392b 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -35,6 +35,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "rds.h"
 #include "tcp.h"
@@ -250,16 +253,7 @@ static void rds_tcp_destroy_conns(void)
}
 }
 
-static void rds_tcp_exit(void)
-{
-   rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
-   rds_tcp_listen_stop();
-   rds_tcp_destroy_conns();
-   rds_trans_unregister(&rds_tcp_transport);
-   rds_tcp_recv_exit();
-   kmem_cache_destroy(rds_tcp_conn_slab);
-}
-module_exit(rds_tcp_exit);
+static void rds_tcp_exit(void);
 
 struct rds_transport rds_tcp_transport = {
.laddr_check= rds_tcp_laddr_check,
@@ -281,6 +275,138 @@ struct rds_transport rds_tcp_transport = {
.t_prefer_loopback  = 1,
 };
 
+static int rds_tcp_netid;
+
+/* per-network namespace private data for this module */
+struct rds_tcp_net {
+   struct socket *rds_tcp_listen_sock;
+   struct work_struct rds_tcp_accept_w;
+};
+
+static void rds_tcp_accept_worker(struct work_struct *work)
+{
+   struct rds_tcp_net *rtn = container_of(work,
+  struct rds_tcp_net,
+  rds_tcp_accept_w);
+
+   while (rds_tcp_accept_one(rtn->rds_tcp_listen_sock) == 0)
+   cond_resched();
+}
+
+void rds_tcp_accept_work(struct sock *sk)
+{
+   struct net *net = sock_net(sk);
+   struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+   queue_work(rds_wq, &rtn->rds_tcp_accept_w);
+}
+
+static __net_init int rds_tcp_init_net(struct net *net)
+{
+   struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+   rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
+   if (!rtn->rds_tcp_listen_sock) {
+   pr_warn("could not set up listen sock\n");
+   return -EAFNOSUPPORT;
+   }
+   INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
+   return 0;
+}
+
+static void __net_exit rds_tcp_exit_net(struct net *net)
+{
+   struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+   /* If rds_tcp_exit_net() is called as a result of netns deletion,
+* the rds_tcp_kill_sock() device notifier would already have cleaned
+* up the listen socket, thus there is no work to do in this function.
+*
+* If rds_tcp_exit_net() is called as a result of module unload,
+* i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
+* we do need to clean up the listen socket here.
+*/
+   if (rtn->rds_tcp_listen_sock) {
+   rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+   rtn->rds_tcp_listen_sock = NULL;
+   flush_work(&rtn->rds_tcp_accept_w);
+   }
+}
+
+static struct pernet_operations rds_tcp_net_ops = {
+   .init = rds_tcp_init_net,
+   .exit = rds_tcp_exit_net,
+   .id = &rds_tcp_netid,
+   .size = sizeof(struct rds_tcp_net),
+};
+
+static void rds_tcp_kill_sock(struct net *net)
+{
+   struct rds_tcp_connection *tc, *_tc;
+   struct sock *sk;
+   struct list_head tmp_list;
+   struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+   rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+   rtn->rds_tcp_listen_sock = NULL;
+   flush_work(&rtn->rds_tcp_accept_w);
+   INIT_LIST_HEAD(&tmp_list);
+   spin_lock_irq(&rds_tcp_conn_lock);
+   list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
+   struct net *c_net = read_pnet(&tc->conn->c_net);
+
+   if (net != c_net || !tc->t_sock)
+   continue;
+   list_del(&tc->t_tcp_node);
+   list_add_tail(&tc->t_tcp_node, &tmp_list);
+   }
+   spin_unlock_irq(&rds_tcp_conn_lock);
+   list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
+   sk = tc->t_s

[PATCH net-next 2/2] rocker: use netdev_err after register_netdev

2015-08-03 Thread sfeldma

From: Scott Feldman 

After successful register_netdev, we can use netdev_err rather the more
generic dev_err.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 0ab3a3b..4e8cad0 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4985,7 +4985,7 @@ static int rocker_probe_port(struct rocker *rocker, 
unsigned int port_number)
 
err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0);
if (err) {
-   dev_err(&pdev->dev, "install ig port table failed\n");
+   netdev_err(rocker_port->dev, "install ig port table failed\n");
goto err_port_ig_tbl;
}
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-next 1/2] rocker: NULL port if port probe fails

2015-08-03 Thread sfeldma

From: Scott Feldman 

Set port to NULL if port probe fails so we don't try to remove partially
initialized port on port probe err cleanup path.

Signed-off-by: Scott Feldman 
---
 drivers/net/ethernet/rocker/rocker.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 7b4c347..0ab3a3b 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -5005,6 +5005,7 @@ err_untagged_vlan:
rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE,
   ROCKER_OP_FLAG_REMOVE);
 err_port_ig_tbl:
+   rocker->ports[port_number] = NULL;
unregister_netdev(dev);
 err_register_netdev:
free_netdev(dev);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next 0/2] lwtunnel: encap locally-generated ipv4 packets

2015-08-03 Thread David Miller

From: Robert Shearman 
Date: Mon, 3 Aug 2015 17:39:19 +0100

> Locally-generated IPv4 packets, such as from applications running on
> the host or traceroute/ping currently don't have lwtunnel output
> redirected encap applied. However, they should do in the same way as
> for forwarded packets and this patch series addresses that.

Series applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] net: fec: fix initial runtime PM refcount

2015-08-03 Thread David Miller

From: Lucas Stach 
Date: Mon,  3 Aug 2015 17:50:11 +0200

> The clocks are initially active and thus the device is marked active.
> This still keeps the PM refcount at 0, the pm_runtime_put_autosuspend()
> call at the end of probe then leaves us with an invalid refcount of -1,
> which in turn leads to the device staying in suspended state even though
> netdev open had been called.
> 
> Fix this by initializing the refcount to be coherent with the initial
> device status.
> 
> Fixes:
> 8fff755e9f8 (net: fec: Ensure clocks are enabled while using mdio bus)
> 
> Signed-off-by: Lucas Stach 
> ---
> Please apply this as a fix for 4.2

I'm waiting for feedback to be given wrt. the runtime-pm issues.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] net: dsa: fix EDSA frame from hwaccel frame

2015-08-03 Thread David Miller

From: Vivien Didelot 
Date: Sun,  2 Aug 2015 21:46:02 -0400

> If the underlying network device features NETIF_F_HW_VLAN_CTAG_TX,
> an EDSA frame is prepended with a 802.1q header once queued.
> 
> To fix this, push the VLAN tag to the payload if present, before
> checking the frame protocol.
> 
> [note: we may prefer to access directly VLAN TCI from hwaccel frames,
> but this approach is simpler.]
> 
> Signed-off-by: Vivien Didelot 

This is a bug fix so should target 'net', but you generated the patch
against 'net-next'.

In any event, you should be explicit about the tree you are targetting
in order to not waste my time like this, by simply specifying the
tree in your "[PATCH xxx]" text in your subject line.   Either
"[PATCH net]" or "[PATCH net-next]".

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] xen-netback: Allocate fraglist early to avoid complex rollback

2015-08-03 Thread David Miller

From: Ross Lagerwall 
Date: Mon, 3 Aug 2015 15:38:03 +0100

> Determine if a fraglist is needed in the tx path, and allocate it if
> necessary before setting up the copy and map operations.
> Otherwise, undoing the copy and map operations is tricky.
> 
> This fixes a use-after-free: if allocating the fraglist failed, the copy
> and map operations that had been set up were still executed, writing
> over the data area of a freed skb.
> 
> Signed-off-by: Ross Lagerwall 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net] udp: fix dst races with multicast early demux

2015-08-03 Thread David Miller

From: Eric Dumazet 
Date: Sat, 01 Aug 2015 12:14:33 +0200

> From: Eric Dumazet 
> 
> Multicast dst are not cached. They carry DST_NOCACHE.
> 
> As mentioned in commit f8864972126899 ("ipv4: fix dst race in
> sk_dst_get()"), these dst need special care before caching them
> into a socket.
> 
> Caching them is allowed only if their refcnt was not 0, ie we
> must use atomic_inc_not_zero()
> 
> Also, we must use READ_ONCE() to fetch sk->sk_rx_dst, as mentioned
> in commit d0c294c53a771 ("tcp: prevent fetching dst twice in early demux
> code")
> 
> Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux")
> Signed-off-by: Eric Dumazet 
> Reported-by: Gregory Hoggarth 
> Reported-by: Alex Gartrell 
> Cc: Michal Kubeček 
> ---
> David : I will be on vacation for following 7 days, no internet access.
> Please wait for tests done by Gregory & Alex before merging this ?
> Thanks !

Now that this has been tested by Gregory, applied and queued up for
-stable thanks!
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-next] net_sched: act_bpf: remove spinlock in fast path

2015-08-03 Thread Alexei Starovoitov

Similar to act_gact/act_mirred, act_bpf can be lockless in packet processing.

Also similar to gact/mirred there is a race between prog->filter and
prog->tcf_action. Meaning that the program being replaced may use
previous default action if it happened to return TC_ACT_UNSPEC.
act_mirred race betwen tcf_action and tcfm_dev is similar.
In all cases the race is harmless.
Long term we may want to improve the situation by replacing the whole
struct tc_action as single pointer instead of updating inner fields one by one.

Signed-off-by: Alexei Starovoitov 
---
 net/sched/act_bpf.c |   15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 1b97dabc621a..2b8c47200152 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -43,10 +43,8 @@ static int tcf_bpf(struct sk_buff *skb, const struct 
tc_action *act,
if (unlikely(!skb_mac_header_was_set(skb)))
return TC_ACT_UNSPEC;
 
-   spin_lock(&prog->tcf_lock);
-
-   prog->tcf_tm.lastuse = jiffies;
-   bstats_update(&prog->tcf_bstats, skb);
+   tcf_lastuse_update(&prog->tcf_tm);
+   bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
 
/* Needed here for accessing maps. */
rcu_read_lock();
@@ -77,7 +75,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct 
tc_action *act,
break;
case TC_ACT_SHOT:
action = filter_res;
-   prog->tcf_qstats.drops++;
+   qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats));
break;
case TC_ACT_UNSPEC:
action = prog->tcf_action;
@@ -87,7 +85,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct 
tc_action *act,
break;
}
 
-   spin_unlock(&prog->tcf_lock);
return action;
 }
 
@@ -294,7 +291,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
if (!tcf_hash_check(parm->index, act, bind)) {
ret = tcf_hash_create(parm->index, est, act,
- sizeof(*prog), bind, false);
+ sizeof(*prog), bind, true);
if (ret < 0)
return ret;
 
@@ -325,7 +322,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
goto out;
 
prog = to_bpf(act);
-   spin_lock_bh(&prog->tcf_lock);
+   ASSERT_RTNL();
 
if (ret != ACT_P_CREATED)
tcf_bpf_prog_fill_cfg(prog, &old);
@@ -341,8 +338,6 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
prog->tcf_action = parm->action;
prog->filter = cfg.filter;
 
-   spin_unlock_bh(&prog->tcf_lock);
-
if (res == ACT_P_CREATED)
tcf_hash_insert(act);
else
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next v2 1/2] openvswitch: Use regular GRE net_device instead of vport

2015-08-03 Thread David Miller

From: Alexei Starovoitov 
Date: Mon, 3 Aug 2015 21:23:40 -0700

> On Mon, Aug 03, 2015 at 05:27:26PM -0700, Pravin B Shelar wrote:
>> With addition of flow based tunneling, there is no need to
>> have special GRE vport. Removes all of the OVS specific
>> GRE code and make OVS use a ip_gre net_device.
>> Minimal GRE vport is kept to handle compatibility with
>> current userspace application.
>> 
>> Signed-off-by: Pravin B Shelar 
> ...
>> +#define GRE_TAP_FB_NAME "gretap0"
> ...
>> +/* fallback device is used for flow based tunneling. */
>> +if (!strcmp(dev->name, GRE_TAP_FB_NAME)) {
>> +struct ip_tunnel *t;
>> +
>> +t = netdev_priv(dev);
>> +t->flow_based_tunnel = true;
>> +eth_hw_addr_random(dev);
>> +netif_keep_dst(dev);
>> +}
>> +
> 
> feature detection based on netdev name?
> meaning that there will be only one such device for the whole
> host? and namespaces cannot have their own gre tunnels?
> (since host 'gretap0' cannot be seen in netns)

Doing anything like this by netdev name is wrong.

Pravin you will need to do this in some other way.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG

2015-08-03 Thread David Miller

From: Joe Perches 
Date: Mon, 03 Aug 2015 21:02:21 -0700

> On Mon, 2015-08-03 at 20:57 -0700, Joe Perches wrote:
>> On Tue, 2015-08-04 at 05:26 +0200, Jason A. Donenfeld wrote:
>> > This patch replaces calls to net_dbg_ratelimited when !DEBUG with
>> > no_printk, keeping with the idiom of all the other debug print helpers.
>> 
>> Makes sense, thanks Jason.
> 
> Perhaps better still would be to use if (0) no_printk so that
> the call and whatever argument calls the net_dbg_ratelimited
> makes are completely eliminated.

Agreed. Jason please respin your patch to work this way.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next v2 1/2] openvswitch: Use regular GRE net_device instead of vport

2015-08-03 Thread Alexei Starovoitov

On Mon, Aug 03, 2015 at 05:27:26PM -0700, Pravin B Shelar wrote:
> With addition of flow based tunneling, there is no need to
> have special GRE vport. Removes all of the OVS specific
> GRE code and make OVS use a ip_gre net_device.
> Minimal GRE vport is kept to handle compatibility with
> current userspace application.
> 
> Signed-off-by: Pravin B Shelar 
...
> +#define GRE_TAP_FB_NAME "gretap0"
...
> + /* fallback device is used for flow based tunneling. */
> + if (!strcmp(dev->name, GRE_TAP_FB_NAME)) {
> + struct ip_tunnel *t;
> +
> + t = netdev_priv(dev);
> + t->flow_based_tunnel = true;
> + eth_hw_addr_random(dev);
> + netif_keep_dst(dev);
> + }
> +

feature detection based on netdev name?
meaning that there will be only one such device for the whole
host? and namespaces cannot have their own gre tunnels?
(since host 'gretap0' cannot be seen in netns)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-next v2] openvswitch: Make 100 percents packets sampled when sampling rate is 1.

2015-08-03 Thread Wenyu Zhang

When sampling rate is 1, the sampling probability is UINT32_MAX. The packet
should be sampled even the prandom_u32() generate the number of UINT32_MAX.
And none packet need be sampled when the probability is 0.

Signed-off-by: Wenyu Zhang 
---
 net/openvswitch/actions.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index cf04c2f..c81bcf5 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -669,9 +669,11 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
 
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 a = nla_next(a, &rem)) {
+   uint32_t probability;
switch (nla_type(a)) {
case OVS_SAMPLE_ATTR_PROBABILITY:
-   if (prandom_u32() >= nla_get_u32(a))
+   probability = nla_get_u32(a);
+   if (!probability || prandom_u32() > probability)
return 0;
break;
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG

2015-08-03 Thread Joe Perches

On Mon, 2015-08-03 at 20:57 -0700, Joe Perches wrote:
> On Tue, 2015-08-04 at 05:26 +0200, Jason A. Donenfeld wrote:
> > This patch replaces calls to net_dbg_ratelimited when !DEBUG with
> > no_printk, keeping with the idiom of all the other debug print helpers.
> 
> Makes sense, thanks Jason.

Perhaps better still would be to use if (0) no_printk so that
the call and whatever argument calls the net_dbg_ratelimited
makes are completely eliminated.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG

2015-08-03 Thread Joe Perches

On Tue, 2015-08-04 at 05:26 +0200, Jason A. Donenfeld wrote:
> The pr_debug family of functions turns into a no-op when -DDEBUG is not
> specified, opting instead to call "no_printk", which gets compiled to a
> no-op (but retains gcc's nice warnings about printf-style arguments).
> 
> The problem with net_dbg_ratelimited is that it is defined to be a
> variant of net_ratelimited_function, which expands to essentially:
> 
> if (net_ratelimit())
> pr_debug(fmt, ...);
> 
> When DEBUG is not defined, then this becomes,
> 
> if (net_ratelimit())
> ;
> 
> This seems benign, except it isn't. Firstly, there's the obvious
> overhead of calling net_ratelimit needlessly, which does quite some book
> keeping for the rate limiting. Given that the pr_debug and
> net_dbg_ratelimited family of functions are sprinkled liberally through
> performance critical code, with developers assuming they'll be compiled
> out to a no-op most of the time, we certainly do not want this needless
> book keeping. Secondly, and most visibly, even though no debug message
> is printed when DEBUG is not defined, if there is a flood of
> invocations, dmesg winds up peppered with messages such as
> "net_ratelimit: 320 callbacks suppressed". This is because our
> aforementioned net_ratelimit() function actually prints this text in
> some circumstances. It's especially odd to see this when there isn't any
> other accompanying debug message.
> 
> So, in sum, it doesn't make sense to have this function's current
> behavior, and instead it should match what every other debug family of
> functions in the kernel does with !DEBUG -- nothing.
> 
> This patch replaces calls to net_dbg_ratelimited when !DEBUG with
> no_printk, keeping with the idiom of all the other debug print helpers.

Makes sense, thanks Jason.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCHv2 net-next 0/4] add meminfo, bist status and misc. fixes

2015-08-03 Thread Hariprasad Shenai

Hi,

This patch series adds the following.
Add support to dump memory address range of various hw modules
Add support to dump edc bist status during ecc error
Read correct bits of who am i register for T6 adapter
and update T6 register range

This patch series has been created against net-next tree and includes
patches on cxgb4 and cxgb4vf driver.

We have included all the maintainers of respective drivers. Kindly review
the change and let us know in case of any review comments.

Thanks

V2: PATCH 3/4 ("cxgb4/cxgb4vf: read the correct bits of PL Who Am I
register") Fix switch statement in get_chip_type() and some more style
fixes based on review comment by Sergei Shtylyov 


Hariprasad Shenai (4):
  cxgb4: Add debugfs support to dump meminfo
  cxgb4: Add support to dump edc bist status
  cxgb4/cxgb4vf: read the correct bits of PL Who Am I register
  cxgb4: Update T6 register ranges

 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 285 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c|  34 ++-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c |  73 --
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h   | 131 +-
 drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c |   3 +-
 5 files changed, 506 insertions(+), 20 deletions(-)

-- 
2.3.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCHv2 net-next 4/4] cxgb4: Update T6 register ranges

2015-08-03 Thread Hariprasad Shenai

Signed-off-by: Hariprasad Shenai 
---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c 
b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 5c63ceb..91750ad 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -1359,9 +1359,10 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
};
 
static const unsigned int t6_reg_ranges[] = {
-   0x1008, 0x114c,
+   0x1008, 0x1124,
+   0x1138, 0x114c,
0x1180, 0x11b4,
-   0x11fc, 0x1250,
+   0x11fc, 0x1254,
0x1280, 0x133c,
0x1800, 0x18fc,
0x3000, 0x302c,
@@ -1384,16 +1385,16 @@ void t4_get_regs(struct adapter *adap, void *buf, 
size_t buf_size)
0x5c10, 0x5ec0,
0x5ec8, 0x5ecc,
0x6000, 0x6040,
-   0x6058, 0x615c,
+   0x6058, 0x619c,
0x7700, 0x7798,
0x77c0, 0x7880,
0x78cc, 0x78fc,
0x7b00, 0x7c54,
0x7d00, 0x7efc,
-   0x8dc0, 0x8de0,
+   0x8dc0, 0x8de4,
0x8df8, 0x8e84,
0x8ea0, 0x8f88,
-   0x8fb8, 0x911c,
+   0x8fb8, 0x9124,
0x9400, 0x9470,
0x9600, 0x971c,
0x9800, 0x9808,
@@ -1413,9 +1414,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0xdfc0, 0xdfe0,
0xe000, 0xf008,
0x11000, 0x11014,
-   0x11048, 0x0,
-   0x8, 0x1117c,
-   0x11190, 0x11264,
+   0x11048, 0x1117c,
+   0x11190, 0x11270,
0x11300, 0x1130c,
0x12000, 0x1206c,
0x19040, 0x1906c,
@@ -1500,9 +1500,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0x1ff00, 0x1ff84,
0x1ffc0, 0x1ffc8,
0x3, 0x30070,
-   0x30100, 0x3015c,
-   0x30190, 0x301d0,
-   0x30200, 0x30318,
+   0x30100, 0x301d0,
+   0x30200, 0x30320,
0x30400, 0x3052c,
0x30540, 0x3061c,
0x30800, 0x30890,
@@ -1578,9 +1577,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t 
buf_size)
0x33c24, 0x33c50,
0x33cf0, 0x33cfc,
0x34000, 0x34070,
-   0x34100, 0x3415c,
-   0x34190, 0x341d0,
-   0x34200, 0x34318,
+   0x34100, 0x341d0,
+   0x34200, 0x34320,
0x34400, 0x3452c,
0x34540, 0x3461c,
0x34800, 0x34890,
-- 
2.3.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCHv2 net-next 2/4] cxgb4: Add support to dump edc bist status

2015-08-03 Thread Hariprasad Shenai

Add support to dump edc bist status for ECC data errors

Signed-off-by: Hariprasad Shenai 
---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c   | 39 
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h |  5 ++--
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c 
b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 800bd48..b193295 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -345,6 +345,43 @@ int t4_wr_mbox_meat(struct adapter *adap, int mbox, const 
void *cmd, int size,
   FW_CMD_MAX_TIMEOUT);
 }
 
+static int t4_edc_err_read(struct adapter *adap, int idx)
+{
+   u32 edc_ecc_err_addr_reg;
+   u32 rdata_reg;
+
+   if (is_t4(adap->params.chip)) {
+   CH_WARN(adap, "%s: T4 NOT supported.\n", __func__);
+   return 0;
+   }
+   if (idx != 0 && idx != 1) {
+   CH_WARN(adap, "%s: idx %d NOT supported.\n", __func__, idx);
+   return 0;
+   }
+
+   edc_ecc_err_addr_reg = EDC_T5_REG(EDC_H_ECC_ERR_ADDR_A, idx);
+   rdata_reg = EDC_T5_REG(EDC_H_BIST_STATUS_RDATA_A, idx);
+
+   CH_WARN(adap,
+   "edc%d err addr 0x%x: 0x%x.\n",
+   idx, edc_ecc_err_addr_reg,
+   t4_read_reg(adap, edc_ecc_err_addr_reg));
+   CH_WARN(adap,
+   "bist: 0x%x, status %llx %llx %llx %llx %llx %llx %llx %llx 
%llx.\n",
+   rdata_reg,
+   (unsigned long long)t4_read_reg64(adap, rdata_reg),
+   (unsigned long long)t4_read_reg64(adap, rdata_reg + 8),
+   (unsigned long long)t4_read_reg64(adap, rdata_reg + 16),
+   (unsigned long long)t4_read_reg64(adap, rdata_reg + 24),
+   (unsigned long long)t4_read_reg64(adap, rdata_reg + 32),
+   (unsigned long long)t4_read_reg64(adap, rdata_reg + 40),
+   (unsigned long long)t4_read_reg64(adap, rdata_reg + 48),
+   (unsigned long long)t4_read_reg64(adap, rdata_reg + 56),
+   (unsigned long long)t4_read_reg64(adap, rdata_reg + 64));
+
+   return 0;
+}
+
 /**
  * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
  * @adap: the adapter
@@ -3283,6 +3320,8 @@ static void mem_intr_handler(struct adapter *adapter, int 
idx)
if (v & ECC_CE_INT_CAUSE_F) {
u32 cnt = ECC_CECNT_G(t4_read_reg(adapter, cnt_addr));
 
+   t4_edc_err_read(adapter, idx);
+
t4_write_reg(adapter, cnt_addr, ECC_CECNT_V(ECC_CECNT_M));
if (printk_ratelimit())
dev_warn(adapter->pdev_dev,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h 
b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index 0626868..13ce018 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -2867,10 +2867,11 @@
 #define EDC_H_BIST_DATA_PATTERN_A  0x50010
 #define EDC_H_BIST_STATUS_RDATA_A  0x50028
 
+#define EDC_H_ECC_ERR_ADDR_A   0x50084
 #define EDC_T51_BASE_ADDR  0x50800
 
-#define EDC_STRIDE_T5 (EDC_T51_BASE_ADDR - EDC_T50_BASE_ADDR)
-#define EDC_REG_T5(reg, idx) (reg + EDC_STRIDE_T5 * idx)
+#define EDC_T5_STRIDE (EDC_T51_BASE_ADDR - EDC_T50_BASE_ADDR)
+#define EDC_T5_REG(reg, idx) (reg + EDC_T5_STRIDE * idx)
 
 #define PL_VF_REV_A 0x4
 #define PL_VF_WHOAMI_A 0x0
-- 
2.3.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCHv2 net-next 1/4] cxgb4: Add debugfs support to dump meminfo

2015-08-03 Thread Hariprasad Shenai

Add debug support to dump memory address ranges of various hardware
modules of the adapter.

Signed-off-by: Hariprasad Shenai 
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 285 +
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h   | 122 +
 2 files changed, 407 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index f701a6f..b657734 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2275,6 +2275,290 @@ static const struct file_operations blocked_fl_fops = {
.llseek  = generic_file_llseek,
 };
 
+struct mem_desc {
+   unsigned int base;
+   unsigned int limit;
+   unsigned int idx;
+};
+
+static int mem_desc_cmp(const void *a, const void *b)
+{
+   return ((const struct mem_desc *)a)->base -
+  ((const struct mem_desc *)b)->base;
+}
+
+static void mem_region_show(struct seq_file *seq, const char *name,
+   unsigned int from, unsigned int to)
+{
+   char buf[40];
+
+   string_get_size((u64)to - from + 1, 1, STRING_UNITS_2, buf,
+   sizeof(buf));
+   seq_printf(seq, "%-15s %#x-%#x [%s]\n", name, from, to, buf);
+}
+
+static int meminfo_show(struct seq_file *seq, void *v)
+{
+   static const char * const memory[] = { "EDC0:", "EDC1:", "MC:",
+   "MC0:", "MC1:"};
+   static const char * const region[] = {
+   "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
+   "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
+   "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
+   "TDDP region:", "TPT region:", "STAG region:", "RQ region:",
+   "RQUDP region:", "PBL region:", "TXPBL region:",
+   "DBVFIFO region:", "ULPRX state:", "ULPTX state:",
+   "On-chip queues:"
+   };
+
+   int i, n;
+   u32 lo, hi, used, alloc;
+   struct mem_desc avail[4];
+   struct mem_desc mem[ARRAY_SIZE(region) + 3];  /* up to 3 holes */
+   struct mem_desc *md = mem;
+   struct adapter *adap = seq->private;
+
+   for (i = 0; i < ARRAY_SIZE(mem); i++) {
+   mem[i].limit = 0;
+   mem[i].idx = i;
+   }
+
+   /* Find and sort the populated memory ranges */
+   i = 0;
+   lo = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+   if (lo & EDRAM0_ENABLE_F) {
+   hi = t4_read_reg(adap, MA_EDRAM0_BAR_A);
+   avail[i].base = EDRAM0_BASE_G(hi) << 20;
+   avail[i].limit = avail[i].base + (EDRAM0_SIZE_G(hi) << 20);
+   avail[i].idx = 0;
+   i++;
+   }
+   if (lo & EDRAM1_ENABLE_F) {
+   hi = t4_read_reg(adap, MA_EDRAM1_BAR_A);
+   avail[i].base = EDRAM1_BASE_G(hi) << 20;
+   avail[i].limit = avail[i].base + (EDRAM1_SIZE_G(hi) << 20);
+   avail[i].idx = 1;
+   i++;
+   }
+
+   if (is_t5(adap->params.chip)) {
+   if (lo & EXT_MEM0_ENABLE_F) {
+   hi = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
+   avail[i].base = EXT_MEM0_BASE_G(hi) << 20;
+   avail[i].limit =
+   avail[i].base + (EXT_MEM0_SIZE_G(hi) << 20);
+   avail[i].idx = 3;
+   i++;
+   }
+   if (lo & EXT_MEM1_ENABLE_F) {
+   hi = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+   avail[i].base = EXT_MEM1_BASE_G(hi) << 20;
+   avail[i].limit =
+   avail[i].base + (EXT_MEM1_SIZE_G(hi) << 20);
+   avail[i].idx = 4;
+   i++;
+   }
+   } else {
+   if (lo & EXT_MEM_ENABLE_F) {
+   hi = t4_read_reg(adap, MA_EXT_MEMORY_BAR_A);
+   avail[i].base = EXT_MEM_BASE_G(hi) << 20;
+   avail[i].limit =
+   avail[i].base + (EXT_MEM_SIZE_G(hi) << 20);
+   avail[i].idx = 2;
+   i++;
+   }
+   }
+   if (!i)/* no memory available */
+   return 0;
+   sort(avail, i, sizeof(struct mem_desc), mem_desc_cmp, NULL);
+
+   (md++)->base = t4_read_reg(adap, SGE_DBQ_CTXT_BADDR_A);
+   (md++)->base = t4_read_reg(adap, SGE_IMSG_CTXT_BADDR_A);
+   (md++)->base = t4_read_reg(adap, SGE_FLM_CACHE_BADDR_A);
+   (md++)->base = t4_read_reg(adap, TP_CMM_TCB_BASE_A);
+   (md++)->base = t4_read_reg(adap, TP_CMM_MM_BASE_A);
+   (md++)->base = t4_read_reg(adap, TP_CMM_TIMER_BASE_A);
+   (md++)->base = t4_read_reg(adap, TP_CMM_MM_RX_FLST_BASE_A);
+   (md++)->base = t4_read_r

[PATCHv2 net-next 3/4] cxgb4/cxgb4vf: read the correct bits of PL Who Am I register

2015-08-03 Thread Hariprasad Shenai

Read the correct bits of PL Who Am I for the Source PF field which has
changed in T6

Signed-off-by: Hariprasad Shenai 
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 34 -
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c  |  8 --
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h|  4 +++
 drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c  |  3 ++-
 4 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index d582e17..27e87b6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4551,6 +4551,32 @@ static void free_some_resources(struct adapter *adapter)
   NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
 #define SEGMENT_SIZE 128
 
+static int get_chip_type(struct pci_dev *pdev, u32 pl_rev)
+{
+   int ver, chip;
+   u16 device_id;
+
+   /* Retrieve adapter's device ID */
+   pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id);
+   ver = device_id >> 12;
+   switch (ver) {
+   case CHELSIO_T4:
+   chip |= CHELSIO_CHIP_CODE(CHELSIO_T4, pl_rev);
+   break;
+   case CHELSIO_T5:
+   chip |= CHELSIO_CHIP_CODE(CHELSIO_T5, pl_rev);
+   break;
+   case CHELSIO_T6:
+   chip |= CHELSIO_CHIP_CODE(CHELSIO_T6, pl_rev);
+   break;
+   default:
+   dev_err(&pdev->dev, "Device %d is not supported\n",
+   device_id);
+   return -EINVAL;
+   }
+   return chip;
+}
+
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
int func, i, err, s_qpp, qpp, num_seg;
@@ -4558,6 +4584,8 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
bool highdma = false;
struct adapter *adapter = NULL;
void __iomem *regs;
+   u32 whoami, pl_rev;
+   enum chip_type chip;
 
printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
 
@@ -4586,7 +4614,11 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
goto out_unmap_bar0;
 
/* We control everything through one PF */
-   func = SOURCEPF_G(readl(regs + PL_WHOAMI_A));
+   whoami = readl(regs + PL_WHOAMI_A);
+   pl_rev = REV_G(readl(regs + PL_REV_A));
+   chip = get_chip_type(pdev, pl_rev);
+   func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ?
+   SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
if (func != ent->driver_data) {
iounmap(regs);
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c 
b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index b193295..5c63ceb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -3529,7 +3529,9 @@ int t4_slow_intr_handler(struct adapter *adapter)
 void t4_intr_enable(struct adapter *adapter)
 {
u32 val = 0;
-   u32 pf = SOURCEPF_G(t4_read_reg(adapter, PL_WHOAMI_A));
+   u32 whoami = t4_read_reg(adapter, PL_WHOAMI_A);
+   u32 pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
+   SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
 
if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5)
val = ERR_DROPPED_DB_F | ERR_EGR_CTXT_PRIO_F | DBFIFO_HP_INT_F;
@@ -3554,7 +3556,9 @@ void t4_intr_enable(struct adapter *adapter)
  */
 void t4_intr_disable(struct adapter *adapter)
 {
-   u32 pf = SOURCEPF_G(t4_read_reg(adapter, PL_WHOAMI_A));
+   u32 whoami = t4_read_reg(adapter, PL_WHOAMI_A);
+   u32 pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
+   SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
 
t4_write_reg(adapter, MYPF_REG(PL_PF_INT_ENABLE_A), 0);
t4_set_reg_field(adapter, PL_INT_MAP0_A, 1 << pf, 0);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h 
b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index 13ce018..e444dc4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -2588,6 +2588,10 @@
 #define SOURCEPF_M0x7U
 #define SOURCEPF_G(x) (((x) >> SOURCEPF_S) & SOURCEPF_M)
 
+#define T6_SOURCEPF_S9
+#define T6_SOURCEPF_M0x7U
+#define T6_SOURCEPF_G(x) (((x) >> T6_SOURCEPF_S) & T6_SOURCEPF_M)
+
 #define PL_INT_CAUSE_A 0x1940c
 
 #define ULP_TX_S27
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c 
b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 0db6dc9..63dd5fd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -619,7 +619,8 @@ int t4vf_get_sge_params(struct adapter *adapter)
 */
whoami = t4_read_reg(adapter,
 T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A);

[PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG

2015-08-03 Thread Jason A. Donenfeld

The pr_debug family of functions turns into a no-op when -DDEBUG is not
specified, opting instead to call "no_printk", which gets compiled to a
no-op (but retains gcc's nice warnings about printf-style arguments).

The problem with net_dbg_ratelimited is that it is defined to be a
variant of net_ratelimited_function, which expands to essentially:

if (net_ratelimit())
pr_debug(fmt, ...);

When DEBUG is not defined, then this becomes,

if (net_ratelimit())
;

This seems benign, except it isn't. Firstly, there's the obvious
overhead of calling net_ratelimit needlessly, which does quite some book
keeping for the rate limiting. Given that the pr_debug and
net_dbg_ratelimited family of functions are sprinkled liberally through
performance critical code, with developers assuming they'll be compiled
out to a no-op most of the time, we certainly do not want this needless
book keeping. Secondly, and most visibly, even though no debug message
is printed when DEBUG is not defined, if there is a flood of
invocations, dmesg winds up peppered with messages such as
"net_ratelimit: 320 callbacks suppressed". This is because our
aforementioned net_ratelimit() function actually prints this text in
some circumstances. It's especially odd to see this when there isn't any
other accompanying debug message.

So, in sum, it doesn't make sense to have this function's current
behavior, and instead it should match what every other debug family of
functions in the kernel does with !DEBUG -- nothing.

This patch replaces calls to net_dbg_ratelimited when !DEBUG with
no_printk, keeping with the idiom of all the other debug print helpers.

Signed-off-by: Jason A. Donenfeld 
---
 include/linux/net.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index 04aa068..500fdfe 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -239,8 +239,13 @@ do {   
\
net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__)
 #define net_info_ratelimited(fmt, ...) \
net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__)
+#if defined(DEBUG)
 #define net_dbg_ratelimited(fmt, ...)  \
net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
+#else
+#define net_dbg_ratelimited(fmt, ...)  \
+   no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
 
 bool __net_get_random_once(void *buf, int nbytes, bool *done,
   struct static_key *done_key);
-- 
2.4.5

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

BUG: null dereference in __skb_recv_datagram

2015-08-03 Thread Brenden Blanco

Hi Folks,

I just hit this one off of latest net-next tree, git hash
58da018053531b9cb91423a64f2a762ef0fe7456

I was running a set of tests from pyroute2 project:
https://github.com/svinota/pyroute2

A simple `sudo make test skip_tests=test_stress` in there hits the
issue every time.

[  318.244596] BUG: unable to handle kernel NULL pointer dereference
at 008e
[  318.245182] IP: [] __skb_recv_datagram+0xbc/0x5a0
[  318.245762] PGD 80999d067 PUD 7fc04a067 PMD 0
[  318.246336] Oops:  [#1]
[  318.262158] CPU: 3 PID: 1580 Comm: dnsmasq Not tainted 4.2.0-rc4-g58da018 #28
[  318.263143] Hardware name: MSI MS-7930/Z97S SLI PLUS (MS-7930),
BIOS V1.2 05/22/2014
[  318.264137] task: 880806c96200 ti: 8807fc09 task.ti:
8807fc09
[  318.265136] RIP: 0010:[] []
__skb_recv_datagram+0xbc/0x5a0
[  318.266153] RSP: 0018:8807fc093b98  EFLAGS: 00010082
[  318.267158] RAX: 0296 RBX:  RCX: 8807fc093c7c
[  318.268172] RDX: 0001 RSI:  RDI: 88080a4b88ac
[  318.269186] RBP: 8807fc093c68 R08: 8807fc093cb0 R09: 7000
[  318.270200] R10: 8807fc094000 R11: 0246 R12: 8807fc093c78
[  318.271217] R13: 88080a4b8800 R14: 88080a4b8898 R15: 
[  318.272236] FS:  7f52a582f700() GS:88082ecc()
knlGS:
[  318.273264] CS:  0010 DS:  ES:  CR0: 80050033
[  318.274286] CR2: 008e CR3: 00080824 CR4: 001406e0
[  318.275314] DR0:  DR1:  DR2: 
[  318.276323] DR3:  DR6: fffe0ff0 DR7: 0400
[  318.277311] Stack:
[  318.278282]  dead00200200 8808061e4300 88080a4d9100
00db
[  318.279291]  8808 8807fc093a78 880806c96200
8807fc094000
[  318.280287]  8807fc093c20 8807fc093cb0 
8807fc093c7c
[  318.281260] Call Trace:
[  318.282202]  [] ? poll_select_copy_remaining+0x140/0x140
[  318.283150]  [] skb_recv_datagram+0x3f/0x60
[  318.284077]  [] netlink_recvmsg+0x59/0x360
[  318.284984]  [] sock_recvmsg+0x13/0x20
[  318.285867]  [] ___sys_recvmsg+0xe3/0x210
[  318.286729]  [] ? fsnotify+0x316/0x4a0
[  318.287569]  [] __sys_recvmsg+0x57/0xa0
[  318.288389]  [] SyS_recvmsg+0x12/0x20
[  318.289186]  [] entry_SYSCALL_64_fastpath+0x12/0x71
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/1] net/ipv4: Enable flow-based ECMP

2015-08-03 Thread Richard Laing

Enable flow-based ECMP.

Currently if equal-cost multipath is enabled the kernel chooses between
equal cost paths for each matching packet, essentially packets are
round-robined between the routes. This means that packets from a single
flow can traverse different routes. If one of the routes experiences
congestion this can result in delayed or out of order packets arriving
at the destination.

This patch allows packets to be routed based on their flow - packets
in the same flow will always use the same route. This prevents out of
order packets. There are other issues with round-robin based ECMP routing
related to variable path MTU handling and debugging. The default
behaviour is changed by this patch to enable flow based ECMP routing
rather than the previous round-robin routing. The behaviour can be
changed using a new sysctl option /net/ipv4/route/flow_based_ecmp.

See RFC2991 for more details on the problems associated with packet
based ECMP routing.

This patch relies on the skb hash value to select between routes. The
selection uses a hash-threshold algorithm (see RFC2992).

Signed-off-by: Richard Laing 
---

 include/net/flow.h   |8 
 include/net/ip_fib.h |4 
 include/net/route.h  |2 ++
 net/ipv4/fib_semantics.c |   30 ++
 net/ipv4/route.c |   19 +++
 5 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/include/net/flow.h b/include/net/flow.h
index 8109a15..b0a2524 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -79,6 +79,8 @@ struct flowi4 {
 #define fl4_ipsec_spi  uli.spi
 #define fl4_mh_typeuli.mht.type
 #define fl4_gre_keyuli.gre_key
+
+   __u32   flowi4_hash;
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
@@ -99,6 +101,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, 
int oif,
fl4->saddr = saddr;
fl4->fl4_dport = dport;
fl4->fl4_sport = sport;
+   fl4->flowi4_hash = 0;
 }
 
 /* Reset some input parameters after previous lookup */
@@ -182,6 +185,11 @@ static inline struct flowi *flowidn_to_flowi(struct 
flowidn *fldn)
return container_of(fldn, struct flowi, u.dn);
 }
 
+static inline void flowi4_set_flow_hash(struct flowi4 *fl, __u32 hash)
+{
+   fl->flowi4_hash = hash;
+}
+
 typedef unsigned long flow_compare_t;
 
 static inline size_t flow_key_size(u16 family)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 5fa643b..7db9f72 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -117,6 +117,8 @@ struct fib_info {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_power;
 #endif
+   /* Cache the number of live nexthops for flow based ECMP calculation. */
+   int live_nexthops;
struct rcu_head rcu;
struct fib_nh   fib_nh[0];
 #define fib_devfib_nh[0].nh_dev
@@ -310,6 +312,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long 
event);
 int fib_sync_down_addr(struct net *net, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 void fib_select_multipath(struct fib_result *res);
+void fib_select_multipath_for_flow(struct fib_result *res,
+  const struct flowi4 *fl4);
 
 /* Exported by fib_trie.c */
 void fib_trie_init(void);
diff --git a/include/net/route.h b/include/net/route.h
index fe22d03..a00e606 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -252,6 +252,8 @@ static inline void ip_route_connect_init(struct flowi4 
*fl4, __be32 dst, __be32
 
flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
   protocol, flow_flags, dst, src, dport, sport);
+
+   flowi4_set_flow_hash(fl4, sk->sk_txhash);
 }
 
 static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3a06586..0a56ad3 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -981,6 +981,7 @@ link_it:
head = &fib_info_devhash[hash];
hlist_add_head(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi)
+   fi->live_nexthops = fi->fib_nhs;
spin_unlock_bh(&fib_info_lock);
return fi;
 
@@ -1196,6 +1197,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned 
long event)
}
ret++;
}
+   fi->live_nexthops = fi->fib_nhs - dead;
}
 
return ret;
@@ -1331,6 +1333,7 @@ int fib_sync_up(struct net_device *dev, unsigned int 
nh_flags)
if (alive > 0) {
fi->fib_flags &= ~nh_flags;
ret++;
+   fi->live_nexthops = alive;
}
}
 
@@ -1397,4 +1400,31 @@ void fib_select_multipath(struct fib_result *res)

[PATCH net-next v2 2/2] gre: Remove support for sharing GRE protocol hook.

2015-08-03 Thread Pravin B Shelar

Support for sharing GREPROTO_CISCO port was added so that
OVS gre port and kernel GRE devices can co-exist. After
flow-based tunneling patches OVS GRE protocol processing
is completely moved to ip_gre module. so there is no need
for GRE protocol hook. Following patch consolidates
GRE protocol related functions into ip_gre module.

Signed-off-by: Pravin B Shelar 
---
 include/net/gre.h|  86 +++--
 net/ipv4/gre_demux.c | 201 +--
 net/ipv4/ip_gre.c| 215 +++
 3 files changed, 209 insertions(+), 293 deletions(-)

diff --git a/include/net/gre.h b/include/net/gre.h
index 4193fd7..b2572b7 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -4,92 +4,24 @@
 #include 
 #include 
 
-#define GREPROTO_CISCO 0
-#define GREPROTO_PPTP  1
-#define GREPROTO_MAX   2
-#define GRE_IP_PROTO_MAX   2
-
-struct gre_protocol {
-   int  (*handler)(struct sk_buff *skb);
-   void (*err_handler)(struct sk_buff *skb, u32 info);
-};
-
 struct gre_base_hdr {
__be16 flags;
__be16 protocol;
 };
 #define GRE_HEADER_SECTION 4
 
+struct gre_protocol {
+   int  (*handler)(struct sk_buff *skb);
+   void (*err_handler)(struct sk_buff *skb, u32 info);
+};
+
 int gre_add_protocol(const struct gre_protocol *proto, u8 version);
 int gre_del_protocol(const struct gre_protocol *proto, u8 version);
 
-struct gre_cisco_protocol {
-   int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
-   int (*err_handler)(struct sk_buff *skb, u32 info,
-  const struct tnl_ptk_info *tpi);
-   u8 priority;
-};
-
-int gre_cisco_register(struct gre_cisco_protocol *proto);
-int gre_cisco_unregister(struct gre_cisco_protocol *proto);
+#define GREPROTO_CISCO 0
+#define GREPROTO_PPTP  1
+#define GREPROTO_MAX   2
+#define GRE_IP_PROTO_MAX   2
 
 #define GRE_TAP_FB_NAME "gretap0"
-
-static inline int ip_gre_calc_hlen(__be16 o_flags)
-{
-   int addend = 4;
-
-   if (o_flags&TUNNEL_CSUM)
-   addend += 4;
-   if (o_flags&TUNNEL_KEY)
-   addend += 4;
-   if (o_flags&TUNNEL_SEQ)
-   addend += 4;
-   return addend;
-}
-
-static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
-{
-   __be16 tflags = 0;
-
-   if (flags & GRE_CSUM)
-   tflags |= TUNNEL_CSUM;
-   if (flags & GRE_ROUTING)
-   tflags |= TUNNEL_ROUTING;
-   if (flags & GRE_KEY)
-   tflags |= TUNNEL_KEY;
-   if (flags & GRE_SEQ)
-   tflags |= TUNNEL_SEQ;
-   if (flags & GRE_STRICT)
-   tflags |= TUNNEL_STRICT;
-   if (flags & GRE_REC)
-   tflags |= TUNNEL_REC;
-   if (flags & GRE_VERSION)
-   tflags |= TUNNEL_VERSION;
-
-   return tflags;
-}
-
-static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
-{
-   __be16 flags = 0;
-
-   if (tflags & TUNNEL_CSUM)
-   flags |= GRE_CSUM;
-   if (tflags & TUNNEL_ROUTING)
-   flags |= GRE_ROUTING;
-   if (tflags & TUNNEL_KEY)
-   flags |= GRE_KEY;
-   if (tflags & TUNNEL_SEQ)
-   flags |= GRE_SEQ;
-   if (tflags & TUNNEL_STRICT)
-   flags |= GRE_STRICT;
-   if (tflags & TUNNEL_REC)
-   flags |= GRE_REC;
-   if (tflags & TUNNEL_VERSION)
-   flags |= GRE_VERSION;
-
-   return flags;
-}
-
 #endif
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 77562e0..d9c552a 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -31,7 +31,6 @@
 #include 
 
 static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
-static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX];
 
 int gre_add_protocol(const struct gre_protocol *proto, u8 version)
 {
@@ -61,163 +60,6 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 
version)
 }
 EXPORT_SYMBOL_GPL(gre_del_protocol);
 
-static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
-   bool *csum_err)
-{
-   const struct gre_base_hdr *greh;
-   __be32 *options;
-   int hdr_len;
-
-   if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr
-   return -EINVAL;
-
-   greh = (struct gre_base_hdr *)skb_transport_header(skb);
-   if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
-   return -EINVAL;
-
-   tpi->flags = gre_flags_to_tnl_flags(greh->flags);
-   hdr_len = ip_gre_calc_hlen(tpi->flags);
-
-   if (!pskb_may_pull(skb, hdr_len))
-   return -EINVAL;
-
-   greh = (struct gre_base_hdr *)skb_transport_header(skb);
-   tpi->proto = greh->protocol;
-
-   options = (__be32 *)(greh + 1);
-   if (greh->flags & GRE_CSUM) {
-   if (skb_checksum_simple_validate(skb)) {
-   *csu

[PATCH net-next v2 1/2] openvswitch: Use regular GRE net_device instead of vport

2015-08-03 Thread Pravin B Shelar

With addition of flow based tunneling, there is no need to
have special GRE vport. Removes all of the OVS specific
GRE code and make OVS use a ip_gre net_device.
Minimal GRE vport is kept to handle compatibility with
current userspace application.

Signed-off-by: Pravin B Shelar 
---
 include/net/gre.h  |  11 +-
 include/net/ip_tunnels.h   |   6 +-
 net/ipv4/gre_demux.c   |  34 --
 net/ipv4/ip_gre.c  | 185 -
 net/ipv4/ip_tunnel.c   |   6 +-
 net/ipv4/ipip.c|   2 +-
 net/ipv6/sit.c |   2 +-
 net/openvswitch/Kconfig|   1 -
 net/openvswitch/vport-gre.c| 230 -
 net/openvswitch/vport-netdev.c |   5 +-
 net/openvswitch/vport-netdev.h |   2 +
 net/openvswitch/vport.h|   2 +-
 12 files changed, 222 insertions(+), 264 deletions(-)

diff --git a/include/net/gre.h b/include/net/gre.h
index b531820..4193fd7 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -33,16 +33,7 @@ struct gre_cisco_protocol {
 int gre_cisco_register(struct gre_cisco_protocol *proto);
 int gre_cisco_unregister(struct gre_cisco_protocol *proto);
 
-void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
- int hdr_len);
-
-static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
- bool csum)
-{
-   return iptunnel_handle_offloads(skb, csum,
-   csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
-}
-
+#define GRE_TAP_FB_NAME "gretap0"
 
 static inline int ip_gre_calc_hlen(__be16 o_flags)
 {
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 4798441..fc37624 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -82,6 +82,8 @@ struct ip_tunnel_dst {
__be32   saddr;
 };
 
+struct metadata_dst;
+
 struct ip_tunnel {
struct ip_tunnel __rcu  *next;
struct hlist_node hash_node;
@@ -115,6 +117,7 @@ struct ip_tunnel {
unsigned intprl_count;  /* # of entries in PRL */
int ip_tnl_net_id;
struct gro_cellsgro_cells;
+   boolflow_based_tunnel;
 };
 
 #define TUNNEL_CSUM__cpu_to_be16(0x01)
@@ -235,7 +238,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net 
*itn,
   __be32 key);
 
 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
- const struct tnl_ptk_info *tpi, bool log_ecn_error);
+ const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
+ bool log_ecn_error);
 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
 struct ip_tunnel_parm *p);
 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 4a7b5b2..77562e0 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -61,40 +61,6 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 
version)
 }
 EXPORT_SYMBOL_GPL(gre_del_protocol);
 
-void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
- int hdr_len)
-{
-   struct gre_base_hdr *greh;
-
-   skb_push(skb, hdr_len);
-
-   skb_reset_transport_header(skb);
-   greh = (struct gre_base_hdr *)skb->data;
-   greh->flags = tnl_flags_to_gre_flags(tpi->flags);
-   greh->protocol = tpi->proto;
-
-   if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
-   __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
-
-   if (tpi->flags&TUNNEL_SEQ) {
-   *ptr = tpi->seq;
-   ptr--;
-   }
-   if (tpi->flags&TUNNEL_KEY) {
-   *ptr = tpi->key;
-   ptr--;
-   }
-   if (tpi->flags&TUNNEL_CSUM &&
-   !(skb_shinfo(skb)->gso_type &
- (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
-   *ptr = 0;
-   *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
-skb->len, 0));
-   }
-   }
-}
-EXPORT_SYMBOL_GPL(gre_build_header);
-
 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err)
 {
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5fd7064..31f2ec5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -47,6 +48,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include 
@@ -200,9 +202,29 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
return PACKET_RCVD;
 }
 
+static __be64 key_to_tunnel_id(__be32

[PATCH net-next v2 0/2] GRE: Use flow based tunneling for OVS GRE vport.

2015-08-03 Thread Pravin B Shelar

Following patches make use of new flow based tunneling
API from kernel. This allows us to directly use netdev
based GRE tunnel implementation. While doing so I have
removed GRE demux API which were targeted for OVS. Most
of GRE protocol code is now consolidated in ip_gre module.

Pravin B Shelar (2):
  openvswitch: Use regular GRE net_device instead of vport
  gre: Remove support for sharing GRE protocol hook.

 include/net/gre.h  |  97 ++
 include/net/ip_tunnels.h   |   6 +-
 net/ipv4/gre_demux.c   | 235 +---
 net/ipv4/ip_gre.c  | 400 ++---
 net/ipv4/ip_tunnel.c   |   6 +-
 net/ipv4/ipip.c|   2 +-
 net/ipv6/sit.c |   2 +-
 net/openvswitch/Kconfig|   1 -
 net/openvswitch/vport-gre.c| 230 +++-
 net/openvswitch/vport-netdev.c |   5 +-
 net/openvswitch/vport-netdev.h |   2 +
 net/openvswitch/vport.h|   2 +-
 12 files changed, 431 insertions(+), 557 deletions(-)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re:

2015-08-03 Thread Pravin Shelar

Somehow subject line got dropped, I am resending these patches.

On Mon, Aug 3, 2015 at 3:58 PM, Pravin B Shelar  wrote:
> Following patches make use of new flow based tunneling
> API from kernel. This allows us to directly use netdev
> based GRE tunnel implementation. While doing so I have
> removed GRE demux API which were targeted for OVS. Most
> of GRE protocol code is now consolidated in ip_gre module.
>
> Pravin B Shelar (2):
>   openvswitch: Use regular GRE net_device instead of vport
>   gre: Remove support for sharing GRE protocol hook.
>
>  include/net/gre.h  |  97 ++
>  include/net/ip_tunnels.h   |   6 +-
>  net/ipv4/gre_demux.c   | 235 +---
>  net/ipv4/ip_gre.c  | 400 
> ++---
>  net/ipv4/ip_tunnel.c   |   6 +-
>  net/ipv4/ipip.c|   2 +-
>  net/ipv6/sit.c |   2 +-
>  net/openvswitch/Kconfig|   1 -
>  net/openvswitch/vport-gre.c| 230 +++-
>  net/openvswitch/vport-netdev.c |   5 +-
>  net/openvswitch/vport-netdev.h |   2 +
>  net/openvswitch/vport.h|   2 +-
>  12 files changed, 431 insertions(+), 557 deletions(-)
>
> --
> 1.8.3.1
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] fddi: Use a more more typical logging style

2015-08-03 Thread Joe Perches

On Mon, 2015-08-03 at 16:05 -0700, David Miller wrote:
> From: Joe Perches 
> Date: Sun, 02 Aug 2015 21:27:45 -0700
> 
> > Use macros that don't require fixed argument counts so
> > format and arguments can be verified by the compiler.
> > 
> > Miscellanea:
> > 
> > o Remove a few #if uses to allow dynamic debug to always work
> > o whitespace neatening
> > 
> > Signed-off-by: Joe Perches 
> 
> This doesn't apply cleanly to net-next, please respin.

Apologies for that.  I used a newer version of the
Evolution email client (3.16.0) which corrupts tabs.
3.16.3 doesn't seem to do that.

I'll probably downgrade back to the old 3.12 version
though.  It doesn't send some attachments properly,
but the editor at least works well.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next] bridge: mdb: fix vlan_enabled access when vlans are not configured

2015-08-03 Thread David Miller

From: Nikolay Aleksandrov 
Date: Tue,  4 Aug 2015 01:19:58 +0200

> From: Nikolay Aleksandrov 
> 
> Instead of trying to access br->vlan_enabled directly use the provided
> helper br_vlan_enabled().
> 
> Signed-off-by: Nikolay Aleksandrov 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-next] bridge: mdb: fix vlan_enabled access when vlans are not configured

2015-08-03 Thread Nikolay Aleksandrov

From: Nikolay Aleksandrov 

Instead of trying to access br->vlan_enabled directly use the provided
helper br_vlan_enabled().

Signed-off-by: Nikolay Aleksandrov 
---
Sorry, forgot to change this before sending the patch.

 net/bridge/br_mdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 1df3ef4a73b9..d747275fad18 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -490,7 +490,7 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr 
*nlh)
return -EINVAL;
 
pv = nbp_get_vlan_info(p);
-   if (br->vlan_enabled && pv && entry->vid == 0) {
+   if (br_vlan_enabled(br) && pv && entry->vid == 0) {
for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
entry->vid = vid;
err = __br_mdb_add(net, br, entry);
@@ -592,7 +592,7 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr 
*nlh)
return -EINVAL;
 
pv = nbp_get_vlan_info(p);
-   if (br->vlan_enabled && pv && entry->vid == 0) {
+   if (br_vlan_enabled(br) && pv && entry->vid == 0) {
for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
entry->vid = vid;
err = __br_mdb_del(br, entry);
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next 1/1] e1000: remove dead e1000_init_eeprom_params calls.

2015-08-03 Thread Jeff Kirsher

On Fri, Jul 24, 2015 at 2:40 PM, Francois Romieu  wrote:
> The device probe method e1000_probe calls e1000_init_eeprom_params
> itself so there's no reason to call it again from e1000_do_write_eeprom
> or e1000_do_read_eeprom.
>
> The sentence above assumes that e1000_init_eeprom_params is effective
> but it's mostly dependant on "hw->mac_type": safe as e1000_probe bails
> out early if it can't set mac_type (see e1000_init_hw_struct, then
> e1000_set_mac_type).
>
> Btw, if effective, the removed paths would had been deadlock prone when
> e1000_eeprom_spi was set:
> -> e1000_write_eeprom (takes e1000_eeprom_lock)
>-> e1000_do_write_eeprom
>   -> e1000_init_eeprom_params
>  -> e1000_read_eeprom (takes e1000_eeprom_lock)
>
> (same narrative with e1000_read_eeprom -> e1000_do_read_eeprom etc.)
>
> As a final note, the candidate deadlock above can't happen in e1000_probe
> due to the way eeprom->word_size is set / tested.
>
> Signed-off-by: Francois Romieu 
> ---
>
> Untested. I have found it while looking at Joern's patch.
>
>  drivers/net/ethernet/intel/e1000/e1000_hw.c | 8 
>  1 file changed, 8 deletions(-)

Can you please send this to intel-wired-...@lists.osuosl.org mailing
list?  That is the mailing list created/used for these patches.  It
also helps me out by adding your patch to our patchworks project for
patches against Intel wired drivers.

Thanks in advance, sorry for the delayed response, was on vacation last week.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next] act_bpf: properly support late binding of bpf action to a classifier

2015-08-03 Thread David Miller

From: Daniel Borkmann 
Date: Mon,  3 Aug 2015 16:21:57 +0200

> Since the introduction of the BPF action in d23b8ad8ab23 ("tc: add BPF
> based action"), late binding was not working as expected. I.e. setting
> the action part for a classifier only via 'bpf index ', where 
> is the index of an existing action, is being rejected by the kernel due
> to other missing parameters.
> 
> It doesn't make sense to require these parameters such as BPF opcodes
> etc, as they are not going to be used anyway: in this case, they're just
> allocated/parsed and then freed again w/o doing anything meaningful.
> 
> Instead, parse and verify the remaining parameters *after* the test on
> tcf_hash_check(), when we really know that we're dealing with creation
> of a new action or replacement of an existing one and where late binding
> is thus irrelevant.
> 
> After patch, test case is now working:
> 
>   FOO="1,6 0 0 4294967295,"
>   tc actions add action bpf bytecode "$FOO"
>   tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action bpf 
> index 1
>   tc actions show action bpf
> action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
> index 1 ref 2 bind 1
>   tc filter show dev foo
> filter protocol all pref 49152 bpf
> filter protocol all pref 49152 bpf handle 0x1 flowid 1:1 bytecode '1,6 0 
> 0 4294967295'
> action order 1: bpf bytecode '1,6 0 0 4294967295' default-action pipe
> index 1 ref 2 bind 1
> 
> Late binding of a BPF action can be useful for preloading maps (e.g. before
> they hit traffic) in case of eBPF programs, or to share a single eBPF action
> with multiple classifiers.
> 
> Signed-off-by: Daniel Borkmann 

Applied to net-next, thanks Daniel.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] fddi: Use a more more typical logging style

2015-08-03 Thread David Miller

From: Joe Perches 
Date: Sun, 02 Aug 2015 21:27:45 -0700

> Use macros that don't require fixed argument counts so
> format and arguments can be verified by the compiler.
> 
> Miscellanea:
> 
> o Remove a few #if uses to allow dynamic debug to always work
> o whitespace neatening
> 
> Signed-off-by: Joe Perches 

This doesn't apply cleanly to net-next, please respin.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/2] openvswitch: Use regular GRE net_device instead of vport

2015-08-03 Thread Pravin B Shelar

With addition of flow based tunneling, there is no need to
have special GRE vport. Removes all of the OVS specific
GRE code and make OVS use a ip_gre net_device.
Minimal GRE vport is kept to handle compatibility with
current userspace application.

Signed-off-by: Pravin B Shelar 
---
 include/net/gre.h  |  11 +-
 include/net/ip_tunnels.h   |   6 +-
 net/ipv4/gre_demux.c   |  34 --
 net/ipv4/ip_gre.c  | 185 -
 net/ipv4/ip_tunnel.c   |   6 +-
 net/ipv4/ipip.c|   2 +-
 net/ipv6/sit.c |   2 +-
 net/openvswitch/Kconfig|   1 -
 net/openvswitch/vport-gre.c| 230 -
 net/openvswitch/vport-netdev.c |   5 +-
 net/openvswitch/vport-netdev.h |   2 +
 net/openvswitch/vport.h|   2 +-
 12 files changed, 222 insertions(+), 264 deletions(-)

diff --git a/include/net/gre.h b/include/net/gre.h
index b531820..4193fd7 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -33,16 +33,7 @@ struct gre_cisco_protocol {
 int gre_cisco_register(struct gre_cisco_protocol *proto);
 int gre_cisco_unregister(struct gre_cisco_protocol *proto);
 
-void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
- int hdr_len);
-
-static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
- bool csum)
-{
-   return iptunnel_handle_offloads(skb, csum,
-   csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
-}
-
+#define GRE_TAP_FB_NAME "gretap0"
 
 static inline int ip_gre_calc_hlen(__be16 o_flags)
 {
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 4798441..fc37624 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -82,6 +82,8 @@ struct ip_tunnel_dst {
__be32   saddr;
 };
 
+struct metadata_dst;
+
 struct ip_tunnel {
struct ip_tunnel __rcu  *next;
struct hlist_node hash_node;
@@ -115,6 +117,7 @@ struct ip_tunnel {
unsigned intprl_count;  /* # of entries in PRL */
int ip_tnl_net_id;
struct gro_cellsgro_cells;
+   boolflow_based_tunnel;
 };
 
 #define TUNNEL_CSUM__cpu_to_be16(0x01)
@@ -235,7 +238,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net 
*itn,
   __be32 key);
 
 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
- const struct tnl_ptk_info *tpi, bool log_ecn_error);
+ const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
+ bool log_ecn_error);
 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
 struct ip_tunnel_parm *p);
 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 4a7b5b2..77562e0 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -61,40 +61,6 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 
version)
 }
 EXPORT_SYMBOL_GPL(gre_del_protocol);
 
-void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
- int hdr_len)
-{
-   struct gre_base_hdr *greh;
-
-   skb_push(skb, hdr_len);
-
-   skb_reset_transport_header(skb);
-   greh = (struct gre_base_hdr *)skb->data;
-   greh->flags = tnl_flags_to_gre_flags(tpi->flags);
-   greh->protocol = tpi->proto;
-
-   if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
-   __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
-
-   if (tpi->flags&TUNNEL_SEQ) {
-   *ptr = tpi->seq;
-   ptr--;
-   }
-   if (tpi->flags&TUNNEL_KEY) {
-   *ptr = tpi->key;
-   ptr--;
-   }
-   if (tpi->flags&TUNNEL_CSUM &&
-   !(skb_shinfo(skb)->gso_type &
- (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
-   *ptr = 0;
-   *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
-skb->len, 0));
-   }
-   }
-}
-EXPORT_SYMBOL_GPL(gre_build_header);
-
 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err)
 {
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5fd7064..31f2ec5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -47,6 +48,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include 
@@ -200,9 +202,29 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
return PACKET_RCVD;
 }
 
+static __be64 key_to_tunnel_id(__be32

[net-next:master 173/173] net/bridge/br_mdb.c:493:8: error: 'struct net_bridge' has no member named 'vlan_enabled'

2015-08-03 Thread kbuild test robot

tree:   git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git master
head:   e44deb2f0cce9183ca94d14effd4170a35eec31d
commit: e44deb2f0cce9183ca94d14effd4170a35eec31d [173/173] bridge: mdb: add/del 
entry on all vlans if vlan_filter is enabled and vid is 0
config: sh-titan_defconfig (attached as .config)
reproduce:
  wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
  chmod +x ~/bin/make.cross
  git checkout e44deb2f0cce9183ca94d14effd4170a35eec31d
  # save the attached .config to linux build tree
  make.cross ARCH=sh 

All error/warnings (new ones prefixed by >>):

   net/bridge/br_mdb.c: In function 'br_mdb_add':
>> net/bridge/br_mdb.c:493:8: error: 'struct net_bridge' has no member named 
>> 'vlan_enabled'
   net/bridge/br_mdb.c: In function 'br_mdb_del':
   net/bridge/br_mdb.c:595:8: error: 'struct net_bridge' has no member named 
'vlan_enabled'

vim +493 net/bridge/br_mdb.c

   487  
   488  p = br_port_get_rtnl(pdev);
   489  if (!p || p->br != br || p->state == BR_STATE_DISABLED)
   490  return -EINVAL;
   491  
   492  pv = nbp_get_vlan_info(p);
 > 493  if (br->vlan_enabled && pv && entry->vid == 0) {
   494  for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
   495  entry->vid = vid;
   496  err = __br_mdb_add(net, br, entry);

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation
#
# Automatically generated file; DO NOT EDIT.
# Linux/sh 4.2.0-rc4 Kernel Configuration
#
CONFIG_SUPERH=y
CONFIG_SUPERH32=y
# CONFIG_SUPERH64 is not set
CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
CONFIG_RWSEM_GENERIC_SPINLOCK=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_HWEIGHT=y
# CONFIG_ARCH_SUSPEND_POSSIBLE is not set
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_SYS_SUPPORTS_HUGETLBFS=y
CONFIG_SYS_SUPPORTS_PCI=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_HAVE_LATENCYTOP_SUPPORT=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
# CONFIG_NO_IOPORT_MAP is not set
CONFIG_DMA_NONCOHERENT=y
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_PGTABLE_LEVELS=2
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_IRQ_WORK=y

#
# General setup
#
CONFIG_BROKEN_ON_SMP=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_CROSS_COMPILE=""
# CONFIG_COMPILE_TEST is not set
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_KERNEL_GZIP=y
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not set
# CONFIG_KERNEL_XZ is not set
# CONFIG_KERNEL_LZO is not set
CONFIG_DEFAULT_HOSTNAME="(none)"
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
CONFIG_POSIX_MQUEUE_SYSCTL=y
CONFIG_CROSS_MEMORY_ATTACH=y
# CONFIG_FHANDLE is not set
CONFIG_USELIB=y
# CONFIG_AUDIT is not set
CONFIG_HAVE_ARCH_AUDITSYSCALL=y

#
# IRQ subsystem
#
CONFIG_MAY_HAVE_SPARSE_IRQ=y
CONFIG_GENERIC_IRQ_SHOW=y
CONFIG_IRQ_DOMAIN=y
CONFIG_IRQ_FORCED_THREADING=y
CONFIG_SPARSE_IRQ=y
CONFIG_GENERIC_CLOCKEVENTS=y

#
# Timers subsystem
#
CONFIG_HZ_PERIODIC=y
# CONFIG_NO_HZ_IDLE is not set
# CONFIG_NO_HZ is not set
# CONFIG_HIGH_RES_TIMERS is not set

#
# CPU/Task time and stats accounting
#
CONFIG_TICK_CPU_ACCOUNTING=y
# CONFIG_BSD_PROCESS_ACCT is not set
# CONFIG_TASKSTATS is not set

#
# RCU Subsystem
#
CONFIG_TINY_RCU=y
# CONFIG_RCU_EXPERT is not set
CONFIG_SRCU=y
# CONFIG_TASKS_RCU is not set
# CONFIG_RCU_STALL_COMMON is not set
# CONFIG_TREE_RCU_TRACE is not set
# CONFIG_RCU_EXPEDITE_BOOT is not set
CONFIG_BUILD_BIN2C=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=16
# CONFIG_CGROUPS is not set
# CONFIG_CHECKPOINT_RESTORE is not set
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_IPC_NS=y
# CONFIG_USER_NS is not set
CONFIG_PID_NS=y
CONFIG_NET_NS=y
# CONFIG_SCHED_AUTOGROUP is not set
# CONFIG_SYSFS_DEPRECATED is not set
# CONFIG_RELAY is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_RD_GZIP=y
CONFIG_RD_BZIP2=y
CONFIG_RD_LZMA=y
CONFIG_RD_XZ=y
CONFIG_RD_LZO=y
CONFIG_RD_LZ4=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SYSCTL=y
CONFIG_ANON_INODES=y
CONFIG_HAVE_UID16=y
CONFIG_BPF=y
# CONFIG_EXPERT is not set
CONFIG_UID16=y
CONFIG_MULTIUSER=y
CONFIG_SGETMASK_SYSCALL=y
CONFIG_SYSFS_SYSCALL=y
# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
# CONFIG_BPF_SYSCALL is not set
CONFIG_SHMEM=y
CONFIG_AIO=y
CONFIG_ADVISE_SYSCALLS=y
CONFIG_PCI_QUIRKS=y
# CONFIG_EMBEDDED is not set
CONFIG_HAVE_PERF_EVENTS=y
CONFIG_PERF_USE_VMALLOC=y

#
# Kernel Performance Ev

[PATCH 2/2] gre: Remove support for sharing GRE protocol hook.

2015-08-03 Thread Pravin B Shelar

Support for sharing GREPROTO_CISCO port was added so that
OVS gre port and kernel GRE devices can co-exist. After
flow-based tunneling patches OVS GRE protocol processing
is completely moved to ip_gre module. so there is no need
for GRE protocol hook. Following patch consolidates
GRE protocol related functions into ip_gre module.

Signed-off-by: Pravin B Shelar 
---
 include/net/gre.h|  86 +++--
 net/ipv4/gre_demux.c | 201 +--
 net/ipv4/ip_gre.c| 215 +++
 3 files changed, 209 insertions(+), 293 deletions(-)

diff --git a/include/net/gre.h b/include/net/gre.h
index 4193fd7..b2572b7 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -4,92 +4,24 @@
 #include 
 #include 
 
-#define GREPROTO_CISCO 0
-#define GREPROTO_PPTP  1
-#define GREPROTO_MAX   2
-#define GRE_IP_PROTO_MAX   2
-
-struct gre_protocol {
-   int  (*handler)(struct sk_buff *skb);
-   void (*err_handler)(struct sk_buff *skb, u32 info);
-};
-
 struct gre_base_hdr {
__be16 flags;
__be16 protocol;
 };
 #define GRE_HEADER_SECTION 4
 
+struct gre_protocol {
+   int  (*handler)(struct sk_buff *skb);
+   void (*err_handler)(struct sk_buff *skb, u32 info);
+};
+
 int gre_add_protocol(const struct gre_protocol *proto, u8 version);
 int gre_del_protocol(const struct gre_protocol *proto, u8 version);
 
-struct gre_cisco_protocol {
-   int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
-   int (*err_handler)(struct sk_buff *skb, u32 info,
-  const struct tnl_ptk_info *tpi);
-   u8 priority;
-};
-
-int gre_cisco_register(struct gre_cisco_protocol *proto);
-int gre_cisco_unregister(struct gre_cisco_protocol *proto);
+#define GREPROTO_CISCO 0
+#define GREPROTO_PPTP  1
+#define GREPROTO_MAX   2
+#define GRE_IP_PROTO_MAX   2
 
 #define GRE_TAP_FB_NAME "gretap0"
-
-static inline int ip_gre_calc_hlen(__be16 o_flags)
-{
-   int addend = 4;
-
-   if (o_flags&TUNNEL_CSUM)
-   addend += 4;
-   if (o_flags&TUNNEL_KEY)
-   addend += 4;
-   if (o_flags&TUNNEL_SEQ)
-   addend += 4;
-   return addend;
-}
-
-static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
-{
-   __be16 tflags = 0;
-
-   if (flags & GRE_CSUM)
-   tflags |= TUNNEL_CSUM;
-   if (flags & GRE_ROUTING)
-   tflags |= TUNNEL_ROUTING;
-   if (flags & GRE_KEY)
-   tflags |= TUNNEL_KEY;
-   if (flags & GRE_SEQ)
-   tflags |= TUNNEL_SEQ;
-   if (flags & GRE_STRICT)
-   tflags |= TUNNEL_STRICT;
-   if (flags & GRE_REC)
-   tflags |= TUNNEL_REC;
-   if (flags & GRE_VERSION)
-   tflags |= TUNNEL_VERSION;
-
-   return tflags;
-}
-
-static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
-{
-   __be16 flags = 0;
-
-   if (tflags & TUNNEL_CSUM)
-   flags |= GRE_CSUM;
-   if (tflags & TUNNEL_ROUTING)
-   flags |= GRE_ROUTING;
-   if (tflags & TUNNEL_KEY)
-   flags |= GRE_KEY;
-   if (tflags & TUNNEL_SEQ)
-   flags |= GRE_SEQ;
-   if (tflags & TUNNEL_STRICT)
-   flags |= GRE_STRICT;
-   if (tflags & TUNNEL_REC)
-   flags |= GRE_REC;
-   if (tflags & TUNNEL_VERSION)
-   flags |= GRE_VERSION;
-
-   return flags;
-}
-
 #endif
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 77562e0..d9c552a 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -31,7 +31,6 @@
 #include 
 
 static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
-static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX];
 
 int gre_add_protocol(const struct gre_protocol *proto, u8 version)
 {
@@ -61,163 +60,6 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 
version)
 }
 EXPORT_SYMBOL_GPL(gre_del_protocol);
 
-static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
-   bool *csum_err)
-{
-   const struct gre_base_hdr *greh;
-   __be32 *options;
-   int hdr_len;
-
-   if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr
-   return -EINVAL;
-
-   greh = (struct gre_base_hdr *)skb_transport_header(skb);
-   if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
-   return -EINVAL;
-
-   tpi->flags = gre_flags_to_tnl_flags(greh->flags);
-   hdr_len = ip_gre_calc_hlen(tpi->flags);
-
-   if (!pskb_may_pull(skb, hdr_len))
-   return -EINVAL;
-
-   greh = (struct gre_base_hdr *)skb_transport_header(skb);
-   tpi->proto = greh->protocol;
-
-   options = (__be32 *)(greh + 1);
-   if (greh->flags & GRE_CSUM) {
-   if (skb_checksum_simple_validate(skb)) {
-   *csu

Re: [PATCH net-next 5/9] openvswitch: Add conntrack action

2015-08-03 Thread Joe Stringer

On 31 July 2015 at 19:08, Pravin Shelar  wrote:
> On Thu, Jul 30, 2015 at 11:12 AM, Joe Stringer  wrote:
>> +static void prepare_frag(struct vport *vport, struct sw_flow_key *key,
>> +struct sk_buff *skb)
>> +{
>> +   unsigned int hlen = ETH_HLEN;
>> +   struct ovs_frag_data *data;
>> +
>> +   data = this_cpu_ptr(&ovs_frag_data_storage);
>> +   data->dst = skb_dst(skb);
>> +   data->vport = vport;
>> +   data->key = key;
>> +   data->cb = *OVS_CB(skb);
>> +
>> +   if (key->eth.tci & htons(VLAN_TAG_PRESENT)) {
>> +   if (skb_vlan_tag_present(skb)) {
>> +   data->vlan_proto = skb->vlan_proto;
>> +   } else {
>> +   data->vlan_proto = vlan_eth_hdr(skb)->h_vlan_proto;
>> +   hlen += VLAN_HLEN;
>> +   }
>> +   }
> Not all actions keep flow key uptodate, so here you can access stale values.

Hmm, okay. Perhaps the right thing to handle all of these cases is to
just make a copy of everything up to the network offset, and restore
that after fragmentation.

>> if (unlikely(err)) {
>> -   kfree_skb(skb);
>> +   /* Hide stolen fragments from user space. */
>> +   if (err == -EINPROGRESS)
>> +   err = 0;
> This does not look safe for error returned from all cases, Can you
> check this case specifically for the CT action case.

I'll place it inside the CT action case.

Thanks for the review, will roll the other fixes into the next version.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[no subject]

2015-08-03 Thread Pravin B Shelar

Following patches make use of new flow based tunneling
API from kernel. This allows us to directly use netdev
based GRE tunnel implementation. While doing so I have
removed GRE demux API which were targeted for OVS. Most
of GRE protocol code is now consolidated in ip_gre module.

Pravin B Shelar (2):
  openvswitch: Use regular GRE net_device instead of vport
  gre: Remove support for sharing GRE protocol hook.

 include/net/gre.h  |  97 ++
 include/net/ip_tunnels.h   |   6 +-
 net/ipv4/gre_demux.c   | 235 +---
 net/ipv4/ip_gre.c  | 400 ++---
 net/ipv4/ip_tunnel.c   |   6 +-
 net/ipv4/ipip.c|   2 +-
 net/ipv6/sit.c |   2 +-
 net/openvswitch/Kconfig|   1 -
 net/openvswitch/vport-gre.c| 230 +++-
 net/openvswitch/vport-netdev.c |   5 +-
 net/openvswitch/vport-netdev.h |   2 +
 net/openvswitch/vport.h|   2 +-
 12 files changed, 431 insertions(+), 557 deletions(-)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/2] igb/ixgbe: Fix ordering of SR-IOV teardown

2015-08-03 Thread Jeff Kirsher

On Wed, 2015-07-29 at 14:31 -0700, David Miller wrote:
> From: Alex Williamson 
> Date: Wed, 29 Jul 2015 13:33:07 -0600
> 
> > I expect that's because of this patch that's in Jeff's dev-queue branch:
> > 
> > http://git.kernel.org/cgit/linux/kernel/git/jkirsher/next-queue.git/commit/?h=dev-queue&id=ddf766a812a13eca1116b5905e902184904266f9
> > 
> > I based these patches off that branch, assuming they'd take the same
> > route and avoid the merge conflict.  If you'd rather take these, I'll be
> > happy to respin.  Apologies for not noting the base branch in the
> > series.  Thanks,
> 
> No, that's fine, this would normally go via Jeff's tree anyways.
> 
> I just didn't see him take it so I assumed that it should go via me.

Sorry, was on vacation last week and cell coverage was spotty where I
was at.  I have picked up the series.


signature.asc
Description: This is a digitally signed message part

Re: [PATCH] net: dsa: mv88e6xxx: call _mv88e6xxx_stats_wait with SMI lock held

2015-08-03 Thread David Miller

From: Vivien Didelot 
Date: Mon,  3 Aug 2015 09:17:44 -0400

> At switch setup, _mv88e6xxx_stats_wait was called without holding the
> SMI mutex. Fix this by requesting the lock for this call.
> 
> Also, return the _mv88e6xxx_stats_wait code, since it may fail.
> 
> Signed-off-by: Vivien Didelot 

Applied to net-next, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next] bridge: mdb: add/del entry on all vlans if vlan_filter is enabled and vid is 0

2015-08-03 Thread David Miller

From: Nikolay Aleksandrov 
Date: Mon,  3 Aug 2015 13:29:16 +0200

> From: Satish Ashok 
> 
> Before this patch when a vid was not specified, the entry was added with
> vid 0 which is useless when vlan_filtering is enabled. This patch makes
> the entry to be added on all configured vlans when vlan filtering is
> enabled and respectively deleted from all, if the entry vid is 0.
> This is also closer to the way fdb works with regard to vid 0 and vlan
> filtering.
 ...
> Signed-off-by: Satish Ashok 
> Signed-off-by: Nikolay Aleksandrov 

Applied, but as usual if any existing user ends up being broken I will
revert this.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH net] udp: fix dst races with multicast early demux

2015-08-03 Thread Gregory Hoggarth

Hi,

I have included this patch into my code and re-run our tests overnight, out of 
644 iterations we did not see the kernel crash. Previous reproduction rate we 
would have expected 4-6 crashes in this time.

So I think this fixes the issue we are seeing.

Thanks,
Greg


From: netdev-ow...@vger.kernel.org  on behalf of 
Eric Dumazet 
Sent: Saturday, 1 August 2015 10:14 p.m.
To: Gregory Hoggarth
Cc: Shawn Bohrer; netdev@vger.kernel.org; alexgartr...@gmail.com; Michal Kubeček
Subject: [PATCH net] udp: fix dst races with multicast early demux

From: Eric Dumazet 

Multicast dst are not cached. They carry DST_NOCACHE.

As mentioned in commit f8864972126899 ("ipv4: fix dst race in
sk_dst_get()"), these dst need special care before caching them
into a socket.

Caching them is allowed only if their refcnt was not 0, ie we
must use atomic_inc_not_zero()

Also, we must use READ_ONCE() to fetch sk->sk_rx_dst, as mentioned
in commit d0c294c53a771 ("tcp: prevent fetching dst twice in early demux
code")

Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux")
Signed-off-by: Eric Dumazet 
Reported-by: Gregory Hoggarth 
Reported-by: Alex Gartrell 
Cc: Michal Kubeček 
---
David : I will be on vacation for following 7 days, no internet access.
Please wait for tests done by Gregory & Alex before merging this ?
Thanks !

 net/ipv4/udp.c |   13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 83aa604f9273..1b8c5ba7d5f7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1995,12 +1995,19 @@ void udp_v4_early_demux(struct sk_buff *skb)

skb->sk = sk;
skb->destructor = sock_efree;
-   dst = sk->sk_rx_dst;
+   dst = READ_ONCE(sk->sk_rx_dst);

if (dst)
dst = dst_check(dst, 0);
-   if (dst)
-   skb_dst_set_noref(skb, dst);
+   if (dst) {
+   /* DST_NOCACHE can not be used without taking a reference */
+   if (dst->flags & DST_NOCACHE) {
+   if (likely(atomic_inc_not_zero(&dst->__refcnt)))
+   skb_dst_set(skb, dst);
+   } else {
+   skb_dst_set_noref(skb, dst);
+   }
+   }
 }

 int udp_rcv(struct sk_buff *skb)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RX overrun errors building on several of our hosts

2015-08-03 Thread Dan Finn

After implementing a new monitoring tool I've noticed that several of our 
physical servers have increasing RX errors, all seem to be classified as 
overruns.


The interfaces are Broadcom Corporation BCM57840 NetXtreme II 10 Gigabit 
Ethernet (rev 11) and we are using the bnx2x driver.  The are configured in a 
bond0 using mode 0 or balance-rr.  We are not seeing any errors on the switch 
and my guess is that this  is either a config issue or driver problem since 
it's happening on multiple servers.  All the interfaces appear to be connected 
at 10 gig full duplex.  The servers are Dell M620s.   I've gathered as much 
related info as I could think of that would be helpful,  it can be found in 
this paste:

http://pastebin.centos.org/31716/

I'm not entirely sure where to look next, any help would be much appreciated.

Thanks,
Dan
 --
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] 3c59x: Fix resource leaks in vortex_open

2015-08-03 Thread David Miller

From: Jia-Ju Bai 
Date: Mon,  3 Aug 2015 11:18:12 +0800

> When vortex_up is failed, the skb buffers allocated by __netdev_alloc_skb
> in vortex_open are not released, which may cause resource leaks.
> This bug has been submitted before.
> This patch modifies the error handling code to fix it.
> 
> Signed-off-by: Jia-Ju Bai 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch] rds: fix an integer overflow test in rds_info_getsockopt()

2015-08-03 Thread David Miller

From: Dan Carpenter 
Date: Sat, 1 Aug 2015 15:33:26 +0300

> "len" is a signed integer.  We check that len is not negative, so it
> goes from zero to INT_MAX.  PAGE_SIZE is unsigned long so the comparison
> is type promoted to unsigned long.  ULONG_MAX - 4095 is a higher than
> INT_MAX so the condition can never be true.
> 
> I don't know if this is harmful but it seems safe to limit "len" to
> INT_MAX - 4095.
> 
> Fixes: a8c879a7ee98 ('RDS: Info and stats')
> Signed-off-by: Dan Carpenter 

Applied, thanks Dan.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Add BQL support for 3c59x, based on patch from Tino Reichardt.

2015-08-03 Thread David Miller

From: Loganaden Velvindron 
Date: Fri, 31 Jul 2015 23:13:13 -0700

> Tested on 3Com PCI 3c905C Tornardo by running Flent multiple times.
> 
> Signed-off-by: Loganaden Velvindron 

Please format your Subject line correctly, it should be of
the form:

[PATCH $TREE] $SUBSYSTEM: $DESCRIPTION.

Where "TREE" is either 'net' or 'net-next'.  "SUBSYSTEM" is the subsystem or
driver name being changed, which here should be '3c59x' and then the
title line descritpion of your patch.

Also, I am pretty sure you will need to add logic to
vortex_tx_timeout() since that resets the TX ring state.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] r8169: Permit users to change transmit and receive max pachet size

2015-08-03 Thread David Miller

From: "Corcodel.marian" 
Date: Tue, 04 Aug 2015 00:41:50 +0300

> A this moment these param is only for test and not for large utilization.

Then you can patch your local driver for "testing".

You change doesn't belong upstream.  We're not going to litter drivers
with debugging hack options.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next] mpls: Use definition for reserved label checks

2015-08-03 Thread roopa


On 8/3/15, 9:50 AM, Robert Shearman wrote:

In multiple locations there are checks for whether the label in hand
is a reserved label or not using the arbritray value of 16. Factor
this out into a #define for better maintainability and for
documentation.

Signed-off-by: Robert Shearman 
---


Acked-by: Roopa Prabhu 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next 0/2] lwtunnel: encap locally-generated ipv4 packets

2015-08-03 Thread roopa


On 8/3/15, 9:39 AM, Robert Shearman wrote:

Locally-generated IPv4 packets, such as from applications running on
the host or traceroute/ping currently don't have lwtunnel output
redirected encap applied. However, they should do in the same way as
for forwarded packets and this patch series addresses that.

Robert Shearman (2):
   lwtunnel: set skb protocol and dev
   ipv4: apply lwtunnel encap for locally-generated packets

  net/core/lwtunnel.c | 12 ++--
  net/ipv4/route.c|  2 ++
  2 files changed, 12 insertions(+), 2 deletions(-)


Thanks for this patch Robert. Looks good.
I have been thinking of sending a similar patch out for this and
since i was also looking at ip fragmentation, I have a slightly 
different patch which I think should also take care of
encapsulating locally generated packets too. This patch moves the output 
redirection to after ip fragmentation.
What do you think about the below (I have briefly tested it. Was 
planning to test some more before sending it out as RFC) ?


[PATCH net-next] lwtunnel: move output redirection to after ip fragmentation

This patch adds tunnel headroom in lwtstate to make
sure we account for tunnel data in mtu calculations and
moves tunnel output redirection after ip fragmentation.

Signed-off-by: Roopa Prabhu 
---
 include/net/lwtunnel.h   |1 +
 net/ipv4/ip_output.c |4 
 net/ipv4/route.c |5 +++--
 net/mpls/mpls_iptunnel.c |1 +
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 918e03c..7816805 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -18,6 +18,7 @@ struct lwtunnel_state {
__u16   flags;
atomic_trefcnt;
int len;
+   __u16   headroom;
__u8data[0];
 };

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6bf89a6..ae3119f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -73,6 +73,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -201,6 +202,9 @@ static int ip_finish_output2(struct sock *sk, struct 
sk_buff *skb)

skb = skb2;
}

+   if (lwtunnel_output_redirect(rt->rt_lwtstate))
+   return lwtunnel_output(sk, skb);
+
rcu_read_lock_bh();
nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d3964fa..4e07b9a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1234,6 +1234,9 @@ static unsigned int ipv4_mtu(const struct 
dst_entry *dst)


mtu = dst->dev->mtu;

+   if (lwtunnel_output_redirect(rt->rt_lwtstate))
+   mtu -= rt->rt_lwtstate->headroom;
+
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
if (rt->rt_uses_gateway && mtu > 576)
mtu = 576;
@@ -1634,8 +1637,6 @@ static int __mkroute_input(struct sk_buff *skb,
rth->dst.output = ip_output;

rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
-   if (lwtunnel_output_redirect(rth->rt_lwtstate))
-   rth->dst.output = lwtunnel_output;
skb_dst_set(skb, &rth->dst);
 out:
err = 0;

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next 0/4] Stacked vlan TSO for virtual devices

2015-08-03 Thread David Miller

From: Toshiaki Makita 
Date: Fri, 31 Jul 2015 15:03:23 +0900

> Basically virtual devices do not need to segment double tagged packets.
> This patch set adds TSO feature for double tagged packets to several
> virtual devices, which can be realized by simply setting
> .ndo_features_check to passthru_features_check.

Series applied, thank you.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next] virtio_net: add gro capability

2015-08-03 Thread David Miller

From: Eric Dumazet 
Date: Fri, 31 Jul 2015 18:25:17 +0200

> From: Eric Dumazet 
> 
> Straightforward patch to add GRO processing to virtio_net.
> 
> napi_complete_done() usage allows more aggressive aggregation,
> opted-in by setting /sys/class/net/xxx/gro_flush_timeout
> 
> Tested:
> 
> Setting /sys/class/net/xxx/gro_flush_timeout to 1000 nsec,
> Rick Jones reported following results.
> 
> One VM of each on a pair of OpenStack compute nodes with E5-2650Lv3 CPUs
> and Intel 82599ES-based NICs. So, two "before" and two "after" VMs.
> The OpenStack compute nodes were running OpenStack Kilo, with VxLAN
> encapsulation being used through OVS so no GRO coming-up the host
> stack.  The compute nodes themselves were running a 3.14-based kernel.
> 
> Single-stream netperf, CPU utilizations and thus service demands are
> based on intra-guest reported CPU.
 ...
> Signed-off-by: Eric Dumazet 
> Tested-by: Rick Jones 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch net-next 1/2] rocker: enable support for scattered packets

2015-08-03 Thread David Miller

From: Jiri Pirko 
Date: Sun,  2 Aug 2015 20:56:37 +0200

> From: Ido Schimmel 
> 
> rocker supports the transmission of scattered packets, so let the kernel
> know about it by setting the NETIF_F_SG bit in the device's features.
> 
> Signed-off-by: Ido Schimmel 
> Signed-off-by: Jiri Pirko 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch net-next 2/2] rocker: linearize skb in case frags would not fit into tx descriptor

2015-08-03 Thread David Miller

From: Jiri Pirko 
Date: Sun,  2 Aug 2015 20:56:38 +0200

> Suggested-by: Scott Feldman 
> Signed-off-by: Jiri Pirko 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next] vxlan: expose COLLECT_METADATA flag to user space

2015-08-03 Thread Jesse Gross

On Fri, Jul 31, 2015 at 8:41 AM, Alexei Starovoitov  wrote:
> thanks. I think exposing collect_metadata for vxlan and in the future
> for other tunnel types is the clean enough way, though the other
> alternative would be to get rid of collect_metadata flag
> from the kernel and do it when flowmode flag is set. Thoughts?

This seems like a good idea to me - I'm not sure that flow based
tunnels are all that useful without metadata collection enabled and
the fewer interfaces that we have to create for each tunnel type, the
better.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Patch net] act_mirred: avoid calling tcf_hash_release() when binding

2015-08-03 Thread David Miller

From: Cong Wang 
Date: Thu, 30 Jul 2015 17:12:20 -0700

> When we share an action within a filter, the bind refcnt
> should increase, therefore we should not call tcf_hash_release().
> 
> Cc: Jamal Hadi Salim 
> Cc: Daniel Borkmann 
> Signed-off-by: Cong Wang 
> Signed-off-by: Cong Wang 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] r8169: Permit users to change transmit and receive max pachet size

2015-08-03 Thread David Miller

From: "Corcodel.marian" 
Date: Tue, 04 Aug 2015 00:06:28 +0300

> Sorry bu these parameters is not covered by ethtool.

Then simply add such a generic facility, so other drivers can benefit
from it as well.

I'll be completely honest, and say that I don't anticipate that you
will implement the ethtool option properly, and that it will probably
take 5 or 6 iterations of review of such a patch before we'll be
willing to accept it.  But this is really what you will have to do.

Then you will need to submit a patch to 'ethtool' itself to support
the new options.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2] openvswitch: Fix L4 checksum handling when dealing with IP fragments

2015-08-03 Thread David Miller

From: Glenn Griffin 
Date: Mon, 3 Aug 2015 09:56:54 -0700

> openvswitch modifies the L4 checksum of a packet when modifying
> the ip address. When an IP packet is fragmented only the first
> fragment contains an L4 header and checksum. Prior to this change
> openvswitch would modify all fragments, modifying application data
> in non-first fragments, causing checksum failures in the
> reassembled packet.
> 
> Signed-off-by: Glenn Griffin 
> ---
> Changes in v2:
>   - Compare frag_off in network byte order rather than host byte order

Applied and queued up for -stable.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2 net-next 1/6] net: fix sk_mem_reclaim_partial()

2015-08-03 Thread Jason Baron



On 05/15/2015 03:39 PM, Eric Dumazet wrote:
> sk_mem_reclaim_partial() goal is to ensure each socket has
> one SK_MEM_QUANTUM forward allocation. This is needed both for
> performance and better handling of memory pressure situations in
> follow up patches.
> 
> SK_MEM_QUANTUM is currently a page, but might be reduced to 4096 bytes
> as some arches have 64KB pages.
> 
> Signed-off-by: Eric Dumazet 
> ---
>  include/net/sock.h | 6 +++---
>  net/core/sock.c| 9 +
>  2 files changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/include/net/sock.h b/include/net/sock.h
> index d882f4c8e438..4581a60636f8 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1368,7 +1368,7 @@ static inline struct inode *SOCK_INODE(struct socket 
> *socket)
>   * Functions for memory accounting
>   */
>  int __sk_mem_schedule(struct sock *sk, int size, int kind);
> -void __sk_mem_reclaim(struct sock *sk);
> +void __sk_mem_reclaim(struct sock *sk, int amount);
>  
>  #define SK_MEM_QUANTUM ((int)PAGE_SIZE)
>  #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
> @@ -1409,7 +1409,7 @@ static inline void sk_mem_reclaim(struct sock *sk)
>   if (!sk_has_account(sk))
>   return;
>   if (sk->sk_forward_alloc >= SK_MEM_QUANTUM)
> - __sk_mem_reclaim(sk);
> + __sk_mem_reclaim(sk, sk->sk_forward_alloc);
>  }
>  
>  static inline void sk_mem_reclaim_partial(struct sock *sk)
> @@ -1417,7 +1417,7 @@ static inline void sk_mem_reclaim_partial(struct sock 
> *sk)
>   if (!sk_has_account(sk))
>   return;
>   if (sk->sk_forward_alloc > SK_MEM_QUANTUM)
> - __sk_mem_reclaim(sk);
> + __sk_mem_reclaim(sk, sk->sk_forward_alloc - 1);
>  }
>

Hi,

Was just looking at this again - this doesn't ensure the SK_MEM_QUANTUM
minimum as the comment suggests- should it be:

_sk_mem_reclaim(sk, sk->sk_forward_alloc - SK_MEM_QUANTUM);
   ^^^

Or are you just trying to make sure its not 0?

Thanks,

-Jason



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch -master] netfilter: xt_CT: checking for IS_ERR() instead of NULL

2015-08-03 Thread Joe Stringer

On 3 August 2015 at 13:24, Pablo Neira Ayuso  wrote:
> On Mon, Aug 03, 2015 at 11:30:16AM -0700, Joe Stringer wrote:
>> On 3 August 2015 at 11:29, Joe Stringer  wrote:
>> > On 30 July 2015 at 04:57, Pablo Neira Ayuso  wrote:
>> >> On Tue, Jul 28, 2015 at 01:42:28AM +0300, Dan Carpenter wrote:
>> >>> We recently changed this from nf_conntrack_alloc() to nf_ct_tmpl_alloc()
>> >>> so the error handling needs to changed to check for NULL instead of
>> >>> IS_ERR().
>> >>>
>> >>> Fixes: 0838aa7fcfcd ('netfilter: fix netns dependencies with conntrack 
>> >>> templates')
>> >>> Signed-off-by: Dan Carpenter 
>> >>
>> >> Applied, thanks.
>> >>
>> >> I have also appended this chunk, since synproxy is also affected:
>> >>
>> >> --- a/net/netfilter/nf_synproxy_core.c
>> >> +++ b/net/netfilter/nf_synproxy_core.c
>> >> @@ -353,7 +353,7 @@ static int __net_init synproxy_net_init(struct net 
>> >> *net)
>> >> int err = -ENOMEM;
>> >>
>> >> ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
>> >> -   if (IS_ERR(ct)) {
>> >> +   if (!ct) {
>> >> err = PTR_ERR(ct);
>> >> goto err1;
>> >> }
>> >
>> > Does PTR_ERR() implicitly interpret NULL as -ENOMEM? Seems like the
>> > fix applied here is a little different from the xt_CT fix.
>>
>> Just saw the initialization of err now, but this would be overridden
>> within the error checking statement.
>
> Right, I noticed before pushing out this change, the final applied
> patch is here:
>
> http://git.kernel.org/cgit/linux/kernel/git/pablo/nf.git/commit/?id=1a727c63612fc582370cf3dc01239d3d239743b5
>
> Let me know if you still have any concern, thanks Joe.

Looks fine. Apologies for the noise.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch -master] netfilter: xt_CT: checking for IS_ERR() instead of NULL

2015-08-03 Thread Pablo Neira Ayuso

On Mon, Aug 03, 2015 at 11:30:16AM -0700, Joe Stringer wrote:
> On 3 August 2015 at 11:29, Joe Stringer  wrote:
> > On 30 July 2015 at 04:57, Pablo Neira Ayuso  wrote:
> >> On Tue, Jul 28, 2015 at 01:42:28AM +0300, Dan Carpenter wrote:
> >>> We recently changed this from nf_conntrack_alloc() to nf_ct_tmpl_alloc()
> >>> so the error handling needs to changed to check for NULL instead of
> >>> IS_ERR().
> >>>
> >>> Fixes: 0838aa7fcfcd ('netfilter: fix netns dependencies with conntrack 
> >>> templates')
> >>> Signed-off-by: Dan Carpenter 
> >>
> >> Applied, thanks.
> >>
> >> I have also appended this chunk, since synproxy is also affected:
> >>
> >> --- a/net/netfilter/nf_synproxy_core.c
> >> +++ b/net/netfilter/nf_synproxy_core.c
> >> @@ -353,7 +353,7 @@ static int __net_init synproxy_net_init(struct net 
> >> *net)
> >> int err = -ENOMEM;
> >>
> >> ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
> >> -   if (IS_ERR(ct)) {
> >> +   if (!ct) {
> >> err = PTR_ERR(ct);
> >> goto err1;
> >> }
> >
> > Does PTR_ERR() implicitly interpret NULL as -ENOMEM? Seems like the
> > fix applied here is a little different from the xt_CT fix.
> 
> Just saw the initialization of err now, but this would be overridden
> within the error checking statement.

Right, I noticed before pushing out this change, the final applied
patch is here:

http://git.kernel.org/cgit/linux/kernel/git/pablo/nf.git/commit/?id=1a727c63612fc582370cf3dc01239d3d239743b5

Let me know if you still have any concern, thanks Joe.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch net-next 2/2] rocker: linearize skb in case frags would not fit into tx descriptor

2015-08-03 Thread Scott Feldman

On Sun, Aug 2, 2015 at 11:56 AM, Jiri Pirko  wrote:
> Suggested-by: Scott Feldman 
> Signed-off-by: Jiri Pirko 

Acked-by: Scott Feldman 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch -master] netfilter: xt_CT: checking for IS_ERR() instead of NULL

2015-08-03 Thread Joe Stringer

On 3 August 2015 at 11:29, Joe Stringer  wrote:
> On 30 July 2015 at 04:57, Pablo Neira Ayuso  wrote:
>> On Tue, Jul 28, 2015 at 01:42:28AM +0300, Dan Carpenter wrote:
>>> We recently changed this from nf_conntrack_alloc() to nf_ct_tmpl_alloc()
>>> so the error handling needs to changed to check for NULL instead of
>>> IS_ERR().
>>>
>>> Fixes: 0838aa7fcfcd ('netfilter: fix netns dependencies with conntrack 
>>> templates')
>>> Signed-off-by: Dan Carpenter 
>>
>> Applied, thanks.
>>
>> I have also appended this chunk, since synproxy is also affected:
>>
>> --- a/net/netfilter/nf_synproxy_core.c
>> +++ b/net/netfilter/nf_synproxy_core.c
>> @@ -353,7 +353,7 @@ static int __net_init synproxy_net_init(struct net *net)
>> int err = -ENOMEM;
>>
>> ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
>> -   if (IS_ERR(ct)) {
>> +   if (!ct) {
>> err = PTR_ERR(ct);
>> goto err1;
>> }
>
> Does PTR_ERR() implicitly interpret NULL as -ENOMEM? Seems like the
> fix applied here is a little different from the xt_CT fix.

Just saw the initialization of err now, but this would be overridden
within the error checking statement.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch net-next 1/2] rocker: enable support for scattered packets

2015-08-03 Thread Scott Feldman

On Sun, Aug 2, 2015 at 11:56 AM, Jiri Pirko  wrote:
> From: Ido Schimmel 
>
> rocker supports the transmission of scattered packets, so let the kernel
> know about it by setting the NETIF_F_SG bit in the device's features.
>
> Signed-off-by: Ido Schimmel 
> Signed-off-by: Jiri Pirko 

Acked-by: Scott Feldman 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [ovs-dev] [PATCH v1] netdev: Make 100 percents packets sampled when sampling rate is 1.

2015-08-03 Thread Jesse Gross

On Mon, Aug 3, 2015 at 11:18 AM, Pravin Shelar  wrote:
> On Mon, Aug 3, 2015 at 12:11 AM, Wenyu Zhang  wrote:
>> When sampling rate is 1, the sampling probability is UINT32_MAX. The packet
>> should be sampled even the prandom32() generate the number of UINT32_MAX.
>> And none packet need be sampled when the probability is 0.
>>
>> Signed-off-by: Wenyu Zhang 
>> ---
>>  net/openvswitch/actions.c |4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>> index cf04c2f..03acb09 100644
>> --- a/net/openvswitch/actions.c
>> +++ b/net/openvswitch/actions.c
>> @@ -669,9 +669,11 @@ static int sample(struct datapath *dp, struct sk_buff 
>> *skb,
>>
>> for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
>>  a = nla_next(a, &rem)) {
>> +   uint32_t probability;
>> switch (nla_type(a)) {
>> case OVS_SAMPLE_ATTR_PROBABILITY:
>> -   if (prandom_u32() >= nla_get_u32(a))
>> +   probability = nla_get_u32(a);
>> +   if (!probability || probability > nla_get_u32(a))
>
> This condition does not looks right to calculate sampling probability.

When you send v2, can you also make the subject more narrow
("openvswitch" instead of "netdev") and add the tree that you are
targeting ("[PATCH net]" in this case)?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch -master] netfilter: xt_CT: checking for IS_ERR() instead of NULL

2015-08-03 Thread Joe Stringer

On 30 July 2015 at 04:57, Pablo Neira Ayuso  wrote:
> On Tue, Jul 28, 2015 at 01:42:28AM +0300, Dan Carpenter wrote:
>> We recently changed this from nf_conntrack_alloc() to nf_ct_tmpl_alloc()
>> so the error handling needs to changed to check for NULL instead of
>> IS_ERR().
>>
>> Fixes: 0838aa7fcfcd ('netfilter: fix netns dependencies with conntrack 
>> templates')
>> Signed-off-by: Dan Carpenter 
>
> Applied, thanks.
>
> I have also appended this chunk, since synproxy is also affected:
>
> --- a/net/netfilter/nf_synproxy_core.c
> +++ b/net/netfilter/nf_synproxy_core.c
> @@ -353,7 +353,7 @@ static int __net_init synproxy_net_init(struct net *net)
> int err = -ENOMEM;
>
> ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
> -   if (IS_ERR(ct)) {
> +   if (!ct) {
> err = PTR_ERR(ct);
> goto err1;
> }

Does PTR_ERR() implicitly interpret NULL as -ENOMEM? Seems like the
fix applied here is a little different from the xt_CT fix.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] net: fec: fix initial runtime PM refcount

2015-08-03 Thread Alan Stern

On Mon, 3 Aug 2015, Uwe [iso-8859-1] Kleine-K�nig wrote:

> Hello,
> 
> I have no clue about runtime-pm, but I added a few people to Cc: who
> should know better ...
> 
> Best regards
> Uwe
> 
> On Mon, Aug 03, 2015 at 06:15:54PM +0200, Andrew Lunn wrote:
> > On Mon, Aug 03, 2015 at 05:50:11PM +0200, Lucas Stach wrote:
> > > The clocks are initially active and thus the device is marked active.
> > > This still keeps the PM refcount at 0, the pm_runtime_put_autosuspend()
> > > call at the end of probe then leaves us with an invalid refcount of -1,
> > > which in turn leads to the device staying in suspended state even though
> > > netdev open had been called.
> > > 
> > > Fix this by initializing the refcount to be coherent with the initial
> > > device status.
> > > 
> > > Fixes:
> > > 8fff755e9f8 (net: fec: Ensure clocks are enabled while using mdio bus)
> > > 
> > > Signed-off-by: Lucas Stach 
> > > ---
> > > Please apply this as a fix for 4.2
> > > ---
> > >  drivers/net/ethernet/freescale/fec_main.c | 1 +
> > >  1 file changed, 1 insertion(+)
> > > 
> > > diff --git a/drivers/net/ethernet/freescale/fec_main.c 
> > > b/drivers/net/ethernet/freescale/fec_main.c
> > > index 32e3807c650e..271bb5862346 100644
> > > --- a/drivers/net/ethernet/freescale/fec_main.c
> > > +++ b/drivers/net/ethernet/freescale/fec_main.c
> > > @@ -3433,6 +3433,7 @@ fec_probe(struct platform_device *pdev)
> > >  
> > >   pm_runtime_set_autosuspend_delay(&pdev->dev, FEC_MDIO_PM_TIMEOUT);
> > >   pm_runtime_use_autosuspend(&pdev->dev);
> > > + pm_runtime_get_noresume(&pdev->dev);
> > >   pm_runtime_set_active(&pdev->dev);
> > >   pm_runtime_enable(&pdev->dev);
> > 
> > This might work, but is it the correct fix?

It looks reasonable to me.  It might also make sense to move all of
that pm_runtime_* stuff to the end of the probe routine.  Notice that
they don't get undone if register_netdev() fails.

> > Documentation/power/runtime_pm.txt says:
> > 
> > 534 In addition to that, the initial runtime PM status of all devices is
> > 535 'suspended', but it need not reflect the actual physical state of the 
> > device.
> > 536 Thus, if the device is initially active (i.e. it is able to process 
> > I/O), its
> > 537 runtime PM status must be changed to 'active', with the help of
> > 538 pm_runtime_set_active(), before pm_runtime_enable() is called for the 
> > device.
> > 
> > At the point we call the pm_runtime_ functions above, all the clocks
> > are ticking. So according to the documentation pm_runtime_set_active()
> > is the right thing to do. But it makes no mention of have to call
> > pm_runtime_get_noresume(). I would of expected pm_runtime_set_active()
> > to set the count to the correct value.

pm_runtime_set_active() doesn't change the usage count.  All it does is 
set the runtime PM status to "active".

A call to pm_runtime_get_noresume() (or something similar) is necessary
to balance the call to pm_runtime_put_autosuspend() at the end of the
probe routine.  Both the _get_ and the _put_ should be present or
neither should be.

For instance, an alternative way to accomplish the same result is to
replace pm_runtime_put_autosuspend() with pm_runtime_autosuspend().  
The only difference is that the usage counter would not be elevated
during the register_netdev() call, so in theory the device could be
suspended while that routine is running.  But if all the pm_runtime_*
calls were moved to the end of the probe function, even that couldn't
happen.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [ovs-dev] [PATCH v1] netdev: Make 100 percents packets sampled when sampling rate is 1.

2015-08-03 Thread Pravin Shelar

On Mon, Aug 3, 2015 at 12:11 AM, Wenyu Zhang  wrote:
> When sampling rate is 1, the sampling probability is UINT32_MAX. The packet
> should be sampled even the prandom32() generate the number of UINT32_MAX.
> And none packet need be sampled when the probability is 0.
>
> Signed-off-by: Wenyu Zhang 
> ---
>  net/openvswitch/actions.c |4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index cf04c2f..03acb09 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -669,9 +669,11 @@ static int sample(struct datapath *dp, struct sk_buff 
> *skb,
>
> for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
>  a = nla_next(a, &rem)) {
> +   uint32_t probability;
> switch (nla_type(a)) {
> case OVS_SAMPLE_ATTR_PROBABILITY:
> -   if (prandom_u32() >= nla_get_u32(a))
> +   probability = nla_get_u32(a);
> +   if (!probability || probability > nla_get_u32(a))

This condition does not looks right to calculate sampling probability.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] net: fec: fix initial runtime PM refcount

2015-08-03 Thread Uwe Kleine-König

Hello,

I have no clue about runtime-pm, but I added a few people to Cc: who
should know better ...

Best regards
Uwe

On Mon, Aug 03, 2015 at 06:15:54PM +0200, Andrew Lunn wrote:
> On Mon, Aug 03, 2015 at 05:50:11PM +0200, Lucas Stach wrote:
> > The clocks are initially active and thus the device is marked active.
> > This still keeps the PM refcount at 0, the pm_runtime_put_autosuspend()
> > call at the end of probe then leaves us with an invalid refcount of -1,
> > which in turn leads to the device staying in suspended state even though
> > netdev open had been called.
> > 
> > Fix this by initializing the refcount to be coherent with the initial
> > device status.
> > 
> > Fixes:
> > 8fff755e9f8 (net: fec: Ensure clocks are enabled while using mdio bus)
> > 
> > Signed-off-by: Lucas Stach 
> > ---
> > Please apply this as a fix for 4.2
> > ---
> >  drivers/net/ethernet/freescale/fec_main.c | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/drivers/net/ethernet/freescale/fec_main.c 
> > b/drivers/net/ethernet/freescale/fec_main.c
> > index 32e3807c650e..271bb5862346 100644
> > --- a/drivers/net/ethernet/freescale/fec_main.c
> > +++ b/drivers/net/ethernet/freescale/fec_main.c
> > @@ -3433,6 +3433,7 @@ fec_probe(struct platform_device *pdev)
> >  
> > pm_runtime_set_autosuspend_delay(&pdev->dev, FEC_MDIO_PM_TIMEOUT);
> > pm_runtime_use_autosuspend(&pdev->dev);
> > +   pm_runtime_get_noresume(&pdev->dev);
> > pm_runtime_set_active(&pdev->dev);
> > pm_runtime_enable(&pdev->dev);
> 
> This might work, but is it the correct fix?
> 
> Documentation/power/runtime_pm.txt says:
> 
> 534 In addition to that, the initial runtime PM status of all devices is
> 535 'suspended', but it need not reflect the actual physical state of the 
> device.
> 536 Thus, if the device is initially active (i.e. it is able to process I/O), 
> its
> 537 runtime PM status must be changed to 'active', with the help of
> 538 pm_runtime_set_active(), before pm_runtime_enable() is called for the 
> device.
> 
> At the point we call the pm_runtime_ functions above, all the clocks
> are ticking. So according to the documentation pm_runtime_set_active()
> is the right thing to do. But it makes no mention of have to call
> pm_runtime_get_noresume(). I would of expected pm_runtime_set_active()
> to set the count to the correct value.

-- 
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | http://www.pengutronix.de/  |
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-next 0/4] bnx2x, cnic, bnx2fc: add support for BD

2015-08-03 Thread David Miller

From: Yuval Mintz 
Date: Mon, 3 Aug 2015 13:09:31 +

> Apparently I've made a typo in netdev's E-mail address.
> Dave - do you want me to re-send the entire series?

Yes.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/6] test_bpf: allow tests to specify an skb fragment.

2015-08-03 Thread Daniel Borkmann


On 08/03/2015 06:38 PM, Nicolas Schichan wrote:

On 08/03/2015 05:29 PM, Daniel Borkmann wrote:

On 08/03/2015 04:02 PM, Nicolas Schichan wrote:
We now have 286 tests, which is awesome!

Perhaps, we need to start thinking of a better test description method
soonish as the test_bpf.ko module grew to ~1.6M, i.e. whenever we add
to struct bpf_test, it adds memory overhead upon all test cases.


Indeed, test_bpf.ko is turning quite large (1.4M when compiled for ARM).

It looks like gzip is able to do wonders on the module though as I end up with
a 94.7K test_bpf.ko.gz file and if the modutils are compiled with
--enable-zlib, it will be gunziped automatically before being loaded to the
kernel.


I think it just contains a lot of zero blocks, which then compress nicely.


I think that marking tests[] array as __initdata will help with the runtime
memory use if someone forgets to rmmod the test_bpf module after a completely
successful run.


Can be done, too, yep. Do you want to send a patch? ;)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 net-next 0/4] gro: Fixes for tunnels and GRO

2015-08-03 Thread Tom Herbert

This patch set addresses some issue related to tunneling and GRO:

- Ensure headers are pull into skb->head when putting 1st packet onto
  GRO list
- Fix remote checksum offload to properly deal with frag0 in GRO.
- Add support for GRO at VXLAN tunnel (call gro_cells)

Testing: Ran one netperf TCP_STREAM to highlight impact of different
configurations:

GUE
  Zero UDP checksum
4628.42 MBps
  UDP checksums enabled
6800.51 MBps
  UDP checksums and remote checksum offload
7663.82 MBps
  UDP checksums and remote checksum offload using no-partial
7287.25 MBps

VXLAN
  Zero UDP checksum
4112.02
  UDP checksums enabled
6785.80 MBps
  UDP checksums and remote checksum offload
7075.56 MBps

v2:
  - Drop "gro: Pull headers into skb head for 1st skb in gro list"
from patch set
  - In vxlan_remcsum and gue_remcsum return immediately if remcsum
processing was already done
  - Add gro callbacks for sit offload
  - Use WARN_ON_ONCE if we get a GUE protocol that does not have
GRO offload support


Tom Herbert (4):
  gro: Fix remcsum offload to deal with frags in GRO
  vxlan: GRO support at tunnel layer
  ipv6: Add gro functions to sit_offloads
  fou: Do WARN_ON_ONCE in gue_gro_receive for bad proto callbacks

 drivers/net/vxlan.c   | 32 
 include/linux/netdevice.h | 44 
 include/net/vxlan.h   |  1 +
 net/ipv4/fou.c| 30 +-
 net/ipv6/ip6_offload.c|  2 ++
 5 files changed, 64 insertions(+), 45 deletions(-)

-- 
1.8.5.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 net-next 1/4] gro: Fix remcsum offload to deal with frags in GRO

2015-08-03 Thread Tom Herbert

The remote checksum offload GRO did not consider the case that frag0
might be in use. This patch fixes that by accessing headers using the
skb_gro functions and not saving offsets relative to skb->head.

Signed-off-by: Tom Herbert 
---
 drivers/net/vxlan.c   | 23 +--
 include/linux/netdevice.h | 44 
 net/ipv4/fou.c| 28 
 3 files changed, 53 insertions(+), 42 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e90f7a4..60b5b42 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -519,10 +519,10 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff 
*skb,
  u32 data, struct gro_remcsum *grc,
  bool nopartial)
 {
-   size_t start, offset, plen;
+   size_t start, offset;
 
if (skb->remcsum_offload)
-   return NULL;
+   return vh;
 
if (!NAPI_GRO_CB(skb)->csum_valid)
return NULL;
@@ -532,17 +532,8 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff 
*skb,
  offsetof(struct udphdr, check) :
  offsetof(struct tcphdr, check));
 
-   plen = hdrlen + offset + sizeof(u16);
-
-   /* Pull checksum that will be written */
-   if (skb_gro_header_hard(skb, off + plen)) {
-   vh = skb_gro_header_slow(skb, off + plen, off);
-   if (!vh)
-   return NULL;
-   }
-
-   skb_gro_remcsum_process(skb, (void *)vh + hdrlen,
-   start, offset, grc, nopartial);
+   vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
+start, offset, grc, nopartial);
 
skb->remcsum_offload = 1;
 
@@ -573,7 +564,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff 
**head,
goto out;
}
 
-   skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
 
flags = ntohl(vh->vx_flags);
@@ -588,6 +578,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff 
**head,
goto out;
}
 
+   skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
+
flush = 0;
 
for (p = *head; p; p = p->next) {
@@ -1110,6 +1102,9 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff 
*skb, struct vxlanhdr *vh,
 {
size_t start, offset, plen;
 
+   if (skb->remcsum_offload)
+   return vh;
+
start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
offset = start + ((data & VXLAN_RCO_UDP) ?
  offsetof(struct udphdr, check) :
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 607b5f4..568d7ae 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2303,8 +2303,7 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
 
 static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
 {
-   return (NAPI_GRO_CB(skb)->gro_remcsum_start - skb_headroom(skb) ==
-   skb_gro_offset(skb));
+   return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb));
 }
 
 static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
@@ -2400,37 +2399,58 @@ static inline void skb_gro_remcsum_init(struct 
gro_remcsum *grc)
grc->delta = 0;
 }
 
-static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
-  int start, int offset,
-  struct gro_remcsum *grc,
-  bool nopartial)
+static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
+   unsigned int off, size_t hdrlen,
+   int start, int offset,
+   struct gro_remcsum *grc,
+   bool nopartial)
 {
__wsum delta;
+   size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
 
BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
 
if (!nopartial) {
-   NAPI_GRO_CB(skb)->gro_remcsum_start =
-   ((unsigned char *)ptr + start) - skb->head;
-   return;
+   NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start;
+   return ptr;
+   }
+
+   ptr = skb_gro_header_fast(skb, off);
+   if (skb_gro_header_hard(skb, off + plen)) {
+   ptr = skb_gro_header_slow(skb, off + plen, off);
+   if (!ptr)
+   return NULL;
}
 
-   delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset);
+   delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
+  sta

1 2 >

1 - 100 of 157 matches

Mail list logo